diff --git a/src/metax_api/management/commands/fix_file_counts.py b/src/metax_api/management/commands/fix_file_counts.py index 121f968e..8bdc7a53 100644 --- a/src/metax_api/management/commands/fix_file_counts.py +++ b/src/metax_api/management/commands/fix_file_counts.py @@ -8,9 +8,15 @@ class Command(BaseCommand): def handle(self, *args, **options): - dirs_with_no_files = Directory.objects.filter(file_count=0, parent_directory=None) - logger.info(f"fix_file_counts command found {dirs_with_no_files.count()} directories with file_count=0") + dirs_with_no_files = Directory.objects_unfiltered.all() + dir_sum = dirs_with_no_files.count() + logger.info(f"fix_file_counts command found {dir_sum} directories") + i=0 for dir in dirs_with_no_files: - dir.calculate_byte_size_and_file_count() - logger.info(f"folder has {dir.file_count} files after recalculation") + i += 1 + try: + dir.calculate_byte_size_and_file_count() + except Exception as e: + logger.error(f"can't fix filecount for directory {i}/{dir_sum}") + logger.info(f"folder {i}/{dir_sum} has {dir.file_count} files after recalculation") logger.info(f"fix_file_counts command executed successfully") \ No newline at end of file diff --git a/src/metax_api/management/commands/fix_file_counts_cr.py b/src/metax_api/management/commands/fix_file_counts_cr.py new file mode 100644 index 00000000..6768ffc7 --- /dev/null +++ b/src/metax_api/management/commands/fix_file_counts_cr.py @@ -0,0 +1,19 @@ +import logging + +from django.core.management.base import BaseCommand + +from metax_api.models import CatalogRecord + +logger = logging.getLogger(__name__) + +class Command(BaseCommand): + def handle(self, *args, **options): + CRS = CatalogRecord.objects.all() + crs_sum = CRS.count() + logger.info(f"fix_file_counts command found {crs_sum} catalog records with file_count=0 and byte_size=0") + i = 1 + for catalog_record in CRS: + logger.info(f"Calculating {i}/{crs_sum} {catalog_record.identifier} ") + catalog_record.calculate_directory_byte_sizes_and_file_counts() + i += 1 + logger.info(f"fix_file_counts command executed successfully") \ No newline at end of file diff --git a/src/metax_api/models/catalog_record.py b/src/metax_api/models/catalog_record.py index 3722c0ce..6d829952 100755 --- a/src/metax_api/models/catalog_record.py +++ b/src/metax_api/models/catalog_record.py @@ -2097,9 +2097,13 @@ def calculate_directory_byte_sizes_and_file_counts(self): if self.research_dataset.get('directories', None): dir_identifiers = [d['identifier'] for d in self.research_dataset['directories']] + file_dir_identifiers = [] if self.research_dataset.get('files', None): - file_dir_identifiers = [File.objects.get(identifier=f['identifier']).parent_directory.identifier - for f in self.research_dataset['files']] + try: + file_dir_identifiers = [File.objects.get(identifier=f['identifier']).parent_directory.identifier + for f in self.research_dataset['files']] + except Exception as e: + _logger.error(e) if not dir_identifiers and not file_dir_identifiers: return @@ -2108,6 +2112,9 @@ def calculate_directory_byte_sizes_and_file_counts(self): highest_level_dirs_by_project = self._get_top_level_parent_dirs_by_project(dir_identifiers) + if len(highest_level_dirs_by_project) == 0: + return + directory_data = {} for project_identifier, dir_paths in highest_level_dirs_by_project.items(): diff --git a/src/metax_api/services/file_service.py b/src/metax_api/services/file_service.py index 4fc3ee32..8ecb1bf1 100755 --- a/src/metax_api/services/file_service.py +++ b/src/metax_api/services/file_service.py @@ -4,7 +4,6 @@ # # :author: CSC - IT Center for Science Ltd., Espoo Finland # :license: MIT - import logging from collections import defaultdict from os import getpid