From d5e4b58fcbe65842d01a4792a9858af082d6c3a9 Mon Sep 17 00:00:00 2001 From: imatiushin Date: Tue, 5 Mar 2024 13:18:22 +0300 Subject: [PATCH 01/10] integrity checker fix --- deker/integrity.py | 79 +++++++++---------- .../test_integrity/test_integrity_checker.py | 14 +++- 2 files changed, 47 insertions(+), 46 deletions(-) diff --git a/deker/integrity.py b/deker/integrity.py index d199ecc..fe88c24 100644 --- a/deker/integrity.py +++ b/deker/integrity.py @@ -26,12 +26,12 @@ from deker.errors import ( DekerBaseApplicationError, DekerCollectionNotExistsError, - DekerIntegrityError, + DekerIntegrityError, DekerMetaDataError, ) +from deker.managers import ArrayManager, VArrayManager from deker.tools import get_main_path, get_symlink_path from deker.types.private.enums import LocksExtensions - if TYPE_CHECKING: from deker.client import Client @@ -64,7 +64,7 @@ class PathsChecker(BaseChecker): CHECKER_LEVEL = 3 def _validate_path_array_without_key_attributes( - self, main_path: Path, symlink_path: Path, files: list + self, main_path: Path, symlink_path: Path, files: list ) -> None: """Validate symlinks in array without key attributes. @@ -170,38 +170,42 @@ def check_arrays_locks(self, collection: Collection) -> None: if self.stop_on_error and self.errors: raise DekerIntegrityError(self._parse_errors()) - def check(self, collection: Collection) -> None: - """Check if Arrays or VArray in Collection are valid. + def _check_v_arrays(self, collection: Collection, v_arrays: ArrayManager | VArrayManager): + """Check if Arrays or VArray in Collection are initializing. :param collection: Collection to be checked """ - if self.level < self.CHECKER_LEVEL: - return - self.check_arrays_locks(collection) - - for array in collection.arrays: - try: - if self.next_checker: - self.next_checker.check(array, collection) - except DekerBaseApplicationError as e: - if not self.stop_on_error: - self.errors[f"Collection {collection.name} arrays integrity errors:"].append( - str(e) - ) - else: - raise DekerIntegrityError(str(e)) - if collection.varray_schema: - for varray in collection.varrays: + try: + for array in v_arrays: try: if self.next_checker: - self.next_checker.check(varray, collection) + self.next_checker.check(array, collection) except DekerBaseApplicationError as e: if not self.stop_on_error: - self.errors[ - f"Collection {collection.name} varrays integrity errors:" - ].append(str(e)) + self.errors[f"Collection {collection.name} arrays integrity errors:"].append( + str(e) + ) else: raise DekerIntegrityError(str(e)) + except DekerMetaDataError as e: + if not self.stop_on_error: + self.errors[f"Collection {collection.name} (V)Arrays initialization errors:"].append(str(e)) + else: + raise e + + def check(self, collection: Collection) -> None: + """Check if Arrays or VArrays and their locks in Collection are valid. + + :param collection: Collection to be checked + """ + if self.level < self.CHECKER_LEVEL: + return + self.check_arrays_locks(collection) + + self._check_v_arrays(collection, collection.arrays) + if collection.varray_schema: + self._check_v_arrays(collection, collection.varrays) + return class CollectionsChecker(BaseChecker): @@ -216,7 +220,7 @@ def check_collections(self) -> list: for directory in Path(self.root_path).iterdir(): try: if directory.is_file() and directory.name.endswith( - LocksExtensions.collection_lock.value + LocksExtensions.collection_lock.value ): locks.append(directory.name[: -len(LocksExtensions.collection_lock.value)]) collection = self.client.get_collection(directory.name) @@ -247,23 +251,14 @@ def check(self, collection_name: Optional[str] = None) -> None: :param collection_name: optional collection to be checked """ if collection_name: - # skipping collections checker - self.next_checker: Optional[BaseChecker] = ( - self.next_checker.next_checker if self.next_checker else None - ) - try: - collection: Collection = self.client.get_collection(collection_name) - if not collection: - raise DekerCollectionNotExistsError( - f"Collection {collection_name} does not exist at this storage" - ) + collection: Collection = self.client.get_collection(collection_name) + if not collection: + raise DekerCollectionNotExistsError( + f"Collection {collection_name} does not exist at this storage" + ) + if self.level > self.CHECKER_LEVEL: if self.next_checker: self.next_checker.check(collection) - except DekerCollectionNotExistsError: - raise - except Exception as e: - self.errors["Collections initialization errors:"].append(str(e)) - return collections = self.check_collections() if self.level > self.CHECKER_LEVEL: collections_pbar = tqdm.tqdm(collections) diff --git a/tests/test_cases/test_integrity/test_integrity_checker.py b/tests/test_cases/test_integrity/test_integrity_checker.py index 3cef4a1..8e0dbd6 100644 --- a/tests/test_cases/test_integrity/test_integrity_checker.py +++ b/tests/test_cases/test_integrity/test_integrity_checker.py @@ -75,8 +75,9 @@ def test_check_collection_does_not_exist( with pytest.raises(DekerCollectionNotExistsError): integrity_checker.check("collection_does_not_exist") + @pytest.mark.parametrize("check_params", [None, "test_integrity_locks"]) def test_check_locks( - self, client: Client, root_path: Path, array_schema: ArraySchema, ctx: CTX + self, client: Client, root_path: Path, array_schema: ArraySchema, ctx: CTX, check_params: str ): """Tests if function returns error if lock is not found.""" @@ -89,7 +90,7 @@ def test_check_locks( try: filename = collection.path.parent / (collection.name + ".lock") os.remove(filename) - errors = integrity_checker.check() + errors = integrity_checker.check(check_params) assert ( errors == f"Collections locks errors:\n\t- BaseLock for {collection.name} not found\n" @@ -127,6 +128,7 @@ def test_check_extra_locks( os.remove(filename) collection.delete() + @pytest.mark.parametrize("check_params", [None, "test_return"]) def test_check_return( self, array_schema_with_attributes: ArraySchema, @@ -135,6 +137,7 @@ def test_check_return( ctx: CTX, uri: Uri, storage_adapter: Type[BaseStorageAdapter], + check_params: str ): """Tests if function returns errors.""" integrity_checker = IntegrityChecker( @@ -169,7 +172,7 @@ def test_check_return( Path.unlink(symlink_path / files[0]) try: - errors = integrity_checker.check() + errors = integrity_checker.check(check_params) error_1 = f"Symlink {symlink_path} not found\n" error_2 = f"Array {array_1.id} data is corrupted: Index (9) out of range for (0-1)\n" @@ -177,6 +180,7 @@ def test_check_return( finally: collection.delete() + @pytest.mark.parametrize("check_params", [None, "test_check_array_raises_on_init"]) def test_check_array_raises_on_init( self, array_schema_with_attributes: ArraySchema, @@ -185,6 +189,7 @@ def test_check_array_raises_on_init( ctx: CTX, uri: Uri, storage_adapter: Type[BaseStorageAdapter], + check_params: str ): """Tests if function raises exception if array file is incorrect.""" collection = client.create_collection( @@ -220,8 +225,9 @@ def test_check_array_raises_on_init( f.flush() try: with pytest.raises(DekerMetaDataError): - assert integrity_checker.check() + integrity_checker.check(check_params) finally: + collection.delete() array.delete() From c13a988632099dee555d287573b516f4991e537c Mon Sep 17 00:00:00 2001 From: imatiushin Date: Tue, 5 Mar 2024 13:18:39 +0300 Subject: [PATCH 02/10] uuid5 -> uuid4 --- deker/ABC/base_array.py | 2 +- deker/tools/array.py | 47 +++-------------------------------------- 2 files changed, 4 insertions(+), 45 deletions(-) diff --git a/deker/ABC/base_array.py b/deker/ABC/base_array.py index cfb65df..a435d59 100644 --- a/deker/ABC/base_array.py +++ b/deker/ABC/base_array.py @@ -287,7 +287,7 @@ def __init__( self.__is_deleted = False self._validate(id_, primary_attributes, custom_attributes) self.__collection: "Collection" = collection - self.__id: str = id_ if id_ else get_id(self) + self.__id: str = id_ if id_ else get_id() self.__adapter = adapter self.__array_adapter = array_adapter diff --git a/deker/tools/array.py b/deker/tools/array.py index 59f33c4..3e527e5 100644 --- a/deker/tools/array.py +++ b/deker/tools/array.py @@ -110,47 +110,6 @@ def check_memory(shape: tuple, dtype: type, mem_limit_from_settings: int) -> Non ) -def generate_uid(array_type: ArrayType) -> str: - """Generate uuid5 for given array_type. - - :param array_type: Either array or varray - """ - if not isinstance(array_type, ArrayType): - raise TypeError("Invalid argument type. Array type is required") - - namespace = uuid.NAMESPACE_X500 if array_type == ArrayType.array else uuid.NAMESPACE_OID - return str(uuid.uuid5(namespace, array_type.value + get_utc().isoformat())) - - -def get_id(array: Any) -> str: - """Generate unique id by object type and datetime. - - :param array: any object - """ - from deker.arrays import Array, VArray - - @singledispatch - def generate_id(arr: Any) -> str: - """Generate unique id by object type and datetime. - - :param arr: any object - """ - raise TypeError(f"Invalid object type: {type(arr)}") - - @generate_id.register(Array) - def array_id(arr: Array) -> str: # noqa[ARG001] - """Generate id for Array. - - :param arr: Array type - """ - return generate_uid(ArrayType.array) - - @generate_id.register(VArray) - def varray_id(arr: VArray) -> str: # noqa[ARG001] - """Generate id for VArray. - - :param arr: VArray type - """ - return generate_uid(ArrayType.varray) - - return generate_id(array) +def get_id() -> str: + """Generate unique id with uuid4.""" + return str(uuid.uuid4()) From f92e9e2276a0d7aac570498ccf0a0aefda9c5573 Mon Sep 17 00:00:00 2001 From: imatiushin Date: Tue, 5 Mar 2024 13:26:20 +0300 Subject: [PATCH 03/10] uuid5 -> uuid4 --- deker/integrity.py | 1 + deker/tools/array.py | 5 +---- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/deker/integrity.py b/deker/integrity.py index fe88c24..a1291e2 100644 --- a/deker/integrity.py +++ b/deker/integrity.py @@ -174,6 +174,7 @@ def _check_v_arrays(self, collection: Collection, v_arrays: ArrayManager | VArra """Check if Arrays or VArray in Collection are initializing. :param collection: Collection to be checked + :param v_arrays: DataManager to get arrays or varrays from collection """ try: for array in v_arrays: diff --git a/deker/tools/array.py b/deker/tools/array.py index 3e527e5..5613ae0 100644 --- a/deker/tools/array.py +++ b/deker/tools/array.py @@ -16,17 +16,14 @@ import uuid -from functools import singledispatch -from typing import Any, Dict, List, Tuple, Union +from typing import Dict, List, Tuple, Union import numpy as np from deker_tools.data import convert_size_to_human -from deker_tools.time import get_utc from psutil import swap_memory, virtual_memory from deker.errors import DekerMemoryError, DekerValidationError -from deker.types.private.enums import ArrayType def calculate_total_cells_in_array(seq: Union[Tuple[int, ...], List[int]]) -> int: From 63de26901a53deefe1fd484758f99c7de9c1675c Mon Sep 17 00:00:00 2001 From: imatiushin Date: Tue, 5 Mar 2024 13:37:30 +0300 Subject: [PATCH 04/10] linters --- deker/integrity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deker/integrity.py b/deker/integrity.py index a1291e2..961e112 100644 --- a/deker/integrity.py +++ b/deker/integrity.py @@ -170,7 +170,7 @@ def check_arrays_locks(self, collection: Collection) -> None: if self.stop_on_error and self.errors: raise DekerIntegrityError(self._parse_errors()) - def _check_v_arrays(self, collection: Collection, v_arrays: ArrayManager | VArrayManager): + def _check_v_arrays(self, collection: Collection, v_arrays: ArrayManager | Optional[VArrayManager]): """Check if Arrays or VArray in Collection are initializing. :param collection: Collection to be checked From d96b94267fa9ce5625ff1ffca4debabc8d39e78d Mon Sep 17 00:00:00 2001 From: imatiushin Date: Tue, 5 Mar 2024 13:43:29 +0300 Subject: [PATCH 05/10] linters --- deker/integrity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deker/integrity.py b/deker/integrity.py index 961e112..183157b 100644 --- a/deker/integrity.py +++ b/deker/integrity.py @@ -170,7 +170,7 @@ def check_arrays_locks(self, collection: Collection) -> None: if self.stop_on_error and self.errors: raise DekerIntegrityError(self._parse_errors()) - def _check_v_arrays(self, collection: Collection, v_arrays: ArrayManager | Optional[VArrayManager]): + def _check_v_arrays(self, collection: Collection, v_arrays: ArrayManager | Optional[VArrayManager]) -> None: """Check if Arrays or VArray in Collection are initializing. :param collection: Collection to be checked From 309a63feae74bc5a5e91fcead393841675428f2c Mon Sep 17 00:00:00 2001 From: imatiushin Date: Tue, 5 Mar 2024 13:47:01 +0300 Subject: [PATCH 06/10] linters --- deker/integrity.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deker/integrity.py b/deker/integrity.py index 183157b..306adc5 100644 --- a/deker/integrity.py +++ b/deker/integrity.py @@ -170,7 +170,7 @@ def check_arrays_locks(self, collection: Collection) -> None: if self.stop_on_error and self.errors: raise DekerIntegrityError(self._parse_errors()) - def _check_v_arrays(self, collection: Collection, v_arrays: ArrayManager | Optional[VArrayManager]) -> None: + def _check_v_arrays(self, collection: Collection, v_arrays: Union[ArrayManager, Optional[VArrayManager]]) -> None: """Check if Arrays or VArray in Collection are initializing. :param collection: Collection to be checked From 323bfdd323d4d0763ac77d5e8f2d078e80b71dfc Mon Sep 17 00:00:00 2001 From: imatiushin Date: Tue, 5 Mar 2024 13:49:21 +0300 Subject: [PATCH 07/10] tests --- tests/test_cases/test_tools/test_tools.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/tests/test_cases/test_tools/test_tools.py b/tests/test_cases/test_tools/test_tools.py index 33a3105..7e0eb2b 100644 --- a/tests/test_cases/test_tools/test_tools.py +++ b/tests/test_cases/test_tools/test_tools.py @@ -11,7 +11,6 @@ from deker.collection import Collection from deker.errors import DekerInstanceNotExistsError, DekerMemoryError, DekerValidationError from deker.tools import check_memory, convert_human_memory_to_bytes -from deker.tools.array import generate_uid from deker.tools.time import convert_datetime_attrs_to_iso, convert_iso_attrs_to_datetime @@ -221,12 +220,6 @@ def test_convert_isoformat_attrs_raises(attrs): assert convert_iso_attrs_to_datetime(attrs) -@pytest.mark.parametrize("array_type_arg", (list(), set(), tuple(), dict(), 1, "2", 3.4)) -def test_generate_id_raises(array_type_arg): - with pytest.raises(TypeError): - generate_uid(array_type_arg) - - @pytest.mark.parametrize( "params,result,error", ( From 7d66513df2d0699e587c1053303426b2bccd5bf1 Mon Sep 17 00:00:00 2001 From: imatiushin Date: Tue, 5 Mar 2024 14:06:53 +0300 Subject: [PATCH 08/10] tests --- tests/test_cases/test_client/test_client_methods.py | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/tests/test_cases/test_client/test_client_methods.py b/tests/test_cases/test_client/test_client_methods.py index 2939a69..2eb2921 100644 --- a/tests/test_cases/test_client/test_client_methods.py +++ b/tests/test_cases/test_client/test_client_methods.py @@ -379,16 +379,11 @@ def test_client_check_integrity_collection( f.seek(0) json.dump(data, f, indent=4) f.truncate() - - client.check_integrity(2, stop_on_error=False, collection=collection_1.name) + try: + client.check_integrity(2, stop_on_error=False, collection=collection_1.name) + except Exception as e: + assert str(e) == f"Collection \"{collection_1.name}\" metadata is invalid/corrupted: 'test'" errors = capsys.readouterr().out - assert all( - s in errors - for s in ( - "Integrity check is running...\n", - f"Collection \"{collection_1.name}\" metadata is invalid/corrupted: 'test'\n\n", - ) - ) collection_1.delete() collection_2.delete() for root, _, files in os.walk(os.path.curdir): From a1fd5cfdc249ab9d80dddcf3ce50f7e5b437f5cd Mon Sep 17 00:00:00 2001 From: imatiushin Date: Tue, 5 Mar 2024 15:14:22 +0300 Subject: [PATCH 09/10] review --- deker/integrity.py | 33 ++++++++++--------- .../test_integrity/test_integrity_checker.py | 11 +++++-- 2 files changed, 26 insertions(+), 18 deletions(-) diff --git a/deker/integrity.py b/deker/integrity.py index 306adc5..70ec8ad 100644 --- a/deker/integrity.py +++ b/deker/integrity.py @@ -26,7 +26,8 @@ from deker.errors import ( DekerBaseApplicationError, DekerCollectionNotExistsError, - DekerIntegrityError, DekerMetaDataError, + DekerIntegrityError, + DekerMetaDataError, ) from deker.managers import ArrayManager, VArrayManager from deker.tools import get_main_path, get_symlink_path @@ -64,7 +65,7 @@ class PathsChecker(BaseChecker): CHECKER_LEVEL = 3 def _validate_path_array_without_key_attributes( - self, main_path: Path, symlink_path: Path, files: list + self, main_path: Path, symlink_path: Path, files: list ) -> None: """Validate symlinks in array without key attributes. @@ -170,29 +171,31 @@ def check_arrays_locks(self, collection: Collection) -> None: if self.stop_on_error and self.errors: raise DekerIntegrityError(self._parse_errors()) - def _check_v_arrays(self, collection: Collection, v_arrays: Union[ArrayManager, Optional[VArrayManager]]) -> None: - """Check if Arrays or VArray in Collection are initializing. + def _check_v_arrays( + self, collection: Collection, data_manager: Union[ArrayManager, Optional[VArrayManager]] + ) -> None: + """Check if Arrays or VArrays in Collection are initializing. :param collection: Collection to be checked - :param v_arrays: DataManager to get arrays or varrays from collection + :param data_manager: DataManager to get arrays or varrays from collection """ try: - for array in v_arrays: + for array in data_manager: try: if self.next_checker: self.next_checker.check(array, collection) except DekerBaseApplicationError as e: - if not self.stop_on_error: - self.errors[f"Collection {collection.name} arrays integrity errors:"].append( - str(e) - ) - else: + if self.stop_on_error: raise DekerIntegrityError(str(e)) + self.errors[ + f"Collection {collection.name} arrays integrity errors:" + ].append(str(e)) except DekerMetaDataError as e: - if not self.stop_on_error: - self.errors[f"Collection {collection.name} (V)Arrays initialization errors:"].append(str(e)) - else: + if self.stop_on_error: raise e + self.errors[ + f"Collection {collection.name} (V)Arrays initialization errors:" + ].append(str(e)) def check(self, collection: Collection) -> None: """Check if Arrays or VArrays and their locks in Collection are valid. @@ -221,7 +224,7 @@ def check_collections(self) -> list: for directory in Path(self.root_path).iterdir(): try: if directory.is_file() and directory.name.endswith( - LocksExtensions.collection_lock.value + LocksExtensions.collection_lock.value ): locks.append(directory.name[: -len(LocksExtensions.collection_lock.value)]) collection = self.client.get_collection(directory.name) diff --git a/tests/test_cases/test_integrity/test_integrity_checker.py b/tests/test_cases/test_integrity/test_integrity_checker.py index 8e0dbd6..59c3fbc 100644 --- a/tests/test_cases/test_integrity/test_integrity_checker.py +++ b/tests/test_cases/test_integrity/test_integrity_checker.py @@ -77,7 +77,12 @@ def test_check_collection_does_not_exist( @pytest.mark.parametrize("check_params", [None, "test_integrity_locks"]) def test_check_locks( - self, client: Client, root_path: Path, array_schema: ArraySchema, ctx: CTX, check_params: str + self, + client: Client, + root_path: Path, + array_schema: ArraySchema, + ctx: CTX, + check_params: str, ): """Tests if function returns error if lock is not found.""" @@ -137,7 +142,7 @@ def test_check_return( ctx: CTX, uri: Uri, storage_adapter: Type[BaseStorageAdapter], - check_params: str + check_params: str, ): """Tests if function returns errors.""" integrity_checker = IntegrityChecker( @@ -189,7 +194,7 @@ def test_check_array_raises_on_init( ctx: CTX, uri: Uri, storage_adapter: Type[BaseStorageAdapter], - check_params: str + check_params: str, ): """Tests if function raises exception if array file is incorrect.""" collection = client.create_collection( From 4e551bb10f4ca2fd34ba293ec93e6da866486daa Mon Sep 17 00:00:00 2001 From: imatiushin Date: Tue, 5 Mar 2024 16:09:41 +0300 Subject: [PATCH 10/10] review --- deker/integrity.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/deker/integrity.py b/deker/integrity.py index 70ec8ad..17d70fa 100644 --- a/deker/integrity.py +++ b/deker/integrity.py @@ -171,7 +171,7 @@ def check_arrays_locks(self, collection: Collection) -> None: if self.stop_on_error and self.errors: raise DekerIntegrityError(self._parse_errors()) - def _check_v_arrays( + def _check_varrays_or_arrays( self, collection: Collection, data_manager: Union[ArrayManager, Optional[VArrayManager]] ) -> None: """Check if Arrays or VArrays in Collection are initializing. @@ -206,9 +206,9 @@ def check(self, collection: Collection) -> None: return self.check_arrays_locks(collection) - self._check_v_arrays(collection, collection.arrays) + self._check_varrays_or_arrays(collection, collection.arrays) if collection.varray_schema: - self._check_v_arrays(collection, collection.varrays) + self._check_varrays_or_arrays(collection, collection.varrays) return