Skip to content

Commit

Permalink
adapter.aasx: allow deleting files from SupplementaryFileContainer
Browse files Browse the repository at this point in the history
`AbstractSupplementaryFileContainer` and
`DictSupplementaryFileContainer` are extended by a `delete_file()`
method, that allows deleting files from them. Since different files may
have the same content, references to the files contents in
`DictSupplementaryFileContainer._store` are tracked via
`_store_refcount`. A files contents are only deleted from `_store`, if
all filenames referring to these these contents are deleted, i.e. if the
refcount reaches 0.
  • Loading branch information
jkhsjdhjs authored and s-heppner committed Jun 13, 2024
1 parent ecf4c13 commit 67432b5
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 0 deletions.
22 changes: 22 additions & 0 deletions basyx/aas/adapter/aasx.py
Original file line number Diff line number Diff line change
Expand Up @@ -778,6 +778,13 @@ def write_file(self, name: str, file: IO[bytes]) -> None:
"""
pass # pragma: no cover

@abc.abstractmethod
def delete_file(self, name: str) -> None:
"""
Deletes a file from this SupplementaryFileContainer given its name.
"""
pass # pragma: no cover

@abc.abstractmethod
def __contains__(self, item: str) -> bool:
"""
Expand All @@ -802,18 +809,23 @@ def __init__(self):
self._store: Dict[bytes, bytes] = {}
# Maps file names to (sha256, content_type)
self._name_map: Dict[str, Tuple[bytes, str]] = {}
# Tracks the number of references to _store keys,
# i.e. the number of different filenames referring to the same file
self._store_refcount: Dict[bytes, int] = {}

def add_file(self, name: str, file: IO[bytes], content_type: str) -> str:
data = file.read()
hash = hashlib.sha256(data).digest()
if hash not in self._store:
self._store[hash] = data
self._store_refcount[hash] = 0
name_map_data = (hash, content_type)
new_name = name
i = 1
while True:
if new_name not in self._name_map:
self._name_map[new_name] = name_map_data
self._store_refcount[hash] += 1
return new_name
elif self._name_map[new_name] == name_map_data:
return new_name
Expand All @@ -839,6 +851,16 @@ def get_sha256(self, name: str) -> bytes:
def write_file(self, name: str, file: IO[bytes]) -> None:
file.write(self._store[self._name_map[name][0]])

def delete_file(self, name: str) -> None:
# The number of different files with the same content are kept track of via _store_refcount.
# The contents are only deleted, once the refcount reaches zero.
hash: bytes = self._name_map[name][0]
self._store_refcount[hash] -= 1
if self._store_refcount[hash] == 0:
del self._store[hash]
del self._store_refcount[hash]
del self._name_map[name]

def __contains__(self, item: object) -> bool:
return item in self._name_map

Expand Down
17 changes: 17 additions & 0 deletions test/adapter/aasx/test_aasx.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,23 @@ def test_supplementary_file_container(self) -> None:
container.write_file("/TestFile.pdf", file_content)
self.assertEqual(hashlib.sha1(file_content.getvalue()).hexdigest(), "78450a66f59d74c073bf6858db340090ea72a8b1")

# Add same file again with different content_type to test reference counting
with open(__file__, 'rb') as f:
duplicate_file = container.add_file("/TestFile.pdf", f, "image/jpeg")
self.assertIn(duplicate_file, container)

# Delete files
container.delete_file(new_name)
self.assertNotIn(new_name, container)
# File should still be accessible
container.write_file(duplicate_file, file_content)

container.delete_file(duplicate_file)
self.assertNotIn(duplicate_file, container)
# File should now not be accessible anymore
with self.assertRaises(KeyError):
container.write_file(duplicate_file, file_content)


class AASXWriterTest(unittest.TestCase):
def test_writing_reading_example_aas(self) -> None:
Expand Down

0 comments on commit 67432b5

Please sign in to comment.