Skip to content

Commit

Permalink
Handle chunks that are referenced multiple times by the same file (1.…
Browse files Browse the repository at this point in the history
…3) (#38)

Handle chunks that are referenced multiple times by the same file
  • Loading branch information
vaultah authored Nov 13, 2022
1 parent 1313ca0 commit 737aabb
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 16 deletions.
15 changes: 10 additions & 5 deletions replicat/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -1307,14 +1307,19 @@ def _download_chunk(digest, refs):

logger.info('Chunk %s referenced %d time(s)', location, len(refs))
decrypted_view = memoryview(decrypted_contents)
writer_futures = (
writer.submit(_write_chunk_ref, ref, decrypted_view) for ref in refs
)
writer_futures = []
referenced_paths = set()

for ref in refs:
writer_futures.append(
writer.submit(_write_chunk_ref, ref, decrypted_view)
)
referenced_paths.add(ref[0])

for future in concurrent.futures.as_completed(writer_futures):
future.result()

for ref in refs:
file_path = ref[0]
for file_path in referenced_paths:
with glock:
digests = files_digests[file_path]
digests.remove(digest)
Expand Down
67 changes: 56 additions & 11 deletions replicat/tests/test_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
import threading
import time
from unittest.mock import ANY, patch
from unittest.mock import ANY, call, patch

import pytest

Expand Down Expand Up @@ -673,12 +673,36 @@ async def test_encrypted_data(self, local_repo, tmp_path):
second_file.parent.mkdir(exist_ok=True, parents=True)
second_file.write_bytes(second_data)

snapshot = await local_repo.snapshot(paths=[first_file, second_file])
result = await local_repo.restore(snapshot_regex=snapshot.name, path=tmp_path)
# GH-36
third_data = rnd.randbytes(256) * 16
third_file = tmp_path / 'directory/third_file'
third_file.parent.mkdir(exist_ok=True, parents=True)
third_file.write_bytes(third_data)

assert set(result.files) == {str(first_file), str(second_file)}
assert tmp_path.joinpath(*first_file.parts[1:]).read_bytes() == first_data
assert tmp_path.joinpath(*second_file.parts[1:]).read_bytes() == second_data
snapshot = await local_repo.snapshot(
paths=[first_file, second_file, third_file]
)
with patch.object(local_repo, 'restore_metadata') as restore_metadata_mock:
result = await local_repo.restore(
snapshot_regex=snapshot.name, path=tmp_path
)

assert set(result.files) == {str(first_file), str(second_file), str(third_file)}
first_restore_path = tmp_path.joinpath(*first_file.parts[1:])
assert first_restore_path.read_bytes() == first_data
second_restore_path = tmp_path.joinpath(*second_file.parts[1:])
assert second_restore_path.read_bytes() == second_data
third_restore_path = tmp_path.joinpath(*third_file.parts[1:])
assert third_restore_path.read_bytes() == third_data

restore_metadata_mock.assert_has_calls(
[
call(first_restore_path, ANY),
call(second_restore_path, ANY),
call(third_restore_path, ANY),
],
any_order=True,
)

@pytest.mark.asyncio
async def test_unencrypted_data(self, local_repo, tmp_path):
Expand All @@ -702,11 +726,32 @@ async def test_unencrypted_data(self, local_repo, tmp_path):
second_file.parent.mkdir(exist_ok=True, parents=True)
second_file.write_bytes(second_data)

await local_repo.snapshot(paths=[first_file, second_file])
result = await local_repo.restore(path=tmp_path)
assert set(result.files) == {str(first_file), str(second_file)}
assert tmp_path.joinpath(*first_file.parts[1:]).read_bytes() == first_data
assert tmp_path.joinpath(*second_file.parts[1:]).read_bytes() == second_data
# GH-36
third_data = rnd.randbytes(256) * 16
third_file = tmp_path / 'directory/third_file'
third_file.parent.mkdir(exist_ok=True, parents=True)
third_file.write_bytes(third_data)

await local_repo.snapshot(paths=[first_file, second_file, third_file])
with patch.object(local_repo, 'restore_metadata') as restore_metadata_mock:
result = await local_repo.restore(path=tmp_path)

assert set(result.files) == {str(first_file), str(second_file), str(third_file)}
first_restore_path = tmp_path.joinpath(*first_file.parts[1:])
assert first_restore_path.read_bytes() == first_data
second_restore_path = tmp_path.joinpath(*second_file.parts[1:])
assert second_restore_path.read_bytes() == second_data
third_restore_path = tmp_path.joinpath(*third_file.parts[1:])
assert third_restore_path.read_bytes() == third_data

restore_metadata_mock.assert_has_calls(
[
call(first_restore_path, ANY),
call(second_restore_path, ANY),
call(third_restore_path, ANY),
],
any_order=True,
)

@pytest.mark.asyncio
async def test_defaults_to_latest_file_version(self, local_repo, tmp_path):
Expand Down

0 comments on commit 737aabb

Please sign in to comment.