Skip to content

Commit

Permalink
[feat] add checksum validation for downloaded files
Browse files Browse the repository at this point in the history
  • Loading branch information
vanto committed Dec 2, 2024
1 parent 089670b commit cc7ce5e
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 1 deletion.
10 changes: 10 additions & 0 deletions dkb_robo/postbox.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
""" Module for handling the DKB postbox. """
import datetime
import hashlib
import logging
from dataclasses import dataclass
from pathlib import Path
Expand Down Expand Up @@ -85,6 +86,15 @@ def download(self, client: requests.Session, target_file: Path, overwrite: bool

with target_file.open('wb') as file:
file.write(resp.content)

# compare checksums of file with checksum from document metadata
if self.document.checksum:
with target_file.open('rb') as file:
checksum = hashlib.md5(file.read()).hexdigest()
if checksum != self.document.checksum:
logger.warning("Checksum mismatch for %s: %s != %s. Renaming file.", target_file, checksum, self.document.checksum)
# rename file to indicate checksum mismatch
target_file.rename(target_file.with_name(target_file.name + '.checksum_mismatch'))
return True
return False

Expand Down
18 changes: 17 additions & 1 deletion test/test_postbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dkb_robo.api import DKBRoboError
from dkb_robo.postbox import PostboxItem, PostBox, Document, Message


class TestPostboxItem(unittest.TestCase):
""" Tests for the PostboxItem class. """

Expand All @@ -17,7 +18,7 @@ def setUp(self):
expirationDate="2023-12-31",
retentionPeriod="9999-12-31",
contentType="application/pdf",
checksum="abc123",
checksum="9473fdd0d880a43c21b7778d34872157",
fileName="test_document",
metadata={"statementDate": "2023-01-01"},
owner="owner",
Expand Down Expand Up @@ -82,6 +83,20 @@ def test_download_existing_file(self, mock_session):
self.assertFalse(result)
target_file.unlink()

@patch("requests.Session")
def test_download_checksum_mismatch(self, mock_session):
""" Test that the download method renames the downloaded file if the checksum of the downloaded file does not match."""
mock_client = mock_session.return_value
mock_client.get.return_value.status_code = 200
mock_client.get.return_value.content = b"wrong test content"
target_file = Path(tempfile.gettempdir()) / "test_document.pdf"
result = self.postbox_item.download(mock_client, target_file, overwrite=True)
self.assertTrue(result)
self.assertFalse(target_file.exists())
mismatched_file = target_file.with_name(target_file.name + ".checksum_mismatch")
self.assertTrue(mismatched_file.exists())
mismatched_file.unlink() # Remove the downloaded test file

def test_filename(self):
""" Test that the filename method returns the correct filename for the postbox item."""
filename = self.postbox_item.filename()
Expand Down Expand Up @@ -185,6 +200,7 @@ def test_date_invalid(self):
with self.assertRaises(AttributeError):
self.postbox_item.date()


class TestPostBox(unittest.TestCase):
""" Tests for the PostBox class. """

Expand Down

0 comments on commit cc7ce5e

Please sign in to comment.