diff --git a/CHANGELOG.md b/CHANGELOG.md index fb0cab7..df9feb7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 0.43.1 - TBD + +#### Bug fixes +- Fixed an issue where validating the checksum of a batch file loaded the entire file into memory + ## 0.43.0 - 2024-10-09 This release drops support for Python 3.8 which has reached end-of-life. diff --git a/databento/historical/api/batch.py b/databento/historical/api/batch.py index 47f4d57..223520c 100644 --- a/databento/historical/api/batch.py +++ b/databento/historical/api/batch.py @@ -431,7 +431,11 @@ def _download_batch_file( hash_algo, _, hash_hex = batch_download_file.hash_str.partition(":") if hash_algo == "sha256": - output_hash = hashlib.sha256(output_path.read_bytes()) + output_hash = hashlib.new(hash_algo) + with open(output_path, "rb") as fd: + while chunk := fd.read(32_000_000): + output_hash.update(chunk) + if output_hash.hexdigest() != hash_hex: warn_msg = f"Downloaded file failed checksum validation: {output_path.name}" logger.warning(warn_msg)