From 6e1ea1453d050bc06864b59de93a62ebfb58e102 Mon Sep 17 00:00:00 2001 From: Sean Morgan Date: Thu, 28 Mar 2024 11:06:51 -0700 Subject: [PATCH 1/4] Read model stream from start --- modelscan/model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modelscan/model.py b/modelscan/model.py index 43dd611..11e6463 100644 --- a/modelscan/model.py +++ b/modelscan/model.py @@ -51,4 +51,4 @@ def get_stream(self) -> IO[bytes]: if not self._stream: raise ModelDataEmpty("Model data is empty.") - return self._stream + return self._stream.seek(0) From 148c5883d6588d0d2d73d14b9a9f19da70bab4b0 Mon Sep 17 00:00:00 2001 From: Sean Morgan Date: Thu, 28 Mar 2024 11:18:23 -0700 Subject: [PATCH 2/4] Reset stream --- modelscan/model.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modelscan/model.py b/modelscan/model.py index 11e6463..ff58035 100644 --- a/modelscan/model.py +++ b/modelscan/model.py @@ -51,4 +51,5 @@ def get_stream(self) -> IO[bytes]: if not self._stream: raise ModelDataEmpty("Model data is empty.") - return self._stream.seek(0) + self._stream.seek(0) + return self._stream From c8dfa47340bdacb7f95840410e568dd0db54ac43 Mon Sep 17 00:00:00 2001 From: Sean Morgan Date: Thu, 28 Mar 2024 15:53:14 -0700 Subject: [PATCH 3/4] Correctly handle numpy stream --- modelscan/tools/picklescanner.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modelscan/tools/picklescanner.py b/modelscan/tools/picklescanner.py index b6e6078..87c4fa1 100644 --- a/modelscan/tools/picklescanner.py +++ b/modelscan/tools/picklescanner.py @@ -190,7 +190,7 @@ def scan_numpy(model: Model, settings: Dict[str, Any]) -> ScanResults: magic = model.get_stream().read(N) # If the file size is less than N, we need to make sure not # to seek past the beginning of the file - model.get_stream().seek(-min(N, len(magic)), 1) # back-up + model._stream.seek(-min(N, len(magic)), 1) # back-up if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX): # .npz file return ScanResults( @@ -208,9 +208,9 @@ def scan_numpy(model: Model, settings: Dict[str, Any]) -> ScanResults: elif magic == np.lib.format.MAGIC_PREFIX: # .npy file - version = np.lib.format.read_magic(model.get_stream()) # type: ignore[no-untyped-call] + version = np.lib.format.read_magic(model._stream) # type: ignore[no-untyped-call] np.lib.format._check_version(version) # type: ignore[attr-defined] - _, _, dtype = np.lib.format._read_array_header(model.get_stream(), version) # type: ignore[attr-defined] + _, _, dtype = np.lib.format._read_array_header(model._stream, version) # type: ignore[attr-defined] if dtype.hasobject: return scan_pickle_bytes(model, settings, scan_name) From 99514a39b2e0f40a776e75696e47bb4d616ea1bc Mon Sep 17 00:00:00 2001 From: Sean Morgan Date: Thu, 28 Mar 2024 16:11:06 -0700 Subject: [PATCH 4/4] Correctly handle numpy stream --- modelscan/tools/picklescanner.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modelscan/tools/picklescanner.py b/modelscan/tools/picklescanner.py index 87c4fa1..80d7409 100644 --- a/modelscan/tools/picklescanner.py +++ b/modelscan/tools/picklescanner.py @@ -187,10 +187,11 @@ def scan_numpy(model: Model, settings: Dict[str, Any]) -> ScanResults: _ZIP_PREFIX = b"PK\x03\x04" _ZIP_SUFFIX = b"PK\x05\x06" # empty zip files start with this N = len(np.lib.format.MAGIC_PREFIX) - magic = model.get_stream().read(N) + stream = model.get_stream() + magic = stream.read(N) # If the file size is less than N, we need to make sure not # to seek past the beginning of the file - model._stream.seek(-min(N, len(magic)), 1) # back-up + stream.seek(-min(N, len(magic)), 1) # back-up if magic.startswith(_ZIP_PREFIX) or magic.startswith(_ZIP_SUFFIX): # .npz file return ScanResults( @@ -208,9 +209,9 @@ def scan_numpy(model: Model, settings: Dict[str, Any]) -> ScanResults: elif magic == np.lib.format.MAGIC_PREFIX: # .npy file - version = np.lib.format.read_magic(model._stream) # type: ignore[no-untyped-call] + version = np.lib.format.read_magic(stream) # type: ignore[no-untyped-call] np.lib.format._check_version(version) # type: ignore[attr-defined] - _, _, dtype = np.lib.format._read_array_header(model._stream, version) # type: ignore[attr-defined] + _, _, dtype = np.lib.format._read_array_header(stream, version) # type: ignore[attr-defined] if dtype.hasobject: return scan_pickle_bytes(model, settings, scan_name)