Skip to content

Commit

Permalink
rewrite & simplify SigMFArchive; fix all broken tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Teque5 committed Jan 13, 2025
1 parent e291b98 commit a4b6c04
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 134 deletions.
217 changes: 108 additions & 109 deletions sigmf/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,135 +21,134 @@


class SigMFArchive:
"""Archive a SigMFFile.
"""
Archive a SigMFFile
A `.sigmf` file must include both valid metadata and data.
If `self.data_file` is not set or the requested output file
is not writable, raise `SigMFFileError`.
Parameters:
sigmffile -- A SigMFFile object with valid metadata and data_file
name -- path to archive file to create. If file exists, overwrite.
If `name` doesn't end in .sigmf, it will be appended.
For example: if `name` == "/tmp/archive1", then the
following archive will be created:
/tmp/archive1.sigmf
- archive1/
- archive1.sigmf-meta
- archive1.sigmf-data
fileobj -- If `fileobj` is specified, it is used as an alternative to
a file object opened in binary mode for `name`. It is
supposed to be at position 0. `name` is not required, but
if specified will be used to determine the directory and
file names within the archive. `fileobj` won't be closed.
For example: if `name` == "archive1" and fileobj is given,
a tar archive will be written to fileobj with the
following structure:
- archive1/
- archive1.sigmf-meta
- archive1.sigmf-data
is not writable, raises `SigMFFileError`.
Parameters
----------
sigmffile : SigMFFile
A SigMFFile object with valid metadata and data_file.
name : PathLike | str | bytes
Path to archive file to create. If file exists, overwrite.
If `name` doesn't end in .sigmf, it will be appended.
For example: if `name` == "/tmp/archive1", then the
following archive will be created:
/tmp/archive1.sigmf
- archive1/
- archive1.sigmf-meta
- archive1.sigmf-data
fileobj : BufferedWriter
If `fileobj` is specified, it is used as an alternative to
a file object opened in binary mode for `name`. It is
supposed to be at position 0. `name` is not required, but
if specified will be used to determine the directory and
file names within the archive. `fileobj` won't be closed.
For example: if `name` == "archive1" and fileobj is given,
a tar archive will be written to fileobj with the
following structure:
- archive1/
- archive1.sigmf-meta
- archive1.sigmf-data
"""

def __init__(self, sigmffile, name=None, fileobj=None):
is_buffer = fileobj is not None
self.sigmffile = sigmffile
self.name = Path(name)
self.fileobj = Path(fileobj)
self.path, arcname, fileobj = self._resolve(name, fileobj)

self._check_input()
self._ensure_data_file_set()
self._validate()

archive_name = self._get_archive_name()
sigmf_fileobj = self._get_output_fileobj()
sigmf_archive = tarfile.TarFile(mode="w", fileobj=sigmf_fileobj, format=tarfile.PAX_FORMAT)
tar = tarfile.TarFile(mode="w", fileobj=fileobj, format=tarfile.PAX_FORMAT)
tmpdir = Path(tempfile.mkdtemp())
sigmf_md_filename = archive_name + SIGMF_METADATA_EXT
sigmf_md_path = tmpdir / sigmf_md_filename
sigmf_data_filename = Path.joinpath(archive_name, SIGMF_DATASET_EXT)
sigmf_data_path = tmpdir / sigmf_data_filename

with open(sigmf_md_path, "w") as mdfile:
self.sigmffile.dump(mdfile, pretty=True)
meta_path = tmpdir / (arcname + SIGMF_METADATA_EXT)
data_path = tmpdir / (arcname + SIGMF_DATASET_EXT)

# write files
with open(meta_path, "w") as handle:
self.sigmffile.dump(handle)
if isinstance(self.sigmffile.data_buffer, io.BytesIO):
self.sigmffile.data_file = sigmf_data_path
with open(sigmf_data_path, "wb") as f:
f.write(self.sigmffile.data_buffer.getbuffer())
# write data buffer to archive
self.sigmffile.data_file = data_path
with open(data_path, "wb") as handle:
handle.write(self.sigmffile.data_buffer.getbuffer())
else:
shutil.copy(self.sigmffile.data_file, sigmf_data_path)

def chmod(tarinfo):
if tarinfo.isdir():
tarinfo.mode = 0o755 # dwrxw-rw-r
else:
tarinfo.mode = 0o644 # -wr-r--r--
return tarinfo

sigmf_archive.add(tmpdir, arcname=archive_name, filter=chmod)
sigmf_archive.close()
if not fileobj:
sigmf_fileobj.close()

# copy data to archive
shutil.copy(self.sigmffile.data_file, data_path)
tar.add(tmpdir, arcname=arcname, filter=self.chmod)
# close files & remove tmpdir
tar.close()
if not is_buffer:
# only close fileobj if we aren't working w/a buffer
fileobj.close()
shutil.rmtree(tmpdir)

self.path = sigmf_archive.name

def _check_input(self):
self._ensure_name_has_correct_extension()
self._ensure_data_file_set()
self._validate_sigmffile_metadata()

def _ensure_name_has_correct_extension(self):
name = Path(self.name)
if name is None:
return

has_extension = "." in name
has_correct_extension = name.endswith(SIGMF_ARCHIVE_EXT)
if has_extension and not has_correct_extension:
apparent_ext = name.suffix
err = "extension {} != {}".format(apparent_ext, SIGMF_ARCHIVE_EXT)
raise SigMFFileError(err)

self.name = name if has_correct_extension else name + SIGMF_ARCHIVE_EXT
@staticmethod
def chmod(tarinfo: tarfile.TarInfo):
"""permission filter for writing tar files"""
if tarinfo.isdir():
tarinfo.mode = 0o755 # dwrxw-rw-r
else:
tarinfo.mode = 0o644 # -wr-r--r--
return tarinfo

def _ensure_data_file_set(self):
if not self.sigmffile.data_file and not isinstance(self.sigmffile.data_buffer, io.BytesIO):
err = "no data file - use `set_data_file`"
raise SigMFFileError(err)
raise SigMFFileError("No data file in SigMFFile; use `set_data_file` before archiving.")

def _validate_sigmffile_metadata(self):
def _validate(self):
self.sigmffile.validate()

def _get_archive_name(self):
if self.fileobj and not self.name:
pathname = self.fileobj.name
else:
pathname = self.name

filename = pathname.name
archive_name, archive_ext = filename.stem, filename.suffix
return archive_name

def _get_output_fileobj(self):
try:
fileobj = self._get_open_fileobj()
except:
if self.fileobj:
err = "fileobj {!r} is not byte-writable".format(self.fileobj)
else:
err = "can't open {!r} for writing".format(self.name)

raise SigMFFileError(err)

return fileobj

def _get_open_fileobj(self):
if self.fileobj:
fileobj = self.fileobj
fileobj.write(bytes()) # force exception if not byte-writable
def _resolve(self, name, fileobj):
"""
Resolve both (name, fileobj) into (path, arcname, fileobj) given either or both.
Returns
-------
path : PathLike
Path of the archive file.
arcname : str
Name of the sigmf object within the archive.
fileobj : BufferedWriter
Open file handle object.
"""
if fileobj:
try:
# exception if not byte-writable
fileobj.write(bytes())
# exception if no name property of handle
path = Path(fileobj.name)
if not name:
arcname = path.stem
else:
arcname = name
except io.UnsupportedOperation:
raise SigMFFileError(f"fileobj {fileobj} is not byte-writable.")
except AttributeError:
raise SigMFFileError(f"fileobj {fileobj} is invalid.")
elif name:
path = Path(name)
# ensure name has correct suffix if it exists
if path.suffix == "":
# add extension if none was given
path = path.with_suffix(SIGMF_ARCHIVE_EXT)
elif path.suffix != SIGMF_ARCHIVE_EXT:
# ensure suffix is correct
raise SigMFFileError(f"Invalid extension ({path.suffix} != {SIGMF_ARCHIVE_EXT}).")
arcname = path.stem

try:
fileobj = open(path, "wb")
except (OSError, IOError):
raise SigMFFileError(f"Can't open {name} for writing.")
else:
fileobj = open(self.name, "wb")
raise SigMFFileError("Either `name` or `fileobj` needs to be defined.")

return fileobj
return path, arcname, fileobj
34 changes: 23 additions & 11 deletions sigmf/archivereader.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,28 +26,40 @@


class SigMFArchiveReader:
"""Access data within SigMF archive `tar` in-place without extracting.
Parameters:
name -- path to archive file to access. If file does not exist,
or if `name` doesn't end in .sigmf, SigMFFileError is raised.
"""
Access data within SigMF archive `tar` in-place without extracting.
Parameters
----------
name : str | bytes | PathLike, optional
Optional path to archive file to access.
skip_checksum : bool, optional
Skip dataset checksum calculation.
map_readonly : bool, optional
Indicate whether assignments on the numpy.memmap are allowed.
archive_buffer : buffer, optional
Raises
------
SigMFError
Archive file does not exist or is improperly formatted.
"""

def __init__(self, name=None, skip_checksum=False, map_readonly=True, archive_buffer=None):
self.name = name
if self.name is not None:
if not name.endswith(SIGMF_ARCHIVE_EXT):
if name is not None:
path = Path(name)
if path.suffix != SIGMF_ARCHIVE_EXT:
err = "archive extension != {}".format(SIGMF_ARCHIVE_EXT)
raise SigMFFileError(err)

tar_obj = tarfile.open(self.name)
tar_obj = tarfile.open(path)

elif archive_buffer is not None:
tar_obj = tarfile.open(fileobj=archive_buffer, mode="r:")

else:
raise ValueError("In sigmf.archivereader.__init__(), either `name` or `archive_buffer` must be not None")
raise ValueError("Either `name` or `archive_buffer` must be not None.")

json_contents = None
data_offset = None
Expand Down
3 changes: 1 addition & 2 deletions sigmf/sigmffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,7 @@ def set_data_file(
if self.get_global_field(self.DATATYPE_KEY) is None:
raise SigMFFileError("Error setting data file, the DATATYPE_KEY must be set in the global metadata first.")

self.data_file = Path(data_file)
self.data_file = Path(data_file) if data_file else None
self.data_buffer = data_buffer
self.data_offset = offset
self.data_size_bytes = size_bytes
Expand Down Expand Up @@ -1042,7 +1042,6 @@ def fromarchive(archive_path, dir=None, skip_checksum=False):
access SigMF archives without extracting them.
"""
from .archivereader import SigMFArchiveReader

return SigMFArchiveReader(archive_path, skip_checksum=skip_checksum).sigmffile


Expand Down
3 changes: 1 addition & 2 deletions sigmf/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@

import re
import sys
from pathlib import Path
from copy import deepcopy
from datetime import datetime, timezone
from pathlib import Path

import numpy as np

from . import error
from . import __version__

SIGMF_DATETIME_ISO8601_FMT = "%Y-%m-%dT%H:%M:%S.%fZ"

Expand Down
1 change: 0 additions & 1 deletion tests/test_archivereader.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,6 @@ def test_archiveread_data_file_unchanged(test_sigmffile):
with tempfile.NamedTemporaryFile(suffix=".sigmf") as temp:
input_samples = test_sigmffile.read_samples()
test_sigmffile.archive(temp.name)

arc = sigmf.sigmffile.fromfile(temp.name)
output_samples = arc.read_samples()

Expand Down
2 changes: 1 addition & 1 deletion tests/test_ncd.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def test_load_ncd(self, subdir: str) -> None:
"""test loading non-conforming dataset"""
data_path = self.temp_dir / subdir / "dat.bin"
meta_path = self.temp_dir / subdir / "dat.sigmf-meta"
Path.mkdir(data_path.parent, exist_ok=True)
Path.mkdir(data_path.parent, parents=True, exist_ok=True)

# create data file
TEST_FLOAT32_DATA.tofile(data_path)
Expand Down
12 changes: 4 additions & 8 deletions tests/test_sigmffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_set_data_file_without_annotations(self):
smf = SigMFFile(copy.deepcopy(TEST_METADATA))
smf._metadata[SigMFFile.ANNOTATION_KEY].clear()
with tempfile.TemporaryDirectory() as tmpdir:
temp_path_data = Path.joinpath(tmpdir, "datafile")
temp_path_data = Path(tmpdir) / "datafile"
TEST_FLOAT32_DATA.tofile(temp_path_data)
smf.set_data_file(temp_path_data)
samples = smf.read_samples()
Expand All @@ -113,7 +113,7 @@ def test_set_data_file_with_annotations(self):
smf = SigMFFile(copy.deepcopy(TEST_METADATA))
smf.add_annotation(start_index=0, length=32)
with tempfile.TemporaryDirectory() as tmpdir:
temp_path_data = Path.joinpath(tmpdir, "datafile")
temp_path_data = Path(tmpdir) / "datafile"
TEST_FLOAT32_DATA.tofile(temp_path_data)
with self.assertWarns(Warning):
# Issues warning since file ends before the final annotatio
Expand Down Expand Up @@ -159,14 +159,10 @@ def test_add_annotation():


def test_fromarchive(test_sigmffile):
print("test_sigmffile is:\n", test_sigmffile)
tf = tempfile.mkstemp()[1]
td = tempfile.mkdtemp()
_, tf = tempfile.mkstemp()
archive_path = test_sigmffile.archive(name=tf)
result = sigmffile.fromarchive(archive_path=archive_path, dir=td)
result = sigmffile.fromarchive(archive_path=archive_path)
assert result._metadata == test_sigmffile._metadata == TEST_METADATA
Path.unlink(tf)
shutil.rmtree(td)


def test_add_multiple_captures_and_annotations():
Expand Down

0 comments on commit a4b6c04

Please sign in to comment.