Skip to content

Commit dc7f7c5

Browse files
committed
fix read_samples from SigMF archive
* When reading from a SigMF (.sigmf) archive, slicing and reading entire file worked, but not reading specific sample count * add test for archive read_samples and refactor related tests
1 parent 63ccc70 commit dc7f7c5

File tree

3 files changed

+160
-123
lines changed

3 files changed

+160
-123
lines changed

sigmf/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
# SPDX-License-Identifier: LGPL-3.0-or-later
66

77
# version of this python module
8-
__version__ = "1.2.13"
8+
__version__ = "1.2.14"
99
# matching version of the SigMF specification
1010
__specification__ = "1.2.5"
1111

sigmf/sigmffile.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@ def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksu
177177
"""
178178
super().__init__()
179179
self.data_file = None
180+
self.data_buffer = None
180181
self.sample_count = 0
181182
self._memmap = None
182183
self.is_complex_data = False # numpy.iscomplexobj(self._memmap) is not adequate for fixed-point complex case
@@ -490,23 +491,28 @@ def _count_samples(self):
490491
use 0.
491492
For complex data, a 'sample' includes both the real and imaginary part.
492493
"""
493-
if self.data_file is None:
494+
if self.data_file is None and self.data_buffer is None:
494495
sample_count = self._get_sample_count_from_annotations()
495496
else:
496497
header_bytes = sum([c.get(self.HEADER_BYTES_KEY, 0) for c in self.get_captures()])
497-
file_size = self.data_file.stat().st_size if self.data_size_bytes is None else self.data_size_bytes
498-
file_data_size = file_size - self.get_global_field(self.TRAILING_BYTES_KEY, 0) - header_bytes # bytes
498+
if self.data_file is not None:
499+
file_bytes = self.data_file.stat().st_size if self.data_size_bytes is None else self.data_size_bytes
500+
elif self.data_buffer is not None:
501+
file_bytes = len(self.data_buffer.getbuffer()) if self.data_size_bytes is None else self.data_size_bytes
502+
else:
503+
file_bytes = 0
504+
sample_bytes = file_bytes - self.get_global_field(self.TRAILING_BYTES_KEY, 0) - header_bytes
499505
sample_size = self.get_sample_size() # size of a sample in bytes
500506
num_channels = self.get_num_channels()
501-
sample_count = file_data_size // sample_size // num_channels
502-
if file_data_size % (sample_size * num_channels) != 0:
507+
sample_count = sample_bytes // sample_size // num_channels
508+
if sample_bytes % (sample_size * num_channels) != 0:
503509
warnings.warn(
504-
f"File `{self.data_file}` does not contain an integer number of samples across channels. "
510+
f"Data source does not contain an integer number of samples across channels. "
505511
"It may be invalid data."
506512
)
507513
if self._get_sample_count_from_annotations() > sample_count:
508514
warnings.warn(
509-
f"File `{self.data_file}` ends before the final annotation in the corresponding SigMF metadata."
515+
f"Data source ends before the final annotation in the corresponding SigMF metadata."
510516
)
511517
self.sample_count = sample_count
512518
return sample_count
@@ -735,7 +741,9 @@ def _read_datafile(self, first_byte, nitems, autoscale, raw_components):
735741
fp.seek(first_byte, 0)
736742
data = np.fromfile(fp, dtype=data_type_in, count=nitems)
737743
elif self.data_buffer is not None:
738-
data = np.frombuffer(self.data_buffer.getbuffer(), dtype=data_type_in, count=nitems)
744+
# handle offset for data_buffer like we do for data_file
745+
buffer_data = self.data_buffer.getbuffer()[first_byte:]
746+
data = np.frombuffer(buffer_data, dtype=data_type_in, count=nitems)
739747
else:
740748
data = self._memmap
741749

@@ -1065,10 +1073,13 @@ def fromarchive(archive_path, dir=None, skip_checksum=False):
10651073

10661074
def fromfile(filename, skip_checksum=False):
10671075
"""
1068-
Creates and returns a SigMFFile or SigMFCollection instance with metadata
1069-
loaded from the specified file. The filename may be that of either a
1070-
sigmf-meta file, a sigmf-data file, a sigmf-collection file, or a sigmf
1071-
archive.
1076+
Creates and returns a SigMFFile or SigMFCollection instance with metadata loaded from the specified file.
1077+
1078+
The file can be one of:
1079+
* A SigMF Metadata file (.sigmf-meta)
1080+
* A SigMF Dataset file (.sigmf-data)
1081+
* A SigMF Collection file (.sigmf-collection)
1082+
* A SigMF Archive file (.sigmf-archive)
10721083
10731084
Parameters
10741085
----------

tests/test_archive.py

Lines changed: 136 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -7,127 +7,132 @@
77
"""Tests for SigMFArchive"""
88

99
import codecs
10+
import copy
1011
import json
12+
import shutil
1113
import tarfile
1214
import tempfile
15+
import unittest
1316
from pathlib import Path
1417

1518
import jsonschema
1619
import numpy as np
17-
import pytest
1820

19-
from sigmf import error
21+
from sigmf import SigMFFile, __specification__, error, fromfile
2022
from sigmf.archive import SIGMF_DATASET_EXT, SIGMF_METADATA_EXT
2123

2224
from .testdata import TEST_FLOAT32_DATA, TEST_METADATA
2325

2426

25-
def create_test_archive(test_sigmffile, tmpfile):
26-
sigmf_archive = test_sigmffile.archive(fileobj=tmpfile)
27-
sigmf_tarfile = tarfile.open(sigmf_archive, mode="r", format=tarfile.PAX_FORMAT)
28-
return sigmf_tarfile
29-
30-
31-
def test_without_data_file_throws_fileerror(test_sigmffile):
32-
test_sigmffile.data_file = None
33-
with tempfile.NamedTemporaryFile() as temp:
34-
with pytest.raises(error.SigMFFileError):
35-
test_sigmffile.archive(name=temp.name)
36-
37-
38-
def test_invalid_md_throws_validationerror(test_sigmffile):
39-
del test_sigmffile._metadata["global"]["core:datatype"] # required field
40-
with tempfile.NamedTemporaryFile() as temp:
41-
with pytest.raises(jsonschema.exceptions.ValidationError):
42-
test_sigmffile.archive(name=temp.name)
43-
44-
45-
def test_name_wrong_extension_throws_fileerror(test_sigmffile):
46-
with tempfile.NamedTemporaryFile() as temp:
47-
with pytest.raises(error.SigMFFileError):
48-
test_sigmffile.archive(name=temp.name + ".zip")
49-
50-
51-
def test_fileobj_extension_ignored(test_sigmffile):
52-
with tempfile.NamedTemporaryFile(suffix=".tar") as temp:
53-
test_sigmffile.archive(fileobj=temp)
54-
55-
56-
def test_name_used_in_fileobj(test_sigmffile):
57-
with tempfile.NamedTemporaryFile() as temp:
58-
sigmf_archive = test_sigmffile.archive(name="testarchive", fileobj=temp)
59-
sigmf_tarfile = tarfile.open(sigmf_archive, mode="r")
60-
basedir, file1, file2 = sigmf_tarfile.getmembers()
61-
assert basedir.name == "testarchive"
62-
63-
def filename(tarinfo):
64-
return Path(tarinfo.name).stem
65-
66-
assert filename(file1) == "testarchive"
67-
assert filename(file2) == "testarchive"
68-
69-
70-
def test_fileobj_not_closed(test_sigmffile):
71-
with tempfile.NamedTemporaryFile() as temp:
72-
test_sigmffile.archive(fileobj=temp)
73-
assert not temp.file.closed
74-
75-
76-
def test_unwritable_fileobj_throws_fileerror(test_sigmffile):
77-
with tempfile.NamedTemporaryFile(mode="rb") as temp:
78-
with pytest.raises(error.SigMFFileError):
79-
test_sigmffile.archive(fileobj=temp)
80-
81-
82-
def test_unwritable_name_throws_fileerror(test_sigmffile):
83-
# Cannot assume /root/ is unwritable (e.g. Docker environment)
84-
# so use invalid filename
85-
unwritable_file = "/bad_name/"
86-
with pytest.raises(error.SigMFFileError):
87-
test_sigmffile.archive(name=unwritable_file)
88-
89-
90-
def test_tarfile_layout(test_sigmffile):
91-
with tempfile.NamedTemporaryFile() as temp:
92-
sigmf_tarfile = create_test_archive(test_sigmffile, temp)
93-
basedir, file1, file2 = sigmf_tarfile.getmembers()
94-
assert tarfile.TarInfo.isdir(basedir)
95-
assert tarfile.TarInfo.isfile(file1)
96-
assert tarfile.TarInfo.isfile(file2)
97-
98-
99-
def test_tarfile_names_and_extensions(test_sigmffile):
100-
with tempfile.NamedTemporaryFile() as temp:
101-
sigmf_tarfile = create_test_archive(test_sigmffile, temp)
102-
basedir, file1, file2 = sigmf_tarfile.getmembers()
27+
class TestSigMFArchive(unittest.TestCase):
28+
"""Tests for SigMF Archive functionality"""
29+
30+
def setUp(self):
31+
"""Create temporary directory and test SigMFFile"""
32+
self.temp_dir = Path(tempfile.mkdtemp())
33+
self.temp_path_data = self.temp_dir / "trash.sigmf-data"
34+
self.temp_path_meta = self.temp_dir / "trash.sigmf-meta"
35+
self.temp_path_archive = self.temp_dir / "test.sigmf"
36+
TEST_FLOAT32_DATA.tofile(self.temp_path_data)
37+
self.sigmf_object = SigMFFile(copy.deepcopy(TEST_METADATA), data_file=self.temp_path_data)
38+
self.sigmf_object.tofile(self.temp_path_meta)
39+
self.sigmf_object.tofile(self.temp_path_archive, toarchive=True)
40+
self.sigmf_tarfile = tarfile.open(self.temp_path_archive, mode="r", format=tarfile.PAX_FORMAT)
41+
42+
def tearDown(self):
43+
"""Clean up temporary directory"""
44+
shutil.rmtree(self.temp_dir)
45+
46+
def test_archive_creation_requires_data_file(self):
47+
"""Test that archiving without data file raises error"""
48+
self.sigmf_object.data_file = None
49+
with self.assertRaises(error.SigMFFileError):
50+
self.sigmf_object.archive(name=self.temp_path_archive)
51+
52+
def test_archive_creation_validates_metadata(self):
53+
"""Test that invalid metadata raises error"""
54+
del self.sigmf_object._metadata["global"]["core:datatype"] # required field
55+
with self.assertRaises(jsonschema.exceptions.ValidationError):
56+
self.sigmf_object.archive(name=self.temp_path_archive)
57+
58+
def test_archive_creation_validates_extension(self):
59+
"""Test that wrong extension raises error"""
60+
wrong_name = self.temp_dir / "temp_archive.zip"
61+
with self.assertRaises(error.SigMFFileError):
62+
self.sigmf_object.archive(name=wrong_name)
63+
64+
def test_fileobj_ignores_extension(self):
65+
"""Test that file object extension is ignored"""
66+
temp_archive_tar = self.temp_dir / "test.sigmf.tar"
67+
with open(temp_archive_tar, "wb") as temp:
68+
self.sigmf_object.archive(fileobj=temp)
69+
70+
def test_custom_name_overrides_fileobj_name(self):
71+
"""Test that name is used in file object"""
72+
with open(self.temp_path_archive, "wb") as temp:
73+
sigmf_archive = self.sigmf_object.archive(name="testarchive", fileobj=temp)
74+
sigmf_tarfile = tarfile.open(sigmf_archive, mode="r")
75+
basedir, file1, file2 = sigmf_tarfile.getmembers()
76+
self.assertEqual(basedir.name, "testarchive")
77+
self.assertEqual(Path(file1.name).stem, "testarchive")
78+
self.assertEqual(Path(file2.name).stem, "testarchive")
79+
80+
def test_fileobj_remains_open_after_archive(self):
81+
"""Test that file object is not closed after archiving"""
82+
with open(self.temp_path_archive, "wb") as temp:
83+
self.sigmf_object.archive(fileobj=temp)
84+
self.assertFalse(temp.closed)
85+
86+
def test_readonly_fileobj_raises_error(self):
87+
"""Test that unwritable file object raises error"""
88+
temp_path = self.temp_dir / "temp_archive.sigmf"
89+
temp_path.touch()
90+
with open(temp_path, "rb") as temp:
91+
with self.assertRaises(error.SigMFFileError):
92+
self.sigmf_object.archive(fileobj=temp)
93+
94+
def test_invalid_path_raises_error(self):
95+
"""Test that unwritable name raises error"""
96+
# Cannot assume /root/ is unwritable (e.g. Docker environment)
97+
# so use invalid filename
98+
unwritable_file = "/bad_name/"
99+
with self.assertRaises(error.SigMFFileError):
100+
self.sigmf_object.archive(name=unwritable_file)
101+
102+
def test_archive_contains_directory_and_files(self):
103+
"""Test archive layout structure"""
104+
basedir, file1, file2 = self.sigmf_tarfile.getmembers()
105+
self.assertTrue(tarfile.TarInfo.isdir(basedir))
106+
self.assertTrue(tarfile.TarInfo.isfile(file1))
107+
self.assertTrue(tarfile.TarInfo.isfile(file2))
108+
109+
def test_archive_files_have_correct_names_and_extensions(self):
110+
"""Test tarfile names and extensions"""
111+
basedir, file1, file2 = self.sigmf_tarfile.getmembers()
103112
archive_name = basedir.name
104-
assert archive_name == Path(temp.name).name
113+
self.assertEqual(archive_name, Path(self.temp_path_archive).stem)
105114
file_extensions = {SIGMF_DATASET_EXT, SIGMF_METADATA_EXT}
106115

107116
file1_name, file1_ext = Path(file1.name).stem, Path(file1.name).suffix
108-
assert file1_name == archive_name
109-
assert file1_ext in file_extensions
117+
self.assertEqual(file1_name, archive_name)
118+
self.assertIn(file1_ext, file_extensions)
110119

111120
file_extensions.remove(file1_ext)
112121

113122
file2_name, file2_ext = Path(file2.name).stem, Path(file2.name).suffix
114-
assert file2_name == archive_name
115-
assert file2_ext in file_extensions
116-
117-
118-
def test_tarfile_persmissions(test_sigmffile):
119-
with tempfile.NamedTemporaryFile() as temp:
120-
sigmf_tarfile = create_test_archive(test_sigmffile, temp)
121-
basedir, file1, file2 = sigmf_tarfile.getmembers()
122-
assert basedir.mode == 0o755
123-
assert file1.mode == 0o644
124-
assert file2.mode == 0o644
125-
126-
127-
def test_contents(test_sigmffile):
128-
with tempfile.NamedTemporaryFile() as temp:
129-
sigmf_tarfile = create_test_archive(test_sigmffile, temp)
130-
basedir, file1, file2 = sigmf_tarfile.getmembers()
123+
self.assertEqual(file2_name, archive_name)
124+
self.assertIn(file2_ext, file_extensions)
125+
126+
def test_archive_files_have_correct_permissions(self):
127+
"""Test tarfile permissions"""
128+
basedir, file1, file2 = self.sigmf_tarfile.getmembers()
129+
self.assertEqual(basedir.mode, 0o755)
130+
self.assertEqual(file1.mode, 0o644)
131+
self.assertEqual(file2.mode, 0o644)
132+
133+
def test_archive_contents_match_original_data(self):
134+
"""Test archive contents"""
135+
_, file1, file2 = self.sigmf_tarfile.getmembers()
131136
if file1.name.endswith(SIGMF_METADATA_EXT):
132137
mdfile = file1
133138
datfile = file2
@@ -136,18 +141,39 @@ def test_contents(test_sigmffile):
136141
datfile = file1
137142

138143
bytestream_reader = codecs.getreader("utf-8") # bytes -> str
139-
mdfile_reader = bytestream_reader(sigmf_tarfile.extractfile(mdfile))
140-
assert json.load(mdfile_reader) == TEST_METADATA
144+
mdfile_reader = bytestream_reader(self.sigmf_tarfile.extractfile(mdfile))
145+
self.assertEqual(json.load(mdfile_reader), TEST_METADATA)
141146

142-
datfile_reader = sigmf_tarfile.extractfile(datfile)
147+
datfile_reader = self.sigmf_tarfile.extractfile(datfile)
143148
# calling `fileno` on `tarfile.ExFileObject` throws error (?), but
144149
# np.fromfile requires it, so we need this extra step
145150
data = np.frombuffer(datfile_reader.read(), dtype=np.float32)
146151

147-
assert np.array_equal(data, TEST_FLOAT32_DATA)
148-
149-
150-
def test_tarfile_type(test_sigmffile):
151-
with tempfile.NamedTemporaryFile() as temp:
152-
sigmf_tarfile = create_test_archive(test_sigmffile, temp)
153-
assert sigmf_tarfile.format == tarfile.PAX_FORMAT
152+
np.testing.assert_array_equal(data, TEST_FLOAT32_DATA)
153+
154+
def test_tarfile_format(self):
155+
"""Tar file format is PAX"""
156+
self.assertEqual(self.sigmf_tarfile.format, tarfile.PAX_FORMAT)
157+
158+
def test_archive_read_samples(self):
159+
"""test that read_samples works correctly with archived data"""
160+
# load from archive
161+
archive_mdfile = fromfile(self.temp_path_archive)
162+
163+
# verify sample count matches
164+
expected_sample_count = len(self.sigmf_object)
165+
self.assertEqual(archive_mdfile.sample_count, expected_sample_count)
166+
167+
# verify read_samples returns same as slice
168+
samples_orig = TEST_FLOAT32_DATA[3:13]
169+
samples_read = archive_mdfile.read_samples(start_index=3, count=10)
170+
samples_sliced = archive_mdfile[3:13]
171+
np.testing.assert_array_equal(samples_orig, samples_sliced)
172+
np.testing.assert_array_equal(samples_orig, samples_read)
173+
174+
def test_archive_read_samples_beyond_end(self):
175+
"""test that read_samples beyond end of data raises error"""
176+
meta = fromfile(self.temp_path_archive)
177+
# FIXME: Should this raise a SigMFFileError instead?
178+
with self.assertRaises(OSError):
179+
meta.read_samples(start_index=meta.sample_count + 10, count=5)

0 commit comments

Comments
 (0)