From 668918e082ac97fbd253216735c425d32970c7ff Mon Sep 17 00:00:00 2001 From: Jared Lumpe Date: Sat, 27 Jan 2024 23:08:30 -0800 Subject: [PATCH] Better error messages when opening invalid signatures file --- gambit/sigs/base.py | 16 ++++++++++++++++ gambit/sigs/hdf5.py | 36 ++++++++++++++++++++++++++++-------- tests/sigs/test_sigs_hdf5.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 8 deletions(-) diff --git a/gambit/sigs/base.py b/gambit/sigs/base.py index bf73203..5ca383c 100644 --- a/gambit/sigs/base.py +++ b/gambit/sigs/base.py @@ -397,6 +397,22 @@ def __getitem__(self, index): return self.signatures[index] +class SignaturesFileError(Exception): + """Indicates an error attempting to open a signatures file.""" + + message: str + filename: str + format: str + + def __init__(self, message: str, filename: Optional[FilePath], format: Optional[str]): + self.message = message + self.filename = str(filename) + self.format = format + + def __str__(self): + return self.message + + def load_signatures(path: FilePath, **kw) -> AbstractSignatureArray: """Load signatures from file. diff --git a/gambit/sigs/hdf5.py b/gambit/sigs/hdf5.py index 7e2b930..c095f40 100644 --- a/gambit/sigs/hdf5.py +++ b/gambit/sigs/hdf5.py @@ -7,7 +7,7 @@ import h5py as h5 from .base import SignatureArray, ConcatenatedSignatureArray, AbstractSignatureArray, SignaturesMeta,\ - ReferenceSignatures + ReferenceSignatures, SignaturesFileError from gambit.kmers import KmerSpec from gambit._cython.metric import BOUNDS_DTYPE from gambit.util.io import FilePath @@ -93,11 +93,11 @@ def __init__(self, group: h5.Group): self.group = group if FMT_VERSION_ATTR not in group.attrs: - raise RuntimeError('HDF5 group does not contain a signature set') + raise SignaturesFileError('HDF5 group does not contain a signature set', None, 'hdf5') self.format_version = group.attrs[FMT_VERSION_ATTR] if self.format_version != CURRENT_FMT_VERSION: - raise ValueError(f'Unrecognized format version: {self.format_version}') + raise ValueError(f'Unrecognized format version: {self.format_version}', None, 'hdf5') self.kmerspec = KmerSpec(group.attrs['kmerspec_k'], group.attrs['kmerspec_prefix']) self.meta = read_metadata(group) @@ -229,13 +229,33 @@ def load_signatures_hdf5(path: FilePath, **kw) -> HDF5Signatures: \\**kw Additional keyword arguments to :func:`h5py.File`. """ - return HDF5Signatures(h5.File(path, **kw)) + exc = SignaturesFileError(f'{path} does not appear to be a GAMBIT signtures file.', path, 'hdf5') + # Check for HDF5 magic number + # The errors raised by the h5py library are a bit cryptic, so make one with a better message if + # not a valid HDF5 file. + # This also raises the standard errors if file cannot be read. + with open(path, 'rb') as f: + header = f.read(8) + if header != b'\x89HDF\r\n\x1a\n': + raise exc -def dump_signatures_hdf5(path: FilePath, - signatures: AbstractSignatureArray, - **kw, - ): + h5file = h5.File(path, **kw) + + if FMT_VERSION_ATTR not in h5file.attrs: + raise exc + + try: + return HDF5Signatures(h5file) + + except SignaturesFileError as exc: + # Make sure errors in opening are annotated with the correct file name + exc.message = f'Error opening signatures file {path}: {exc.message}' + exc.filename = str(path) + raise + + +def dump_signatures_hdf5(path: FilePath, signatures: AbstractSignatureArray, **kw): """Write k-mer signatures and associated metadata to an HDF5 file. Parameters diff --git a/tests/sigs/test_sigs_hdf5.py b/tests/sigs/test_sigs_hdf5.py index 819d6ce..941039a 100644 --- a/tests/sigs/test_sigs_hdf5.py +++ b/tests/sigs/test_sigs_hdf5.py @@ -6,6 +6,7 @@ from gambit.sigs.hdf5 import read_metadata, write_metadata, load_signatures_hdf5, dump_signatures_hdf5 from gambit.sigs import SignaturesMeta, SignatureList, AnnotatedSignatures +from gambit.sigs.base import SignaturesFileError from gambit.sigs.test import AbstractSignatureArrayTests from gambit.kmers import KmerSpec from gambit.test import make_signatures @@ -51,6 +52,35 @@ def dump_load(sigs, path, **kw): return load_signatures_hdf5(f) +def test_open_not_hdf5(tmp_path): + """Test opening an invalid file.""" + + # Not an HDF5 file + file = tmp_path / 'not-hdf5.gs' + with open(file, 'w') as f: + f.write('foo') + + with pytest.raises(SignaturesFileError) as einfo: + load_signatures_hdf5(file) + + assert einfo.value.filename == str(file) + assert einfo.value.format == 'hdf5' + + +def test_open_invalid(tmp_path): + """Test opening an invalid HDF5 file.""" + + file = tmp_path / 'invalid.gs' + with h5.File(file, 'w') as f: + pass # Empty + + with pytest.raises(SignaturesFileError) as einfo: + load_signatures_hdf5(file) + + assert einfo.value.filename == str(file) + assert einfo.value.format == 'hdf5' + + class TestHDF5Signatures: @pytest.fixture(scope='class')