Skip to content

Commit

Permalink
Better error messages when opening invalid signatures file
Browse files Browse the repository at this point in the history
  • Loading branch information
jlumpe committed Jan 28, 2024
1 parent 0187f2e commit 668918e
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 8 deletions.
16 changes: 16 additions & 0 deletions gambit/sigs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,6 +397,22 @@ def __getitem__(self, index):
return self.signatures[index]


class SignaturesFileError(Exception):
"""Indicates an error attempting to open a signatures file."""

message: str
filename: str
format: str

def __init__(self, message: str, filename: Optional[FilePath], format: Optional[str]):
self.message = message
self.filename = str(filename)
self.format = format

def __str__(self):
return self.message


def load_signatures(path: FilePath, **kw) -> AbstractSignatureArray:
"""Load signatures from file.
Expand Down
36 changes: 28 additions & 8 deletions gambit/sigs/hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import h5py as h5

from .base import SignatureArray, ConcatenatedSignatureArray, AbstractSignatureArray, SignaturesMeta,\
ReferenceSignatures
ReferenceSignatures, SignaturesFileError
from gambit.kmers import KmerSpec
from gambit._cython.metric import BOUNDS_DTYPE
from gambit.util.io import FilePath
Expand Down Expand Up @@ -93,11 +93,11 @@ def __init__(self, group: h5.Group):
self.group = group

if FMT_VERSION_ATTR not in group.attrs:
raise RuntimeError('HDF5 group does not contain a signature set')
raise SignaturesFileError('HDF5 group does not contain a signature set', None, 'hdf5')

self.format_version = group.attrs[FMT_VERSION_ATTR]
if self.format_version != CURRENT_FMT_VERSION:
raise ValueError(f'Unrecognized format version: {self.format_version}')
raise ValueError(f'Unrecognized format version: {self.format_version}', None, 'hdf5')

self.kmerspec = KmerSpec(group.attrs['kmerspec_k'], group.attrs['kmerspec_prefix'])
self.meta = read_metadata(group)
Expand Down Expand Up @@ -229,13 +229,33 @@ def load_signatures_hdf5(path: FilePath, **kw) -> HDF5Signatures:
\\**kw
Additional keyword arguments to :func:`h5py.File`.
"""
return HDF5Signatures(h5.File(path, **kw))
exc = SignaturesFileError(f'{path} does not appear to be a GAMBIT signtures file.', path, 'hdf5')

# Check for HDF5 magic number
# The errors raised by the h5py library are a bit cryptic, so make one with a better message if
# not a valid HDF5 file.
# This also raises the standard errors if file cannot be read.
with open(path, 'rb') as f:
header = f.read(8)
if header != b'\x89HDF\r\n\x1a\n':
raise exc

def dump_signatures_hdf5(path: FilePath,
signatures: AbstractSignatureArray,
**kw,
):
h5file = h5.File(path, **kw)

if FMT_VERSION_ATTR not in h5file.attrs:
raise exc

try:
return HDF5Signatures(h5file)

except SignaturesFileError as exc:
# Make sure errors in opening are annotated with the correct file name
exc.message = f'Error opening signatures file {path}: {exc.message}'
exc.filename = str(path)
raise


def dump_signatures_hdf5(path: FilePath, signatures: AbstractSignatureArray, **kw):
"""Write k-mer signatures and associated metadata to an HDF5 file.
Parameters
Expand Down
30 changes: 30 additions & 0 deletions tests/sigs/test_sigs_hdf5.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

from gambit.sigs.hdf5 import read_metadata, write_metadata, load_signatures_hdf5, dump_signatures_hdf5
from gambit.sigs import SignaturesMeta, SignatureList, AnnotatedSignatures
from gambit.sigs.base import SignaturesFileError
from gambit.sigs.test import AbstractSignatureArrayTests
from gambit.kmers import KmerSpec
from gambit.test import make_signatures
Expand Down Expand Up @@ -51,6 +52,35 @@ def dump_load(sigs, path, **kw):
return load_signatures_hdf5(f)


def test_open_not_hdf5(tmp_path):
"""Test opening an invalid file."""

# Not an HDF5 file
file = tmp_path / 'not-hdf5.gs'
with open(file, 'w') as f:
f.write('foo')

with pytest.raises(SignaturesFileError) as einfo:
load_signatures_hdf5(file)

assert einfo.value.filename == str(file)
assert einfo.value.format == 'hdf5'


def test_open_invalid(tmp_path):
"""Test opening an invalid HDF5 file."""

file = tmp_path / 'invalid.gs'
with h5.File(file, 'w') as f:
pass # Empty

with pytest.raises(SignaturesFileError) as einfo:
load_signatures_hdf5(file)

assert einfo.value.filename == str(file)
assert einfo.value.format == 'hdf5'


class TestHDF5Signatures:

@pytest.fixture(scope='class')
Expand Down

0 comments on commit 668918e

Please sign in to comment.