Skip to content

Commit

Permalink
Fix up results export test funcs
Browse files Browse the repository at this point in the history
  • Loading branch information
jlumpe committed Aug 4, 2024
1 parent 45a93f3 commit 007866b
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 54 deletions.
121 changes: 68 additions & 53 deletions tests/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@
from gambit.db.models import AnnotatedGenome, Taxon


def compare_genome_matches(match1: Optional[GenomeMatch], match2: Optional[GenomeMatch]) -> bool:
"""Compare two ``GenomeMatch`` instances for equality.
def compare_genome_matches(match1: Optional[GenomeMatch], match2: Optional[GenomeMatch]):
"""Assert two ``GenomeMatch`` instances are equal.
The values for the ``distance`` attribute are only checked for approximate equality, to support
instances where one was loaded from a results archive (saving and loading a float in JSON is
Expand All @@ -24,77 +24,88 @@ def compare_genome_matches(match1: Optional[GenomeMatch], match2: Optional[Genom
Also allows one or both values to be None.
"""
if match1 is None or match2 is None:
return match1 is None and match2 is None
assert match1 is None and match2 is None
return

return match1.genome == match2.genome and \
match1.matched_taxon == match2.matched_taxon and \
np.isclose(match1.distance, match2.distance)
assert match1.genome == match2.genome
assert match1.matched_taxon == match2.matched_taxon
assert np.isclose(match1.distance, match2.distance)


def compare_classifier_results(result1: ClassifierResult, result2: ClassifierResult) -> bool:
"""Compare two ``ClassifierResult`` instances for equality."""
return result1.success == result2.success and \
result1.predicted_taxon == result2.predicted_taxon and \
compare_genome_matches(result1.primary_match, result2.primary_match) and \
compare_genome_matches(result1.closest_match, result2.closest_match) and \
result1.next_taxon == result2.next_taxon and \
set(result1.warnings) == set(result2.warnings) and \
result1.error == result2.error
"""Assert two ``ClassifierResult`` instances are equal."""
assert result1.success == result2.success
assert result1.predicted_taxon == result2.predicted_taxon
compare_genome_matches(result1.primary_match, result2.primary_match)
compare_genome_matches(result1.closest_match, result2.closest_match)
assert result1.next_taxon == result2.next_taxon
assert set(result1.warnings) == set(result2.warnings)
assert result1.error == result2.error


def compare_result_items(item1: QueryResultItem, item2: QueryResultItem) -> bool:
"""Compare two ``QueryResultItem`` instances for equality.
"""Assert two ``QueryResultItem`` instances are equal.
Does not compare the value of the ``input`` attributes.
"""
if item1.report_taxon != item2.report_taxon:
return False
if not compare_classifier_results(item1.classifier_result, item2.classifier_result):
return False
if len(item1.closest_genomes) != len(item2.closest_genomes):
return False
assert item1.report_taxon == item2.report_taxon
compare_classifier_results(item1.classifier_result, item2.classifier_result)

assert len(item1.closest_genomes) == len(item2.closest_genomes)
for m1, m2 in zip(item1.closest_genomes, item2.closest_genomes):
if not compare_genome_matches(m1, m2):
return False

return True
compare_genome_matches(m1, m2)


def cmp_json_attrs(data: dict[str, Any], obj, attrnames: Iterable[str]):
"""Assert JSON data values equals object attribute values for the given keys/names."""

for attr in attrnames:
assert data[attr] == getattr(obj, attr)

def cmp_taxon_json(taxon_data: dict[str, Any], taxon: Optional[Taxon]):

def cmp_taxon_json(data: dict[str, Any], taxon: Optional[Taxon]):
"""Assert Taxon instance matches data in JSON export."""

if taxon is None:
assert taxon_data is None
assert data is None

else:
assert taxon_data is not None
cmp_json_attrs(taxon_data, taxon, ['id', 'key', 'name', 'ncbi_id', 'rank', 'distance_threshold'])
assert data is not None
cmp_json_attrs(data, taxon, ['id', 'key', 'name', 'ncbi_id', 'rank'])
if taxon.distance_threshold is None:
assert data['distance_threshold'] is None
else:
assert data['distance_threshold'] is not None
assert np.isclose(data['distance_threshold'], taxon.distance_threshold)


def cmp_annnotatedgenome_json(data: dict[str, Any], genome: AnnotatedGenome):
"""Assert AnnotatedGenome instance matches data in JSON export."""

def cmp_annnotatedgenome_json(genome_data: dict[str, Any], genome: AnnotatedGenome):
assert genome_data['id'] == genome.genome_id
assert data['id'] == genome.genome_id
cmp_json_attrs(
genome_data,
data,
genome,
['key', 'description', 'organism', 'ncbi_db', 'ncbi_id', 'genbank_acc', 'refseq_acc'],
)
for taxon_data, taxon in zip_strict(genome_data['taxonomy'], genome.taxon.ancestors(True)):
for taxon_data, taxon in zip_strict(data['taxonomy'], genome.taxon.ancestors(True)):
cmp_taxon_json(taxon_data, taxon)

def cmp_genomematch_json(match_data, match: GenomeMatch):
assert np.isclose(match_data['distance'], match.distance)
cmp_annnotatedgenome_json(match_data['genome'], match.genome)

assert (match_data['matched_taxon'] is None) == (match.matched_taxon is None)
if match.matched_taxon is not None:
cmp_taxon_json(match_data['matched_taxon'], match.matched_taxon)
def cmp_genomematch_json(data, match: GenomeMatch):
"""Assert GenomeMatch instance matches data in JSON export."""

assert np.isclose(data['distance'], match.distance)
cmp_annnotatedgenome_json(data['genome'], match.genome)

cmp_taxon_json(data['matched_taxon'], match.matched_taxon)


def check_json_results(file: TextIO,
results: QueryResults,
strict: bool = False,
):
"""Check exported JSON data matches the given results object.
"""Assert exported JSON data matches the given results object.
Parameters
----------
Expand All @@ -115,16 +126,17 @@ def check_json_results(file: TextIO,
data = json.load(file)

assert len(data['items']) == len(results.items)
# assert data['params'] == to_json(results.params)
cmp_json_attrs(data['genomeset'], results.genomeset, ['id', 'key', 'version', 'name', 'description'])
assert data['signaturesmeta'] == to_json(results.signaturesmeta)
# assert data['gambit_version'] == results.gambit_version
assert data['extra'] == results.extra

if strict:
assert data['timestamp'] == to_json(results.timestamp)
assert data['gambit_version'] == results.gambit_version
assert data['extra'] == results.extra

for item, item_data in zip(results.items, data['items']):

# Compare data['query'] <-> item.input
query = item_data['query']
assert query['name'] == item.input.label

Expand All @@ -135,41 +147,44 @@ def check_json_results(file: TextIO,
else:
assert query['format'] == item.input.file.format

# Check path matches exactly if strict mode, otherwise just file name
if strict:
assert query['path'] == str(item.input.file.path)
else:
assert Path(query['path']).name == item.input.file.path.name

# Predicted taxon
predicted_data = item_data['predicted_taxon']
cmp_taxon_json(predicted_data, item.report_taxon)
if item.report_taxon is not None:
assert np.isclose(predicted_data['distance_threshold'], item.report_taxon.distance_threshold)

# Next taxon
# Predicted/next taxon
cmp_taxon_json(item_data['predicted_taxon'], item.report_taxon)
cmp_taxon_json(item_data['next_taxon'], item.classifier_result.next_taxon)

# Closest genomes
assert len(item_data['closest_genomes']) == len(item.closest_genomes)
for match, match_data in zip_strict(item.closest_genomes, item_data['closest_genomes']):
cmp_genomematch_json(match_data, match)


def cmp_csv_taxon(row, taxon: Optional[Taxon], prefix: str):
def cmp_csv_taxon(row: dict[str, str], taxon: Optional[Taxon], prefix: str):

if taxon is None:
assert row[prefix + '.name'] == ''
assert row[prefix + '.rank'] == ''
assert row[prefix + '.ncbi_id'] == ''
assert row[prefix + '.threshold'] == ''

else:
assert row[prefix + '.name'] == taxon.name
assert row[prefix + '.rank'] == taxon.rank
assert row[prefix + '.ncbi_id'] == str(taxon.ncbi_id or '')
assert np.isclose(float(row[prefix + '.threshold']), taxon.distance_threshold)

dt = row[prefix + '.threshold']
if taxon.distance_threshold is None:
assert dt == ''
else:
assert np.isclose(float(dt), taxon.distance_threshold)


def check_csv_results(file: TextIO, results: QueryResults, strict: bool = False):
"""Check exported CSV data matches the given results object.
"""Assert exported CSV data matches the given results object.
Parameters
----------
Expand Down
2 changes: 1 addition & 1 deletion tests/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,4 @@ def test_query_python(testdb: TestDB, strict: bool):

for file, item, ref_item in zip_strict(query_files, results.items, ref_results.items):
assert item.input.file == file
assert compare_result_items(item, ref_item)
compare_result_items(item, ref_item)

0 comments on commit 007866b

Please sign in to comment.