From 007866bd34d2bd961f1a04df0434c131f43f56fb Mon Sep 17 00:00:00 2001 From: Jared Lumpe Date: Sun, 4 Aug 2024 04:42:52 -0700 Subject: [PATCH] Fix up results export test funcs --- tests/results.py | 121 +++++++++++++++++++++++++------------------- tests/test_query.py | 2 +- 2 files changed, 69 insertions(+), 54 deletions(-) diff --git a/tests/results.py b/tests/results.py index 2a3326e..6541147 100644 --- a/tests/results.py +++ b/tests/results.py @@ -14,8 +14,8 @@ from gambit.db.models import AnnotatedGenome, Taxon -def compare_genome_matches(match1: Optional[GenomeMatch], match2: Optional[GenomeMatch]) -> bool: - """Compare two ``GenomeMatch`` instances for equality. +def compare_genome_matches(match1: Optional[GenomeMatch], match2: Optional[GenomeMatch]): + """Assert two ``GenomeMatch`` instances are equal. The values for the ``distance`` attribute are only checked for approximate equality, to support instances where one was loaded from a results archive (saving and loading a float in JSON is @@ -24,77 +24,88 @@ def compare_genome_matches(match1: Optional[GenomeMatch], match2: Optional[Genom Also allows one or both values to be None. """ if match1 is None or match2 is None: - return match1 is None and match2 is None + assert match1 is None and match2 is None + return - return match1.genome == match2.genome and \ - match1.matched_taxon == match2.matched_taxon and \ - np.isclose(match1.distance, match2.distance) + assert match1.genome == match2.genome + assert match1.matched_taxon == match2.matched_taxon + assert np.isclose(match1.distance, match2.distance) def compare_classifier_results(result1: ClassifierResult, result2: ClassifierResult) -> bool: - """Compare two ``ClassifierResult`` instances for equality.""" - return result1.success == result2.success and \ - result1.predicted_taxon == result2.predicted_taxon and \ - compare_genome_matches(result1.primary_match, result2.primary_match) and \ - compare_genome_matches(result1.closest_match, result2.closest_match) and \ - result1.next_taxon == result2.next_taxon and \ - set(result1.warnings) == set(result2.warnings) and \ - result1.error == result2.error + """Assert two ``ClassifierResult`` instances are equal.""" + assert result1.success == result2.success + assert result1.predicted_taxon == result2.predicted_taxon + compare_genome_matches(result1.primary_match, result2.primary_match) + compare_genome_matches(result1.closest_match, result2.closest_match) + assert result1.next_taxon == result2.next_taxon + assert set(result1.warnings) == set(result2.warnings) + assert result1.error == result2.error def compare_result_items(item1: QueryResultItem, item2: QueryResultItem) -> bool: - """Compare two ``QueryResultItem`` instances for equality. + """Assert two ``QueryResultItem`` instances are equal. Does not compare the value of the ``input`` attributes. """ - if item1.report_taxon != item2.report_taxon: - return False - if not compare_classifier_results(item1.classifier_result, item2.classifier_result): - return False - if len(item1.closest_genomes) != len(item2.closest_genomes): - return False + assert item1.report_taxon == item2.report_taxon + compare_classifier_results(item1.classifier_result, item2.classifier_result) + assert len(item1.closest_genomes) == len(item2.closest_genomes) for m1, m2 in zip(item1.closest_genomes, item2.closest_genomes): - if not compare_genome_matches(m1, m2): - return False - - return True + compare_genome_matches(m1, m2) def cmp_json_attrs(data: dict[str, Any], obj, attrnames: Iterable[str]): + """Assert JSON data values equals object attribute values for the given keys/names.""" + for attr in attrnames: assert data[attr] == getattr(obj, attr) -def cmp_taxon_json(taxon_data: dict[str, Any], taxon: Optional[Taxon]): + +def cmp_taxon_json(data: dict[str, Any], taxon: Optional[Taxon]): + """Assert Taxon instance matches data in JSON export.""" + if taxon is None: - assert taxon_data is None + assert data is None + else: - assert taxon_data is not None - cmp_json_attrs(taxon_data, taxon, ['id', 'key', 'name', 'ncbi_id', 'rank', 'distance_threshold']) + assert data is not None + cmp_json_attrs(data, taxon, ['id', 'key', 'name', 'ncbi_id', 'rank']) + if taxon.distance_threshold is None: + assert data['distance_threshold'] is None + else: + assert data['distance_threshold'] is not None + assert np.isclose(data['distance_threshold'], taxon.distance_threshold) + + +def cmp_annnotatedgenome_json(data: dict[str, Any], genome: AnnotatedGenome): + """Assert AnnotatedGenome instance matches data in JSON export.""" -def cmp_annnotatedgenome_json(genome_data: dict[str, Any], genome: AnnotatedGenome): - assert genome_data['id'] == genome.genome_id + assert data['id'] == genome.genome_id cmp_json_attrs( - genome_data, + data, genome, ['key', 'description', 'organism', 'ncbi_db', 'ncbi_id', 'genbank_acc', 'refseq_acc'], ) - for taxon_data, taxon in zip_strict(genome_data['taxonomy'], genome.taxon.ancestors(True)): + for taxon_data, taxon in zip_strict(data['taxonomy'], genome.taxon.ancestors(True)): cmp_taxon_json(taxon_data, taxon) -def cmp_genomematch_json(match_data, match: GenomeMatch): - assert np.isclose(match_data['distance'], match.distance) - cmp_annnotatedgenome_json(match_data['genome'], match.genome) - assert (match_data['matched_taxon'] is None) == (match.matched_taxon is None) - if match.matched_taxon is not None: - cmp_taxon_json(match_data['matched_taxon'], match.matched_taxon) +def cmp_genomematch_json(data, match: GenomeMatch): + """Assert GenomeMatch instance matches data in JSON export.""" + + assert np.isclose(data['distance'], match.distance) + cmp_annnotatedgenome_json(data['genome'], match.genome) + + cmp_taxon_json(data['matched_taxon'], match.matched_taxon) + def check_json_results(file: TextIO, results: QueryResults, strict: bool = False, ): - """Check exported JSON data matches the given results object. + """Assert exported JSON data matches the given results object. Parameters ---------- @@ -115,16 +126,17 @@ def check_json_results(file: TextIO, data = json.load(file) assert len(data['items']) == len(results.items) - # assert data['params'] == to_json(results.params) cmp_json_attrs(data['genomeset'], results.genomeset, ['id', 'key', 'version', 'name', 'description']) assert data['signaturesmeta'] == to_json(results.signaturesmeta) - # assert data['gambit_version'] == results.gambit_version - assert data['extra'] == results.extra if strict: assert data['timestamp'] == to_json(results.timestamp) + assert data['gambit_version'] == results.gambit_version + assert data['extra'] == results.extra for item, item_data in zip(results.items, data['items']): + + # Compare data['query'] <-> item.input query = item_data['query'] assert query['name'] == item.input.label @@ -135,41 +147,44 @@ def check_json_results(file: TextIO, else: assert query['format'] == item.input.file.format + # Check path matches exactly if strict mode, otherwise just file name if strict: assert query['path'] == str(item.input.file.path) else: assert Path(query['path']).name == item.input.file.path.name - # Predicted taxon - predicted_data = item_data['predicted_taxon'] - cmp_taxon_json(predicted_data, item.report_taxon) - if item.report_taxon is not None: - assert np.isclose(predicted_data['distance_threshold'], item.report_taxon.distance_threshold) - - # Next taxon + # Predicted/next taxon + cmp_taxon_json(item_data['predicted_taxon'], item.report_taxon) cmp_taxon_json(item_data['next_taxon'], item.classifier_result.next_taxon) # Closest genomes + assert len(item_data['closest_genomes']) == len(item.closest_genomes) for match, match_data in zip_strict(item.closest_genomes, item_data['closest_genomes']): cmp_genomematch_json(match_data, match) -def cmp_csv_taxon(row, taxon: Optional[Taxon], prefix: str): +def cmp_csv_taxon(row: dict[str, str], taxon: Optional[Taxon], prefix: str): if taxon is None: assert row[prefix + '.name'] == '' assert row[prefix + '.rank'] == '' assert row[prefix + '.ncbi_id'] == '' assert row[prefix + '.threshold'] == '' + else: assert row[prefix + '.name'] == taxon.name assert row[prefix + '.rank'] == taxon.rank assert row[prefix + '.ncbi_id'] == str(taxon.ncbi_id or '') - assert np.isclose(float(row[prefix + '.threshold']), taxon.distance_threshold) + + dt = row[prefix + '.threshold'] + if taxon.distance_threshold is None: + assert dt == '' + else: + assert np.isclose(float(dt), taxon.distance_threshold) def check_csv_results(file: TextIO, results: QueryResults, strict: bool = False): - """Check exported CSV data matches the given results object. + """Assert exported CSV data matches the given results object. Parameters ---------- diff --git a/tests/test_query.py b/tests/test_query.py index 38d579f..6c84589 100644 --- a/tests/test_query.py +++ b/tests/test_query.py @@ -42,4 +42,4 @@ def test_query_python(testdb: TestDB, strict: bool): for file, item, ref_item in zip_strict(query_files, results.items, ref_results.items): assert item.input.file == file - assert compare_result_items(item, ref_item) + compare_result_items(item, ref_item)