Skip to content

Commit

Permalink
feat: add create_brand_taxonomy_mapping function
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Dec 12, 2024
1 parent 8e672eb commit 4440051
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 0 deletions.
34 changes: 34 additions & 0 deletions openfoodfacts/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,6 +440,40 @@ def create_taxonomy_mapping(taxonomy: Taxonomy) -> Dict[str, str]:
return mapping


def is_prefixed_value(value: str) -> bool:
"""Return True if the given value has a language prefix (en:, fr:,...),
False otherwise."""
return len(value) > 3 and value[2] == ":"


def create_brand_taxonomy_mapping(taxonomy: Taxonomy) -> Dict[str, str]:
"""From a brand taxonomy, create a mapping of tags to taxonomy brand names.
The mapping generated is different than the mapping generated by the
`create_taxonomy_mapping` function, as it maps an unprefixed value
(ex: `nestle`) to a brand name, with capitalization and accents
(ex: `Nestlé`).
The taxonomy mapping has the following format:
{
"alva": "Alva",
"benecop": "Bénécop",
...
}
:param taxonomy: the taxonomy to use (brand taxonomy)
:return: a dict mapping tags (*without* language prefix) to brand values
(capitalized)
"""
mapping = {}
for node in taxonomy.iter_nodes():
unprefixed_key = node.id
if is_prefixed_value(node.id):
unprefixed_key = node.id[3:]
mapping[unprefixed_key] = node.names["en"]
return mapping


def map_to_canonical_id(
taxonomy_mapping: Dict[str, str], values: List[str]
) -> Dict[str, str]:
Expand Down
18 changes: 18 additions & 0 deletions tests/unit/test_taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
from openfoodfacts.taxonomy import (
Taxonomy,
TaxonomyNode,
create_brand_taxonomy_mapping,
create_taxonomy_mapping,
map_to_canonical_id,
)
from openfoodfacts.types import TaxonomyType


def test_map_to_canonical_id():
Expand Down Expand Up @@ -123,3 +125,19 @@ def test_multiple_languages_with_different_synonyms(self):
}

assert create_taxonomy_mapping(taxonomy) == expected_mapping

def test_create_brand_taxonomy_mapping(self):
taxonomy = Taxonomy.from_dict(
{
"en:5th-season": {"name": {"en": "5th Season"}},
"en:arev": {"name": {"en": "Arèv"}},
"en:arrighi": {"name": {"en": "Arrighi"}},
"en:voiles-au-vent": {"name": {"en": "Voiles au Vent"}},
}
)
assert create_brand_taxonomy_mapping(taxonomy) == {
"5th-season": "5th Season",
"arev": "Arèv",
"arrighi": "Arrighi",
"voiles-au-vent": "Voiles au Vent",
}

0 comments on commit 4440051

Please sign in to comment.