Skip to content

Commit

Permalink
Upgrade to CLDR 45 (#1077)
Browse files Browse the repository at this point in the history
* Upgrade to CLDR 45

* Handle 'localeRules="nonlikelyScript"' for parent locales

Locales of the form 'lang_Script' where 'Script' is not the
likely script for 'lang' should have 'root' as their parent
locale. For example, the parent of 'az_Arab' should not be
computed as 'az' by truncating from the end, but should be
'root' instead as 'Arab' is not the likely script for 'az'.

The list of such languages was previously specified using
an explicit 'locales' attribute. It is now handled dynamically
using the new 'localeRules' attribute.
  • Loading branch information
tomasr8 authored Jul 11, 2024
1 parent 3edf772 commit 75486c9
Show file tree
Hide file tree
Showing 5 changed files with 51 additions and 7 deletions.
28 changes: 26 additions & 2 deletions babel/localedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,27 @@ def locale_identifiers() -> list[str]:
]


def _is_non_likely_script(name: str) -> bool:
"""Return whether the locale is of the form ``lang_Script``,
and the script is not the likely script for the language.
This implements the behavior of the ``nonlikelyScript`` value of the
``localRules`` attribute for parent locales added in CLDR 45.
"""
from babel.core import get_global, parse_locale

try:
lang, territory, script, variant, *rest = parse_locale(name)
except ValueError:
return False

if lang and script and not territory and not variant and not rest:
likely_subtag = get_global('likely_subtags').get(lang)
_, _, likely_script, *_ = parse_locale(likely_subtag)
return script != likely_script
return False


def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str, Any]:
"""Load the locale data for the given locale.
Expand Down Expand Up @@ -132,8 +153,11 @@ def load(name: os.PathLike[str] | str, merge_inherited: bool = True) -> dict[str
from babel.core import get_global
parent = get_global('parent_exceptions').get(name)
if not parent:
parts = name.split('_')
parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
if _is_non_likely_script(name):
parent = 'root'
else:
parts = name.split('_')
parent = "root" if len(parts) == 1 else "_".join(parts[:-1])
data = load(parent).copy()
filename = resolve_locale_filename(name)
with open(filename, 'rb') as fileobj:
Expand Down
8 changes: 4 additions & 4 deletions scripts/download_import_cldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
import zipfile
from urllib.request import urlretrieve

URL = 'https://unicode.org/Public/cldr/44/cldr-common-44.0.zip'
FILENAME = 'cldr-common-44.0.zip'
# Via https://unicode.org/Public/cldr/44/hashes/SHASUM512
FILESUM = 'f2cd8733948caf308d6e39eae21724da7f29f528f8969d456514e1e84ecd5f1e6936d0460414a968888bb1b597bc1ee723950ea47df5cba21a02bb14f96d18b6'
URL = 'https://unicode.org/Public/cldr/45/cldr-common-45.0.zip'
FILENAME = 'cldr-common-45.0.zip'
# Via https://unicode.org/Public/cldr/45/hashes/SHASUM512.txt
FILESUM = '638123882bd29911fc9492ec152926572fec48eb6c1f5dd706aee3e59cad8be4963a334bb7a09a645dbedc3356f60ef7ac2ef7ab4ccf2c8926b547782175603c'
BLKSIZE = 131072


Expand Down
5 changes: 5 additions & 0 deletions scripts/import_cldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,11 @@ def parse_global(srcdir, sup):

for paternity in parentBlock.findall('./parentLocale'):
parent = paternity.attrib['parent']
if parent == 'root':
# Since CLDR-45, the 'root' parent locale uses 'localeRules="nonlikelyScript"' instead of
# 'locales'. This special case is handled in babel when loading locale data
# (https://cldr.unicode.org/index/downloads/cldr-45#h.5rbkhkncdqi9)
continue
for child in paternity.attrib['locales'].split():
parent_exceptions[child] = parent

Expand Down
15 changes: 15 additions & 0 deletions tests/test_localedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,21 @@ def test_load():
assert localedata.load('en_US') is localedata.load('en_US')


def test_load_inheritance(monkeypatch):
from babel.localedata import _cache

_cache.clear()
localedata.load('hi_Latn')
# Must not be ['root', 'hi_Latn'] even though 'hi_Latn' matches the 'lang_Script'
# form used by 'nonLikelyScripts'. This is because 'hi_Latn' has an explicit parent locale 'en_IN'.
assert list(_cache.keys()) == ['root', 'en', 'en_001', 'en_IN', 'hi_Latn']

_cache.clear()
localedata.load('az_Arab')
# Must not include 'az' as 'Arab' is not a likely script for 'az'.
assert list(_cache.keys()) == ['root', 'az_Arab']


def test_merge():
d = {1: 'foo', 3: 'baz'}
localedata.merge(d, {1: 'Foo', 2: 'Bar'})
Expand Down
2 changes: 1 addition & 1 deletion tests/test_numbers.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,7 +250,7 @@ def test_list_currencies():

assert list_currencies(locale='pa_Arab') == {'PKR', 'INR', 'EUR'}

assert len(list_currencies()) == 305
assert len(list_currencies()) == 306


def test_validate_currency():
Expand Down

0 comments on commit 75486c9

Please sign in to comment.