-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
18,716 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
import difflib | ||
import os | ||
import typing | ||
import urllib.request | ||
|
||
import simplejson as json | ||
|
||
|
||
class FuzzyDict(dict): | ||
"""Provides a dictionary that performs fuzzy lookup""" | ||
|
||
def __init__(self, cutoff: float = 0.6): | ||
"""Construct a new FuzzyDict instance | ||
items is an dictionary to copy items from (optional) | ||
cutoff is the match ratio below which matches should not be considered | ||
cutoff needs to be a float between 0 and 1 (where zero is no match | ||
and 1 is a perfect match)""" | ||
super(FuzzyDict, self).__init__() | ||
self.cutoff = cutoff | ||
|
||
# short wrapper around some super (dict) methods | ||
self._dict_contains = lambda key: super(FuzzyDict, self).__contains__(key) | ||
self._dict_getitem = lambda key: super(FuzzyDict, self).__getitem__(key) | ||
|
||
def _search(self, lookfor: typing.Any, stop_on_first: bool = False): | ||
"""Returns the value whose key best matches lookfor | ||
if stop_on_first is True then the method returns as soon | ||
as it finds the first item | ||
""" | ||
|
||
# if the item is in the dictionary then just return it | ||
if self._dict_contains(lookfor): | ||
return True, lookfor, self._dict_getitem(lookfor), 1 | ||
|
||
# set up the fuzzy matching tool | ||
ratio_calc = difflib.SequenceMatcher() | ||
ratio_calc.set_seq1(lookfor) | ||
|
||
# test each key in the dictionary | ||
best_ratio = 0 | ||
best_match = None | ||
best_key = None | ||
for key in self: | ||
# if the current key is not a string | ||
# then we just skip it | ||
try: | ||
# set up the SequenceMatcher with other text | ||
ratio_calc.set_seq2(key) | ||
except TypeError: | ||
continue | ||
|
||
# we get an error here if the item to look for is not a | ||
# string - if it cannot be fuzzy matched and we are here | ||
# this it is definitely not in the dictionary | ||
try: | ||
# calculate the match value | ||
ratio = ratio_calc.ratio() | ||
except TypeError: | ||
break | ||
|
||
# if this is the best ratio so far - save it and the value | ||
if ratio > best_ratio: | ||
best_ratio = ratio | ||
best_key = key | ||
best_match = self._dict_getitem(key) | ||
|
||
if stop_on_first and ratio >= self.cutoff: | ||
break | ||
|
||
return best_ratio >= self.cutoff, best_key, best_match, best_ratio | ||
|
||
def __contains__(self, item: typing.Any): | ||
if self._search(item, True)[0]: | ||
return True | ||
else: | ||
return False | ||
|
||
def __getitem__(self, lookfor: typing.Any): | ||
matched, key, item, ratio = self._search(lookfor) | ||
|
||
if not matched: | ||
raise KeyError( | ||
"'%s'. closest match: '%s' with ratio %.3f" | ||
% (str(lookfor), str(key), ratio) | ||
) | ||
|
||
return item | ||
|
||
|
||
__HERE = os.path.abspath(os.path.dirname(__file__)) | ||
with open(os.path.join(__HERE, "map_filename.json"), "r", encoding="utf8") as f: | ||
FILENAMES: FuzzyDict = FuzzyDict() | ||
for k, v in json.load(f).items(): | ||
FILENAMES[k] = v | ||
|
||
with open(os.path.join(__HERE, "city_coordinates.json"), "r", encoding="utf8") as f: | ||
COORDINATES: FuzzyDict = FuzzyDict() | ||
for k, v in json.load(f).items(): | ||
COORDINATES[k] = v | ||
|
||
EXTRA = {} | ||
|
||
|
||
def register_url(asset_url: str): | ||
if asset_url: | ||
registry = asset_url + "/registry.json" | ||
try: | ||
contents = urllib.request.urlopen(registry).read() | ||
contents = json.loads(contents) | ||
except Exception as e: | ||
raise e | ||
files = {} | ||
pinyin_names = set() | ||
for name, pinyin in contents["PINYIN_MAP"].items(): | ||
file_name = contents["FILE_MAP"][pinyin] | ||
files[name] = [file_name, "js"] | ||
pinyin_names.add(pinyin) | ||
|
||
for key, file_name in contents["FILE_MAP"].items(): | ||
if key not in pinyin_names: | ||
# English names | ||
files[key] = [file_name, "js"] | ||
|
||
js_folder_name = contents["JS_FOLDER"] | ||
if js_folder_name == "/": | ||
js_file_prefix = f"{asset_url}/" | ||
else: | ||
js_file_prefix = f"{asset_url}/{js_folder_name}/" | ||
EXTRA[js_file_prefix] = files | ||
|
||
|
||
def register_files(asset_files: dict): | ||
if asset_files: | ||
FILENAMES.update(asset_files) | ||
|
||
|
||
def register_coords(coords: dict): | ||
if coords: | ||
COORDINATES.update(coords) |
Oops, something went wrong.