Skip to content

Commit

Permalink
fix(address): fix failing doctest
Browse files Browse the repository at this point in the history
  • Loading branch information
NickCrews committed Oct 29, 2024
1 parent 4dc55b0 commit 66cd422
Showing 1 changed file with 12 additions and 10 deletions.
22 changes: 12 additions & 10 deletions mismo/lib/geo/_address.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ class AddressFeatures:
Examples
--------
>>> address = ibis.literal({
... "street1": "132 Main St ",
... "street1": "132 w elmore St ",
... "street2": "Apt 3-b",
... "city": "Springfield",
... "city": "Anchorage",
... "state": "",
... "postal_code": "12345",
... "latitude": 123.456,
Expand All @@ -30,7 +30,7 @@ class AddressFeatures:
Whitespace is normed, case is converted to uppercase:
>>> features.street1.execute()
'132 MAIN ST'
'132 W ELMORE ST'
Punctuation is removed:
Expand All @@ -49,7 +49,9 @@ class AddressFeatures:
>>> features.street_number_sorted.execute()
'123'
>>> features.street_name.execute()
'MAIN'
'ELMORE'
>>> features.street_ngrams.execute()
['123', 'MORE', 'ELMORE', 'LMOR', 'ELMO']
>>> features.all_null.execute()
np.False_
Expand All @@ -61,7 +63,7 @@ class AddressFeatures:
For use in in preparing data for blocking, you can get all the features as a struct:
>>> features.as_struct().execute()
{'street1': '132 MAIN ST', 'street2': 'APT 3B', 'city': 'SPRINGFIELD', 'state': None, 'postal_code': '12345', 'street_number': '132', 'street_name': 'MAIN', 'street_ngrams': ['132 ', 'MAIN', '32 M', 'AIN ', '2 MA', 'IN S', ' MAI', 'N ST', 'APT ', 'PT 3', 'T 3B', '132 MAIN ST', 'APT 3B'], 'latitude': 123.456}
{'street1': '132 W ELMORE ST', 'street2': 'APT 3B', 'city': 'ANCHORAGE', 'state': None, 'postal_code': '12345', 'taggings': [{'token': '132', 'label': 'AddressNumber'}, {'token': 'W', 'label': 'StreetNamePreDirectional'}, {'token': 'ELMORE', 'label': 'StreetName'}, {'token': 'ST', 'label': 'StreetNamePostType'}], 'street_number': '132', 'street_number_sorted': '123', 'street_name': 'ELMORE', 'street_ngrams': ['123', 'MORE', 'ELMORE', 'LMOR', 'ELMO'], 'latitude': 123.456}
""" # noqa: E501

def __init__(self, raw: ir.StructValue | ir.Table, *, street_ngrams_n: int = 4):
Expand Down Expand Up @@ -112,7 +114,7 @@ def postal_code(self) -> ir.StringValue:
return self._norm(self.raw.postal_code)

@property
def _tagged(self) -> TaggedAddress:
def tagged(self) -> TaggedAddress:
# We only use the street name and address number, so only pass in the street1.
# This is both more performant, and it removes the possibility of
# "1-b" in "apt 1-b" being tagged as a street number (which I just saw happen).
Expand All @@ -123,12 +125,12 @@ def street_name(self) -> ir.StringValue:
"""
The normalized street name from street1, eg "OAK TREE" from "123 oak tree St".
"""
return self._norm(self._tagged.StreetName)
return self._norm(self.tagged.StreetName)

@property
def street_number(self) -> ir.StringValue:
"""The normalized street number from street1, eg "132" from "132 Main St"."""
return self._norm(self._tagged.AddressNumber)
return self._norm(self.tagged.AddressNumber)

@property
def street_number_sorted(self) -> ir.StringValue:
Expand All @@ -146,7 +148,7 @@ def street_ngrams(self) -> ir.ArrayValue:
text.ngrams(self.street_number_sorted.fill_null(""), n=self.street_ngrams_n)
+ text.ngrams(self.street_name.fill_null(""), n=self.street_ngrams_n)
+ ibis.array([self.street_number_sorted, self.street_name])
)
).unique()

@property
def all_null(self) -> ir.BooleanValue:
Expand All @@ -173,7 +175,7 @@ def as_struct(self) -> ir.StructValue:
"city": self.city,
"state": self.state,
"postal_code": self.postal_code,
"taggings": self._tagged.taggings,
"taggings": self.tagged.taggings,
"street_number": self.street_number,
"street_number_sorted": self.street_number_sorted,
"street_name": self.street_name,
Expand Down

0 comments on commit 66cd422

Please sign in to comment.