Skip to content

Commit

Permalink
Add error ignoring for individual results
Browse files Browse the repository at this point in the history
  • Loading branch information
dax-dot-gay committed Dec 4, 2023
1 parent 0e12b9b commit 87bad3c
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 38 deletions.
70 changes: 55 additions & 15 deletions open_groceries/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,17 @@
"costco": Costco,
}

__all__ = ["OpenGrocery", "Wegmans", "Costco", "Location", "Address", "LatLong", "ApiException", "GroceryItem"]
__all__ = [
"OpenGrocery",
"Wegmans",
"Costco",
"Location",
"Address",
"LatLong",
"ApiException",
"GroceryItem",
]


class OpenGrocery:
def __init__(
Expand All @@ -38,17 +48,29 @@ def __init__(
if k in features
}

def _execute_mass(self, func: str, include: list[ADAPTER_TYPES], *args, threads: int = 6, flatten: bool = False, **kwargs) -> list:
def _execute_mass(
self,
func: str,
include: list[ADAPTER_TYPES],
*args,
threads: int = 6,
flatten: bool = False,
**kwargs,
) -> list:
results = []
with ThreadPoolExecutor(max_workers=threads) as executor:
tasks = [executor.submit(getattr(self.adapters[i], func), *args, **kwargs) for i in self.adapters.keys() if i in include]
tasks = [
executor.submit(getattr(self.adapters[i], func), *args, **kwargs)
for i in self.adapters.keys()
if i in include
]
for task in as_completed(tasks):
if flatten:
results.extend(task.result())
else:
results.append(task.result())
return results

def _get_position(self, near: str) -> LatLong:
map_result = requests.get(
f"https://api.mapbox.com/geocoding/v5/mapbox.places/{near.lower()}.json",
Expand All @@ -69,9 +91,10 @@ def _get_position(self, near: str) -> LatLong:
map_data["features"][0]["center"], longitude_first=True
)
return position


def locations(self, near: str, include: list[ADAPTER_TYPES] = ADAPTERS) -> list[Location]:
def locations(
self, near: str, include: list[ADAPTER_TYPES] = ADAPTERS
) -> list[Location]:
"""Get locations near an address, sorted by distance
Args:
Expand All @@ -84,7 +107,7 @@ def locations(self, near: str, include: list[ADAPTER_TYPES] = ADAPTERS) -> list[
current_position = self._get_position(near)
results = self._execute_mass("get_locations", include, near, flatten=True)
return sorted(results, key=lambda x: current_position.distance_to(x.location))

def set_nearest_stores(self, near: str, include: list[ADAPTER_TYPES] = ADAPTERS):
"""Set each adapter to the nearest store to an address
Expand All @@ -97,7 +120,7 @@ def set_nearest_stores(self, near: str, include: list[ADAPTER_TYPES] = ADAPTERS)
for loc in all_near:
if not locations.get(loc.type) and loc.type in include:
locations[loc.type] = loc

for adapter, location in locations.items():
if self.adapters.get(adapter):
self.adapters[adapter].set_location(location)
Expand All @@ -112,7 +135,12 @@ def set_locations(self, locations: dict[ADAPTER_TYPES, Location]):
if self.adapters.get(adapter):
self.adapters[adapter].set_location(location)

def search(self, query: str, include: list[ADAPTER_TYPES] = ADAPTERS) -> list[GroceryItem]:
def search(
self,
query: str,
include: list[ADAPTER_TYPES] = ADAPTERS,
ignore_errors: bool = False,
) -> list[GroceryItem]:
"""Search all adapters for a query
Args:
Expand All @@ -122,10 +150,18 @@ def search(self, query: str, include: list[ADAPTER_TYPES] = ADAPTERS) -> list[Gr
Returns:
list[GroceryItem]: List of results, sorted by name similarity to query
"""
results: list[GroceryItem] = self._execute_mass("search_groceries", include, query, flatten=True)
match_order = get_close_matches(query.lower(), [i.name.lower() for i in results], n=len(results), cutoff=0)
results: list[GroceryItem] = self._execute_mass(
"search_groceries",
include,
query,
flatten=True,
ignore_errors=ignore_errors,
)
match_order = get_close_matches(
query.lower(), [i.name.lower() for i in results], n=len(results), cutoff=0
)
return sorted(results, key=lambda x: match_order.index(x.name.lower()))

def adapter(self, adapter: ADAPTER_TYPES) -> Union[GroceryAdapter, None]:
"""Utility function to get a specific adapter
Expand All @@ -136,7 +172,7 @@ def adapter(self, adapter: ADAPTER_TYPES) -> Union[GroceryAdapter, None]:
Union[GroceryAdapter, None]: Adapter, or None if not initialized
"""
return self.adapters.get(adapter)

def suggest(self, term: str, include: list[ADAPTER_TYPES] = ADAPTERS) -> list[str]:
"""Get autocompletion results for a search term
Expand All @@ -147,6 +183,10 @@ def suggest(self, term: str, include: list[ADAPTER_TYPES] = ADAPTERS) -> list[st
Returns:
list[str]: List of suggestions in order of similarity
"""
results: list[GroceryItem] = list(set(self._execute_mass("suggest", include, term, flatten=True)))
match_order = get_close_matches(term.lower(), [i.lower() for i in results], n=len(results), cutoff=0)
results: list[GroceryItem] = list(
set(self._execute_mass("suggest", include, term, flatten=True))
)
match_order = get_close_matches(
term.lower(), [i.lower() for i in results], n=len(results), cutoff=0
)
return sorted(results, key=lambda x: match_order.index(x.lower()))
38 changes: 22 additions & 16 deletions open_groceries/adapters/costco.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ def __init__(
}
self.base_url = "https://www.costco.com/"

def search_groceries(self, search: str) -> list[GroceryItem]:
def search_groceries(
self, search: str, ignore_errors: bool = False
) -> list[GroceryItem]:
req = self.session.get(
self.base_url + "CatalogSearch", params={"dept": "All", "keyword": search}
)
Expand All @@ -57,7 +59,8 @@ def search_groceries(self, search: str) -> list[GroceryItem]:
pass

metas = {
i.attrs["itemprop"]: i.attrs["content"] for i in product.select("meta")
i.attrs["itemprop"]: i.attrs["content"]
for i in product.select("meta")
}
results.append(
GroceryItem(
Expand All @@ -82,18 +85,23 @@ def search_groceries(self, search: str) -> list[GroceryItem]:
metadata={},
)
)
except:
pass
except Exception as e:
if not ignore_errors:
raise e

return results

def get_grocery_item(self, id: str) -> GroceryItem:
req = self.session.get(self.base_url + f"{id}.product.{id}.html")
soup = BeautifulSoup(req.text, features="html.parser")

script_results = [i.text for i in soup.select("script") if "pageCrumbs" in i.text]

script_results = [
i.text for i in soup.select("script") if "pageCrumbs" in i.text
]
if len(script_results) > 0:
crumbLine = [i.strip() for i in script_results[0].split("\n") if "pageCrumbs" in i]
crumbLine = [
i.strip() for i in script_results[0].split("\n") if "pageCrumbs" in i
]
categories = json.loads(crumbLine[0].split(":")[1].strip(" ,"))
else:
categories = []
Expand All @@ -116,7 +124,7 @@ def get_grocery_item(self, id: str) -> GroceryItem:
price=price,
ratings=None,
metadata={},
categories=categories
categories=categories,
)

def get_locations(self, near: str) -> list[Location]:
Expand Down Expand Up @@ -213,17 +221,15 @@ def suggest(self, search: str) -> list[str]:
"Host": "search.costco.com",
"Origin": "https://www.costco.com",
"Referer": "https://www.costco.com/",
"User-Agent": self.user_agent
"User-Agent": self.user_agent,
},
params={
"q": search,
"loc": AUTOCOMPLETE_LOC,
"rowsPerGroup": 10
}
params={"q": search, "loc": AUTOCOMPLETE_LOC, "rowsPerGroup": 10},
)

if result.status_code >= 300:
raise ApiException(result)

data = result.json()
return [i["term"] for i in data["response"]["docs"] if i["type"] == "PopularSearch"]
return [
i["term"] for i in data["response"]["docs"] if i["type"] == "PopularSearch"
]
22 changes: 18 additions & 4 deletions open_groceries/adapters/wegmans.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,26 @@ def _get_session_context(
def url(self, path: str):
return self.base.rstrip("/") + "/" + path.lstrip("/")

def search_groceries(self, search: str) -> list[GroceryItem]:
def search_groceries(
self, search: str, ignore_errors: bool = False
) -> list[GroceryItem]:
result = self.session.get(
self.url("/api/v2/store_products"), params={"search_term": search}
)
if result.status_code >= 300:
raise ApiException(result)

data = result.json()
return [self.build_wegmans_grocery_item(item) for item in data["items"]]
results = []
for item in data["items"]:
if ignore_errors:
try:
results.append(self.build_wegmans_grocery_item(item))
except:
pass
else:
results.append(self.build_wegmans_grocery_item(item))

return results

def build_wegmans_grocery_item(self, data: dict) -> GroceryItem:
return GroceryItem(
Expand Down Expand Up @@ -171,4 +182,7 @@ def suggest(self, search: str) -> list[str]:
raise ApiException(result)

data = result.json()
return [s.replace("<strong>", "").replace("</strong>", "") for s in data["product_autocompletes"]]
return [
s.replace("<strong>", "").replace("</strong>", "")
for s in data["product_autocompletes"]
]
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "open-groceries"
version = "0.2.2"
version = "0.3.0"
description = "Unified data acquisition across multiple grocery store sites"
authors = ["Dax Harris <[email protected]>"]
license = "MIT"
Expand Down
4 changes: 2 additions & 2 deletions test.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from open_groceries import *

groc = OpenGrocery()
groc = OpenGrocery(features=["wegmans"])
groc.set_nearest_stores("Rochester Institute of Technology")
print(groc.search("pots"))
print(groc.search("fear", ignore_errors=True))

0 comments on commit 87bad3c

Please sign in to comment.