diff --git a/overturemaestro/__init__.py b/overturemaestro/__init__.py index 6faec96..997f045 100644 --- a/overturemaestro/__init__.py +++ b/overturemaestro/__init__.py @@ -16,7 +16,7 @@ convert_geometry_to_parquet_for_multiple_types, ) from overturemaestro.release_index import ( - get_all_available_release_versions, + get_available_release_versions, get_available_theme_type_pairs, get_newest_release_version, ) @@ -33,7 +33,7 @@ "convert_geometry_to_geodataframe_for_multiple_types", "convert_geometry_to_parquet", "convert_geometry_to_parquet_for_multiple_types", - "get_all_available_release_versions", + "get_available_release_versions", "get_available_theme_type_pairs", "get_newest_release_version", ] diff --git a/overturemaestro/__main__.py b/overturemaestro/__main__.py new file mode 100644 index 0000000..2ec8587 --- /dev/null +++ b/overturemaestro/__main__.py @@ -0,0 +1,20 @@ +"""Main CLI module.""" + + +def main() -> None: + """Run provided CLI.""" + try: + from overturemaestro import __app_name__, cli + except ImportError as exc: + raise + error_msg = ( + "Missing optional dependencies required for the CLI." + " Please install required packages using `pip install overturemaestro[cli]`." + ) + raise ImportError(error_msg) from exc + + cli.app(prog_name=__app_name__) # pragma: no cover + + +if __name__ == "__main__": # pragma: no cover + main() diff --git a/overturemaestro/_exceptions.py b/overturemaestro/_exceptions.py new file mode 100644 index 0000000..c001a0e --- /dev/null +++ b/overturemaestro/_exceptions.py @@ -0,0 +1 @@ +class QueryNotGeocodedError(ValueError): ... diff --git a/overturemaestro/_geometry_clustering.py b/overturemaestro/_geometry_clustering.py index 52f9473..8f75696 100644 --- a/overturemaestro/_geometry_clustering.py +++ b/overturemaestro/_geometry_clustering.py @@ -2,9 +2,6 @@ from collections.abc import Generator, Iterable from typing import Any -import polars as pl -from sklearn.cluster import Birch - EARTH_RADIUS_KM = 6371 CLUSTERING_THRESHOLD = 20 / EARTH_RADIUS_KM # calculate 20 kilometers threshold @@ -12,6 +9,9 @@ def calculate_row_group_bounding_box( parquet_filename: str, parquet_row_group: int, pyarrow_table: Any ) -> Any: + import polars as pl + from sklearn.cluster import Birch + df = ( pl.from_arrow(pyarrow_table) .with_row_index(name="row_index") diff --git a/overturemaestro/_geopandas_api_version.py b/overturemaestro/_geopandas_api_version.py new file mode 100644 index 0000000..6ed3faf --- /dev/null +++ b/overturemaestro/_geopandas_api_version.py @@ -0,0 +1,4 @@ +import geopandas as gpd +from packaging import version + +GEOPANDAS_NEW_API = version.parse(gpd.__version__) >= version.parse("1.0.0") diff --git a/overturemaestro/cli.py b/overturemaestro/cli.py new file mode 100644 index 0000000..94764df --- /dev/null +++ b/overturemaestro/cli.py @@ -0,0 +1,623 @@ +"""CLI module for the OvertureMaestro functions.""" + +import logging +from pathlib import Path +from typing import Annotated, Optional, cast + +import click +import typer +from shapely import box + +from overturemaestro._geopandas_api_version import GEOPANDAS_NEW_API + +app = typer.Typer(context_settings={"help_option_names": ["-h", "--help"]}, rich_markup_mode="rich") + + +def _version_callback(value: bool) -> None: + if value: + from overturemaestro import __app_name__, __version__ + + typer.echo(f"{__app_name__} {__version__}") + raise typer.Exit() + + +# def _display_osm_extracts_callback(ctx: typer.Context, value: bool) -> None: +# if value: +# from quackosm.osm_extracts import display_available_extracts + +# param_values = {p.name: p.default for p in ctx.command.params} +# param_values.update(ctx.params) +# osm_source = cast(str, param_values.get("osm_extract_source")) +# display_available_extracts(source=osm_source, use_full_names=True, use_pager=True) +# raise typer.Exit() + + +def _display_release_versions_callback(ctx: typer.Context, value: bool) -> None: + if value: + from rich import print as rprint + from rich.table import Table + + from overturemaestro import get_available_release_versions + + release_versions = get_available_release_versions() + + table = Table() + table.add_column("Release Version", no_wrap=True) + for release_version in release_versions: + table.add_row(release_version) + + rprint(table) + + raise typer.Exit() + + +def _display_theme_type_pairs_callback(ctx: typer.Context, value: bool) -> None: + if value: + from rich import print as rprint + from rich.table import Table + + from overturemaestro import get_available_theme_type_pairs, get_newest_release_version + + param_values = {p.name: p.default for p in ctx.command.params} + param_values.update(ctx.params) + release_version = cast( + str, param_values.get("release_version", get_newest_release_version()) + ) + theme_type_pairs = get_available_theme_type_pairs(release=release_version) + + table = Table(title=f"{release_version} release") + table.add_column("Theme", no_wrap=True) + table.add_column("Type", no_wrap=True) + for theme_value, type_value in theme_type_pairs: + table.add_row(theme_value, type_value) + + rprint(table) + raise typer.Exit() + + +def _path_callback(ctx: typer.Context, value: Path) -> Path: + if not Path(value).exists(): + raise typer.BadParameter(f"File not found error: {value}") + return value + + +def _empty_path_callback(ctx: typer.Context, value: Path) -> Optional[Path]: + if not value: + return None + return _path_callback(ctx, value) + + +class BboxGeometryParser(click.ParamType): # type: ignore + """Parser for geometry in WKT form.""" + + name = "BBOX" + + def convert(self, value, param=None, ctx=None): # type: ignore + """Convert parameter value.""" + try: + bbox_values = [float(x.strip()) for x in value.split(",")] + return box(*bbox_values) + except ValueError: # ValueError raised when passing non-numbers to float() + raise typer.BadParameter( + "Cannot parse provided bounding box." + " Valid value must contain 4 floating point numbers" + " separated by commas." + ) from None + + +class WktGeometryParser(click.ParamType): # type: ignore + """Parser for geometry in WKT form.""" + + name = "TEXT (WKT)" + + def convert(self, value, param=None, ctx=None): # type: ignore + """Convert parameter value.""" + if not value: + return None + try: + from shapely import from_wkt + + return from_wkt(value) + except Exception: + raise typer.BadParameter("Cannot parse provided WKT") from None + + +class GeoJsonGeometryParser(click.ParamType): # type: ignore + """Parser for geometry in GeoJSON form.""" + + name = "TEXT (GeoJSON)" + + def convert(self, value, param=None, ctx=None): # type: ignore + """Convert parameter value.""" + if not value: + return None + try: + from shapely import from_geojson + + return from_geojson(value) + except Exception: + raise typer.BadParameter("Cannot parse provided GeoJSON") from None + + +class GeoFileGeometryParser(click.ParamType): # type: ignore + """Parser for geometry in geo file form.""" + + name = "PATH" + + def convert(self, value, param=None, ctx=None): # type: ignore + """Convert parameter value.""" + if not value: + return None + + value = _path_callback(ctx=ctx, value=value) + + try: + import geopandas as gpd + + gdf = gpd.read_file(value) + if GEOPANDAS_NEW_API: + return gdf.union_all() + else: + return gdf.unary_union + except Exception: + raise typer.BadParameter("Cannot parse provided geo file") from None + + +class GeocodeGeometryParser(click.ParamType): # type: ignore + """Parser for geometry in string Nominatim query form.""" + + name = "TEXT" + + def convert(self, value, param=None, ctx=None): # type: ignore + """Convert parameter value.""" + if not value: + return None + + try: + from overturemaestro.geocode import geocode_to_geometry + + return geocode_to_geometry(value) + except Exception: + raise typer.BadParameter("Cannot geocode provided Nominatim query") from None + + +class GeohashGeometryParser(click.ParamType): # type: ignore + """Parser for geometry in string Nominatim query form.""" + + name = "TEXT (Geohash)" + + def convert(self, value, param=None, ctx=None): # type: ignore + """Convert parameter value.""" + if not value: + return None + + try: + import geopandas as gpd + from geohash import bbox as geohash_bbox + from shapely.geometry import box + + geometries = [] + for geohash in value.split(","): + bounds = geohash_bbox(geohash.strip()) + geometries.append( + box(minx=bounds["w"], miny=bounds["s"], maxx=bounds["e"], maxy=bounds["n"]) + ) + if GEOPANDAS_NEW_API: + return gpd.GeoSeries(geometries).union_all() + else: + return gpd.GeoSeries(geometries).unary_union + except Exception: + raise + # raise typer.BadParameter(f"Cannot parse provided Geohash value: {geohash}") from None + + +class H3GeometryParser(click.ParamType): # type: ignore + """Parser for geometry in string Nominatim query form.""" + + name = "TEXT (H3)" + + def convert(self, value, param=None, ctx=None): # type: ignore + """Convert parameter value.""" + if not value: + return None + + try: + import geopandas as gpd + import h3 + from shapely.geometry import Polygon + + geometries = [] # noqa: FURB138 + for h3_cell in value.split(","): + geometries.append( + Polygon([coords[::-1] for coords in h3.cell_to_boundary(h3_cell.strip())]) + ) + if GEOPANDAS_NEW_API: + return gpd.GeoSeries(geometries).union_all() + else: + return gpd.GeoSeries(geometries).unary_union + except Exception as ex: + raise typer.BadParameter(f"Cannot parse provided H3 values: {value}") from ex + + +class S2GeometryParser(click.ParamType): # type: ignore + """Parser for geometry in string Nominatim query form.""" + + name = "TEXT (S2)" + + def convert(self, value, param=None, ctx=None): # type: ignore + """Convert parameter value.""" + if not value: + return None + + try: + import geopandas as gpd + from s2 import s2 + from shapely.geometry import Polygon + + geometries = [] # noqa: FURB138 + for s2_index in value.split(","): + geometries.append( + Polygon(s2.s2_to_geo_boundary(s2_index.strip(), geo_json_conformant=True)) + ) + if GEOPANDAS_NEW_API: + return gpd.GeoSeries(geometries).union_all() + else: + return gpd.GeoSeries(geometries).unary_union + except Exception: + raise typer.BadParameter(f"Cannot parse provided S2 value: {s2_index}") from None + + +@app.command() # type: ignore +def main( + theme_value: Annotated[ + Optional[str], + typer.Argument( + help="Data [bold yellow]theme[/bold yellow] value", + metavar="theme", + show_default=False, + ), + ] = None, + type_value: Annotated[ + Optional[str], + typer.Argument( + help="Feature [bold yellow]type[/bold yellow] within [yellow]theme[/yellow]", + metavar="type", + show_default=False, + ), + ] = None, + release_version: Annotated[ + Optional[str], + typer.Option( + "--release-version", + "--release", + help=( + "OvertureMaps dataset release version." + " If not provided, will automatically select the newest" + " available version." + ), + is_eager=True, + show_default=False, + ), + ] = None, + geom_filter_bbox: Annotated[ + Optional[str], + typer.Option( + help=( + "Geometry to use as a filter in the" + " [bold dark_orange]bounding box[/bold dark_orange] format - 4 floating point" + " numbers separated by commas." + " Cannot be used together with" + " [bold bright_cyan]geom-filter-file[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geocode[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geojson[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-geohash[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-h3[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-s2[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-wkt[/bold bright_cyan]." + ), + click_type=BboxGeometryParser(), + show_default=False, + ), + ] = None, + geom_filter_file: Annotated[ + Optional[str], + typer.Option( + help=( + "Geometry to use as a filter in the" + " [bold dark_orange]file[/bold dark_orange] format - any that can be opened by" + " GeoPandas. Will return the unary union of the geometries in the file." + " Cannot be used together with" + " [bold bright_cyan]geom-filter-bbox[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geocode[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geojson[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-geohash[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-h3[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-s2[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-wkt[/bold bright_cyan]." + ), + click_type=GeoFileGeometryParser(), + show_default=False, + ), + ] = None, + geom_filter_geocode: Annotated[ + Optional[str], + typer.Option( + help=( + "Geometry to use as a filter in the" + " [bold dark_orange]string to geocode[/bold dark_orange] format - it will be" + " geocoded to the geometry using Nominatim API (GeoPy library)." + " Cannot be used together with" + " [bold bright_cyan]geom-filter-bbox[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-file[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geojson[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-geohash[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-h3[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-s2[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-wkt[/bold bright_cyan]." + ), + click_type=GeocodeGeometryParser(), + show_default=False, + ), + ] = None, + geom_filter_geojson: Annotated[ + Optional[str], + typer.Option( + help=( + "Geometry to use as a filter in the [bold dark_orange]GeoJSON[/bold dark_orange]" + " format." + " Cannot be used used together with" + " [bold bright_cyan]geom-filter-bbox[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-file[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geocode[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-geohash[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-h3[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-s2[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-wkt[/bold bright_cyan]." + ), + click_type=GeoJsonGeometryParser(), + show_default=False, + ), + ] = None, + geom_filter_index_geohash: Annotated[ + Optional[str], + typer.Option( + help=( + "Geometry to use as a filter in the" + " [bold dark_orange]Geohash index[/bold dark_orange]" + " format. Separate multiple values with a comma." + " Cannot be used used together with" + " [bold bright_cyan]geom-filter-bbox[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-file[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geocode[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geojson[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-h3[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-s2[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-wkt[/bold bright_cyan]." + ), + click_type=GeohashGeometryParser(), + show_default=False, + ), + ] = None, + geom_filter_index_h3: Annotated[ + Optional[str], + typer.Option( + help=( + "Geometry to use as a filter in the [bold dark_orange]H3 index[/bold dark_orange]" + " format. Separate multiple values with a comma." + " Cannot be used used together with" + " [bold bright_cyan]geom-filter-bbox[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-file[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geocode[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geojson[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-geohash[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-s2[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-wkt[/bold bright_cyan]." + ), + click_type=H3GeometryParser(), + show_default=False, + ), + ] = None, + geom_filter_index_s2: Annotated[ + Optional[str], + typer.Option( + help=( + "Geometry to use as a filter in the [bold dark_orange]S2 index[/bold dark_orange]" + " format. Separate multiple values with a comma." + " Cannot be used used together with" + " [bold bright_cyan]geom-filter-bbox[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-file[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geocode[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geojson[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-geohash[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-h3[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-wkt[/bold bright_cyan]." + ), + click_type=S2GeometryParser(), + show_default=False, + ), + ] = None, + geom_filter_wkt: Annotated[ + Optional[str], + typer.Option( + help=( + "Geometry to use as a filter in the [bold dark_orange]WKT[/bold dark_orange]" + " format." + " Cannot be used together with" + " [bold bright_cyan]geom-filter-bbox[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-file[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geocode[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-geojson[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-geohash[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-h3[/bold bright_cyan] or" + " [bold bright_cyan]geom-filter-index-s2[/bold bright_cyan]." + ), + click_type=WktGeometryParser(), + show_default=False, + ), + ] = None, + result_file_path: Annotated[ + Optional[Path], + typer.Option( + "--output", + "-o", + help=( + "Path where to save final geoparquet file. If not provided, it will be generated" + " automatically based on the input pbf file name." + ), + show_default=False, + ), + ] = None, + ignore_cache: Annotated[ + bool, + typer.Option( + "--ignore-cache/", + "--no-cache/", + help="Whether to ignore previously precalculated geoparquet files or not.", + show_default=False, + ), + ] = False, + working_directory: Annotated[ + Path, + typer.Option( + "--working-directory", + "--work-dir", + help=( + "Directory where to save the parsed parquet and geoparquet files." + " Will be created if doesn't exist." + ), + ), + ] = "files", # type: ignore + # silent_mode: Annotated[ + # bool, + # typer.Option( + # "--silent/", + # help="Whether to disable progress reporting.", + # show_default=False, + # ), + # ] = False, + # transient_mode: Annotated[ + # bool, + # typer.Option( + # "--transient/", + # help="Whether to make more transient (concise) progress reporting.", + # show_default=False, + # ), + # ] = False, + # allow_uncovered_geometry: Annotated[ + # bool, + # typer.Option( + # "--allow-uncovered-geometry/", + # help=( + # "Suppresses an error if some geometry parts aren't covered by any OSM extract." + # " Works only when PbfFileReader is asked to download OSM extracts automatically." + # ), + # show_default=False, + # ), + # ] = False, + show_release_versions: Annotated[ + Optional[bool], + typer.Option( + "--show-release-versions", + help="Show available OvertureMaps release versions and exit.", + callback=_display_release_versions_callback, + is_eager=False, + ), + ] = None, + show_theme_type_pairs: Annotated[ + Optional[bool], + typer.Option( + "--show-theme-type-pairs", + help="Show available OvertureMaps theme type pairs for the release and exit.", + callback=_display_theme_type_pairs_callback, + is_eager=False, + ), + ] = None, + version: Annotated[ + Optional[bool], + typer.Option( + "--version", + "-v", + help="Show the application's version and exit.", + callback=_version_callback, + is_eager=True, + ), + ] = None, +) -> None: + """ + OvertureMaestro CLI. + + Wraps public functions and prints final path to the saved geoparquet file at the end. + """ + number_of_geometries_provided = sum( + geom is not None + for geom in ( + geom_filter_bbox, + geom_filter_file, + geom_filter_geocode, + geom_filter_geojson, + geom_filter_index_geohash, + geom_filter_index_h3, + geom_filter_index_s2, + geom_filter_wkt, + ) + ) + if theme_value is None or type_value is None or number_of_geometries_provided == 0: + from click.exceptions import UsageError + + raise UsageError( + message=( + "OvertureMaestro requires theme, type and a geometry filter" + " (one of --geom-filter-file, --geom-filter-geocode," + " --geom-filter-geojson, --geom-filter-index-geohash," + " --geom-filter-index-h3, --geom-filter-index-s2, --geom-filter-wkt)" + " to download the data." + ), + ) + + if number_of_geometries_provided > 1: + raise typer.BadParameter("Provided more than one geometry for filtering") + + geometry_filter_value = ( + geom_filter_bbox + or geom_filter_file + or geom_filter_geocode + or geom_filter_geojson + or geom_filter_index_geohash + or geom_filter_index_h3 + or geom_filter_index_s2 + or geom_filter_wkt + ) + + logging.disable(logging.CRITICAL) + + # if transient_mode and silent_mode: + # raise typer.BadParameter("Cannot pass both silent and transient mode at once.") + + # verbosity_mode: Literal["silent", "transient", "verbose"] = "verbose" + + # if transient_mode: + # verbosity_mode = "transient" + # elif silent_mode: + # verbosity_mode = "silent" + + from overturemaestro import convert_geometry_to_parquet, get_available_theme_type_pairs + + if (theme_value, type_value) not in get_available_theme_type_pairs( + release=release_version # type: ignore[arg-type] + ): + raise typer.BadParameter( + f"Dataset of theme = {theme_value} and type = {type_value} doesn't exist." + ) + + geoparquet_path = convert_geometry_to_parquet( + theme=theme_value, + type=type_value, + geometry_filter=geometry_filter_value, + release=release_version, + ignore_cache=ignore_cache, + working_directory=working_directory, + result_file_path=result_file_path, + # verbosity_mode=verbosity_mode, + ) + + typer.secho(geoparquet_path, fg="green") diff --git a/overturemaestro/data_downloader.py b/overturemaestro/data_downloader.py index e67d45b..0a2a334 100644 --- a/overturemaestro/data_downloader.py +++ b/overturemaestro/data_downloader.py @@ -388,11 +388,11 @@ def _filter_data_properly( ) -> Any: import geopandas as gpd import numpy as np - from geoarrow.rust.core import WKBArray + from geoarrow.rust.core import GeometryArray from shapely import Point, STRtree, get_coordinates geoseries = gpd.GeoSeries.from_arrow( - WKBArray.from_arrow(pyarrow_table["geometry"].combine_chunks()) + GeometryArray.from_arrow(pyarrow_table["geometry"].combine_chunks()) ) # First pass - find all simple examples - any point inside geometry filter diff --git a/overturemaestro/geocode.py b/overturemaestro/geocode.py new file mode 100644 index 0000000..cd00b9b --- /dev/null +++ b/overturemaestro/geocode.py @@ -0,0 +1,69 @@ +"""Geocoding module for getting a geometry from query using Nominatim.""" + +import hashlib +import json +from pathlib import Path +from typing import Any, Optional, Union, cast, overload + +from geopy.geocoders.nominatim import Nominatim +from geopy.location import Location +from shapely.geometry import shape +from shapely.geometry.base import BaseGeometry +from shapely.ops import unary_union + +from overturemaestro._exceptions import QueryNotGeocodedError + + +@overload +def geocode_to_geometry(query: str) -> BaseGeometry: ... + + +@overload +def geocode_to_geometry(query: list[str]) -> BaseGeometry: ... + + +def geocode_to_geometry(query: Union[str, list[str]]) -> BaseGeometry: + """Geocode a query to a (Multi)Polygon geometry using Nominatim.""" + if not isinstance(query, str): + return unary_union([geocode_to_geometry(sub_query) for sub_query in query]) + + h = hashlib.new("sha256") + h.update(query.encode()) + query_hash = h.hexdigest() + query_file_path = Path("cache").resolve() / f"{query_hash}.json" + + if not query_file_path.exists(): + query_results = Nominatim( + user_agent="OvertureMaestro Python package (https://github.com/kraina-ai/overturemaestro)" + ).geocode(query, geometry="geojson", exactly_one=False) + + if not query_results: + raise QueryNotGeocodedError(f"Zero results from Nominatim for query '{query}'.") + + polygon_result = _get_first_polygon(query_results) + + if not polygon_result: + raise QueryNotGeocodedError(f"No polygon found for query '{query}'.") + + query_file_path.parent.mkdir(parents=True, exist_ok=True) + query_file_path.write_text(json.dumps(polygon_result)) + else: + polygon_result = json.loads(query_file_path.read_text()) + + return unary_union(shape(polygon_result)) + + +def _get_first_polygon(results: list[Location]) -> Optional[dict[str, Any]]: + """ + Choose first result of geometry type (Multi)Polygon from list of results. + + Inspired by OSMnx implementation. + """ + polygon_types = {"Polygon", "MultiPolygon"} + + for result in results: + geojson_dict = cast(dict[str, Any], result.raw["geojson"]) + if geojson_dict["type"] in polygon_types: + return geojson_dict + + return None diff --git a/overturemaestro/release_index.py b/overturemaestro/release_index.py index 0527a30..065b8de 100644 --- a/overturemaestro/release_index.py +++ b/overturemaestro/release_index.py @@ -73,9 +73,10 @@ def get_newest_release_version() -> str: ) return newest_release_version -def get_all_available_release_versions() -> list[str]: + +def get_available_release_versions() -> list[str]: """ - Get all available OvertureMaps release versions. + Get available OvertureMaps release versions. Checks available precalculated release indexes in the GitHub repository and returns them. @@ -188,6 +189,7 @@ def load_release_index( *, geometry_filter: Optional[BaseGeometry] = None, remote_index: bool = False, + skip_index_download: bool = True, ) -> gpd.GeoDataFrame: """ Load release index as a GeoDataFrame. @@ -201,6 +203,8 @@ def load_release_index( Defaults to None. remote_index (bool, optional): Avoid downloading the index and stream it from remote source. Defaults to False. + skip_index_download (bool, optional): Avoid downloading the index if doesn't exist locally + and generate it instead. Defaults to False. Returns: gpd.GeoDataFrame: Index with bounding boxes for each row group for each parquet file. @@ -219,8 +223,11 @@ def load_release_index( if remote_index: filesystem = HTTPFileSystem() index_file_path = LFS_DIRECTORY_URL + str(index_file_path) + elif skip_index_download: + # Generate the index and skip download + generate_release_index(release) else: - # Download or generate the index if cannot be downloaded + # Try to download the index or generate it if cannot be downloaded download_existing_release_index(release) or generate_release_index(release) if geometry_filter is None: @@ -261,17 +268,30 @@ def get_available_theme_type_pairs(release: Optional[str] = None) -> list[tuple[ cache_directory = _get_release_cache_directory(release) release_index_path = cache_directory / "release_index_content.json" - if not release_index_path.exists(): - raise FileNotFoundError( - f"Index for release {release} isn't cached locally. " - "Please download or generate the index first using " - "download_existing_release_index or generate_release_index function." + + if release_index_path.exists(): + index_content = pd.read_json(release_index_path) + else: + index_content_file_name = "release_index_content.json" + index_content_file_url = ( + LFS_DIRECTORY_URL + (cache_directory / index_content_file_name).as_posix() ) - theme_type_tuples = json.loads(release_index_path.read_text()) - return sorted( - (theme_type_tuple["theme"], theme_type_tuple["type"]) - for theme_type_tuple in theme_type_tuples - ) + index_content = pd.read_json(index_content_file_url) + + # if not release_index_path.exists(): + # raise FileNotFoundError( + # f"Index for release {release} isn't cached locally. " + # "Please download or generate the index first using " + # "download_existing_release_index or generate_release_index function." + # ) + + return sorted(index_content[["theme", "type"]].itertuples(index=False, name=None)) + + # theme_type_tuples = json.loads(release_index_path.read_text()) + # return sorted( + # (theme_type_tuple["theme"], theme_type_tuple["type"]) + # for theme_type_tuple in theme_type_tuples + # ) @overload @@ -335,7 +355,6 @@ def _download_existing_release_index( progressbar=False, known_hash=None, ) - rprint("Downloaded index metadata file") theme_type_tuples = json.loads((cache_directory / index_content_file_name).read_text()) @@ -360,7 +379,6 @@ def _download_existing_release_index( progressbar=False, known_hash=sha_value, ) - rprint(f"Downloaded index file {release}/{file_name}") except urllib.error.HTTPError as ex: if ex.code == 404: @@ -497,11 +515,13 @@ def _get_release_cache_directory(release: str) -> Path: def _get_index_file_name(theme_value: str, type_value: str) -> str: return f"{theme_value}_{type_value}.parquet" + def _load_all_available_release_versions_from_github() -> list[str]: gh_fs = GithubFileSystem(org="kraina-ai", repo="overturemaps-releases-indexes") release_versions = [file_path.split("/")[1] for file_path in gh_fs.ls("release_indexes")] return release_versions + def _load_newest_release_version_from_github() -> str: release_versions = _load_all_available_release_versions_from_github() return sorted(release_versions)[-1]