diff --git a/pyproject.toml b/pyproject.toml index 1e783288..d47b324c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,6 +18,7 @@ dynamic = ["version"] requires-python = ">=3.9" dependencies = [ + "aiohttp", # http filesystem support "astropy", "fsspec>=2023.10.0", # Used for abstract filesystems "healpy", diff --git a/src/hipscat/io/paths.py b/src/hipscat/io/paths.py index d4bb5ec5..17ade760 100644 --- a/src/hipscat/io/paths.py +++ b/src/hipscat/io/paths.py @@ -4,6 +4,9 @@ import re from typing import Dict, List +from urllib.parse import urlencode + +from fsspec.implementations.http import HTTPFileSystem from hipscat.io.file_io.file_pointer import FilePointer, append_paths_to_pointer, get_fs from hipscat.pixel_math.healpix_pixel import INVALID_PIXEL, HealpixPixel @@ -87,7 +90,10 @@ def get_healpix_from_path(path: str) -> HealpixPixel: def pixel_catalog_files( - catalog_base_dir: FilePointer, pixels: List[HealpixPixel], storage_options: Dict | None = None + catalog_base_dir: FilePointer, + pixels: List[HealpixPixel], + query_params: dict = None, + storage_options: Dict | None = None, ) -> List[FilePointer]: """Create a list of path *pointers* for pixel catalog files. This will not create the directory or files. @@ -103,25 +109,60 @@ def pixel_catalog_files( Args: catalog_base_dir (FilePointer): base directory of the catalog (includes catalog name) pixels (List[HealpixPixel]): the healpix pixels to create pointers to + query_params (dict): Params to append to URL. Ex: {'cols': ['ra', 'dec'], 'fltrs': ['r>=10', 'g<18']} storage_options (dict): the storage options for the file system to target when generating the paths + Returns (List[FilePointer]): A list of paths to the pixels, in the same order as the input pixel list. """ fs, _ = get_fs(catalog_base_dir, storage_options) base_path_stripped = catalog_base_dir.removesuffix(fs.sep) + + url_params = "" + if isinstance(fs, HTTPFileSystem) and query_params: + url_params = dict_to_query_urlparams(query_params) + return [ fs.sep.join( [ base_path_stripped, f"{ORDER_DIRECTORY_PREFIX}={pixel.order}", f"{DIR_DIRECTORY_PREFIX}={pixel.dir}", - f"{PIXEL_DIRECTORY_PREFIX}={pixel.pixel}.parquet", + f"{PIXEL_DIRECTORY_PREFIX}={pixel.pixel}.parquet" + url_params, ] ) for pixel in pixels ] +def dict_to_query_urlparams(query_params: dict) -> str: + """Converts a dictionary to a url query parameter string + + Args: + query_params (dict): dictionary of query parameters. + Returns: + query parameter string to append to a url + """ + + if not query_params: + return "" + + query = {} + for key, value in query_params.items(): + if not all([key, value]): + continue + if isinstance(value, list): + value = ",".join(value).replace(" ", "") + query[key] = value + + if not query: + return "" + + # Build the query string and add the "?" prefix + url_params = "?" + urlencode(query, doseq=True) + return url_params + + def pixel_catalog_file(catalog_base_dir: FilePointer, pixel_order: int, pixel_number: int) -> FilePointer: """Create path *pointer* for a pixel catalog file. This will not create the directory or file. diff --git a/tests/hipscat/io/test_paths.py b/tests/hipscat/io/test_paths.py index 40c54e05..2c0bdca3 100644 --- a/tests/hipscat/io/test_paths.py +++ b/tests/hipscat/io/test_paths.py @@ -54,6 +54,42 @@ def test_pixel_catalog_files(): assert expected == result +def test_pixel_catalog_files_w_query_params(): + expected = [ + "https://foo/Norder=0/Dir=0/Npix=5.parquet?columns=ID%2CRA%2CDEC%2Cr_auto&filters=r_auto%3C13" + ] + query_params = {"columns": ["ID", "RA", "DEC", "r_auto"], "filters": ["r_auto<13"]} + result = paths.pixel_catalog_files("https://foo", [HealpixPixel(0, 5)], query_params=query_params) + assert expected == result + + +def test_dict_to_query_urlparams(): + expected = "?columns=ID%2CRA%2CDEC%2Cr_auto&filters=r_auto%3C13" + query_params = {"columns": ["ID", "RA", "DEC", "r_auto"], "filters": ["r_auto<13"]} + result = paths.dict_to_query_urlparams(query_params) + assert result == expected + + expected = "?columns=ID%2CRA%2CDEC%2Cr_auto&filters=r_auto%3C13" + query_params = {"columns": [" ID", "RA ", "DEC ", "r_auto"], "filters": ["r_auto < 13"]} + result = paths.dict_to_query_urlparams(query_params) + assert result == expected + + result = paths.dict_to_query_urlparams({}) + assert result == "" + + result = paths.dict_to_query_urlparams(None) + assert result == "" + + result = paths.dict_to_query_urlparams({"": ""}) + assert result == "" + + result = paths.dict_to_query_urlparams({None: ""}) + assert result == "" + + result = paths.dict_to_query_urlparams({"": "nonempty"}) + assert result == "" + + def test_get_healpix_from_path(): expected = HealpixPixel(5, 34)