Skip to content

Commit

Permalink
Merge pull request #291 from astronomy-commons/lsdb-server
Browse files Browse the repository at this point in the history
added support to url params on pixel_catalog_files
  • Loading branch information
Schwarzam authored Jun 12, 2024
2 parents 7b37616 + d261c8e commit 20eee15
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 2 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dynamic = ["version"]

requires-python = ">=3.9"
dependencies = [
"aiohttp", # http filesystem support
"astropy",
"fsspec>=2023.10.0", # Used for abstract filesystems
"healpy",
Expand Down
45 changes: 43 additions & 2 deletions src/hipscat/io/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

import re
from typing import Dict, List
from urllib.parse import urlencode

from fsspec.implementations.http import HTTPFileSystem

from hipscat.io.file_io.file_pointer import FilePointer, append_paths_to_pointer, get_fs
from hipscat.pixel_math.healpix_pixel import INVALID_PIXEL, HealpixPixel
Expand Down Expand Up @@ -87,7 +90,10 @@ def get_healpix_from_path(path: str) -> HealpixPixel:


def pixel_catalog_files(
catalog_base_dir: FilePointer, pixels: List[HealpixPixel], storage_options: Dict | None = None
catalog_base_dir: FilePointer,
pixels: List[HealpixPixel],
query_params: dict = None,
storage_options: Dict | None = None,
) -> List[FilePointer]:
"""Create a list of path *pointers* for pixel catalog files. This will not create the directory
or files.
Expand All @@ -103,25 +109,60 @@ def pixel_catalog_files(
Args:
catalog_base_dir (FilePointer): base directory of the catalog (includes catalog name)
pixels (List[HealpixPixel]): the healpix pixels to create pointers to
query_params (dict): Params to append to URL. Ex: {'cols': ['ra', 'dec'], 'fltrs': ['r>=10', 'g<18']}
storage_options (dict): the storage options for the file system to target when generating the paths
Returns (List[FilePointer]):
A list of paths to the pixels, in the same order as the input pixel list.
"""
fs, _ = get_fs(catalog_base_dir, storage_options)
base_path_stripped = catalog_base_dir.removesuffix(fs.sep)

url_params = ""
if isinstance(fs, HTTPFileSystem) and query_params:
url_params = dict_to_query_urlparams(query_params)

return [
fs.sep.join(
[
base_path_stripped,
f"{ORDER_DIRECTORY_PREFIX}={pixel.order}",
f"{DIR_DIRECTORY_PREFIX}={pixel.dir}",
f"{PIXEL_DIRECTORY_PREFIX}={pixel.pixel}.parquet",
f"{PIXEL_DIRECTORY_PREFIX}={pixel.pixel}.parquet" + url_params,
]
)
for pixel in pixels
]


def dict_to_query_urlparams(query_params: dict) -> str:
"""Converts a dictionary to a url query parameter string
Args:
query_params (dict): dictionary of query parameters.
Returns:
query parameter string to append to a url
"""

if not query_params:
return ""

query = {}
for key, value in query_params.items():
if not all([key, value]):
continue
if isinstance(value, list):
value = ",".join(value).replace(" ", "")
query[key] = value

if not query:
return ""

# Build the query string and add the "?" prefix
url_params = "?" + urlencode(query, doseq=True)
return url_params


def pixel_catalog_file(catalog_base_dir: FilePointer, pixel_order: int, pixel_number: int) -> FilePointer:
"""Create path *pointer* for a pixel catalog file. This will not create the directory
or file.
Expand Down
36 changes: 36 additions & 0 deletions tests/hipscat/io/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,42 @@ def test_pixel_catalog_files():
assert expected == result


def test_pixel_catalog_files_w_query_params():
expected = [
"https://foo/Norder=0/Dir=0/Npix=5.parquet?columns=ID%2CRA%2CDEC%2Cr_auto&filters=r_auto%3C13"
]
query_params = {"columns": ["ID", "RA", "DEC", "r_auto"], "filters": ["r_auto<13"]}
result = paths.pixel_catalog_files("https://foo", [HealpixPixel(0, 5)], query_params=query_params)
assert expected == result


def test_dict_to_query_urlparams():
expected = "?columns=ID%2CRA%2CDEC%2Cr_auto&filters=r_auto%3C13"
query_params = {"columns": ["ID", "RA", "DEC", "r_auto"], "filters": ["r_auto<13"]}
result = paths.dict_to_query_urlparams(query_params)
assert result == expected

expected = "?columns=ID%2CRA%2CDEC%2Cr_auto&filters=r_auto%3C13"
query_params = {"columns": [" ID", "RA ", "DEC ", "r_auto"], "filters": ["r_auto < 13"]}
result = paths.dict_to_query_urlparams(query_params)
assert result == expected

result = paths.dict_to_query_urlparams({})
assert result == ""

result = paths.dict_to_query_urlparams(None)
assert result == ""

result = paths.dict_to_query_urlparams({"": ""})
assert result == ""

result = paths.dict_to_query_urlparams({None: ""})
assert result == ""

result = paths.dict_to_query_urlparams({"": "nonempty"})
assert result == ""


def test_get_healpix_from_path():
expected = HealpixPixel(5, 34)

Expand Down

0 comments on commit 20eee15

Please sign in to comment.