Skip to content

Commit

Permalink
enh: only list feature data that is only in upstream basin
Browse files Browse the repository at this point in the history
  • Loading branch information
paulmueller committed Jan 1, 2024
1 parent 11acbd9 commit 1f0b499
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 4 deletions.
15 changes: 11 additions & 4 deletions ckanext/dc_serve/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from dcor_shared import (
DC_MIME_TYPES, s3, sha256sum, get_ckan_config_option, get_resource_path,
wait_for_resource)

import h5py

from .res_file_lock import CKANResourceFileLock

Expand Down Expand Up @@ -38,6 +38,13 @@ def generate_condensed_resource_job(resource, override=False):
path_in=path,
ancillaries=True,
check_suffix=False)
# Determine the features that are not in the condensed
# dataset.
with h5py.File(path) as hsrc, h5py.File(cond) as hdst:
feats_src = set(hsrc["events"].keys())
feats_dst = set(hdst["events"].keys())
feats_upstream = sorted(feats_src - feats_dst)

# Write DCOR basins
with RTDCWriter(cond) as hw:
# DCOR
Expand All @@ -50,7 +57,7 @@ def generate_condensed_resource_job(resource, override=False):
basin_format="dcor",
basin_locs=[dcor_url],
basin_descr="Original access via DCOR API",
basin_feats=None,
basin_feats=feats_upstream,
verify=False)
# S3
s3_endpoint = get_ckan_config_option(
Expand All @@ -69,7 +76,7 @@ def generate_condensed_resource_job(resource, override=False):
basin_format="s3",
basin_locs=[s3_url],
basin_descr="Direct access via S3",
basin_feats=None,
basin_feats=feats_upstream,
verify=False)
# HTTP (only works for public resources)
hw.store_basin(
Expand All @@ -78,7 +85,7 @@ def generate_condensed_resource_job(resource, override=False):
basin_format="http",
basin_locs=[s3_url],
basin_descr="Public resource access via HTTP",
basin_feats=None,
basin_feats=feats_upstream,
verify=False)
return True
return False
Expand Down
4 changes: 4 additions & 0 deletions ckanext/dc_serve/tests/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,7 @@ def test_upload_condensed_dataset_to_s3_job_and_verify_basin(
assert np.allclose(np.mean(ds["image"][0]),
47.15595,
rtol=0, atol=1e-4)
# The basin features should only list those that are not in
# the condensed dataset.
assert ds.basins[0].features == [
"contour", "image", "mask", "trace"]

0 comments on commit 1f0b499

Please sign in to comment.