From 7bd7afd48c51121a8b31757de52de0824febd737 Mon Sep 17 00:00:00 2001 From: Ryan May Date: Thu, 25 Apr 2024 14:19:25 -0600 Subject: [PATCH] BUG: Fix dataset parsing for Hyrax catalogs (Fixes #759) Due to the way parsing is done, we look backwards (on when a dataset tag is encountered) to handle datasets with embedded access tags. Unfortunately, this left us not handling the last such dataset encountered if it contained such tags, like is done on NASA's Hyrax server. --- src/siphon/catalog.py | 16 +- tests/fixtures/nasa_hyrax_dataset | 1023 +++++++++++++++++++++++++++++ tests/test_catalog.py | 11 + 3 files changed, 1044 insertions(+), 6 deletions(-) create mode 100644 tests/fixtures/nasa_hyrax_dataset diff --git a/src/siphon/catalog.py b/src/siphon/catalog.py index 46bcaca3c..820c2547e 100644 --- a/src/siphon/catalog.py +++ b/src/siphon/catalog.py @@ -314,12 +314,11 @@ def __init__(self, catalog_url): current_dataset = child.attrib['name'] self._process_dataset(child) - if previous_dataset: - # see if the previously processed dataset has access elements as children - # if so, these datasets need to be processed specially when making - # access_urls - if self.datasets[previous_dataset].access_element_info: - self.ds_with_access_elements_to_process.append(previous_dataset) + # see if the previously processed dataset has access elements as children + # if so, these datasets need to be processed specially when making + # access_urls + if previous_dataset and self.datasets[previous_dataset].access_element_info: + self.ds_with_access_elements_to_process.append(previous_dataset) previous_dataset = current_dataset @@ -346,6 +345,11 @@ def __init__(self, catalog_url): service_skip = self.services[-1].number_of_subservices service_skip_count = 0 + # Needed if the last dataset had such info, since it's only processed looking backwards + # when a new dataset is encountered. + if previous_dataset and self.datasets[previous_dataset].access_element_info: + self.ds_with_access_elements_to_process.append(previous_dataset) + self._process_datasets() def __str__(self): diff --git a/tests/fixtures/nasa_hyrax_dataset b/tests/fixtures/nasa_hyrax_dataset new file mode 100644 index 000000000..f20fd52c6 --- /dev/null +++ b/tests/fixtures/nasa_hyrax_dataset @@ -0,0 +1,1023 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate, br + Connection: + - keep-alive + User-Agent: + - Siphon (0.9.post343+gacede3d2.d20240425) + method: GET + uri: https://opendap.larc.nasa.gov/opendap/DSCOVR/EPIC/L1B/2024/04/catalog.xml + response: + body: + string: "\n \n + \ \n + \ \n + \ \n\n + \ \n + \ \n + \ 340531844\n + \ 2024-04-02T12:43:04Z\n + \ \n + \ \n + \ \n \n \n + \ 340284362\n + \ 2024-04-02T12:42:35Z\n + \ \n + \ \n + \ \n \n \n + \ 340206122\n + \ 2024-04-02T12:44:09Z\n + \ \n + \ \n + \ \n \n \n + \ 339952139\n + \ 2024-04-02T12:43:14Z\n + \ \n + \ \n + \ \n \n \n + \ 339743260\n + \ 2024-04-02T12:42:34Z\n + \ \n + \ \n + \ \n \n \n + \ 339572568\n + \ 2024-04-02T12:42:36Z\n + \ \n + \ \n + \ \n \n \n + \ 339400496\n + \ 2024-04-02T12:43:09Z\n + \ \n + \ \n + \ \n \n \n + \ 305854418\n + \ 2024-04-09T05:07:43Z\n + \ \n + \ \n + \ \n \n \n + \ 338598244\n + \ 2024-04-09T05:18:55Z\n + \ \n + \ \n + \ \n \n \n + \ 305071091\n + \ 2024-04-09T05:19:31Z\n + \ \n + \ \n + \ \n \n \n + \ 338310926\n + \ 2024-04-09T05:19:09Z\n + \ \n + \ \n + \ \n \n \n + \ 338173570\n + \ 2024-04-09T05:19:15Z\n + \ \n + \ \n + \ \n \n \n + \ 337889565\n + \ 2024-04-09T05:18:57Z\n + \ \n + \ \n + \ \n \n \n + \ 337678243\n + \ 2024-04-09T05:19:17Z\n + \ \n + \ \n + \ \n \n \n + \ 337513576\n + \ 2024-04-09T05:18:55Z\n + \ \n + \ \n + \ \n \n \n + \ 337486553\n + \ 2024-04-09T05:19:15Z\n + \ \n + \ \n + \ \n \n \n + \ 337487127\n + \ 2024-04-09T05:19:02Z\n + \ \n + \ \n + \ \n \n \n + \ 304024368\n + \ 2024-04-09T05:19:01Z\n + \ \n + \ \n + \ \n \n \n + \ 337163954\n + \ 2024-04-09T05:19:05Z\n + \ \n + \ \n + \ \n \n \n + \ 337011592\n + \ 2024-04-09T05:19:09Z\n + \ \n + \ \n + \ \n \n \n + \ 336955482\n + \ 2024-04-09T05:18:53Z\n + \ \n + \ \n + \ \n \n \n + \ 336857592\n + \ 2024-04-08T15:28:25Z\n + \ \n + \ \n + \ \n \n \n + \ 336693356\n + \ 2024-04-08T15:28:35Z\n + \ \n + \ \n + \ \n \n \n + \ 336533817\n + \ 2024-04-08T15:28:20Z\n + \ \n + \ \n + \ \n \n \n + \ 336308561\n + \ 2024-04-08T15:36:28Z\n + \ \n + \ \n + \ \n \n \n + \ 336130640\n + \ 2024-04-08T15:27:10Z\n + \ \n + \ \n + \ \n \n \n + \ 335885220\n + \ 2024-04-08T15:29:18Z\n + \ \n + \ \n + \ \n \n \n + \ 335832004\n + \ 2024-04-08T15:29:21Z\n + \ \n + \ \n + \ \n \n \n + \ 335828331\n + \ 2024-04-08T15:28:58Z\n + \ \n + \ \n + \ \n \n \n + \ 335728048\n + \ 2024-04-08T15:29:16Z\n + \ \n + \ \n + \ \n \n \n + \ 335522224\n + \ 2024-04-08T15:29:06Z\n + \ \n + \ \n + \ \n \n \n + \ 335404006\n + \ 2024-04-08T15:27:07Z\n + \ \n + \ \n + \ \n \n \n + \ 335220276\n + \ 2024-04-08T15:29:14Z\n + \ \n + \ \n + \ \n \n \n + \ 335189101\n + \ 2024-04-08T15:27:13Z\n + \ \n + \ \n + \ \n \n \n + \ 335162684\n + \ 2024-04-10T11:43:22Z\n + \ \n + \ \n + \ \n \n \n + \ 335041685\n + \ 2024-04-10T11:42:53Z\n + \ \n + \ \n + \ \n \n \n + \ 334858224\n + \ 2024-04-10T11:43:41Z\n + \ \n + \ \n + \ \n \n \n + \ 334748590\n + \ 2024-04-10T11:42:41Z\n + \ \n + \ \n + \ \n \n \n + \ 334554499\n + \ 2024-04-10T11:43:23Z\n + \ \n + \ \n + \ \n \n \n + \ 334355285\n + \ 2024-04-10T11:42:41Z\n + \ \n + \ \n + \ \n \n \n + \ 334163690\n + \ 2024-04-10T11:43:13Z\n + \ \n + \ \n + \ \n \n \n + \ 334065286\n + \ 2024-04-10T11:43:26Z\n + \ \n + \ \n + \ \n \n \n + \ 334041031\n + \ 2024-04-10T11:42:46Z\n + \ \n + \ \n + \ \n \n \n + \ 333886783\n + \ 2024-04-10T11:42:52Z\n + \ \n + \ \n + \ \n \n \n + \ 333800093\n + \ 2024-04-10T11:42:42Z\n + \ \n + \ \n + \ \n \n \n + \ 333645248\n + \ 2024-04-10T11:42:59Z\n + \ \n + \ \n + \ \n \n \n + \ 333615564\n + \ 2024-04-10T11:42:46Z\n + \ \n + \ \n + \ \n \n \n + \ 38026616\n 2024-04-08T16:08:17Z\n \n + \ \n + \ \n \n \n + \ 333365082\n + \ 2024-04-11T15:13:45Z\n + \ \n + \ \n + \ \n \n \n + \ 333247186\n + \ 2024-04-11T15:13:53Z\n + \ \n + \ \n + \ \n \n \n + \ 333156490\n + \ 2024-04-11T15:13:58Z\n + \ \n + \ \n + \ \n \n \n + \ 332954093\n + \ 2024-04-11T15:13:39Z\n + \ \n + \ \n + \ \n \n \n + \ 332773511\n + \ 2024-04-11T15:13:38Z\n + \ \n + \ \n + \ \n \n \n + \ 332627451\n + \ 2024-04-11T15:14:15Z\n + \ \n + \ \n + \ \n \n \n + \ 332529341\n + \ 2024-04-11T15:13:58Z\n + \ \n + \ \n + \ \n \n \n + \ 332529512\n + \ 2024-04-11T15:14:11Z\n + \ \n + \ \n + \ \n \n \n + \ 332381579\n + \ 2024-04-11T15:14:49Z\n + \ \n + \ \n + \ \n \n \n + \ 332179287\n + \ 2024-04-11T15:13:27Z\n + \ \n + \ \n + \ \n \n \n + \ 332166496\n + \ 2024-04-11T15:14:07Z\n + \ \n + \ \n + \ \n \n \n + \ 332146055\n + \ 2024-04-11T15:13:29Z\n + \ \n + \ \n + \ \n \n \n + \ 332149706\n + \ 2024-04-08T17:00:34Z\n + \ \n + \ \n + \ \n \n \n + \ 331988988\n + \ 2024-04-08T16:57:29Z\n + \ \n + \ \n + \ \n \n \n + \ 331888459\n + \ 2024-04-08T16:58:05Z\n + \ \n + \ \n + \ \n \n \n + \ 331750865\n + \ 2024-04-08T16:59:38Z\n + \ \n + \ \n + \ \n \n \n + \ 331549118\n + \ 2024-04-08T16:58:56Z\n + \ \n + \ \n + \ \n \n \n + \ 331339606\n + \ 2024-04-08T17:01:13Z\n + \ \n + \ \n + \ \n \n \n + \ 331268423\n + \ 2024-04-08T17:00:08Z\n + \ \n + \ \n + \ \n \n \n + \ 331267676\n + \ 2024-04-08T16:57:36Z\n + \ \n + \ \n + \ \n \n \n + \ 331149011\n + \ 2024-04-08T16:57:30Z\n + \ \n + \ \n + \ \n \n \n + \ 331022311\n + \ 2024-04-08T16:59:19Z\n + \ \n + \ \n + \ \n \n \n + \ 330936189\n + \ 2024-04-08T16:57:09Z\n + \ \n + \ \n + \ \n \n \n + \ 330871114\n + \ 2024-04-08T16:58:19Z\n + \ \n + \ \n + \ \n \n \n + \ 330900669\n + \ 2024-04-08T16:59:00Z\n + \ \n + \ \n + \ \n \n \n + \ 330809405\n + \ 2024-04-08T17:25:15Z\n + \ \n + \ \n + \ \n \n \n + \ 330635337\n + \ 2024-04-08T17:22:28Z\n + \ \n + \ \n + \ \n \n \n + \ 330582912\n + \ 2024-04-08T17:24:41Z\n + \ \n + \ \n + \ \n \n \n + \ 330494833\n + \ 2024-04-08T17:24:34Z\n + \ \n + \ \n + \ \n \n \n + \ 330275919\n + \ 2024-04-08T17:26:33Z\n + \ \n + \ \n + \ \n \n \n + \ 330102423\n + \ 2024-04-08T17:24:57Z\n + \ \n + \ \n + \ \n \n \n + \ 330103312\n + \ 2024-04-08T17:24:22Z\n + \ \n + \ \n + \ \n \n \n + \ 330078123\n + \ 2024-04-08T17:24:23Z\n + \ \n + \ \n + \ \n \n \n + \ 329956921\n + \ 2024-04-08T17:24:27Z\n + \ \n + \ \n + \ \n \n \n + \ 329930898\n + \ 2024-04-08T17:24:36Z\n + \ \n + \ \n + \ \n \n \n + \ 297477620\n + \ 2024-04-08T17:24:33Z\n + \ \n + \ \n + \ \n \n \n + \ 329732885\n + \ 2024-04-08T17:24:31Z\n + \ \n + \ \n + \ \n \n \n + \ 329711569\n + \ 2024-04-08T17:24:29Z\n + \ \n + \ \n + \ \n \n \n + \ 296703424\n + \ 2024-04-09T09:04:30Z\n + \ \n + \ \n + \ \n \n \n + \ 329078044\n + \ 2024-04-09T09:03:46Z\n + \ \n + \ \n + \ \n \n \n + \ 329244349\n + \ 2024-04-09T09:02:27Z\n + \ \n + \ \n + \ \n \n \n + \ 329198412\n + \ 2024-04-09T09:04:07Z\n + \ \n + \ \n + \ \n \n \n + \ 329037104\n + \ 2024-04-09T09:02:48Z\n + \ \n + \ \n + \ \n \n \n + \ 329022311\n + \ 2024-04-09T09:04:19Z\n + \ \n + \ \n + \ \n \n \n + \ 329075995\n + \ 2024-04-09T09:02:47Z\n + \ \n + \ \n + \ \n \n \n + \ 328981657\n + \ 2024-04-09T09:02:58Z\n + \ \n + \ \n + \ \n \n \n + \ 328889200\n + \ 2024-04-09T09:03:13Z\n + \ \n + \ \n + \ \n \n \n + \ 328835550\n + \ 2024-04-09T09:05:53Z\n + \ \n + \ \n + \ \n \n \n + \ 328612336\n + \ 2024-04-11T22:19:17Z\n + \ \n + \ \n + \ \n \n \n + \ 296294643\n + \ 2024-04-11T22:20:13Z\n + \ \n + \ \n + \ \n \n \n + \ 328359297\n + \ 2024-04-11T22:20:35Z\n + \ \n + \ \n + \ \n \n \n + \ 328272845\n + \ 2024-04-11T22:20:04Z\n + \ \n + \ \n + \ \n \n \n + \ 328107050\n + \ 2024-04-11T22:19:41Z\n + \ \n + \ \n + \ \n \n \n + \ 327909320\n + \ 2024-04-11T22:19:27Z\n + \ \n + \ \n + \ \n \n \n + \ 327856360\n + \ 2024-04-11T22:19:38Z\n + \ \n + \ \n + \ \n \n \n + \ 327813046\n + \ 2024-04-11T22:19:54Z\n + \ \n + \ \n + \ \n \n \n + \ 327770786\n + \ 2024-04-11T22:19:59Z\n + \ \n + \ \n + \ \n \n \n + \ 327654734\n + \ 2024-04-11T22:19:45Z\n + \ \n + \ \n + \ \n \n \n + \ 327520519\n + \ 2024-04-11T22:19:24Z\n + \ \n + \ \n + \ \n \n \n + \ 327308864\n + \ 2024-04-11T22:19:51Z\n + \ \n + \ \n + \ \n \n \n + \ 327323177\n + \ 2024-04-11T22:19:40Z\n + \ \n + \ \n + \ \n \n \n + \ 327390560\n + \ 2024-04-12T22:11:54Z\n + \ \n + \ \n + \ \n \n \n + \ 327428634\n + \ 2024-04-12T22:12:33Z\n + \ \n + \ \n + \ \n \n \n + \ 327476194\n + \ 2024-04-12T22:13:10Z\n + \ \n + \ \n + \ \n \n \n + \ 327522922\n + \ 2024-04-12T22:11:47Z\n + \ \n + \ \n + \ \n \n \n + \ 327631747\n + \ 2024-04-12T22:11:51Z\n + \ \n + \ \n + \ \n \n \n + \ 327500618\n + \ 2024-04-12T22:12:14Z\n + \ \n + \ \n + \ \n \n \n + \ 327566571\n + \ 2024-04-12T22:12:12Z\n + \ \n + \ \n + \ \n \n \n + \ 327670917\n + \ 2024-04-12T22:12:06Z\n + \ \n + \ \n + \ \n \n \n + \ 327837974\n + \ 2024-04-12T22:14:06Z\n + \ \n + \ \n + \ \n \n \n + \ 327847006\n + \ 2024-04-12T22:11:48Z\n + \ \n + \ \n + \ \n \n \n + \ 327850524\n + \ 2024-04-12T22:12:28Z\n + \ \n + \ \n + \ \n \n \n + \ 327826356\n + \ 2024-04-12T22:11:53Z\n + \ \n + \ \n + \ \n \n \n + \ 327912848\n + \ 2024-04-12T22:11:53Z\n + \ \n + \ \n + \ \n \n \n + \ 328094815\n + \ 2024-04-17T11:39:45Z\n + \ \n + \ \n + \ \n \n \n + \ 327968861\n + \ 2024-04-17T11:39:33Z\n + \ \n + \ \n + \ \n \n \n + \ 327920842\n + \ 2024-04-17T11:39:15Z\n + \ \n + \ \n + \ \n \n \n + \ 328052843\n + \ 2024-04-17T11:38:34Z\n + \ \n + \ \n + \ \n \n \n + \ 327969779\n + \ 2024-04-17T11:39:07Z\n + \ \n + \ \n + \ \n \n \n + \ 327844474\n + \ 2024-04-17T11:39:33Z\n + \ \n + \ \n + \ \n \n \n + \ 327900723\n + \ 2024-04-17T11:39:41Z\n + \ \n + \ \n + \ \n \n \n + \ 327997850\n + \ 2024-04-17T11:38:40Z\n + \ \n + \ \n + \ \n \n \n + \ 328092643\n + \ 2024-04-17T11:39:27Z\n + \ \n + \ \n + \ \n \n \n + \ 328071884\n + \ 2024-04-17T11:39:17Z\n + \ \n + \ \n + \ \n \n \n + \ 328058305\n + \ 2024-04-17T11:39:10Z\n + \ \n + \ \n + \ \n \n \n + \ 328127886\n + \ 2024-04-17T11:38:40Z\n + \ \n + \ \n + \ \n \n \n + \ 328193171\n + \ 2024-04-17T11:39:38Z\n + \ \n + \ \n + \ \n \n \n + \ 296131332\n + \ 2024-04-17T11:49:36Z\n + \ \n + \ \n + \ \n \n \n + \ 328366157\n + \ 2024-04-17T11:45:52Z\n + \ \n + \ \n + \ \n \n \n + \ 328416408\n + \ 2024-04-17T11:47:15Z\n + \ \n + \ \n + \ \n \n \n + \ 328387936\n + \ 2024-04-17T11:47:15Z\n + \ \n + \ \n + \ \n \n \n + \ 328384375\n + \ 2024-04-17T11:49:29Z\n + \ \n + \ \n + \ \n \n \n + \ 328338828\n + \ 2024-04-17T11:40:45Z\n + \ \n + \ \n + \ \n \n \n + \ 295840420\n + \ 2024-04-17T11:48:12Z\n + \ \n + \ \n + \ \n \n \n + \ 328360084\n + \ 2024-04-17T11:41:09Z\n + \ \n + \ \n + \ \n \n \n + \ 328395052\n + \ 2024-04-17T11:49:09Z\n + \ \n + \ \n + \ \n \n \n + \ 328407757\n + \ 2024-04-17T11:48:09Z\n + \ \n + \ \n + \ \n \n \n + \ 328515986\n + \ 2024-04-17T11:49:25Z\n + \ \n + \ \n + \ \n \n \n + \ 328641064\n + \ 2024-04-17T11:49:13Z\n + \ \n + \ \n + \ \n \n \n + \ 328766008\n + \ 2024-04-17T11:47:12Z\n + \ \n + \ \n + \ \n \n \n + \ 328858548\n + \ 2024-04-17T02:03:29Z\n + \ \n + \ \n + \ \n \n \n + \ 328713734\n + \ 2024-04-17T02:03:08Z\n + \ \n + \ \n + \ \n \n \n + \ 328794285\n + \ 2024-04-17T02:03:07Z\n + \ \n + \ \n + \ \n \n \n + \ 328745429\n + \ 2024-04-17T02:03:22Z\n + \ \n + \ \n + \ \n \n \n + \ 328791335\n + \ 2024-04-18T12:02:22Z\n + \ \n + \ \n + \ \n \n \n + \ 328659030\n + \ 2024-04-18T12:02:59Z\n + \ \n + \ \n + \ \n \n \n + \ 328711813\n + \ 2024-04-18T12:01:41Z\n + \ \n + \ \n + \ \n \n \n + \ 328777529\n + \ 2024-04-18T12:03:36Z\n + \ \n + \ \n + \ \n \n \n + \ 328868809\n + \ 2024-04-18T12:03:15Z\n + \ \n + \ \n + \ \n \n \n + \ 199045749\n + \ 2024-04-18T12:01:55Z\n + \ \n + \ \n + \ \n \n \n + \ 329059353\n + \ 2024-04-18T12:02:24Z\n + \ \n + \ \n + \ \n \n \n + \ 329031545\n + \ 2024-04-18T12:02:18Z\n + \ \n + \ \n + \ \n \n \n + \ 329247454\n + \ 2024-04-18T12:03:35Z\n + \ \n + \ \n + \ \n \n \n \n" + headers: + Connection: + - Keep-Alive + Content-Description: + - thredds_catalog + Content-Type: + - text/xml + Date: + - Thu, 25 Apr 2024 16:14:12 GMT + Keep-Alive: + - timeout=5, max=100 + Last-Modified: + - Thu, 25 Apr 2024 16:14:13 GMT + Server: + - Apache + Strict-Transport-Security: + - max-age=31536000 + Transfer-Encoding: + - chunked + X-DAP: + - '3.2' + X-FRAME-OPTIONS: + - DENY + XDODS-Server: + - dods/3.2 + XOPeNDAP-Server: + - asciival/, bes/, csv_handler/, dapreader_module/, dmrpp_module/, fileout_covjson/, + fileout_gdal/, fileout_json/, fileout_netcdf/, fits_handler/, freeform_handler/, + functions/, gateway_module/, gdal_handler/, hdf4_handler/, hdf5_handler/, + libdap/, ncml_moddule/, netcdf_handler/, ngap_module/, usage/, w10n_handler/, + www-interface/, xml_data_handler/ + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_catalog.py b/tests/test_catalog.py index 98747070b..53168f835 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -397,3 +397,14 @@ def test_latest_resolver_fail(): assert latest == '' assert '"latest" not available for this catalog' in str(excinfo.value) + + +@recorder.use_cassette('nasa_hyrax_dataset') +def test_nasa_hyrax_dataset(): + """Test that catalogs from NASA's Hyrax server are properly parsed.""" + cat = TDSCatalog('https://opendap.larc.nasa.gov/opendap/DSCOVR/EPIC/L1B/' + '2024/04/catalog.xml') + + # Checks #gh-759 + assert len(cat.datasets) == 161 + assert 'epic_1b_20240413222222_03.h5' in cat.datasets \ No newline at end of file