From e404a326707872214added2e520c0fc953423e20 Mon Sep 17 00:00:00 2001 From: Ryan May Date: Wed, 11 Dec 2024 16:10:55 -0700 Subject: [PATCH] ENH: Make parsing Wyoming post-data values more robust (Fixes #749) The main goal is to allow for missing station information, which results in '******' for latitude, no longitude, and -9999.0 for elevation. As a result, avoid the fixed index parsing of this information, and instead rely on the text to the left of the ':' to identify appropriate sections. --- src/siphon/simplewebservice/wyoming.py | 27 ++-- tests/fixtures/wyoming_missing_station_info | 132 ++++++++++++++++++++ tests/test_wyoming.py | 12 ++ 3 files changed, 159 insertions(+), 12 deletions(-) create mode 100644 tests/fixtures/wyoming_missing_station_info diff --git a/src/siphon/simplewebservice/wyoming.py b/src/siphon/simplewebservice/wyoming.py index b212655dc..639dc8de7 100644 --- a/src/siphon/simplewebservice/wyoming.py +++ b/src/siphon/simplewebservice/wyoming.py @@ -15,6 +15,11 @@ from ..http_util import HTTPEndPoint +def _safe_float(s): + """Convert to float, handling ****** as a string for missing.""" + return pd.NA if all(c == '*' for c in s) or s == '-9999.0' else float(s) + + class WyomingUpperAir(HTTPEndPoint): """Download and parse data from the University of Wyoming's upper air archive.""" @@ -83,18 +88,16 @@ def _get_data(self, time, site_id): meta_data = soup.find_all('pre')[1].contents[0] lines = meta_data.splitlines() - # If the station doesn't have a name identified we need to insert a - # record showing this for parsing to proceed. - if 'Station number' in lines[1]: - lines.insert(1, 'Station identifier: ') - - station = lines[1].split(':')[1].strip() - station_number = int(lines[2].split(':')[1].strip()) - sounding_time = datetime.strptime(lines[3].split(':')[1].strip(), '%y%m%d/%H%M') - latitude = float(lines[4].split(':')[1].strip()) - longitude = float(lines[5].split(':')[1].strip()) - elevation = float(lines[6].split(':')[1].strip()) - pw = float(lines[-1].split(':')[1].strip()) + # Convert values after table into key, value pairs using the name to the left of the : + post_values = dict(tuple(map(str.strip, line.split(': '))) for line in lines[1:]) + + station = post_values.get('Station identifier', '') + station_number = int(post_values['Station number']) + sounding_time = datetime.strptime(post_values['Observation time'], '%y%m%d/%H%M') + latitude = _safe_float(post_values.get('Station latitude', '******')) + longitude = _safe_float(post_values.get('Station longitude', '******')) + elevation = _safe_float(post_values.get('Station elevation', '******')) + pw = float(post_values['Precipitable water [mm] for entire sounding']) df['station'] = station df['station_number'] = station_number diff --git a/tests/fixtures/wyoming_missing_station_info b/tests/fixtures/wyoming_missing_station_info new file mode 100644 index 000000000..fc9092259 --- /dev/null +++ b/tests/fixtures/wyoming_missing_station_info @@ -0,0 +1,132 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate, br, zstd + Connection: + - keep-alive + User-Agent: + - Siphon (0.9.post521+g63eb3aba.d20241211) + method: GET + uri: http://weather.uwyo.edu/cgi-bin/sounding/?region=naconf&TYPE=TEXT%3ALIST&YEAR=2012&MONTH=01&FROM=0100&TO=0100&STNM=82244 + response: + body: + string: "\nUniversity of Wyoming - Radiosonde Data\n\n

82244 Santarem Observations at 00Z 01 Jan 2012

\n
\n-----------------------------------------------------------------------------\n
+        \  PRES   HGHT   TEMP   DWPT   RELH   MIXR   DRCT   SKNT   THTA   THTE   THTV\n
+        \   hPa     m      C      C      %    g/kg    deg   knot     K      K      K
+        \n-----------------------------------------------------------------------------\n
+        1002.0          29.0   24.1     75  19.29      0      4  302.0  359.2  305.4\n
+        1000.0     74   28.8   24.3     77  19.57      0      4  301.9  360.1  305.5\n
+        \ 986.0    200   29.0   22.0     66  17.21      7      5  303.4  354.7  306.5\n
+        \ 925.0    767   25.0   20.0     74  16.19     40     10  304.9  353.5  307.8\n
+        \ 897.0   1035   22.8   18.8     79  15.51     55     14  305.2  351.9  308.1\n
+        \ 850.0   1505   18.8   16.8     88  14.37     65     17  305.8  349.1  308.5\n
+        \ 828.0   1730   17.2   15.4     89  13.47     71     18  306.4  347.1  308.9\n
+        \ 816.0   1854   16.6   14.0     84  12.44     75     19  307.1  344.9  309.4\n
+        \ 779.0   2248   14.9    9.4     70   9.61     80     17  309.3  338.9  311.1\n
+        \ 751.0   2559   13.5    5.8     60   7.79     65     14  311.0  335.4  312.5\n
+        \ 746.0   2616   13.2    5.2     58   7.49     66     14  311.4  334.8  312.8\n
+        \ 731.0   2787   11.8    8.3     79   9.48     71     13  311.6  341.1  313.4\n
+        \ 710.0   3031   11.0    1.0     50   5.83     77     12  313.4  331.9  314.5\n
+        \ 700.0   3149   10.2    2.2     58   6.45     80     11  313.8  334.3  315.0\n
+        \ 674.0   3463    8.6    3.6     71   7.41     94     10  315.4  338.9  316.8\n
+        \ 649.0   3776    8.0   -1.0     53   5.51    108      9  318.1  336.1  319.2\n
+        \ 531.0   5403   -1.5   -9.5     54   3.52    183      5  325.5  337.6  326.2\n
+        \ 500.0   5880   -3.9  -18.9     30   1.73    205      4  328.2  334.4  328.6\n
+        \ 483.0   6152   -6.3  -20.3     32   1.59    281      2  328.5  334.3  328.8\n
+        \ 474.0   6300   -7.7  -13.7     62   2.82    323      1  328.6  338.4  329.1\n
+        \ 471.0   6349   -7.3  -22.3     29   1.37    337      1  329.6  334.6  329.9\n
+        \ 466.0   6432   -7.8  -17.8     44   2.03      0      0  330.0  337.3  330.4\n
+        \ 463.0   6483   -8.1  -15.1     57   2.57    359      1  330.3  339.4  330.8\n
+        \ 460.0   6533   -7.9  -22.9     29   1.33    358      2  331.1  336.0  331.4\n
+        \ 444.0   6808  -10.1  -13.5     76   3.06    352      8  331.7  342.5  332.4\n
+        \ 437.0   6931   -9.9  -19.9     44   1.81    350     10  333.5  340.1  333.9\n
+        \ 408.0   7459  -12.9  -39.9      8   0.29    354     26  336.2  337.4  336.3\n
+        \ 404.0   7534  -13.4  -38.9     10   0.33    355     28  336.5  337.9  336.6\n
+        \ 400.0   7610  -13.9  -37.9     11   0.37    355     28  336.8  338.3  336.9\n
+        \ 381.0   7968  -16.9  -39.4     12   0.33    355     20  337.6  339.0  337.7\n
+        \ 366.0   8263  -19.3  -40.6     13   0.30    345     16  338.3  339.5  338.3\n
+        \ 353.0   8529  -21.5  -41.7     14   0.28    345     12  338.8  340.0  338.9\n
+        \ 324.0   9160  -26.8  -44.2     18   0.23    115      2  340.0  341.0  340.1\n
+        \ 319.0   9274  -27.7  -44.7     18   0.22    118      3  340.2  341.1  340.2\n
+        \ 300.0   9710  -31.3  -41.3     37   0.34    130      8  341.1  342.6  341.2\n
+        \ 293.0   9877  -32.8  -41.4     42   0.35    125     10  341.4  342.8  341.4\n
+        \ 285.0  10073  -34.5  -41.5     49   0.35    131     12  341.6  343.1  341.7\n
+        \ 250.0  10980  -41.1  -54.1     23   0.10    160     24  344.8  345.3  344.9\n
+        \ 238.0  11313  -43.8  -60.0     15   0.05    165     23  345.6  345.9  345.6\n
+        \ 235.0  11398  -44.5  -61.5     13   0.04    165     24  345.8  346.0  345.8\n
+        \ 217.0  11928  -49.1  -63.1     18   0.04    162     32  346.7  346.9  346.7\n
+        \ 212.0  12081  -50.3  -59.3     34   0.06    162     34  347.1  347.4  347.1\n
+        \ 206.0  12268  -51.5  -56.4     56   0.09    161     37  348.1  348.5  348.1\n
+        \ 200.0  12460  -53.1  -56.9     63   0.09    160     40  348.5  348.9  348.5\n
+        \ 193.0  12688  -54.9  -58.2     67   0.08    156     43  349.2  349.6  349.2\n
+        \ 192.0  12720  -55.1  -58.7     64   0.07    155     43  349.4  349.7  349.4\n
+        \ 170.0  13485  -60.5  -70.5     26   0.02    137     26  352.8  352.9  352.8\n
+        \ 154.0  14091  -65.7  -72.7     37   0.01    123     13  354.0  354.1  354.0\n
+        \ 151.0  14210  -66.6  -73.6     37   0.01    120     10  354.5  354.6  354.5\n
+        \ 150.0  14250  -66.9  -73.9     37   0.01    125     10  354.6  354.7  354.6\n
+        \ 145.0  14448  -68.0  -75.1     36   0.01    145     10  356.1  356.2  356.1\n
+        \ 142.0  14571  -68.8  -75.9     35   0.01    205      6  357.0  357.1  357.0\n
+        \ 138.0  14738  -69.7  -76.9     34   0.01    280     10  358.2  358.3  358.2\n
+        \ 134.0  14910  -70.7  -78.0     33   0.01    310     16  359.5  359.5  359.5\n
+        \ 116.0  15754  -75.6  -83.3     29   0.00    320     33  365.6  365.6  365.6\n
+        \ 111.0  16012  -77.1  -84.9     28   0.00    340     40  367.4  367.4  367.4\n
+        \ 106.0  16281  -78.7  -86.5     27   0.00    340     20  369.3  369.3  369.3\n
+        \ 101.0  16564  -80.3  -88.3     26   0.00    300     15  371.3  371.3  371.3\n
+        \ 100.0  16620  -80.7  -88.7     26   0.00    280     14  371.6  371.6  371.6\n
+        \  92.2  17069  -83.9  -91.9     24   0.00    240     18  373.9  373.9  373.9\n
+        \  87.7  17347  -85.9  -93.9     23   0.00    195     36  375.3  375.3  375.3\n
+        \  82.7  17678  -83.5  -91.5     24   0.00    195     36  386.6  386.6  386.6\n
+        \  74.1  18288  -79.0  -87.0     26   0.00     65     15  408.2  408.2  408.2\n
+        \  70.2  18593  -76.8  -84.8     27   0.00    105     20  419.4  419.4  419.4\n
+        \  70.0  18610  -76.7  -84.7     27   0.00    105     18  420.0  420.0  420.0\n
+        \  66.7  18898  -74.6  -84.5     21   0.00    105     20  430.5  430.5  430.5\n
+        \  63.3  19202  -72.3  -84.2     15   0.01    255     12  441.8  441.9  441.8\n
+        \  60.1  19507  -70.1  -84.0     12   0.01    245     16  453.4  453.5  453.4\n
+        \  50.0  20590  -62.1  -83.1      4   0.01    340      8  496.7  496.8  496.7\n

Station + information and sounding indices

\n                             Station
+        number: 82244\n                           Observation time: 120101/0000\n
+        \                          Station latitude: ******\n                          Station
+        elevation: -9999.0\n                            Showalter index: -0.52\n                                SWEAT
+        index: 239.60\n                                    K index: 31.50\n                         Cross
+        totals index: 20.70\n                      Vertical totals index: 22.70\n
+        \                       Totals totals index: 43.40\n     Mean mixed layer
+        potential temperature: 0.00\n              Mean mixed layer mixing ratio:
+        0.00\n              1000 hPa to 500 hPa thickness: 5806.00\nPrecipitable water
+        [mm] for entire sounding: 52.28\n
\n

Description of the \ndata + columns\nor sounding indices.\n\n

\n

\n\n\n
\n
\nInterested in graduate studies in atmospheric science?\nCheck + out our program at the\nUniversity + of Wyoming\n\n
\nQuestions about the + weather data provided by this site can be\naddressed to \nLarry + Oolman (ldoolman@uwyo.edu)\n
\n\n\n\n" + headers: + Accept-Ranges: + - bytes + Connection: + - Keep-Alive + Content-Length: + - '7447' + Content-Type: + - text/html; charset=UTF-8 + Date: + - Wed, 11 Dec 2024 22:28:52 GMT + ETag: + - W/"1d17-629061f5f3e9d" + Keep-Alive: + - timeout=5, max=100 + Last-Modified: + - Wed, 11 Dec 2024 22:28:53 GMT + Server: + - Apache + status: + code: 200 + message: OK +version: 1 diff --git a/tests/test_wyoming.py b/tests/test_wyoming.py index 3a648cca7..e43f8fbab 100644 --- a/tests/test_wyoming.py +++ b/tests/test_wyoming.py @@ -6,6 +6,7 @@ from datetime import datetime from numpy.testing import assert_almost_equal +import pandas as pd import pytest from siphon.simplewebservice.wyoming import WyomingUpperAir @@ -140,3 +141,14 @@ def test_wyoming_heights(): assert_almost_equal(df['height'][140], 10336.0, 2) assert_almost_equal(df['direction'][1], 145.0, 1) + + +# GH #749 +@recorder.use_cassette('wyoming_missing_station_info') +def test_missing_station(): + """Test that we can still return data for stations missing from the Wyoming archive.""" + df = WyomingUpperAir.request_data(datetime(2012, 1, 1, 0), '82244') + assert df['station'][0] == '' + assert pd.isna(df['latitude'][0]) + assert pd.isna(df['longitude'][1]) + assert pd.isna(df['elevation'][0])