Skip to content

Commit

Permalink
ENH: Make parsing Wyoming post-data values more robust (Fixes #749)
Browse files Browse the repository at this point in the history
The main goal is to allow for missing station information, which results
in '******' for latitude, no longitude, and -9999.0 for elevation. As a
result, avoid the fixed index parsing of this information, and instead
rely on the text to the left of the ':' to identify appropriate
sections.
  • Loading branch information
dopplershift committed Dec 11, 2024
1 parent 7aa5353 commit 0a1573c
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 12 deletions.
27 changes: 15 additions & 12 deletions src/siphon/simplewebservice/wyoming.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@
from ..http_util import HTTPEndPoint


def _safe_float(s):
"""Convert to float, handling ****** as a string for missing."""
return pd.NA if all(c == '*' for c in s) or s == '-9999.0' else float(s)


class WyomingUpperAir(HTTPEndPoint):
"""Download and parse data from the University of Wyoming's upper air archive."""

Expand Down Expand Up @@ -83,18 +88,16 @@ def _get_data(self, time, site_id):
meta_data = soup.find_all('pre')[1].contents[0]
lines = meta_data.splitlines()

# If the station doesn't have a name identified we need to insert a
# record showing this for parsing to proceed.
if 'Station number' in lines[1]:
lines.insert(1, 'Station identifier: ')

station = lines[1].split(':')[1].strip()
station_number = int(lines[2].split(':')[1].strip())
sounding_time = datetime.strptime(lines[3].split(':')[1].strip(), '%y%m%d/%H%M')
latitude = float(lines[4].split(':')[1].strip())
longitude = float(lines[5].split(':')[1].strip())
elevation = float(lines[6].split(':')[1].strip())
pw = float(lines[-1].split(':')[1].strip())
# Convert values after table into key, value pairs using the name to the left of the :
post_values = dict(tuple(map(str.strip, l.split(': '))) for l in lines[1:])

Check failure on line 92 in src/siphon/simplewebservice/wyoming.py

View workflow job for this annotation

GitHub Actions / Run Lint Tools

Ruff (E741)

src/siphon/simplewebservice/wyoming.py:92:69: E741 Ambiguous variable name: `l`

station = post_values.get('Station identifier', '')
station_number = int(post_values['Station number'])
sounding_time = datetime.strptime(post_values['Observation time'], '%y%m%d/%H%M')
latitude = _safe_float(post_values.get('Station latitude', '******'))
longitude = _safe_float(post_values.get('Station longitude', '******'))
elevation = _safe_float(post_values.get('Station elevation', '******'))
pw = float(post_values['Precipitable water [mm] for entire sounding'])

df['station'] = station
df['station_number'] = station_number
Expand Down
132 changes: 132 additions & 0 deletions tests/fixtures/wyoming_missing_station_info

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions tests/test_wyoming.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from datetime import datetime

from numpy.testing import assert_almost_equal
import pandas as pd
import pytest

from siphon.simplewebservice.wyoming import WyomingUpperAir
Expand Down Expand Up @@ -140,3 +141,14 @@ def test_wyoming_heights():

assert_almost_equal(df['height'][140], 10336.0, 2)
assert_almost_equal(df['direction'][1], 145.0, 1)


# GH #749
@recorder.use_cassette('wyoming_missing_station_info')
def test_missing_station():
"""Test that we can still return data for stations missing from the Wyoming archive."""
df = WyomingUpperAir.request_data(datetime(2012, 1, 1, 0), '82244')
assert df['station'][0] == ''
assert pd.isna(df['latitude'][0])
assert pd.isna(df['longitude'][1])
assert pd.isna(df['elevation'][0])

0 comments on commit 0a1573c

Please sign in to comment.