Skip to content

Commit

Permalink
Merge pull request #66 from wmo-im/dev
Browse files Browse the repository at this point in the history
Removal of geojson
  • Loading branch information
tomkralidis authored May 30, 2022
2 parents c1af25a + cc106ac commit ba7ce4e
Show file tree
Hide file tree
Showing 8 changed files with 21 additions and 248 deletions.
8 changes: 4 additions & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@

FROM ubuntu:focal

ARG BUILD_PACKAGES="build-essential curl cmake gfortran" \
ECCODES_VER=2.23.0
ARG BUILD_PACKAGES="build-essential cmake gfortran" \
ECCODES_VER=2.26.0

ENV DEBIAN_FRONTEND="noninteractive" \
TZ="Etc/UTC" \
Expand All @@ -33,10 +33,10 @@ WORKDIR /tmp/eccodes

RUN echo "Acquire::Check-Valid-Until \"false\";\nAcquire::Check-Date \"false\";" | cat > /etc/apt/apt.conf.d/10no--check-valid-until \
&& apt-get update -y \
&& apt-get install -y ${BUILD_PACKAGES} python3 python3-pip libffi-dev python3-dev libudunits2-0 \
&& apt-get install -y ${BUILD_PACKAGES} python3 python3-pip libffi-dev python3-dev libudunits2-0 curl \
&& curl https://confluence.ecmwf.int/download/attachments/45757960/eccodes-${ECCODES_VER}-Source.tar.gz --output eccodes-${ECCODES_VER}-Source.tar.gz \
&& tar xzf eccodes-${ECCODES_VER}-Source.tar.gz \
&& mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX=${ECCODES_DIR} ../eccodes-${ECCODES_VER}-Source && make && ctest && make install \
&& mkdir build && cd build && cmake -DCMAKE_INSTALL_PREFIX=${ECCODES_DIR} -DENABLE_AEC=OFF ../eccodes-${ECCODES_VER}-Source && make && ctest && make install # \
&& cd / && rm -rf /tmp/eccodes /tmp/csv2bufr \
&& apt-get remove --purge -y ${BUILD_PACKAGES} \
&& apt autoremove -y \
Expand Down
90 changes: 4 additions & 86 deletions csv2bufr/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

__version__ = "0.1.0"

from copy import deepcopy
import csv
from datetime import timezone, datetime
import hashlib
Expand All @@ -31,11 +30,9 @@
import os.path
from typing import Any, Iterator, Union

from cfunits import Units
from eccodes import (codes_bufr_new_from_samples,
codes_set_array, codes_set, codes_get_native_type,
codes_write, codes_release, codes_get,
CODES_MISSING_LONG, CODES_MISSING_DOUBLE,
codes_bufr_keys_iterator_new,
codes_bufr_keys_iterator_next,
codes_bufr_keys_iterator_delete,
Expand Down Expand Up @@ -215,9 +212,9 @@ def __init__(self, descriptors: list, delayed_replications: list = list(),
:param table_version: version of Master Table 0 to use, default 36
"""
# ===============================
# first create empty bufr message
# ===============================
# ================================
# first create empty bufr messages
# ================================
bufr_msg = codes_bufr_new_from_samples("BUFR4")
# ===============================
# set delayed replication factors
Expand Down Expand Up @@ -461,78 +458,6 @@ def md5(self) -> str:
else:
return None

def as_geojson(self, identifier: str, template: dict ) -> str: # noqa
"""
Returns contents of BUFR message as a geoJSON string according to the
specified template.
:param identifier: unique ID used to identify the message
:param template: dictionary containing mapping from BUFR to geoJSON
:param units: dictionary mapping units, e.g. {"K":"Celsius"}
:returns: string containing geoJSON data (from json.dumps)
"""

_template = deepcopy(template)
result = self._extract(_template)
if "units" in template["_meta"]:
units = template["_meta"]["units"]
for u in units:
for o in result["properties"]["observations"]:
if result["properties"]["observations"][o]["units"] == u:
value = result["properties"]["observations"][o]["value"] # noqa
if value is not None:
value = Units.conform(
result["properties"]["observations"][o]["value"], # noqa
Units(u),
Units(units[u])
)
result["properties"]["observations"][o]["value"] = value # noqa
result["properties"]["observations"][o]["units"] = units[u] # noqa
if "_meta" in result:
del result["_meta"]
result["properties"]["identifier"] = result["id"] = identifier
result["properties"]["resultTime"] = datetime.now(timezone.utc).isoformat(timespec="seconds") # noqa
return json.dumps(result, indent=4)

def _extract(self, object_: Union[dict, list]) -> Union[dict, list]:
"""
Internal function used to iterate over geoJSON template and to extract
data from the BUFR message into the goeJSON structure / dictionary.
Used by as_geojson
:param object_: the element in the geoJSON message to extract
:returns: the element with the value set from the BUFR message
"""

if isinstance(object_, dict):
# check if format or eccodes in object
if "format" in object_:
assert "args" in object_
args = self._extract(object_["args"])
if None not in args:
result = object_["format"].format(*args)
else:
result = None
elif "eccodes_key" in object_:
result = self.get_element(object_["eccodes_key"])
if isinstance(result, int):
result = float(result)
if result in (CODES_MISSING_LONG, CODES_MISSING_DOUBLE):
result = None
else:
for k in object_:
object_[k] = self._extract(object_[k])
result = object_
elif isinstance(object_, list):
for idx in range(len(object_)):
object_[idx] = self._extract(object_[idx])
result = object_
else:
result = object_

return result

def parse(self, data: dict, metadata: dict, mappings: dict) -> None:
"""
Function to parse observation data and station metadata, mapping to the
Expand Down Expand Up @@ -653,8 +578,7 @@ def get_datetime(self) -> datetime:
)


def transform(data: str, metadata: dict, mappings: dict,
template: dict = {}) -> Iterator[dict]:
def transform(data: str, metadata: dict, mappings: dict) -> Iterator[dict]:
"""
This function returns an iterator to process each line in the input CSV
string. On each iteration a dictionary is returned containing the BUFR
Expand All @@ -667,7 +591,6 @@ def transform(data: str, metadata: dict, mappings: dict,
The dictionary returned by the iterator contains the following keys:
- ["bufr4"] = data encoded into BUFR;
- ["geojson"] = data encoded into geojson (only present if template specified); # noqa
- ["_meta"] = metadata on the data.
The ["_meta"] element includes the following:
Expand Down Expand Up @@ -774,11 +697,6 @@ def transform(data: str, metadata: dict, mappings: dict,
isodate = message.get_datetime().strftime('%Y%m%dT%H%M%S')
rmk = f"WIGOS_{wsi}_{isodate}"

# now create GeoJSON if specified
if template:
LOGGER.debug("Adding GeoJSON representation")
result["geojson"] = message.as_geojson(rmk, template) # noqa

# now additional metadata elements
LOGGER.debug("Adding metadata elements")
result["_meta"] = {
Expand Down
23 changes: 2 additions & 21 deletions csv2bufr/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,12 +96,8 @@ def create_mappings(ctx, sequence):
help="Name of output file")
@click.option("--station-metadata", "station_metadata", required=True,
help="WIGOS station identifier JSON file")
@click.option("--geojson-template", "geojson_template", required=False,
default=None,
help="Name of file or template for GeoJSON containing mapping from BUFR to GeoJSON") # noqa
@cli_option_verbosity
def transform(ctx, csv_file, mapping, output_dir, station_metadata,
geojson_template, verbosity):
def transform(ctx, csv_file, mapping, output_dir, station_metadata, verbosity):
result = None
click.echo(f"Transforming {csv_file.name} to BUFR")

Expand All @@ -118,23 +114,12 @@ def transform(ctx, csv_file, mapping, output_dir, station_metadata,
with open(mappings_file) as fh:
mappings = json.load(fh)

# now identify geojson template to use
template = None

if geojson_template is not None:
if not os.path.isfile(geojson_template):
json_template_file = f"{MAPPINGS}{os.sep}{geojson_template}.geojson" # noqa
else:
json_template_file = geojson_template
with open(json_template_file) as fh:
template = json.load(fh)

metadata = None
with open(station_metadata) as fh:
metadata = json.load(fh)

try:
result = transform_csv(csv_file.read(), metadata, mappings, template)
result = transform_csv(csv_file.read(), metadata, mappings)
except Exception as err:
raise click.ClickException(err)

Expand All @@ -144,10 +129,6 @@ def transform(ctx, csv_file, mapping, output_dir, station_metadata,
bufr_filename = f"{output_dir}{os.sep}{key}.bufr4"
with open(bufr_filename, "wb") as fh:
fh.write(item["bufr4"])
if "geojson" in item:
json_filename = f"{output_dir}{os.sep}{key}.geojson"
with open(json_filename, "w") as fh:
fh.write(item["geojson"])

click.echo("Done")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,18 @@
{"eccodes_key": "#1#wigosLocalIdentifierCharacter", "jsonpath": "$.wigosIds[0].wid_local"},
{"eccodes_key": "#1#stationOrSiteName", "csv_column": "Station_Name"},
{"eccodes_key": "#1#stationType", "csv_column": "WMO_Station_Type"},
{"eccodes_key": "#1#year", "csv_column": "M_Year"},
{"eccodes_key": "#1#month", "csv_column": "M_Month"},
{"eccodes_key": "#1#day", "csv_column": "M_DayOfMonth"},
{"eccodes_key": "#1#hour", "csv_column": "M_HourOfDay"},
{"eccodes_key": "#1#minute", "csv_column": "M_Minutes"},
{"eccodes_key": "#1#latitude", "csv_column": "Latitude"},
{"eccodes_key": "#1#longitude", "csv_column": "Longitude"},
{"eccodes_key": "#1#year", "csv_column": "M_Year", "valid_min": 2000, "valid_max": 2100},
{"eccodes_key": "#1#month", "csv_column": "M_Month", "valid_min": 1, "valid_max": 12},
{"eccodes_key": "#1#day", "csv_column": "M_DayOfMonth", "valid_min": 1, "valid_max": 31},
{"eccodes_key": "#1#hour", "csv_column": "M_HourOfDay", "valid_min": 0, "valid_max": 23},
{"eccodes_key": "#1#minute", "csv_column": "M_Minutes", "valid_min": 0, "valid_max": 59},
{"eccodes_key": "#1#latitude", "csv_column": "Latitude", "valid_min": -90, "valid_max": 90},
{"eccodes_key": "#1#longitude", "csv_column": "Longitude", "valid_min": -180, "valid_max": 180},
{"eccodes_key": "#1#heightOfStationGroundAboveMeanSeaLevel", "csv_column": "Elevation"},
{"eccodes_key": "#1#heightOfBarometerAboveMeanSeaLevel", "csv_column": "BP_Elevation"},
{"eccodes_key": "#1#nonCoordinatePressure", "csv_column": "BP"},
{"eccodes_key": "#1#pressureReducedToMeanSeaLevel", "csv_column": "QNH"},
{"eccodes_key": "#1#3HourPressureChange", "csv_column": "BP_Change"},
{"eccodes_key": "#1#3HourPressureChange", "csv_column": "BP_Change", "valid_min": -5000, "valid_max": 5230},
{"eccodes_key": "#1#characteristicOfPressureTendency", "csv_column": "BP_Tendency"},
{"eccodes_key": "#1#heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform", "csv_column": "Temp_H"},
{"eccodes_key": "#1#airTemperature", "csv_column": "AirTempK"},
Expand Down
6 changes: 3 additions & 3 deletions process_station.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ station_name=`echo ${file} | cut -d '.' -f 1 | sed s/_SYNOP//g`
WSI=`grep ${station_name} ./data/input/station_list.csv | cut -d ',' -f 2`
csv2bufr data transform \
./data/input/${file} \
--mapping malawi_synop_bufr \
--geojson-template malawi_synop_json \
--bufr-template synop_bufr \
--geojson-template synop_json \
--output-dir ./data/output \
--station-metadata ./metadata/${WSI}.json >& ${WSI}.log
done
done
126 changes: 0 additions & 126 deletions tests/test_csv2bufr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import csv
from io import StringIO
import logging
import json

from eccodes import (codes_bufr_new_from_samples, codes_release)
import pytest
Expand Down Expand Up @@ -89,111 +88,6 @@ def data_dict():
}


@pytest.fixture
def json_template():
return {
"id": None,
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [{"eccodes_key": "#1#longitude"},
{"eccodes_key": "#1#latitude"}]
},
"properties": {
"identifier": None,
"phenomenonTime": {
"format": "{:04.0f}-{:02.0f}-{:02.0f}T{:02.0f}:{:02.0f}:00+00:00", # noqa
"args": [
{"eccodes_key": "#1#year"},
{"eccodes_key": "#1#month"},
{"eccodes_key": "#1#day"},
{"eccodes_key": "#1#hour"},
{"eccodes_key": "#1#minute"}
]},
"resultTime": None,
"observations": {
"#1#airTemperature": {
"value": {
"eccodes_key": "#1#airTemperature"
},
"cf_standard_name": "air_temperature",
"units": {
"eccodes_key": "#1#airTemperature->units"
},
"sensor_height_above_local_ground": None,
"sensor_height_above_mean_sea_level": None,
"valid_min": None,
"valid_max": None,
"scale": None,
"offset": None
},
"#1#pressureReducedToMeanSeaLevel": {
"value": {
"eccodes_key": "#1#pressureReducedToMeanSeaLevel"
},
"cf_standard_name": "pressure_at_mean_sea_level",
"units": {
"eccodes_key":
"#1#pressureReducedToMeanSeaLevel->units"
},
"sensor_height_above_local_ground": None,
"sensor_height_above_mean_sea_level": None,
"valid_min": None,
"valid_max": None,
"scale": None,
"offset": None
}
}
},
"_meta": {
"units": {
"K": "Celsius"
}
}
}


@pytest.fixture
def json_result():
return {
"id": "WIGOS_0-1-2-ABCD_20211118T180000",
"type": "Feature",
"geometry": {
"type": "Point",
"coordinates": [0.0, 55.154]
},
"properties": {
"identifier": "WIGOS_0-1-2-ABCD_20211118T180000",
"phenomenonTime": "2021-11-18T18:00:00+00:00",
"resultTime": None,
"observations": {
"#1#airTemperature": {
"value": 17.160000000000025,
"cf_standard_name": "air_temperature",
"units": "Celsius",
"sensor_height_above_local_ground": None,
"sensor_height_above_mean_sea_level": None,
"valid_min": None,
"valid_max": None,
"scale": None,
"offset": None
},
"#1#pressureReducedToMeanSeaLevel": {
"value": 100130.0,
"cf_standard_name": "pressure_at_mean_sea_level",
"units": "Pa",
"sensor_height_above_local_ground": None,
"sensor_height_above_mean_sea_level": None,
"valid_min": None,
"valid_max": None,
"scale": None,
"offset": None
}
}
}
}


@pytest.fixture
def data_to_encode():
return {
Expand Down Expand Up @@ -342,23 +236,3 @@ def test_transform(data_dict, station_dict, mapping_dict):
assert sorted(item["_meta"].keys()) == item_meta_keys

assert item["_meta"]["md5"] == "981938dbd97be3e5adc8e7b1c6eb642c"


def test_json(data_dict, station_dict, mapping_dict, json_template,
json_result):
# create CSV
output = StringIO()
writer = csv.DictWriter(output, quoting=csv.QUOTE_NONNUMERIC,
fieldnames=data_dict.keys())
writer.writeheader()
writer.writerow(data_dict)
data = output.getvalue()
# transform CSV to BUFR
result = transform(data, station_dict, mapping_dict, json_template)
for item in result:
geojson = json.loads(item["geojson"])
# we need to copy result time to our expected json result
json_result["properties"]["resultTime"] = \
geojson["properties"]["resultTime"]
# now compare
assert json.dumps(geojson) == json.dumps(json_result)

0 comments on commit ba7ce4e

Please sign in to comment.