diff --git a/documentation/gis.rst b/documentation/gis.rst index cdde15703..6a4428e84 100644 --- a/documentation/gis.rst +++ b/documentation/gis.rst @@ -119,13 +119,13 @@ For example, the junctions GeoDataFrame contains the following information: :skipif: gpd is None >>> print(wn_gis.junctions.head()) - node_type elevation initial_quality geometry - name - 10 Junction 216.408 5.000e-04 POINT (20.00000 70.00000) - 11 Junction 216.408 5.000e-04 POINT (30.00000 70.00000) - 12 Junction 213.360 5.000e-04 POINT (50.00000 70.00000) - 13 Junction 211.836 5.000e-04 POINT (70.00000 70.00000) - 21 Junction 213.360 5.000e-04 POINT (30.00000 40.00000) + elevation initial_quality geometry + name + 10 216.408 5.000e-04 POINT (20.00000 70.00000) + 11 216.408 5.000e-04 POINT (30.00000 70.00000) + 12 213.360 5.000e-04 POINT (50.00000 70.00000) + 13 211.836 5.000e-04 POINT (70.00000 70.00000) + 21 213.360 5.000e-04 POINT (30.00000 40.00000) Each GeoDataFrame contains attributes and geometry: @@ -341,23 +341,23 @@ and then translates the GeoDataFrames coordinates to EPSG:3857. >>> wn_gis = wntr.network.to_gis(wn, crs='EPSG:4326') >>> print(wn_gis.junctions.head()) - node_type elevation initial_quality geometry - name - 10 Junction 216.408 5.000e-04 POINT (20.00000 70.00000) - 11 Junction 216.408 5.000e-04 POINT (30.00000 70.00000) - 12 Junction 213.360 5.000e-04 POINT (50.00000 70.00000) - 13 Junction 211.836 5.000e-04 POINT (70.00000 70.00000) - 21 Junction 213.360 5.000e-04 POINT (30.00000 40.00000) + elevation initial_quality geometry + name + 10 216.408 5.000e-04 POINT (20.00000 70.00000) + 11 216.408 5.000e-04 POINT (30.00000 70.00000) + 12 213.360 5.000e-04 POINT (50.00000 70.00000) + 13 211.836 5.000e-04 POINT (70.00000 70.00000) + 21 213.360 5.000e-04 POINT (30.00000 40.00000) >>> wn_gis.to_crs('EPSG:3857') >>> print(wn_gis.junctions.head()) - node_type elevation initial_quality geometry - name - 10 Junction 216.408 5.000e-04 POINT (2226389.816 11068715.659) - 11 Junction 216.408 5.000e-04 POINT (3339584.724 11068715.659) - 12 Junction 213.360 5.000e-04 POINT (5565974.540 11068715.659) - 13 Junction 211.836 5.000e-04 POINT (7792364.356 11068715.659) - 21 Junction 213.360 5.000e-04 POINT (3339584.724 4865942.280) + elevation initial_quality geometry + name + 10 216.408 5.000e-04 POINT (2226389.816 11068715.659) + 11 216.408 5.000e-04 POINT (3339584.724 11068715.659) + 12 213.360 5.000e-04 POINT (5565974.540 11068715.659) + 13 211.836 5.000e-04 POINT (7792364.356 11068715.659) + 21 213.360 5.000e-04 POINT (3339584.724 4865942.280) Snap point geometries to the nearest point or line ---------------------------------------------------- diff --git a/documentation/model_io.rst b/documentation/model_io.rst index f66f8fd53..bcd5b7636 100644 --- a/documentation/model_io.rst +++ b/documentation/model_io.rst @@ -206,27 +206,29 @@ GeoJSON files GeoJSON files are commonly used to store geographic data structures. More information on GeoJSON files can be found at https://geojson.org. -To use GeoJSON files in WNTR, a set of valid base column names are required. -Valid base GeoJSON column names can be obtained using the -:class:`~wntr.network.io.valid_gis_names` function. -The following example returns valid base GeoJSON column names for junctions. +When reading GeoJSON files into WNTR, only a set of valid column names can be used. +Valid GeoJSON column names can be obtained using the +:class:`~wntr.network.io.valid_gis_names` function. By default, the function +returns all column names, both required and optional. +The following example returns valid GeoJSON column names for junctions. .. doctest:: :skipif: gpd is None >>> geojson_column_names = wntr.network.io.valid_gis_names() >>> print(geojson_column_names['junctions']) - ['name', 'elevation', 'coordinates', 'emitter_coefficient', 'initial_quality', 'minimum_pressure', 'required_pressure', 'pressure_exponent', 'tag'] + ['name', 'elevation', 'geometry', 'emitter_coefficient', 'initial_quality', 'minimum_pressure', 'required_pressure', 'pressure_exponent', 'tag'] -A minimal list of valid column names can also be obtained by setting ``complete_list`` to False. -Column names that are optional (i.e., ``initial_quality``) and not included in the GeoJSON file are defined using default values. +A minimal list of required column names can also be obtained by setting ``complete_list`` to False. +Column names that are optional (i.e., ``initial_quality``) and not included in the GeoJSON file are +defined using default values. .. doctest:: :skipif: gpd is None >>> geojson_column_names = wntr.network.io.valid_gis_names(complete_list=False) >>> print(geojson_column_names['junctions']) - ['name', 'elevation', 'coordinates'] + ['name', 'elevation', 'geometry'] Note that GeoJSON files can contain additional custom column names that are assigned to WaterNetworkModel objects. @@ -253,7 +255,7 @@ Note that patterns, curves, sources, controls, and options are not stored in the The :class:`~wntr.network.io.read_geojson` function creates a WaterNetworkModel from a dictionary of GeoJSON files. -Valid base column names and additional custom attributes are added to the model. +Valid column names and additional custom attributes are added to the model. The function can also be used to append information from GeoJSON files into an existing WaterNetworkModel. .. doctest:: @@ -300,20 +302,21 @@ To use Esri Shapefiles in WNTR, several formatting requirements are enforced: assumed that the first 10 characters of each attribute are unique. * To create WaterNetworkModel from Shapefiles, a set of valid field names are required. - Valid base Shapefiles field names can be obtained using the - :class:`~wntr.network.io.valid_gis_names` function. - For Shapefiles, the `truncate` input parameter should be set to 10 (characters). - The following example returns valid base Shapefile field names for junctions. - Note that attributes like ``base_demand`` are truncated to ``base_deman``. + Valid Shapefiles field names can be obtained using the + :class:`~wntr.network.io.valid_gis_names` function. By default, the function + returns all column names, both required and optional. + For Shapefiles, the `truncate_names` input parameter should be set to 10 (characters). + The following example returns valid Shapefile field names for junctions. + Note that attributes like ``minimum_pressure`` are truncated to ``minimum_pr``. .. doctest:: :skipif: gpd is None >>> shapefile_field_names = wntr.network.io.valid_gis_names(truncate_names=10) >>> print(shapefile_field_names['junctions']) - ['name', 'elevation', 'coordinate', 'emitter_co', 'initial_qu', 'minimum_pr', 'required_p', 'pressure_e', 'tag'] + ['name', 'elevation', 'geometry', 'emitter_co', 'initial_qu', 'minimum_pr', 'required_p', 'pressure_e', 'tag'] - A minimal list of valid field names can also be obtained by setting ``complete_list`` to False. + A minimal list of required field names can also be obtained by setting ``complete_list`` to False. Field names that are optional (i.e., ``initial_quality``) and not included in the Shapefile are defined using default values. .. doctest:: @@ -322,7 +325,7 @@ To use Esri Shapefiles in WNTR, several formatting requirements are enforced: >>> shapefile_field_names = wntr.network.io.valid_gis_names(complete_list=False, ... truncate_names=10) >>> print(shapefile_field_names['junctions']) - ['name', 'elevation', 'coordinate'] + ['name', 'elevation', 'geometry'] * Shapefiles can contain additional custom field names that are assigned to WaterNetworkModel objects. @@ -349,7 +352,7 @@ Note that patterns, curves, sources, controls, and options are not stored in the The :class:`~wntr.network.io.read_shapefile` function creates a WaterNetworkModel from a dictionary of Shapefile directories. -Valid base field names and additional custom field names are added to the model. +Valid field names and additional custom field names are added to the model. The function can also be used to append information from Shapefiles into an existing WaterNetworkModel. .. doctest:: diff --git a/wntr/gis/network.py b/wntr/gis/network.py index d9c4e9d6b..dd1a633e7 100644 --- a/wntr/gis/network.py +++ b/wntr/gis/network.py @@ -99,14 +99,21 @@ def _create_gis(self, wn, crs: str = None, pumps_as_points: bool = False, Represent valves as points (True) or lines (False), by default False """ - def _extract_geodataframe(df, crs=None, links_as_points=False): - # Drop any column with all NaN + def _extract_geodataframe(df, crs=None, valid_base_names=None, + links_as_points=False): + if valid_base_names is None: + valid_base_names = [] + + # Drop any column with all NaN, this removes excess attributes + # Valid base attributes that have all None values are added back + # at the end of this routine df = df.loc[:, ~df.isna().all()] + # Define geom and drop node_type/link_type if df.shape[0] > 0: - # Define geom if 'node_type' in df.columns: geom = [Point((x,y)) for x,y in df['coordinates']] + del df['node_type'] elif 'link_type' in df.columns: geom = [] for link_name in df['name']: @@ -120,16 +127,25 @@ def _extract_geodataframe(df, crs=None, links_as_points=False): ls.append(v) ls.append(link.end_node.coordinates) geom.append(LineString(ls)) + del df['link_type'] - # Drop column if not a str, float, int, or bool + # Drop column if not a str, float, int, or bool (or np.bool_) + # This drops columns like coordinates, vertices # This could be extended to keep additional data type (list, # tuple, network elements like Patterns, Curves) drop_cols = [] for col in df.columns: - if not isinstance(df.iloc[0][col], (str, float, int, bool)): + # Added np.bool_ to the following check + # Returned by df.to_dict('records') for some network models + if not isinstance(df.iloc[0][col], (str, float, int, bool, np.bool_)): drop_cols.append(col) df = df.drop(columns=drop_cols) + # Add back in valid base attributes that had all None values + cols = list(set(valid_base_names) - set(df.columns)) + if len(cols) > 0: + df[cols] = None + # Set index if len(df) > 0: df.set_index('name', inplace=True) @@ -137,7 +153,7 @@ def _extract_geodataframe(df, crs=None, links_as_points=False): df = gpd.GeoDataFrame(df, crs=crs, geometry=geom) else: df = gpd.GeoDataFrame() - + return df # Convert the WaterNetworkModel to a dictionary @@ -146,29 +162,31 @@ def _extract_geodataframe(df, crs=None, links_as_points=False): df_nodes = pd.DataFrame(wn_dict['nodes']) df_links = pd.DataFrame(wn_dict['links']) + valid_base_names = self._valid_names(complete_list=False, truncate_names=None) + # Junctions df = df_nodes[df_nodes['node_type'] == 'Junction'] - self.junctions = _extract_geodataframe(df, crs) + self.junctions = _extract_geodataframe(df, crs, valid_base_names['junctions']) # Tanks df = df_nodes[df_nodes['node_type'] == 'Tank'] - self.tanks = _extract_geodataframe(df, crs) + self.tanks = _extract_geodataframe(df, crs, valid_base_names['tanks']) # Reservoirs df = df_nodes[df_nodes['node_type'] == 'Reservoir'] - self.reservoirs = _extract_geodataframe(df, crs) + self.reservoirs = _extract_geodataframe(df, crs, valid_base_names['reservoirs']) # Pipes df = df_links[df_links['link_type'] == 'Pipe'] - self.pipes = _extract_geodataframe(df, crs, False) + self.pipes = _extract_geodataframe(df, crs, valid_base_names['pipes'], False) # Pumps df = df_links[df_links['link_type'] == 'Pump'] - self.pumps = _extract_geodataframe(df, crs, pumps_as_points) + self.pumps = _extract_geodataframe(df, crs, valid_base_names['pumps'], pumps_as_points) # Valves df = df_links[df_links['link_type'] == 'Valve'] - self.valves = _extract_geodataframe(df, crs, valves_as_points) + self.valves = _extract_geodataframe(df, crs, valid_base_names['valves'], valves_as_points) def _create_wn(self, append=None): """ @@ -187,22 +205,32 @@ def _create_wn(self, append=None): wn_dict['nodes'] = [] wn_dict['links'] = [] - for element in [self.junctions, self.tanks, self.reservoirs]: + # Modifications to create a WaterNetworkModel from a dict + # Reset index + # Create coordinates/vertices from geometry + # Add node_type/link_type + for node_type, element in [('Junction', self.junctions), + ('Tank', self.tanks), + ('Reservoir', self.reservoirs)]: if element.shape[0] > 0: assert (element['geometry'].geom_type).isin(['Point']).all() df = element.reset_index(names="name") df.rename(columns={'geometry':'coordinates'}, inplace=True) df['coordinates'] = [[x,y] for x,y in zip(df['coordinates'].x, df['coordinates'].y)] + df['node_type'] = node_type wn_dict['nodes'].extend(df.to_dict('records')) - for element in [self.pipes, self.pumps, self.valves]: + for link_type, element in [('Pipe', self.pipes), + ('Pump', self.pumps), + ('Valve', self.valves)]: if element.shape[0] > 0: assert 'start_node_name' in element.columns assert 'end_node_name' in element.columns df = element.reset_index(names="name") df['vertices'] = df.apply(lambda row: list(row.geometry.coords)[1:-1], axis=1) df.drop(columns=['geometry'], inplace=True) + df['link_type'] = link_type wn_dict['links'].extend(df.to_dict('records')) # Create WaterNetworkModel from dictionary @@ -470,6 +498,17 @@ def _valid_names(self, complete_list=True, truncate_names=None): if truncate_names is not None and truncate_names > 0: for element, attributes in valid_names.items(): valid_names[element] = [attribute[:truncate_names] for attribute in attributes] + + for key, vals in valid_names.items(): + # Remove coordinates and vertices (not used to create GeoDataFrame geometry) + if 'coordinates' in valid_names[key]: + valid_names[key].remove('coordinates') + if 'vertices' in valid_names[key]: + valid_names[key].remove('vertices') + + # Add geometry + if 'geometry' not in valid_names[key]: + valid_names[key].append('geometry') return valid_names diff --git a/wntr/network/elements.py b/wntr/network/elements.py index 28e3e7fe8..24453ccd9 100644 --- a/wntr/network/elements.py +++ b/wntr/network/elements.py @@ -394,7 +394,7 @@ class Tank(Node): "min_level", "max_level", "diameter", - "min_vol" + "min_vol", "vol_curve_name", "overflow", "coordinates"] @@ -1041,7 +1041,7 @@ class Pump(Link): "end_node_name", "pump_type", "pump_curve_name", - "power" + "power", "base_speed", "speed_pattern_name", "initial_status"] diff --git a/wntr/network/io.py b/wntr/network/io.py index e4ceac8de..a299e459a 100644 --- a/wntr/network/io.py +++ b/wntr/network/io.py @@ -644,12 +644,13 @@ def valid_gis_names(complete_list=True, truncate_names=None): Valid column/field names for GeoJSON or Shapefiles Note that Shapefile field names are truncated to 10 characters - (set truncate=10) + (set truncate_names=10) Parameters ---------- complete_list : bool - Include a complete list of column/field names (beyond basic attributes) + When true, returns both optional and required column/field names. + When false, only returns required column/field names. truncate_names : None or int Truncate column/field names to specified number of characters, set truncate=10 for Shapefiles. None indicates no truncation. diff --git a/wntr/tests/test_gis.py b/wntr/tests/test_gis.py index 0872e179f..3018c80ae 100644 --- a/wntr/tests/test_gis.py +++ b/wntr/tests/test_gis.py @@ -132,12 +132,12 @@ def test_wn_to_gis(self): #assert self.gis_data.valves.shape[0] == self.wn.num_valves # Check minimal set of attributes - assert set(['node_type', 'elevation', 'geometry']).issubset(self.gis_data.junctions.columns) - assert set(['node_type', 'elevation', 'geometry']).issubset(self.gis_data.tanks.columns) - assert set(['node_type', 'geometry']).issubset(self.gis_data.reservoirs.columns) - assert set(['link_type', 'start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.pipes.columns) - assert set(['link_type', 'start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.pumps.columns) - #assert set(['link_type', 'start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.valves.columns) # Net1 has no valves + assert set(['elevation', 'geometry']).issubset(self.gis_data.junctions.columns) + assert set(['elevation', 'geometry']).issubset(self.gis_data.tanks.columns) + assert set(['geometry']).issubset(self.gis_data.reservoirs.columns) + assert set(['start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.pipes.columns) + assert set(['start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.pumps.columns) + #assert set(['start_node_name', 'end_node_name', 'geometry']).issubset(self.gis_data.valves.columns) # Net1 has no valves def test_gis_to_wn(self): @@ -265,15 +265,17 @@ def test_set_crs_to_crs(self): def test_add_attributes_and_write(self): - self.gis_data.add_node_attributes(self.results.node['pressure'].loc[3600,:], 'Pressure_1hr') - self.gis_data.add_link_attributes(self.results.link['flowrate'].loc[3600,:], 'Flowrate_1hr') + gis_data = self.wn.to_gis() + + gis_data.add_node_attributes(self.results.node['pressure'].loc[3600,:], 'Pressure_1hr') + gis_data.add_link_attributes(self.results.link['flowrate'].loc[3600,:], 'Flowrate_1hr') - assert 'Pressure_1hr' in self.gis_data.junctions.columns - assert 'Pressure_1hr' in self.gis_data.tanks.columns - assert 'Pressure_1hr' in self.gis_data.reservoirs.columns - assert 'Flowrate_1hr' in self.gis_data.pipes.columns - assert 'Flowrate_1hr' in self.gis_data.pumps.columns - assert 'Flowrate_1hr' not in self.gis_data.valves.columns # Net1 has no valves + assert 'Pressure_1hr' in gis_data.junctions.columns + assert 'Pressure_1hr' in gis_data.tanks.columns + assert 'Pressure_1hr' in gis_data.reservoirs.columns + assert 'Flowrate_1hr' in gis_data.pipes.columns + assert 'Flowrate_1hr' in gis_data.pumps.columns + assert 'Flowrate_1hr' not in gis_data.valves.columns # Net1 has no valves def test_write_geojson(self): prefix = 'temp_Net1' diff --git a/wntr/tests/test_network.py b/wntr/tests/test_network.py index 5e30e1204..f6428ecf2 100644 --- a/wntr/tests/test_network.py +++ b/wntr/tests/test_network.py @@ -1087,6 +1087,30 @@ def test_shapefile_roundtrip(self): files['valves'] = 'temp_valves' B = self.wntr.network.read_shapefile(files) assert(wn._compare(B, level=0)) - + + def test_valid_gis_names(self): + + required_names = wntr.network.io.valid_gis_names(complete_list=False, truncate_names=None) + valid_names = wntr.network.io.valid_gis_names(complete_list=True, truncate_names=None) + + wn = self.wntr.network.WaterNetworkModel(join(ex_datadir, "Net6.inp")) + gis_data = wn.to_gis() + + for component in required_names.keys(): + required_columns = required_names[component] + valid_columns = valid_names[component] + + data = getattr(gis_data, component) + data_columns = list(data.columns) + data_columns.append(data.index.name) + + # Check that all data columns are valid + assert len(set(data_columns)-set(valid_columns)) == 0 + # Check that all required columns are in the data + assert len(set(required_columns)-set(data_columns)) == 0 + # Assert that node_type and link_type are not in data columns + assert 'node_type' not in data_columns + assert 'link_type' not in data_columns + if __name__ == "__main__": unittest.main()