-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPNET_Step_10.py
484 lines (357 loc) · 18.8 KB
/
PNET_Step_10.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
import arcpy
import os
import shutil
from PNET_Functions import get_watershed_folders, delete_old, create_csv, \
get_fields, csv_to_list, parse_multistring, make_folder
import scipy.stats as stat
import numpy as np
import matplotlib.pyplot as plt
import math
# -------------------------------------------------------------------------------
# Name: PNET Step 10
# Purpose: Creates Comparison Graphs for categorical data
#
# Author: Tyler Hatch
#
# Created: 3/1/2020
# Latest Update: 3/1/2020
# -------------------------------------------------------------------------------
# The folder containing all watershed folders
root_folder = arcpy.GetParameterAsText(0)
# CSV to set field data from instead (expects headers)
input_field_csv = arcpy.GetParameterAsText(1)
def main():
# Initialize variables and file locations
arcpy.env.overwriteOutput = True
watershed_folders = get_watershed_folders(root_folder)
# Setup projectwide data
projectwide_output = make_folder(os.path.join(root_folder, "00_ProjectWide", "Outputs", "Comparisons"), "Categorical")
projectwide_database = os.path.join(root_folder, "00_ProjectWide", "Inputs", "Database", "Field_Database.csv")
delete_old(projectwide_output)
keep_fields = ["FID", "Shape", "POINT_X", "POINT_Y", "SnapDist", "FldRchLen",
"EcoRgn_L4", "EcoRgn_L3", "HUC8", "NAME", "StreamName",
"PRECIP", "DRAREA", "iGeo_ElMax", "iGeo_ElMin"]
# set the field lists to the values from the file
# meta_group_field, meta_group_field_name, group_field, group_field_name, field_db_fields = read_field_csv(input_field_csv)
graphs = read_field_csv_new(input_field_csv)
for graph in graphs:
to_merge = []
meta_group_field = graph[0]
meta_group_field_name = graph[1]
group_field = graph[2]
group_field_name = graph[3]
field_db_fields = graph[4]
arcpy.AddMessage("Graphing {}...".format(group_field_name))
if meta_group_field and group_field_name:
meta_exists = True
else:
meta_exists = False
for watershed in watershed_folders:
arcpy.AddMessage("\tWorking on {}...".format(watershed))
# Setup watershed data
watershed_output = make_folder(os.path.join(watershed, "Outputs", "Comparisons"), "Categorical")
delete_old(watershed_output)
# Get the CSV with Field data
watershed_db = projectwide_database
# Get data from the field database
field_data_list = csv_to_list(watershed_db)
# Find certain field indexes in the field database
id_field_db = field_data_list[0].index("""RchID""")
field_indexes_db = []
for field_db_field in field_db_fields:
field_indexes_db.append(field_data_list[0].index(field_db_field))
# remove headers
field_data_list.pop(0)
# Create a list with only necessary data
field_compare_list = []
for row in field_data_list:
to_add = []
# Add id column
to_add.append(row[id_field_db])
# Add any other columns
for index in field_indexes_db:
to_add.append(row[index])
field_compare_list.append(to_add)
# Get the CSV with extracted PNET data
watershed_pnet = os.path.join(watershed, "Outputs", "Extracted_Data", "All_Data.csv")
# Get data from the PNET output
pnet_data_list = csv_to_list(watershed_pnet)
# Find certain PNET indexes in the PNET output
id_pnet = pnet_data_list[0].index("""RchID""")
if group_field not in pnet_data_list[0]:
arcpy.AddMessage("Could not complete plots for {}, could not find {} field".format(watershed, group_field))
elif meta_exists and meta_group_field not in pnet_data_list[0]:
arcpy.AddMessage("Could not complete plots for {}, could not find {} field".format(watershed, meta_group_field))
else:
group_pnet = pnet_data_list[0].index(group_field)
if meta_exists:
meta_group_pnet = pnet_data_list[0].index(meta_group_field)
# remove headers
pnet_data_list.pop(0)
# Create a list with only necessary data
pnet_compare_list = []
for row in pnet_data_list:
to_add = []
# Add id column
to_add.append(row[id_pnet])
# Add grouping columns
if meta_exists:
to_add.append(row[meta_group_pnet])
to_add.append(row[group_pnet])
# Add this row to the overall list
pnet_compare_list.append(to_add)
# Make list of new fields
if meta_exists:
new_fields = ["""RchID""", meta_group_field_name, group_field_name]
else:
new_fields = ["""RchID""", group_field_name]
for new_field in field_db_fields:
# This is where field data will go
new_fields.append("Y_" + new_field[:8])
# Perform data comparisons
both_compare_list = [new_fields]
for pnet_row in pnet_compare_list:
new_row = []
# Get the ID of the current row
current_site = pnet_row[0]
# Find the corresponding row in the field data list
for db_row_num, db_row in enumerate(field_compare_list):
# If the two site are the same
if db_row[0] == current_site:
field_row = db_row
break
# Add the reach ID to our new row
new_row.append(pnet_row[0])
# Add the group/metagroup field to our new row
new_row.append(pnet_row[1])
if meta_exists:
# Add the metagroup to our new row
new_row.append(pnet_row[2])
# Prepare to iterate through each column of data, skipping rchID
field_iter = iter(field_row)
next(field_iter)
for field_data in field_iter:
# Make sure that the data is not missing
if field_data != "":
# Add data into the new row
field_num = float(field_data)
new_row.append(field_num)
else:
new_row += 0
both_compare_list.append(new_row)
# Add in data for each of the other PNET fields (That were created in previous steps)
pnet_data_list = csv_to_list(watershed_pnet)
for row_num, row in enumerate(both_compare_list):
# Add data from each PNET field
data_to_add = []
for add_field in keep_fields:
if add_field in pnet_data_list[0]:
this_index = pnet_data_list[0].index(add_field)
data_to_add.append(pnet_data_list[row_num][this_index])
both_compare_list[row_num] = data_to_add + row
# Create a new shapefile to hold data
template = os.path.join(watershed, "Outputs", "Extracted_Data", "Extraction_Merge_Points.shp")
comparison_points = arcpy.CreateFeatureclass_management(watershed_output, "Categorical_Comparison_Points.shp",
"POINT", spatial_reference=template)
to_merge.append(comparison_points)
# Add in new fields to the shapefile
for field, example in zip(both_compare_list[0], both_compare_list[1]):
# Make sure we are not adding in any already existing default fields
shapefile_fields = get_fields(comparison_points)
if field not in shapefile_fields:
# Decide to add a text or float field
if isinstance(example, str):
arcpy.AddField_management(comparison_points, field[:10], "TEXT")
else:
arcpy.AddField_management(comparison_points, field[:10], "FLOAT")
# Skip headers
iter_list = iter(both_compare_list)
next(iter_list)
# remove useless field
arcpy.DeleteField_management(comparison_points, "Id")
# Add in data to the shapefile
with arcpy.da.InsertCursor(comparison_points, '*') as inserter:
with arcpy.da.SearchCursor(template, '*') as searcher:
for row, search_row in zip(iter_list, searcher):
# Steal Shape and FID data from template
row[0] = search_row[0]
row[1] = search_row[1]
# Add in row
inserter.insertRow(row)
# Save as CSV
create_csv(os.path.join(watershed_output, "Categorical_Comparison_Data.csv"), comparison_points)
# Get a list of all the different metagroup types
if meta_exists:
metagroup_types = unique_values(comparison_points, meta_group_field_name[:10])
# Make a folder, shapefile, and plots for every metagroup
if " " in metagroup_types:
metagroup_types.remove(" ")
for metagroup in metagroup_types:
# Create a new folder for only data in this meta group
plot_folder = make_folder(watershed_output, "{}_{}".format(meta_group_field_name.title(), metagroup.title()))
delete_old(plot_folder)
# Create a shapefile with only data we want to look at
layer_name = 'temp'
new_shapefile = os.path.join(plot_folder, '{}_{}_Comparison.shp'.format(meta_group_field_name.title(), metagroup.title()))
arcpy.MakeFeatureLayer_management(comparison_points, layer_name)
query = '{} = \'{}\''.format(meta_group_field_name[:10], metagroup)
arcpy.SelectLayerByAttribute_management(layer_name, 'NEW_SELECTION', query)
arcpy.CopyFeatures_management(layer_name, new_shapefile)
# Create plots for this data
create_plots(new_shapefile, group_field_name, field_db_fields, plot_folder, metagroup, meta_group_field_name)
else:
plot_folder = make_folder(watershed_output, "{}".format(group_field_name.title()))
delete_old(plot_folder)
# Create a shapefile with only data we want to look at
layer_name = 'temp'
new_shapefile = os.path.join(plot_folder, '{}_Comparison.shp'.format(group_field_name.title()))
arcpy.MakeFeatureLayer_management(comparison_points, layer_name)
arcpy.CopyFeatures_management(layer_name, new_shapefile)
# Create plots for this data
create_plots(new_shapefile, group_field_name, field_db_fields, plot_folder)
arcpy.Delete_management(new_shapefile)
# Do projectwide
arcpy.AddMessage('\tSaving ProjectWide...')
save_loc = os.path.join(projectwide_output, "Categorical_Comparison_Points_{}.shp".format(group_field_name))
merged = arcpy.Merge_management(to_merge, save_loc)
create_csv(os.path.join(projectwide_output, "Categorical_Comparison_Data.csv"), merged)
if meta_exists:
# Get a list of all the different metagroup types
metagroup_types = unique_values(merged, meta_group_field_name[:10])
# Make a folder, shapefile, and plots for every metagroup
if " " in metagroup_types:
metagroup_types.remove(" ")
for metagroup in metagroup_types:
# Create a new folder for only data in this meta group
plot_folder = make_folder(projectwide_output, "{}_{}".format(meta_group_field_name.title(), metagroup.title()))
delete_old(plot_folder)
# Create a shapefile with only data we want to look at
layer_name = 'temp'
new_shapefile = os.path.join(plot_folder,
'{}_{}_Comparison.shp'.format(meta_group_field_name.title(), metagroup.title()))
arcpy.MakeFeatureLayer_management(merged, layer_name)
query = '{} = \'{}\''.format(meta_group_field_name[:10], metagroup)
arcpy.SelectLayerByAttribute_management(layer_name, 'NEW_SELECTION', query)
arcpy.CopyFeatures_management(layer_name, new_shapefile)
# Create plots for this data
create_plots(new_shapefile, group_field_name, field_db_fields, plot_folder, metagroup, meta_group_field_name)
else:
plot_folder = make_folder(projectwide_output, "{}".format(group_field_name.title()))
delete_old(plot_folder)
# Create a shapefile with only data we want to look at
layer_name = 'temp'
new_shapefile = os.path.join(plot_folder, '{}_Comparison.shp'.format(group_field_name.title()))
arcpy.MakeFeatureLayer_management(merged, layer_name)
arcpy.CopyFeatures_management(layer_name, new_shapefile)
# Create plots for this data
create_plots(new_shapefile, group_field_name, field_db_fields, plot_folder)
arcpy.Delete_management(new_shapefile)
arcpy.Delete_management(merged)
def create_plots(data_shapefile, group_field, y_axis_fields, out_folder, metagroup = "", metagroup_name = ""):
for y_axis_field in y_axis_fields:
try:
labels, all_data = get_data(data_shapefile, group_field, y_axis_field)
new_labels = []
for entry, label in zip(all_data, labels):
n = len(entry)
new_label = "[{}] ".format(n) + label
new_labels.append(new_label)
# set up plot
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(9, 9))
bplot = ax.boxplot(all_data,
vert=True, # vertical box alignment
labels=new_labels) # will be used to label x-ticks
if metagroup != "":
ax.set_title('{} values for all {} groups\n [When {} = {}]'
.format(y_axis_field.title(), group_field.title(), metagroup_name.title(), metagroup.title()))
else:
ax.set_title('{} values for all {} groups'
.format(y_axis_field.title(), group_field.title()))
ax.yaxis.grid(True)
ax.set_xlabel('{} Category'.format(group_field.title()))
ax.set_ylabel('{} Value'.format(y_axis_field.title()))
plt.xticks(rotation=90)
# save plot
plot_name = os.path.join(out_folder, "{}_Plot.png".format(y_axis_field.title()))
plt.savefig(plot_name, bbox_inches='tight')
plt.close()
except:
arcpy.AddMessage("\t\tCould not plot {}".format(y_axis_field))
def get_data(data_shapefile, group_field, y_axis_field):
# Pull comparison values from the comparison points shapefile
actual_group_field = group_field[:10]
actual_y_axis_field = "Y_" + y_axis_field[:8]
groups_list = unique_values(data_shapefile, actual_group_field)
if " " in groups_list:
groups_list.remove(" ")
if "" in groups_list:
groups_list.remove("")
data = []
for group in groups_list:
query = '{} = \'{}\''.format(actual_group_field, group)
group_data = arcpy.da.FeatureClassToNumPyArray(data_shapefile, actual_y_axis_field, query).astype(np.float)
# Remove negatives and zeros
group_data = group_data[np.where(group_data > 0.0)]
data.append(group_data)
return groups_list, data
def plot_points(x, y, axis):
axis.scatter(x, y, color="darkred", label="Field Sites", alpha=.4)
def plot_regression(x, y, axis, new_max):
# calculate regression
regression = stat.linregress(x, y)
model_x = np.arange(0.0, new_max, new_max/10000)
model_y = regression.slope * model_x + regression.intercept
# plot regression line
axis.plot(model_x, model_y, color='black', linewidth=2.0, linestyle='-', label='Regression line')
# calculate prediction intervals and plot as shaded areas
n = len(x)
return regression.rvalue**2, regression.slope, regression.intercept
def save_db(database, main_folder):
save_loc = os.path.join(main_folder, "Inputs", "Database", "Field_Database.csv")
shutil.copyfile(database, save_loc)
return save_loc
def read_field_csv(file):
input_field_list = csv_to_list(file)
# remove headers
input_field_list.pop(0)
meta = input_field_list[0][0]
meta_name = input_field_list[0][1]
group = input_field_list[0][2]
group_name = input_field_list[0][3]
fields = [input_field_list[0][4]]
# remove top row
input_field_list.pop(0)
for y_axis_field in input_field_list:
fields.append(y_axis_field[4])
return meta, meta_name, group, group_name, fields
def read_field_csv_new(file):
input_field_list = csv_to_list(file)
# remove headers
input_field_list.pop(0)
end_graph = False
fields = []
graphs = []
for row in input_field_list:
if len(row[2]) > 1:
if end_graph:
graphs.append([meta, meta_name, group, group_name, fields])
# Start a new graph
meta = row[0]
meta_name = row[1]
group = row[2]
group_name = row[3]
fields = [row[4]]
end_graph = True
else:
fields.append(row[4])
# Add final row
graphs.append([meta, meta_name, group, group_name, fields])
return graphs
def is_csv(file):
return file.endswith('.csv')
def unique_values(table, field):
# Adapted from https://gis.stackexchange.com/questions/208430/trying-to-extract-a-list-of-unique-values-from-a-field-using-python
with arcpy.da.SearchCursor(table, [field]) as cursor:
return sorted({row[0] for row in cursor})
if __name__ == "__main__":
main()