-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze_dwi.py
135 lines (112 loc) · 7.86 KB
/
analyze_dwi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
"""Performs spatial analysis on crash data. Uses manually prepped county data which includes population counts
2010-2017."""
import sys
import arcpy
import pandas as pd
# Using NAD 83 as coordinate system
spatial_ref = arcpy.SpatialReference(4269)
directory = sys.path[0]
workspace = '{0}\\DWI.gdb'.format(directory)
if arcpy.Exists(workspace):
pass
else:
arcpy.CreateFileGDB_management(directory, "DWI")
arcpy.env.workspace = workspace
arcpy.env.overwriteOutput = True
counties = '{0}\\counties'.format(workspace)
# data_years = range(2010, 2018)
data_years = range(2010, 2017)
county_list = []
# Build county list
with arcpy.da.SearchCursor(counties, ["CNTY_NM"]) as cursor1:
for row in cursor1:
county_list.append(row[0])
def county_deaths():
"""Joins prepped alcohol related crash data points with county polygons to find number of deaths per capita in each
county for year on year trend analysis and to find enforcement patterns on a regional scale"""
arcpy.MakeFeatureLayer_management("Master_DWIpoints", "Master_lyr")
# Dictionary of dictionaries for building county deaths data frame
data_dict_deaths = {name: {} for name in county_list}
for key in data_dict_deaths:
data_dict_deaths[key] = {year: 0 for year in data_years}
for year in data_years:
# Calculate county death counts for each year and save to new fc
arcpy.SelectLayerByAttribute_management(in_layer_or_view="Master_lyr", selection_type="NEW_SELECTION",
where_clause="Crash_Year = {0}".format(year))
arcpy.SpatialJoin_analysis(target_features=counties, join_features="Master_lyr",
out_feature_class='countydeaths{0}'.format(year),
join_operation="JOIN_ONE_TO_ONE", join_type="KEEP_ALL",
field_mapping="CNTY_NM 'CNTY_NM' true true false 13 Text 0 0 ,First,#," + counties + ",CNTY_NM,-1,-1;CNTY_NBR 'CNTY_NBR' true true false 2 Short 0 0 ,First,#," + counties + ",CNTY_NBR,-1,-1;FIPS 'FIPS' true true false 2 Short 0 0 ,First,#," + counties + ",FIPS,-1,-1;pop{0} 'pop{1}' true true false 4 Long 0 0 ,First,#,".format(year, year) + counties + ",pop{0},-1,-1;Crash_Death_Count 'Crash_Death_Count{1}' true true false 4 Long 0 0 ,Sum,#,Master_DWIpoints,Crash_Death_Count,-1,-1".format(year, year),
match_option='CONTAINS')
# Add per capita (1000) field with 5 decimal places
arcpy.AddField_management('countydeaths{0}'.format(year), "DeathsPer1000{0}".format(year), "DOUBLE",
field_scale=5)
# Calculate alcohol related crash deaths per 1000 people in field
arcpy.CalculateField_management(in_table='countydeaths{0}'.format(year), field="DeathsPer1000{0}".format(year),
expression="calcdeathrate( !Crash_Death_Count!, !pop{0}!)".format(year),
expression_type="PYTHON_9.3",
code_block="def calcdeathrate(death, pop):\n if death == 0 or death is None:\n return 0\n else:\n rate = float(death)/(float( pop)/1000)\n return rate")
with arcpy.da.SearchCursor('countydeaths{0}'.format(year), ["CNTY_NM", "DeathsPer1000{0}".format(year)]) as cursor1:
for row in cursor1:
data_dict_deaths[row[0]][year] = row[1]
# Convert to data frame and save as csv
county_df = pd.DataFrame.from_dict(data_dict_deaths)
county_df.to_csv('{0}\CSVResults\CountyDeaths.csv'.format(directory))
def county_injuries():
"""Joins prepped alcohol related crash data points with county polygons to find number of injuries per capita in each
county for year on year trend analysis and to find enforcement patterns on a regional scale"""
arcpy.MakeFeatureLayer_management("Master_DWIpoints", "Master_lyr")
# Dictionary of dictionaries for building county injuries data frame
data_dict_injuries = {name: {} for name in county_list}
for key in data_dict_injuries:
data_dict_injuries[key] = {year: 0 for year in data_years}
for year in data_years:
# Calculate county injury counts for each year and save to new fc
arcpy.SelectLayerByAttribute_management(in_layer_or_view="Master_lyr", selection_type="NEW_SELECTION",
where_clause="Crash_Year = {0} AND Crash_Severity <> 'Not Injured' AND Crash_Severity <> 'Unknown'".format(year))
arcpy.SpatialJoin_analysis(target_features=counties, join_features="Master_lyr",
out_feature_class='countyinjuries{0}'.format(year),
join_operation="JOIN_ONE_TO_ONE", join_type="KEEP_ALL",
field_mapping="CNTY_NM 'CNTY_NM' true true false 13 Text 0 0 ,First,#," + counties + ",CNTY_NM,-1,-1;CNTY_NBR 'CNTY_NBR' true true false 2 Short 0 0 ,First,#," + counties + ",CNTY_NBR,-1,-1;FIPS 'FIPS' true true false 2 Short 0 0 ,First,#," + counties + ",FIPS,-1,-1;pop{0} 'pop{1}' true true false 4 Long 0 0 ,First,#,".format(year, year) + counties + ",pop{0},-1,-1;Crash_Severity 'Crash_Severity{1}' true true false 8000 Text 0 0 ,Count,#,Master_DWIpoints,Crash_Severity,-1,-1".format(year, year),
match_option='CONTAINS')
arcpy.AddField_management('countyinjuries{0}'.format(year), "InjuriesPer1000{0}".format(year), "DOUBLE",
field_scale=5)
arcpy.CalculateField_management(in_table='countyinjuries{0}'.format(year), field="InjuriesPer1000{0}".format(year),
expression="calcinjuryrate( !Crash_Severity!, !pop{0}!)".format(year),
expression_type="PYTHON_9.3",
code_block="def calcinjuryrate(injuries, pop):\n if injuries == 0 or injuries is None:\n return 0\n else:\n rate = float(injuries)/(float( pop)/1000)\n return rate")
with arcpy.da.SearchCursor('countyinjuries{0}'.format(year), ["CNTY_NM", "InjuriesPer1000{0}".format(year)]) as cursor2:
for row in cursor2:
data_dict_injuries[row[0]][year] = row[1]
# Convert to data frame and save as csv
county_df = pd.DataFrame.from_dict(data_dict_injuries)
county_df.to_csv('{0}\CSVResults\CountyInjuries.csv'.format(directory))
def calculate_averages():
"""Calculates crash and injury rate averages and compares to statewide rates. Adds field with difference to county
feature class."""
# Read and format data frames.
death_df = (pd.read_csv('{0}\CSVResults\CountyDeaths.csv'.format(directory))).drop('Unnamed: 0', axis=1)
injury_df = (pd.read_csv('{0}\CSVResults\CountyInjuries.csv'.format(directory))).drop('Unnamed: 0', axis=1)
data_dict_rates = {name: [] for name in county_list}
# Calculate average statewide rates
death_avg = ((death_df.mean(numeric_only=True)).sum())/254
injury_avg = ((injury_df.mean(numeric_only=True)).sum())/254
# Compare averages and place in dict in order: death average, injury average
for c in county_list:
county_injury_diff = injury_avg - ((injury_df[c].values.sum())/len(data_years))
county_death_dff = death_avg - ((death_df[c].values.sum())/len(data_years))
data_dict_rates[c] = [county_death_dff, county_injury_diff]
with arcpy.da.UpdateCursor(counties, ["CNTY_NM", "DeathRateDiff", "InjuryRateDiff"]) as cursor3:
for row in cursor3:
row[1] = data_dict_rates[row[0]][0]
row[2] = data_dict_rates[row[0]][1]
cursor3.updateRow(row)
# Clean up
fcs = arcpy.ListFeatureClasses()
print fcs
for fc in fcs:
if "deaths" in fc or "injuries" in fc:
arcpy.Delete_management(fc)
# county_deaths()
# county_injuries()
# calculate_averages()