-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlabrpt.py
347 lines (234 loc) · 8.92 KB
/
labrpt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
#!/usr/bin/env python
# coding: utf-8
# In[1]:
# %load labrpt.py
#!/usr/bin/env python3
# Generate reports from lab results.
# Run like:
# python3 labrpt.py [--db <database>.csv] [--corpus <corpus>.yml] <data>.csv
# --> Output written to <data>.txt
# There are default values for <database> and <corpus>.
# E.g.
# python3 labrpt.py --db badstuffdb.csv --corpus copycorpus.yml cake0.csv
# --> Make the report in cake0.csv.txt with specific db and corpus
# or
# python3 labrpt.py cake1.csv
# --> Make the report in cake0.csv.txt with default db and corpus.
# or
# python3 labrpt.py cakes/*.csv
# --> Make the reports in cake/cake0.csv.txt, cake/cake1.csv.txt,
# cake/cake2.csv.txt, etc with default db and corpus.
# Use the -v flag to see progress messages.
# Standard library.
import argparse
import csv
import os
import re
import sys
# Non-standard modules.
import yaml # PyYAML: pip3 install --user pyyaml
verbose = False # Global usually only read by verb().
def verb(msg='', end='\n', sv_tm=False, rpt_tm=False): # {{{
'''Print a message to STDOUT when verbose flag is set.
Each line is prefixed with its number which is kept track with a static
variable, so this should not be called from parallel code.
Optionally save and/or report time since last call, which is useful for
coarse profiling.
'''
import time
tm_now = time.time()
# Initialize static variables on first execution.
static_vars = ["linenum", "newline", "tm_saved"]
if False in [hasattr(verb, x) for x in static_vars]:
verb.linenum = 1 # Strictly incrementing counter of printed lines.
verb.newline = True # FSM used for counting lines and printing numbers.
verb.tm_saved = None # Storage for time state.
if verbose:
if verb.newline:
outstr = "%d %s" % (verb.linenum, str(msg))
else:
outstr = str(msg)
if rpt_tm:
outstr += " [%s]" % tmdiff_s2wdhms_ascii(tm_now - verb.tm_saved)
fd = sys.stdout
print(outstr, end=end, file=fd)
fd.flush()
if end == '\n':
verb.linenum += 1
verb.newline = True
else:
verb.newline = False
if sv_tm:
verb.tm_saved = time.time()
# }}}
def notCommentLine(line: str) -> bool:
return (not line.lstrip().startswith('#'))
def deduplicateSpaces(line: str) -> str:
return re.sub(" +", ' ', line)
def processRow(state, rowNum, row): # {{{
# TODO
ret = state
return ret # }}}
def stateToReport(state): # {{{
# TODO
ret = ""
return ret # }}}
def getArgs(): # {{{
parser = argparse.ArgumentParser(
description = "labrpt Report Maker",
formatter_class = argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument("-v", "--verbose",
default=False,
action='store_true',
help="Display progress messages.")
parser.add_argument("-d", "--delimiter",
type=str,
default=',',
help="String separating columns.")
# NOTE: CSV format is possible but parsing is harder.
parser.add_argument("--db",
type=str,
default="badstuffdb.yml", # TODO: Change to something proper.
help="Database of chemical info. YAML format.")
parser.add_argument("--corpus",
type=str,
default="copycorpus.yml", # TODO: Change to something proper.
help="Copytext for each chemical. YAML format.")
parser.add_argument("fnames",
nargs='+',
type=str,
help="List of CSV files to generate reports for.")
args = parser.parse_args()
global verbose
verbose = args.verbose
return args # }}}
def main(args): # {{{
# Read in entire chemical database into a dict (Map).
# Access values like: db["carbaryl"]["colour"]
# NOTE: This is easier in YAML format instead of CSV.
verb("Reading database... ", end='')
with open(args.db, 'r') as fd:
db = yaml.safe_load(fd)
TDIsPulledfromDB = {(item["Pesticide Name"]).lower(): item['ADI'] for item in db}
verb("Imported Database")
for key,value in TDIsPulledfromDB.items(): ###convert any numerical ADIs into a float
try:
TDIsPulledfromDB[key]=float(value)
except:
pass #some have none numerical values so need to pass exceptions
verb("Done")
# Read in entire copy corpus into a dict (Map).
# Access values like: corpus["carbaryl"]
verb("Reading corpus... ", end='')
with open(args.corpus, 'r') as fd:
corpus = yaml.safe_load(fd)
with open('copycorpus.yml', 'r') as fd:
corpus = yaml.safe_load(fd)
for item in corpus:
item['']=(item['']).lower() #making all pesticide names lowercase
item['Pesticide Name']=item.pop('') #For some reason when the yaml was imported the pesticdes were all given the key ''
verb("Corpus read")
verb("There are %d files to process" % len(args.fnames))
for fnamei in args.fnames:
fnameo = fnamei + ".txt"
# Read in lab results CSV line by line.
# Analyse and decide what pieces of copy text to use.
verb("Reading CSV %s... " % fnamei, end='')
with open(fnamei, 'r', newline='') as fd:
reader = csv.DictReader(fd)
fdUncomment = filter(notCommentLine, fd)
fdClean = map(deduplicateSpaces, fdUncomment)
reader = csv.reader(fdClean, delimiter=args.delimiter)
labresults= {(rows['Parameter']).lower():float(rows['Result']) for rows in reader} #This should give us just the results. The name of the chemical and the amount detected
state = {}
for rowNum,row in enumerate(reader):
state = processRow(state, rowNum, row)
report = stateToReport(state)
verb("Writing report... ", end='')
with open(fnameo, 'w') as fd:
fd.write(report)
verb("Done")
return 0 # }}}
if __name__ == "__main__":
args = getArgs()
sys.exit(main(args))
# In[141]:
with open('labdata0.csv') as csvfile:
reader = csv.DictReader(csvfile)
AmountsDetectedfromlab = {rows['Parameter']:float(rows['Result']) for rows in reader}
# AmountsDetectedfromlab={
# "chemical1":0.085,
# "chemical2":0.1,
# "chemical3":0.02,
# "chemical4":0.1,
# } #pretend data. Really we want to pull these from the csv that the lab sent us
AmountsDetectedfromlab
# In[171]:
# In[145]:
####I only want to look in the database for items that were actually detected in the lab.
###So iterate through the key in AmountsDetectedfromlab which will give the chemical names
### and return a dictionary of chemical_name:TDI
TDIsNeeded = {key.split()[0]: (TDIsPulledfromDB[(key.split()[0])]) for key in AmountsDetectedfromlab.keys()}
TDIsNeeded
# In[153]:
###TII (mg per individual)=Tolerable Daily Intake (mg/kg b.w.)*Body weight(kg b.w)###
# Define body weight
bw=75
###Define function to calculate the tolerable intake per person for each chemical
### multiply the TDI by the bodyweight
def calcTII(TDI,bw):
TII=TDI*bw
return TII
TIIS={key:calcTII(TDIsPulledfromDB[key.split()[0]],bw) for key in AmountsDetectedfromlab.keys()}
TIIS
# In[162]:
###TCL (mg/kg(food)) =TII/Amount of food consumed(kg)=TII/AE###
def calcTCL(TII,AE):
TCL=TII/AE
return TCL
AE=0.25 #this is the amount eaten in kg. Want this input on command line
TCLresultsDict={key:calcTCL(value,AE) for key,value in TIIS.items()} #This should apply the calcTCL func to every TII value, dividing each by 0.25
TCLresultsDict
# In[155]:
##Define a function to claculate the Concern Index for any chemical
def calcCIChem(AD,TCL):
FCIc=AD/TCL #this still needs to be *100 but I think it's best to do that later
return FCIc
#Calculate the Concern index for each chemical (Amount detected/TCL)
##Create the CI results dict
ChemicalCIresultDic={key:(calcCIChem(AmountsDetectedfromlab[key],value)) for key,value in TCLresultsDict.items()}
ChemicalCIresultDic
# In[156]:
#find the chemical with the max value
highestCI=max(ChemicalCIresultDic, key=ChemicalCIresultDic.get)
highestCI
# In[157]:
keylst=list(ChemicalCIresultDic.keys())
keylst.remove(highestCI)
keylst
# In[21]:
# ##Geomean approach
# ###copied this method from here: https://bytes.com/topic/python/answers/727876-geometrical-mean
# def geomean(numbers):
# product = 1
# for n in numbers:
# product *= n
# return product ** (1.0/len(numbers))
# FCIoverall=geomean(FCIresultDict.values())
# FCIoverall
# In[181]:
###Calculate the Overall-FCI for all except the highest
###~~~FCI=AD1TCL1*100+AD2TCL22*100+AD3TCL32*100...ADnTCLn2*100
def calcCI_low(CI):
return ((CI**2))*100
### run CalcOverallFCI on all except the highest
totalFCI=0
for chem in keylst:
totalFCI+=(calcCI_low(ChemicalCIresultDic[chem]))
def calcCI_highest(CI):
return(CI*100)
totalFCI=int(+(calcCI_highest(ChemicalCIresultDic[highestCI])))
print("Overall Food Concern Index =",totalFCI)
# In[ ]:
#