-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgel_report.py
524 lines (429 loc) · 20.5 KB
/
gel_report.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
'''
gel_report.py
This script takes a GEL Participant ID and queries the CIP-API to return a clinical report.
This report is then modified to make it clear that this report has not been issued by GEL, including extracting some information from the local LIMS system.
Hopefully this solves a problem faced by many labs and prevents too much duplication of work!
Created 02/06/2017 by Aled Jones
'''
import requests
from bs4 import BeautifulSoup
import pdfkit
import pyodbc
from datetime import datetime
from jinja2 import Environment, FileSystemLoader
import sys
import getopt
# Import local settings
from authentication import APIAuthentication # import the function from the authentication script which generates the access token
import gel_report_config as config # config file
from database_connection_config import * # database connection details
import GEL_logo as gel_logo
class connect():
def __init__(self):
# call function to retrieve the api token
self.token=APIAuthentication().get_token()
# The link to the first page of the CIP API results
self.interpretationlist = "https://cipapi.genomicsengland.nhs.uk/api/2/interpretation-request?cip={cip}&status=sent_to_gmcs%2Creport_generated%2Creport_sent&members={proband}&format=json"
# The probandID to return the report for
self.proband_id = ""
# Empty variables for report paths to be generated
self.html_report = ""
self.pdf_report = ""
# Usage example
self.usage = "python gel_report.py -g <GELParticipantID> -h True/False"
# Header line to remove
self.old_header = "Genomics England, Queen Mary University of London,"
# Old banner colour
self.existing_banner_css = "#007C83; /*#27b7cc;*/"
# new banner colour - turn header transparent so logo can be seen
self.new_banner_css = "transparent;\n height: 100px;"
# Where to add in new participant information table
self.where_to_put_patient_info_table = "<h3>Participant Information</h3>"
# Where to add in new clinician info table
self.where_to_put_clinician_info = ""
#line from header which states date report generated
self.date_generated = ""
# variable used to move date report generated from the header into a lower table
self.lastrow = ""
#variables which identify columns in the table which need changing
self.replace_with_proband_id = "Link to clinical summary"
self.proband_id_string = "GeL Proband ID"
# a flag to determine if the header is required to be changed to look like a report from the lab (as opposed to keeping the Gel geader to make it clear it's a GEL report).
self.remove_headers = ""
# create a variable to hold the various cip versions
self.max_cip_ver = 0
def take_inputs(self, argv):
'''Capture the gel participant ID from the command line'''
# define expected inputs
try:
opts, args = getopt.getopt(argv, "g:h:", ['gelid', 'removeheader'])
# raise errors with usage eg
except getopt.GetoptError:
print "ERROR - correct usage is", self.usage
sys.exit(2)
# loop through the arguments
for opt, arg in opts:
if opt in ("-g", "--gelid"):
# capture the proband ID
self.proband_id = str(arg)
if opt in ("-h", "--removeheader"):
# If this flag is given don't touch the report header.
self.remove_headers = str(arg)
if self.proband_id:
#build paths to reports
self.htmlfilename = self.proband_id + ".html"
self.html_report = config.html_reports + self.htmlfilename
self.pdf_report = config.pdf_dir + self.proband_id + ".pdf"
# Call the function to read the API
json = self.read_API_page()
# if test passed parse the json to pull out report
self.parse_json(json)
def read_API_page(self):
'''
This function uses the authentication token to read the interpretation request list end point
This endpoint is filtered based on the CIP, status and the proband ID
The resulting json is returned
'''
# use requests module to return all the cases available to you
# insert CIP and proband into url
self.interpretationlist = self.interpretationlist.format(cip = config.CIP, proband = self.proband_id)
# if proxy is set in the config file
if config.proxy:
response = requests.get(self.interpretationlist, headers = {"Authorization": "JWT " + self.token}, proxies = config.proxy) # note space is required after JWT
else:
response = requests.get(self.interpretationlist, headers = {"Authorization": "JWT " + self.token}) # note space is required after JWT
# pass this in the json format to the parse_json function
return response.json()
def parse_json(self,json):
'''
This function takes the json file containing all cases whcih match the CIP, status and proband filter.
A few checks are performed to ensure there is only one record before the record is parsed and the report is downloaded and modified
'''
# loop through the results
if json['count'] == 0:
print "no record for proband %s with the status sent_to_gmcs,report_generated or report_sent" % (self.proband_id)
elif json['count'] > 1:
print "STOP - multiple unblocked interpretation requests for desired CIP (%s) found for proband %s\nPlease inform GEL" % (config.CIP, self.proband_id)
else:
for sample in json['results']:
# ensure have found the desired proband id and make sure correct CIP
assert sample["proband"] == self.proband_id and sample["cip"] == config.CIP
# variable to record the highest version of the report
highest_report_version = 0
# variable to record URL for the html report
highest_report_url = ""
# loop through each report in the clinical_reports section to get the highest report version
for report_number in range(len(sample["clinical_reports"])):
# capture the verison number from the report url - it's the last number eg the last value (3) in the link https://cipapi.genomicsengland.nhs.uk/api/ClinicalReport/123/1/2/3
report_version_number = sample["clinical_reports"][report_number]["url"].split("/")[-1]
# if this is the highest report number
if report_version_number > highest_report_version:
# capture the new highest report version and url
highest_report_version = report_version_number
highest_report_url = sample["clinical_reports"][report_number]["url"]
# loop through each interpreted genome to find the highest cip_version
for interpreted_genome in range(len(sample["interpreted_genomes"])):
if int(sample["interpreted_genomes"][interpreted_genome]["cip_version"]) > self.max_cip_ver:
self.max_cip_ver = sample["interpreted_genomes"][interpreted_genome]["cip_version"]
#read the interpretation_request to pull out any variants
self.read_interpretation_request(sample)
# if proxy is set in the config file
if config.proxy:
report = requests.get(highest_report_url, headers = {"Authorization": "JWT " + self.token}, proxies = config.proxy)# note space is required after JWT
else:
report = requests.get(highest_report_url, headers = {"Authorization": "JWT " + self.token})# note space is required after JWT
# create an beautiful soup object for the html clinical report
soup = BeautifulSoup(report.content, "html.parser")
#check for errors first
soup = self.check_for_errors(soup)
# if headers are to be removed (not a negative negative)
if self.remove_headers == "True":
#pass the object to the replace_gel_address function and update the object
soup = self.replace_gel_address(soup)
#read and remove the over header (grey bar with proband id and date generated)
soup = self.remove_over_header(soup)
#pass to function to remove the banner text
soup = self.remove_banner_text(soup)
#pass to function to put things from the over header into a different table
soup = self.move_date_report_generated(soup)
#pass to function to replace the GeL logo with that of the lab (and or UKAS)
soup = self.replace_GeL_logo(soup)
#stop the annex tables being split over pages
soup = self.stop_annex_tables_splitting_over_page(soup)
# pass to function to expand coverage
soup = self.expand_coverage(soup)
#write html to file so can be read in edit_CSS function
with open(self.html_report, "w") as file:
file.write(str(soup))
## Call function to pull out patient demographics from LIMS. capture dict
patient_info_dict = self.read_lims(sample["sites"])
#Can't change CSS or insert tables using beautiful soup so need to read and replace html file
self.edit_CSS(patient_info_dict)
print "creating clinical report"
#pass modified file to create a pdf.
self.create_pdf(self.htmlfilename, self.pdf_report, patient_info_dict)
def edit_CSS(self,patient_info_dict):
'''Can't change CSS or insert tables using beautiful soup so need to read and replace html file.
This function reads that file, loops through it and
1 - Adds in a table containing patient information extracted from LIMS
for reports that are being modified to look like they are not from gel:
2 - edits the CSS which defines the banner colour so it is transparent
3 - Adds the date report generated to the table (was previously in the grey header)
4 - Adds in table with clinician referral information
'''
# read file into object (a list)
with open(self.html_report, "r") as file:
data = file.readlines()
#loop through the file object
for i, line in enumerate(data):
## Add in the new patient info table
# if the line is where we want to add in this table (defined in __init__)
if self.where_to_put_patient_info_table in line:
# open the html template
with open(config.new_patientinfo_table,"r") as template:
#write template to a list
template_to_write = template.readlines()
# Add in this template at this position NB this will over write the line so this line is also in the template
data[i] = "".join(template_to_write)
# if it's a report which is to be modified
if self.remove_headers == "True":
## Replace the banner CSS
# if line contains the existing banner css (from __init__)
if self.existing_banner_css in line:
# replace that line in the file object so it's now transparent background
data[i]=line.replace(self.existing_banner_css, self.new_banner_css)
## Add extra row to the table with the date report generated
# if the last row of the table (as stated in the function move_date_report_generated)
if self.lastrow in line:
# create empty list
template_to_write = []
# write the table code to the list
template_to_write.append("<tr><td>Date Report Generated:</td><td><em>" + self.date_generated + "</em></td></tr>")
# append the last row as below we are overwriting the existing line
template_to_write.append(self.lastrow)
# Add this list to the file object
data[i] = "".join(template_to_write)
## Add in the clinician and address
# look for desired location
if self.where_to_put_clinician_info in line:
#empty list
template_to_write = []
#add new div
#template_to_write.append("<div>")
# add line which is going to be replaced
template_to_write.append(self.where_to_put_clinician_info)
# open html template containing the clinician info structure (and a new header)
with open(config.new_clinician_table,"r") as template:
# add this file to the list
for line in template.readlines():
#print patient_info_dict['clinician1']
if line.startswith("<p>cc.{{copies}}</p>"):
if patient_info_dict['copies'] == "":
pass
else:
template_to_write.append(line)
else:
template_to_write.append(line)
# add the new html code back to the list
data[i] = "".join(template_to_write)
# otherwise add in clinician info for less heavily modified reports
else:
## Add in the clinician and address
# look for desired location
if gel_logo.gel_logo_code in line:
#empty list
template_to_write = []
# add line which is going to be replaced
template_to_write.append(gel_logo.gel_logo_code)
# open html template containing the clinician info structure (and a new header)
with open(config.new_clinician_table,"r") as template:
# add this file to the list
for line in template.readlines():
#print patient_info_dict['clinician1']
if line.startswith("<p>cc.{{copies}}</p>"):
if patient_info_dict['copies'] == "":
pass
else:
template_to_write.append(line)
else:
template_to_write.append(line)
# add the new html code back to the list
data[i] = "".join(template_to_write)
#write the modified list back to a file
with open(self.html_report, "w") as file:
file.writelines(data)
def read_lims(self, sites):
'''This function must create a dictionary which is used to populate the html variables
eg patient_info_dict={"NHS":NHS,"InternalPatientID":InternalPatientID,"dob":DOB,"firstname":FName,"lastname":LName,"Sex":Sex,"clinician1":clinician1,"clinician1_add":clinician1_address,"copies":copies,"report_title":report_title}
NB report_title is from the config file'''
# TO BE COMPLETED BY EACH GMC
if find_patient:
# complete the dictionary
patient_info_dict={"NHS":NHS,"InternalPatientID":InternalPatientID,"dob":DOB,"firstname":FName,"lastname":LName,"gender":Gender,"clinician":clinician,"clinician_add":clinic_address,"report_title":report_title}
#return the dictionary
return patient_info_dict
else:
# report error and quit if can't find all the required info (or if it doesn't meet required formats)
print "No Patient information for that proband in database\ncannot create report"
quit()
def check_for_errors(self, html):
'''issue warning (or abort) if any errors are found when reporting eg can't find coverage report etc. Uses a warning message from config file'''
# look for presence of an warning message
for div in html.find_all('div', {'class':'content-div error-panel'}):
# if there is an error
if div:
# capture and print the error message
for message in div.find_all('p'):
print (config.warning_message % self.proband_id) + message.get_text()
#if required stop the report being generated
#quit()
# return html
return html
def replace_gel_address(self,html):
'''This function looks for and removes the GeL address '''
# notes is a list of all the p tags where the class == note
for note in html.find_all('p', {'class':'note'}):
# if text == gel address (as defined in the init function)
if self.old_header in note.get_text():
# remove the gel address tag
note.extract()
# return modified html
return html
def move_date_report_generated(self,html):
'''This function looks for the table underneath the GEl address and above participant info and puts in the information from the over_header'''
#loop through the tables - the table we are after is the only one with class=form-table and cellpadding=0
for table in html.find_all('table', {'class':'form-table', 'cellpadding':'0'}):
# find all rows in the table
rows=table.find_all('tr')
#loop through rows
for row in rows:
#find all columns
cols=row.find_all('td')
# loop through columns
for col in cols:
# if the column contains "Link to clinical summary"
if self.replace_with_proband_id in col.get_text():
# replace the string with GEL Proband ID (defined in init)
col.string=col.get_text().replace(self.replace_with_proband_id,self.proband_id_string)
# remove the <a> tag around the GeL ID and remove the hyperlink (link won't work without logging in first)
for a in col.find_all('a'):
a.replaceWithChildren()
#delete hyperlink
del a['href']
#capture the last row of the table to use to insert another row to the table elsewhere in the script
self.lastrow=str(row)
#return new html
return html
def remove_over_header(self,html):
'''This function reads and then removes the grey bar at the top of the report containing the proband id and the date report was generated'''
# find the span tag where class=right (should only be one - the date generated)
for span in html.find_all('span', {'class':'right'}):
# capture the text, and remove the Generated on string
self.date_generated=span.get_text().replace('Generated on:','')
#delete the div containing the over header
for div in html.find_all('div', {'class':'over-header content-div'}):
div.decompose()
#return new html
return html
def remove_banner_text(self,html):
'''This function removes the title from the big green banner (Whole Genome Analysis Rare disease Primary Findings)'''
# remove the header text
for div in html.find_all('div', {'class':'banner-text'}):
div.decompose()
#return new html
return html
def stop_annex_tables_splitting_over_page(self,html):
'''This script takes the referenced databases and software version tables and stops these being broken over pages'''
# find all tables
for table in html.find_all('table'):
#find the table head
for head in table.find_all('thead'):
# find each column in the header
for col in head.find_all('th'):
# if there is a column called name
if col.get_text() == "Name":
# prevent page breaks
table['style'] = " page-break-inside: avoid !important"
#return new html
return html
def replace_GeL_logo(self,html):
'''This function replaces gel logo with a new logo'''
if self.remove_headers == "True":
# find the img tag where class == logo (should only be one)
for img in html.find_all('img', {'class':"logo"}):
# change the src to link to new image
img['src'] = config.new_logo
#change the style so is on right hand side and has a small margin
img['style'] = "float:right; margin: 2%;"
# capture this tag so we can use it to place the clinicians name and address
self.where_to_put_clinician_info = str(img)
else:
# find the img tag where class == logo (should only be one)
for img in html.find_all('img', {'class':"logo"}):
# Need to ensure the image doesn't shrink
img['style'] = "height:100px;"
# return the modified html
return html
def expand_coverage(self,html):
'''Expand the coverage section'''
# find the coverage div and delete so coverage seciton no longer needs to be clicked to be visible
for section in html.find_all('div', id = "coverage"):
del(section['hidden'])
# find the section header and remove text/hyperlink properties
for section in html.find_all('a'):
# find the coverage section
if "Coverage Metrics" in section.get_text():
# remove the extra styles no longer needed
del(section['onclick'])
del(section['style'])
# create new tag and section title
new_header = "Coverage Report"
# replace p with h3
section.name = "h3"
# change the string
section.string = new_header
return html
def create_pdf(self, htmlfilename, pdfreport_path, patient_info):
# add the path to wkhtmltopdf to the pdfkit config settings
pdfkitconfig = pdfkit.configuration(wkhtmltopdf=config.wkhtmltopdf_path)
# create options to use in the footer
options = {'footer-right':'Page [page] of [toPage]','footer-left':'Date Created [isodate]','quiet':""}
# use Jinja to populate the variables within the html template
# first tell the system where to find the html template (this is written by beautiful soup above)
env = Environment(loader=FileSystemLoader(config.html_reports))
template = env.get_template(htmlfilename)
# create the pdf using template.render to populate variables from dictionary created in read_geneworks
pdfkit.from_string(template.render(patient_info), pdfreport_path, options=options, configuration=pdfkitconfig)
# print
print "Report can be found at "+pdfreport_path
def fetchone(self, query):
# Connection
cnxn = pyodbc.connect(dbconnectstring)
# Opening a cursor
cursor = cnxn.cursor()
#perform query
cursor.execute(query)
#capture result
result = cursor.fetchone()
#yield result
if result:
return result
else:
print "no result found"
def fetchall(self, query):
# Connection
cnxn = pyodbc.connect(dbconnectstring)
# Opening a cursor
cursor = cnxn.cursor()
#perform query
cursor.execute(query)
#capture result
result = cursor.fetchall()
#yield result
if result:
return(result)
else:
print "no result found"
if __name__=="__main__":
c=connect()
c.take_inputs(sys.argv[1:])