Skip to content

Commit

Permalink
GCGI-1318, pWGS report improvements (#374)
Browse files Browse the repository at this point in the history
* first draft of changes

* add detection cutoff constant to results_dict

* Add introductory sentence

* introductory sentence tweaks

* introductory sentence tweaks

* import case overview plugins

* no message

* no message

* Add extraction step to plugin.py

* include data from different plugin to the results dictionary

* Pull parameters from full.config.ini

* get path to the workspace directory

* pull pc.results from json output file

* write data to json

* Different pattern for extracted fields

* no message

* using built in method to write to json

* Use wrapper to access configuration parameters

* Added file not found exception

* file exist check and adjusting test

* checksum update and testing for scenario when file exists

* update summary/plugin_test.py checksum

---------

Co-authored-by: Oumaima Hamza <[email protected]>
  • Loading branch information
OumaimaHamza and Oumaima Hamza authored Apr 19, 2024
1 parent 187a66c commit 8157f38
Show file tree
Hide file tree
Showing 12 changed files with 105 additions and 78 deletions.
7 changes: 4 additions & 3 deletions src/lib/djerba/plugins/pwgs/analysis/analysis_template.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

<img id='Detection' style='width: 100%; object-fit: contain' src="${results.get(constants.PWGS_PLOT)}"/>

<p> This test counted the number of sites in the patient's plasma
<p> This patient has been diagnosed with ${results.get(constants.PRIMARY_CANCER)} and has been referred for the OICR Genomics ${results.get(constants.ASSAY)} assay through the ${results.get(constants.STUDY)} study.
This test counted the number of sites in the patient's plasma
at which variants were detected that match variants found in the patient's primary tumour
(candidate sites passing QC: <strong>${html_builder.k_comma_format(results.get(constants.SITES_CHECKED))}</strong>).
To test for the possibility that candidate SNVs match SNVs arising from sequencing artefacts,
Expand All @@ -17,12 +18,12 @@
The number of SNVs of tumour origin
detected in this plasma sample (detected sites: <strong>${html_builder.k_comma_format(results.get(constants.SITES_DETECTED))}</strong>) was
<strong>${results.get(constants.SIGNIFICANCE)} </strong>
than in the control cohort (N=${results.get(constants.COHORT_N)}).</p>
than in the control cohort (N=${results.get(constants.COHORT_N)}). Dataset detection cutoff is <strong>${html_builder.k_comma_format(results.get(constants.DATASET_DETECTION_CUTOFF))}</strong>. </p>
<table class="variants" style="width:100%">
<thead>
<th style=" width:16%">Reads Checked</th>
<th style=" width:16%">Reads Detected</th>
<th style=" width:20%">Detected:Checked Reads Ratio (%)</th>
<th style=" width:20%">Detected:Checked Reads (%)</th>
</thead>
<tbody>
<tr>
Expand Down
90 changes: 53 additions & 37 deletions src/lib/djerba/plugins/pwgs/analysis/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import os
import csv
from decimal import Decimal
import math
import json
import re
import logging

Expand All @@ -15,11 +17,11 @@
import djerba.plugins.pwgs.pwgs_tools as pwgs_tools
import djerba.plugins.pwgs.constants as pc

class main(plugin_base):

class main(plugin_base):
PRIORITY = 200
PLUGIN_VERSION = '1.1'

def configure(self, config):
config = self.apply_defaults(config)
wrapper = self.get_config_wrapper(config)
Expand Down Expand Up @@ -51,31 +53,46 @@ def configure(self, config):

def extract(self, config):
wrapper = self.get_config_wrapper(config)
mrdetect_results = pwgs_tools.preprocess_results(self, config[self.identifier][pc.RESULTS_FILE])
hbc_results = self.preprocess_hbc(config[self.identifier][pc.HBC_FILE])
reads_detected = self.preprocess_vaf(config[self.identifier][pc.VAF_FILE])
pwgs_base64 = self.write_pwgs_plot(config[self.identifier][pc.HBC_FILE],
config[self.identifier][pc.VAF_FILE],
output_dir = self.workspace.print_location())
mrdetect_results = pwgs_tools.preprocess_results(self, wrapper.get_my_string(pc.RESULTS_FILE))
hbc_results = self.preprocess_hbc(wrapper.get_my_string(pc.HBC_FILE))
reads_detected = self.preprocess_vaf(wrapper.get_my_string(pc.VAF_FILE))
pwgs_base64 = self.write_pwgs_plot(wrapper.get_my_string(pc.HBC_FILE), wrapper.get_my_string(pc.VAF_FILE), output_dir=self.workspace.print_location())
self.logger.info("PWGS ANALYSIS: Finished preprocessing files")
data = self.get_starting_plugin_data(wrapper, self.PLUGIN_VERSION)
results = {
pc.CTDNA_OUTCOME: mrdetect_results[pc.CTDNA_OUTCOME],
pc.SIGNIFICANCE: mrdetect_results[pc.SIGNIFICANCE],
pc.TUMOUR_FRACTION_READS: float('%.1E' % Decimal( reads_detected*100 / hbc_results[pc.READS_CHECKED] )),
pc.SITES_CHECKED: hbc_results[pc.SITES_CHECKED],
pc.READS_CHECKED: hbc_results[pc.READS_CHECKED],
pc.SITES_DETECTED: hbc_results[pc.SITES_DETECTED],
pc.READS_DETECTED: reads_detected,
pc.PVALUE: mrdetect_results[pc.PVALUE],
pc.COHORT_N: hbc_results[pc.COHORT_N],
'pwgs_base64': pwgs_base64,
'files': {
'results_file': config[self.identifier][pc.RESULTS_FILE],
'hbc_results': config[self.identifier][pc.HBC_FILE],
'vaf_results': config[self.identifier][pc.VAF_FILE]
}
data = self.get_starting_plugin_data(wrapper, self.PLUGIN_VERSION)
workspace_dir = self.workspace.get_work_dir()
# Read from the case_overview JSON file and populate results
json_file_path = os.path.join(workspace_dir, "pWGS_case_overview_output.json")
if os.path.exists(json_file_path):
with open(json_file_path, 'r') as json_file:
json_data = json.load(json_file)
assay = json_data.get("results", {}).get("assay", "Assay name not found")
primary_cancer = json_data.get("results", {}).get("primary_cancer", "Primary cancer not found")
study_title = json_data.get("results", {}).get("study_title", "Study title not found")
else:
assay = "Assay name not found"
primary_cancer = "Primary cancer not found"
study_title = "Study title not found"

results = {
pc.ASSAY: assay,
pc.STUDY: study_title,
pc.PRIMARY_CANCER: primary_cancer,
pc.CTDNA_OUTCOME: mrdetect_results[pc.CTDNA_OUTCOME],
pc.SIGNIFICANCE: mrdetect_results[pc.SIGNIFICANCE],
pc.TUMOUR_FRACTION_READS: float('%.1E' % Decimal(reads_detected * 100 / hbc_results[pc.READS_CHECKED])),
pc.SITES_CHECKED: hbc_results[pc.SITES_CHECKED],
pc.READS_CHECKED: hbc_results[pc.READS_CHECKED],
pc.SITES_DETECTED: hbc_results[pc.SITES_DETECTED],
pc.READS_DETECTED: reads_detected,
pc.PVALUE: mrdetect_results[pc.PVALUE],
pc.DATASET_DETECTION_CUTOFF: math.ceil(mrdetect_results[pc.DATASET_DETECTION_CUTOFF]),
pc.COHORT_N: hbc_results[pc.COHORT_N],
'pwgs_base64': pwgs_base64,
'files': {
'hbc_results': wrapper.get_my_string(pc.HBC_FILE),
'vaf_results': wrapper.get_my_string(pc.VAF_FILE)
}
}
data[pc.RESULTS] = results
self.workspace.write_json('hbc_results.json', hbc_results)
self.workspace.write_json('mrdetect_results.json', mrdetect_results)
Expand All @@ -97,16 +114,16 @@ def preprocess_hbc(self, hbc_path):
reads_checked.append(row[3])
sites_detected.append(row[4])
except IndexError as err:
msg = "Incorrect number of columns in HBC row: '{0}'".format(row)+\
"read from '{0}'".format(hbc_path)
msg = "Incorrect number of columns in HBC row: '{0}'".format(row) + \
"read from '{0}'".format(hbc_path)
raise RuntimeError(msg) from err
hbc_n = len(sites_detected) - 1
hbc_dict = {pc.SITES_CHECKED: int(sites_checked[0]),
pc.READS_CHECKED: int(reads_checked[0]),
pc.SITES_DETECTED: int(sites_detected[0]),
pc.COHORT_N: hbc_n}
return hbc_dict

def preprocess_vaf(self, vaf_path):
"""
summarize Variant Allele Frequency (VAF) file
Expand All @@ -116,19 +133,19 @@ def preprocess_vaf(self, vaf_path):
reader_file = csv.reader(hbc_file, delimiter="\t")
next(reader_file, None)
for row in reader_file:
try:
try:
reads_tmp = row[1]
reads_detected = reads_detected + int(reads_tmp)
except IndexError as err:
msg = "Incorrect number of columns in vaf row: '{0}' ".format(row)+\
msg = "Incorrect number of columns in vaf row: '{0}' ".format(row) + \
"read from '{0}'".format(vaf_path)
raise RuntimeError(msg) from err
raise RuntimeError(msg) from err
return reads_detected

def render(self, data):
renderer = mako_renderer(self.get_module_dir())
return renderer.render_name(pc.ANALYSIS_TEMPLATE_NAME, data)

def specify_params(self):
discovered = [
pc.RESULTS_FILE,
Expand All @@ -140,19 +157,18 @@ def specify_params(self):
self.set_ini_default(core_constants.ATTRIBUTES, 'clinical')
self.set_priority_defaults(self.PRIORITY)

def write_pwgs_plot(self, hbc_path, vaf_file, output_dir ):
def write_pwgs_plot(self, hbc_path, vaf_file, output_dir):
'''
use R to plot the detection rate
compared to healthy blood control,
return in base64
'''
args = [
os.path.join(os.path.dirname(__file__),'detection.plot.R'),
os.path.join(os.path.dirname(__file__), 'detection.plot.R'),
'--hbc_results', hbc_path,
'--vaf_results', vaf_file,
'--output_directory', output_dir,
'--pval', str(pc.DETECTION_ALPHA)
]
pwgs_results = subprocess_runner().run(args)
return(pwgs_results.stdout.split('"')[1])

return (pwgs_results.stdout.split('"')[1])
18 changes: 15 additions & 3 deletions src/lib/djerba/plugins/pwgs/analysis/test/plugin_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import unittest
import tempfile
import string
import shutil

from djerba.util.validator import path_validator
from djerba.plugins.plugin_tester import PluginTester
Expand Down Expand Up @@ -45,7 +46,16 @@ def testPreprocessResults(self):
self.assertEqual(results[constants.CTDNA_OUTCOME], 'DETECTED')
self.assertEqual(results[constants.SIGNIFICANCE], 'significantly larger')

def testPwgsAnalysis(self):
def test_pwgs_analysis_exists(self):
# test the scenario where pWGS_case_overview_output.json exist
shutil.copyfile(os.path.join(self.sup_dir, f"plugins/pwgs/report_json/pwgs.case.json"), os.path.join(self.get_tmp_dir(), "pWGS_case_overview_output.json"))
self.run_test_with_scenario("pwgs.analysis.file.exists.scenario.json", "4006439d93e4734c66b5c393ea62a307")

def test_pwgs_analysis_not_exists(self):
# test the scenario where pWGS_case_overview_output.json doesn't exist
self.run_test_with_scenario("pwgs.analysis.file.doesnt.exist.scenario.json", "90c11f5e410fe24fb8cc2f5249d67cd7")

def run_test_with_scenario(self, json_filename, md5_checksum):
test_source_dir = os.path.realpath(os.path.dirname(__file__))
with open(os.path.join(test_source_dir, self.INI_NAME)) as in_file:
template_str = in_file.read()
Expand All @@ -55,11 +65,13 @@ def testPwgsAnalysis(self):
os.mkdir(input_dir)
with open(os.path.join(input_dir, self.INI_NAME), 'w') as ini_file:
ini_file.write(ini_str)
json_location = os.path.join(self.sup_dir ,"plugins/pwgs/report_json/pwgs.analysis.json")

json_location = os.path.join(self.sup_dir, f"plugins/pwgs/report_json/{json_filename}")

params = {
self.INI: self.INI_NAME,
self.JSON: json_location,
self.MD5: 'fce53e32cc5ea44828413e84a4d624b4'
self.MD5: md5_checksum
}
self.run_basic_test(input_dir, params)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
<td colspan="3">${results.get(constants.PRIMARY_CANCER)}</td>
</tr>
<td colspan="1">Site of biopsy:</td>
<td colspan="3">Liquid Biopsy</td>
<td colspan="3">Blood cell-free DNA</td>
</tr>
<tr>
<td>Study:</td><td>${results.get(constants.STUDY)}</td>
Expand Down
32 changes: 17 additions & 15 deletions src/lib/djerba/plugins/pwgs/case_overview/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,16 @@
import djerba.plugins.pwgs.pwgs_tools as pwgs_tools
import djerba.plugins.pwgs.constants as pc

class main(plugin_base):

class main(plugin_base):
PRIORITY = 100
PLUGIN_VERSION = '1.0'

def configure(self, config):
config = self.apply_defaults(config)
wrapper = self.get_config_wrapper(config)
work_dir = self.workspace.get_work_dir()
if os.path.exists(os.path.join(work_dir,core_constants.DEFAULT_SAMPLE_INFO)):
if os.path.exists(os.path.join(work_dir, core_constants.DEFAULT_SAMPLE_INFO)):
sample_info = self.workspace.read_json(core_constants.DEFAULT_SAMPLE_INFO)
if wrapper.my_param_is_null(pc.DONOR):
wrapper.set_my_param(pc.DONOR, sample_info[pc.DONOR])
Expand All @@ -40,24 +40,25 @@ def configure(self, config):
def extract(self, config):
wrapper = self.get_config_wrapper(config)
data = self.get_starting_plugin_data(wrapper, self.PLUGIN_VERSION)
results = {
pc.ASSAY: "plasma Whole Genome Sequencing (pWGS) - 30X (v1.0)",
pc.PWGS_REPORT: config['core']['report_id'],
pc.PRIMARY_CANCER: config[self.identifier][pc.PRIMARY_CANCER],
pc.REQ_APPROVED: config[self.identifier][pc.REQ_APPROVED],
pc.DONOR: config[self.identifier][pc.DONOR],
pc.GROUP_ID: config[self.identifier][pc.GROUP_ID],
pc.PATIENT_ID: config[self.identifier][pc.PATIENT_ID_LOWER],
pc.STUDY: config[self.identifier][pc.STUDY],
pc.WGS_REPORT: config[self.identifier][pc.WGS_REPORT]
}
results = {
pc.ASSAY: "plasma Whole Genome Sequencing (pWGS) - 30X (v1.0)",
pc.PWGS_REPORT: config['core']['report_id'],
pc.PRIMARY_CANCER: config[self.identifier][pc.PRIMARY_CANCER],
pc.REQ_APPROVED: config[self.identifier][pc.REQ_APPROVED],
pc.DONOR: config[self.identifier][pc.DONOR],
pc.GROUP_ID: config[self.identifier][pc.GROUP_ID],
pc.PATIENT_ID: config[self.identifier][pc.PATIENT_ID_LOWER],
pc.STUDY: config[self.identifier][pc.STUDY],
pc.WGS_REPORT: config[self.identifier][pc.WGS_REPORT]
}
data[pc.RESULTS] = results
self.workspace.write_json('pWGS_case_overview_output.json', data)
return data

def render(self, data):
renderer = mako_renderer(self.get_module_dir())
return renderer.render_name(pc.CASE_OVERVIEW_TEMPLATE_NAME, data)

def specify_params(self):
required = [
pc.REQ_APPROVED,
Expand All @@ -77,3 +78,4 @@ def specify_params(self):
self.set_ini_default(core_constants.ATTRIBUTES, 'clinical')
self.set_priority_defaults(self.PRIORITY)


Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def testPwgsCase(self):
params = {
self.INI: self.INI_NAME,
self.JSON: json_location,
self.MD5: '13655e31f88039cae030d374134c9438'
self.MD5: 'dcf8ea1396831a90c135f7a0936183d3'
}
self.run_basic_test(input_dir, params)

Expand Down
1 change: 1 addition & 0 deletions src/lib/djerba/plugins/pwgs/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
COVERAGE = 'coverage'
CTDNA_DETECTION = 'ctdna_detection'
CTDNA_OUTCOME = 'outcome'
DATASET_DETECTION_CUTOFF = 'dataset_detection_cutoff'
DETECTION_PLOT = 'detection_plot'
INSERT_SIZE = 'median_insert_size'
PVALUE = 'p-value'
Expand Down
3 changes: 2 additions & 1 deletion src/lib/djerba/plugins/pwgs/pwgs_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def preprocess_results(self, results_path):
try:
results_dict = {
constants.TUMOUR_FRACTION_ZVIRAN: float('%.1E' % Decimal(row[7]))*100,
constants.PVALUE: float('%.3E' % Decimal(row[10]))
constants.PVALUE: float('%.3E' % Decimal(row[10])),
constants.DATASET_DETECTION_CUTOFF: float(row[11])
}
except IndexError as err:
msg = "Incorrect number of columns in vaf row: '{0}' ".format(row)+\
Expand Down
4 changes: 2 additions & 2 deletions src/lib/djerba/plugins/pwgs/summary/summary_template.html
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
<div class="oneoftwocellmain" >
<h2 >Summary</h2>
</div>
<div class="twooftwocellmain" style="height: 80px; padding: 10px; background-color:rgba(101,188,69,.5); border: 3px solid #65bc45; border-radius: 12px" >
<div class="twooftwocellmain" style="height: 80px; padding: 10px; background-color:rgba(194,194,194,.5); border: 3px solid #65bc45; border-radius: 12px" >
<hr class="big-line" style="margin-top: -30px;">

<h3 style="margin-top: 45px;">Minimal Residual Disease: ${results.get(constants.CTDNA_DETECTION)}</h3>
<h3 style="margin-top: 45px;">Cell-free DNA tumour burden: ${results.get(constants.CTDNA_DETECTION)}</h3>
<table class="info" >
<tr>
<td>Mutational sampling analysis:</td>
Expand Down
2 changes: 1 addition & 1 deletion src/lib/djerba/plugins/pwgs/summary/test/plugin_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ def testPwgsSummary(self):
params = {
self.INI: self.INI_NAME,
self.JSON: json_location,
self.MD5: '6416078989efc8197eb5179ebf379614'
self.MD5: 'ee5d5bc62807914086abcd66c6038e85'
}
self.run_basic_test(input_dir, params)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,7 @@
</table>

<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>
<br/>


${html_builder().section_cells_end()}

Expand Down
Loading

0 comments on commit 8157f38

Please sign in to comment.