GCGI-1318, pWGS report improvements (#374)

* first draft of changes * add detection cutoff constant to results_dict * Add introductory sentence * introductory sentence tweaks * introductory sentence tweaks * import case overview plugins * no message * no message * Add extraction step to plugin.py * include data from different plugin to the results dictionary * Pull parameters from full.config.ini * get path to the workspace directory * pull pc.results from json output file * write data to json * Different pattern for extracted fields * no message * using built in method to write to json * Use wrapper to access configuration parameters * Added file not found exception * file exist check and adjusting test * checksum update and testing for scenario when file exists * update summary/plugin_test.py checksum --------- Co-authored-by: Oumaima Hamza <[email protected]>
oicr-gsi · Apr 19, 2024 · 8157f38 · 8157f38
1 parent 187a66c
commit 8157f38
Show file tree

Hide file tree

Showing 12 changed files with 105 additions and 78 deletions.
diff --git a/src/lib/djerba/plugins/pwgs/analysis/analysis_template.html b/src/lib/djerba/plugins/pwgs/analysis/analysis_template.html
@@ -8,7 +8,8 @@
 
       <img id='Detection' style='width: 100%; object-fit: contain' src="${results.get(constants.PWGS_PLOT)}"/>
 
-      <p> This test counted the number of sites in the patient's plasma 
+      <p> This patient has been diagnosed with ${results.get(constants.PRIMARY_CANCER)} and has been referred for the OICR Genomics ${results.get(constants.ASSAY)} assay through the ${results.get(constants.STUDY)} study.
+            This test counted the number of sites in the patient's plasma
             at which variants were detected that match variants found in the patient's primary tumour 
             (candidate sites passing QC: <strong>${html_builder.k_comma_format(results.get(constants.SITES_CHECKED))}</strong>).
             To test for the possibility that candidate SNVs match SNVs arising from sequencing artefacts, 
@@ -17,12 +18,12 @@
             The number of SNVs of tumour origin 
             detected in this plasma sample (detected sites: <strong>${html_builder.k_comma_format(results.get(constants.SITES_DETECTED))}</strong>) was
              <strong>${results.get(constants.SIGNIFICANCE)} </strong>  
-             than in the control cohort (N=${results.get(constants.COHORT_N)}).</p>
+             than in the control cohort (N=${results.get(constants.COHORT_N)}). Dataset detection cutoff is <strong>${html_builder.k_comma_format(results.get(constants.DATASET_DETECTION_CUTOFF))}</strong>. </p>
             <table class="variants" style="width:100%">
                   <thead>
                         <th style=" width:16%">Reads Checked</th>
                         <th style=" width:16%">Reads Detected</th>
-                        <th style=" width:20%">Detected:Checked Reads Ratio (%)</th>
+                        <th style=" width:20%">Detected:Checked Reads (%)</th>
                   </thead>
                   <tbody>
                         <tr>

diff --git a/src/lib/djerba/plugins/pwgs/analysis/plugin.py b/src/lib/djerba/plugins/pwgs/analysis/plugin.py
@@ -2,6 +2,8 @@
 import os
 import csv
 from decimal import Decimal
+import math
+import json
 import re
 import logging
 
@@ -15,11 +17,11 @@
 import djerba.plugins.pwgs.pwgs_tools as pwgs_tools
 import djerba.plugins.pwgs.constants as pc
 
-class main(plugin_base):
 
+class main(plugin_base):
     PRIORITY = 200
     PLUGIN_VERSION = '1.1'
-    
+
     def configure(self, config):
         config = self.apply_defaults(config)
         wrapper = self.get_config_wrapper(config)
@@ -51,31 +53,46 @@ def configure(self, config):
 
     def extract(self, config):
         wrapper = self.get_config_wrapper(config)
-        mrdetect_results = pwgs_tools.preprocess_results(self, config[self.identifier][pc.RESULTS_FILE])
-        hbc_results = self.preprocess_hbc(config[self.identifier][pc.HBC_FILE])
-        reads_detected = self.preprocess_vaf(config[self.identifier][pc.VAF_FILE])
-        pwgs_base64 = self.write_pwgs_plot(config[self.identifier][pc.HBC_FILE], 
-                                           config[self.identifier][pc.VAF_FILE],
-                                           output_dir = self.workspace.print_location())
+        mrdetect_results = pwgs_tools.preprocess_results(self, wrapper.get_my_string(pc.RESULTS_FILE))
+        hbc_results = self.preprocess_hbc(wrapper.get_my_string(pc.HBC_FILE))
+        reads_detected = self.preprocess_vaf(wrapper.get_my_string(pc.VAF_FILE))
+        pwgs_base64 = self.write_pwgs_plot(wrapper.get_my_string(pc.HBC_FILE), wrapper.get_my_string(pc.VAF_FILE), output_dir=self.workspace.print_location())
         self.logger.info("PWGS ANALYSIS: Finished preprocessing files")
-        data = self.get_starting_plugin_data(wrapper, self.PLUGIN_VERSION)       
-        results =  {
-                pc.CTDNA_OUTCOME: mrdetect_results[pc.CTDNA_OUTCOME],
-                pc.SIGNIFICANCE: mrdetect_results[pc.SIGNIFICANCE],
-                pc.TUMOUR_FRACTION_READS: float('%.1E' % Decimal( reads_detected*100 / hbc_results[pc.READS_CHECKED] )),
-                pc.SITES_CHECKED: hbc_results[pc.SITES_CHECKED],
-                pc.READS_CHECKED: hbc_results[pc.READS_CHECKED],
-                pc.SITES_DETECTED: hbc_results[pc.SITES_DETECTED],
-                pc.READS_DETECTED: reads_detected,
-                pc.PVALUE: mrdetect_results[pc.PVALUE],
-                pc.COHORT_N: hbc_results[pc.COHORT_N],
-                'pwgs_base64': pwgs_base64,
-                'files': {
-                    'results_file': config[self.identifier][pc.RESULTS_FILE],
-                    'hbc_results': config[self.identifier][pc.HBC_FILE],
-                    'vaf_results': config[self.identifier][pc.VAF_FILE]
-                }
+        data = self.get_starting_plugin_data(wrapper, self.PLUGIN_VERSION)
+        workspace_dir = self.workspace.get_work_dir()
+        # Read from the case_overview JSON file and populate results
+        json_file_path = os.path.join(workspace_dir, "pWGS_case_overview_output.json")
+        if os.path.exists(json_file_path):
+            with open(json_file_path, 'r') as json_file:
+                json_data = json.load(json_file)
+                assay = json_data.get("results", {}).get("assay", "Assay name not found")
+                primary_cancer = json_data.get("results", {}).get("primary_cancer", "Primary cancer not found")
+                study_title = json_data.get("results", {}).get("study_title", "Study title not found")
+        else:
+            assay = "Assay name not found"
+            primary_cancer = "Primary cancer not found"
+            study_title = "Study title not found"
+
+        results = {
+            pc.ASSAY: assay,
+            pc.STUDY: study_title,
+            pc.PRIMARY_CANCER: primary_cancer,
+            pc.CTDNA_OUTCOME: mrdetect_results[pc.CTDNA_OUTCOME],
+            pc.SIGNIFICANCE: mrdetect_results[pc.SIGNIFICANCE],
+            pc.TUMOUR_FRACTION_READS: float('%.1E' % Decimal(reads_detected * 100 / hbc_results[pc.READS_CHECKED])),
+            pc.SITES_CHECKED: hbc_results[pc.SITES_CHECKED],
+            pc.READS_CHECKED: hbc_results[pc.READS_CHECKED],
+            pc.SITES_DETECTED: hbc_results[pc.SITES_DETECTED],
+            pc.READS_DETECTED: reads_detected,
+            pc.PVALUE: mrdetect_results[pc.PVALUE],
+            pc.DATASET_DETECTION_CUTOFF: math.ceil(mrdetect_results[pc.DATASET_DETECTION_CUTOFF]),
+            pc.COHORT_N: hbc_results[pc.COHORT_N],
+            'pwgs_base64': pwgs_base64,
+            'files': {
+                'hbc_results': wrapper.get_my_string(pc.HBC_FILE),
+                'vaf_results': wrapper.get_my_string(pc.VAF_FILE)
             }
+        }
         data[pc.RESULTS] = results
         self.workspace.write_json('hbc_results.json', hbc_results)
         self.workspace.write_json('mrdetect_results.json', mrdetect_results)
@@ -97,16 +114,16 @@ def preprocess_hbc(self, hbc_path):
                     reads_checked.append(row[3])
                     sites_detected.append(row[4])
                 except IndexError as err:
-                    msg = "Incorrect number of columns in HBC row: '{0}'".format(row)+\
-                        "read from '{0}'".format(hbc_path)
+                    msg = "Incorrect number of columns in HBC row: '{0}'".format(row) + \
+                          "read from '{0}'".format(hbc_path)
                     raise RuntimeError(msg) from err
         hbc_n = len(sites_detected) - 1
         hbc_dict = {pc.SITES_CHECKED: int(sites_checked[0]),
                     pc.READS_CHECKED: int(reads_checked[0]),
                     pc.SITES_DETECTED: int(sites_detected[0]),
                     pc.COHORT_N: hbc_n}
         return hbc_dict
-    
+
     def preprocess_vaf(self, vaf_path):
         """
         summarize Variant Allele Frequency (VAF) file
@@ -116,19 +133,19 @@ def preprocess_vaf(self, vaf_path):
             reader_file = csv.reader(hbc_file, delimiter="\t")
             next(reader_file, None)
             for row in reader_file:
-                try: 
+                try:
                     reads_tmp = row[1]
                     reads_detected = reads_detected + int(reads_tmp)
                 except IndexError as err:
-                    msg = "Incorrect number of columns in vaf row: '{0}' ".format(row)+\
+                    msg = "Incorrect number of columns in vaf row: '{0}' ".format(row) + \
                           "read from '{0}'".format(vaf_path)
-                    raise RuntimeError(msg) from err      
+                    raise RuntimeError(msg) from err
         return reads_detected
-    
+
     def render(self, data):
         renderer = mako_renderer(self.get_module_dir())
         return renderer.render_name(pc.ANALYSIS_TEMPLATE_NAME, data)
-    
+
     def specify_params(self):
         discovered = [
             pc.RESULTS_FILE,
@@ -140,19 +157,18 @@ def specify_params(self):
         self.set_ini_default(core_constants.ATTRIBUTES, 'clinical')
         self.set_priority_defaults(self.PRIORITY)
 
-    def write_pwgs_plot(self, hbc_path, vaf_file, output_dir ):
+    def write_pwgs_plot(self, hbc_path, vaf_file, output_dir):
         '''
         use R to plot the detection rate 
         compared to healthy blood control, 
         return in base64
         '''
         args = [
-            os.path.join(os.path.dirname(__file__),'detection.plot.R'),
+            os.path.join(os.path.dirname(__file__), 'detection.plot.R'),
             '--hbc_results', hbc_path,
             '--vaf_results', vaf_file,
             '--output_directory', output_dir,
             '--pval', str(pc.DETECTION_ALPHA)
         ]
         pwgs_results = subprocess_runner().run(args)
-        return(pwgs_results.stdout.split('"')[1])
-
+        return (pwgs_results.stdout.split('"')[1])
diff --git a/src/lib/djerba/plugins/pwgs/analysis/test/plugin_test.py b/src/lib/djerba/plugins/pwgs/analysis/test/plugin_test.py
@@ -6,6 +6,7 @@
 import unittest
 import tempfile
 import string
+import shutil
 
 from djerba.util.validator import path_validator
 from djerba.plugins.plugin_tester import PluginTester
@@ -45,7 +46,16 @@ def testPreprocessResults(self):
         self.assertEqual(results[constants.CTDNA_OUTCOME], 'DETECTED')
         self.assertEqual(results[constants.SIGNIFICANCE], 'significantly larger')
 
-    def testPwgsAnalysis(self):
+    def test_pwgs_analysis_exists(self):
+        # test the scenario where pWGS_case_overview_output.json exist
+        shutil.copyfile(os.path.join(self.sup_dir, f"plugins/pwgs/report_json/pwgs.case.json"), os.path.join(self.get_tmp_dir(), "pWGS_case_overview_output.json"))
+        self.run_test_with_scenario("pwgs.analysis.file.exists.scenario.json", "4006439d93e4734c66b5c393ea62a307")
+
+    def test_pwgs_analysis_not_exists(self):
+        # test the scenario where pWGS_case_overview_output.json doesn't exist
+        self.run_test_with_scenario("pwgs.analysis.file.doesnt.exist.scenario.json", "90c11f5e410fe24fb8cc2f5249d67cd7")
+
+    def run_test_with_scenario(self, json_filename, md5_checksum):
         test_source_dir = os.path.realpath(os.path.dirname(__file__))
         with open(os.path.join(test_source_dir, self.INI_NAME)) as in_file:
             template_str = in_file.read()
@@ -55,11 +65,13 @@ def testPwgsAnalysis(self):
         os.mkdir(input_dir)
         with open(os.path.join(input_dir, self.INI_NAME), 'w') as ini_file:
             ini_file.write(ini_str)
-        json_location = os.path.join(self.sup_dir ,"plugins/pwgs/report_json/pwgs.analysis.json")
+
+        json_location = os.path.join(self.sup_dir, f"plugins/pwgs/report_json/{json_filename}")
+
         params = {
             self.INI: self.INI_NAME,
             self.JSON: json_location,
-            self.MD5: 'fce53e32cc5ea44828413e84a4d624b4'
+            self.MD5: md5_checksum
         }
         self.run_basic_test(input_dir, params)
 

diff --git a/src/lib/djerba/plugins/pwgs/case_overview/case_template.html b/src/lib/djerba/plugins/pwgs/case_overview/case_template.html
@@ -22,7 +22,7 @@
       <td colspan="3">${results.get(constants.PRIMARY_CANCER)}</td>
     </tr>
       <td colspan="1">Site of biopsy:</td>  
-      <td colspan="3">Liquid Biopsy</td>
+      <td colspan="3">Blood cell-free DNA</td>
     </tr>
     <tr>
       <td>Study:</td><td>${results.get(constants.STUDY)}</td>

diff --git a/src/lib/djerba/plugins/pwgs/case_overview/plugin.py b/src/lib/djerba/plugins/pwgs/case_overview/plugin.py
@@ -11,16 +11,16 @@
 import djerba.plugins.pwgs.pwgs_tools as pwgs_tools
 import djerba.plugins.pwgs.constants as pc
 
-class main(plugin_base):
 
+class main(plugin_base):
     PRIORITY = 100
     PLUGIN_VERSION = '1.0'
 
     def configure(self, config):
         config = self.apply_defaults(config)
         wrapper = self.get_config_wrapper(config)
         work_dir = self.workspace.get_work_dir()
-        if os.path.exists(os.path.join(work_dir,core_constants.DEFAULT_SAMPLE_INFO)):
+        if os.path.exists(os.path.join(work_dir, core_constants.DEFAULT_SAMPLE_INFO)):
             sample_info = self.workspace.read_json(core_constants.DEFAULT_SAMPLE_INFO)
             if wrapper.my_param_is_null(pc.DONOR):
                 wrapper.set_my_param(pc.DONOR, sample_info[pc.DONOR])
@@ -40,24 +40,25 @@ def configure(self, config):
     def extract(self, config):
         wrapper = self.get_config_wrapper(config)
         data = self.get_starting_plugin_data(wrapper, self.PLUGIN_VERSION)
-        results =  {
-                pc.ASSAY: "plasma Whole Genome Sequencing (pWGS) - 30X (v1.0)",
-                pc.PWGS_REPORT: config['core']['report_id'],
-                pc.PRIMARY_CANCER: config[self.identifier][pc.PRIMARY_CANCER],
-                pc.REQ_APPROVED: config[self.identifier][pc.REQ_APPROVED],
-                pc.DONOR: config[self.identifier][pc.DONOR],
-                pc.GROUP_ID: config[self.identifier][pc.GROUP_ID],
-                pc.PATIENT_ID: config[self.identifier][pc.PATIENT_ID_LOWER],
-                pc.STUDY:  config[self.identifier][pc.STUDY],
-                pc.WGS_REPORT: config[self.identifier][pc.WGS_REPORT]
-            }
+        results = {
+            pc.ASSAY: "plasma Whole Genome Sequencing (pWGS) - 30X (v1.0)",
+            pc.PWGS_REPORT: config['core']['report_id'],
+            pc.PRIMARY_CANCER: config[self.identifier][pc.PRIMARY_CANCER],
+            pc.REQ_APPROVED: config[self.identifier][pc.REQ_APPROVED],
+            pc.DONOR: config[self.identifier][pc.DONOR],
+            pc.GROUP_ID: config[self.identifier][pc.GROUP_ID],
+            pc.PATIENT_ID: config[self.identifier][pc.PATIENT_ID_LOWER],
+            pc.STUDY: config[self.identifier][pc.STUDY],
+            pc.WGS_REPORT: config[self.identifier][pc.WGS_REPORT]
+        }
         data[pc.RESULTS] = results
+        self.workspace.write_json('pWGS_case_overview_output.json', data)
         return data
-    
+
     def render(self, data):
         renderer = mako_renderer(self.get_module_dir())
         return renderer.render_name(pc.CASE_OVERVIEW_TEMPLATE_NAME, data)
-    
+
     def specify_params(self):
         required = [
             pc.REQ_APPROVED,
@@ -77,3 +78,4 @@ def specify_params(self):
         self.set_ini_default(core_constants.ATTRIBUTES, 'clinical')
         self.set_priority_defaults(self.PRIORITY)
 
+
diff --git a/src/lib/djerba/plugins/pwgs/case_overview/test/plugin_test.py b/src/lib/djerba/plugins/pwgs/case_overview/test/plugin_test.py
@@ -40,7 +40,7 @@ def testPwgsCase(self):
         params = {
             self.INI: self.INI_NAME,
             self.JSON: json_location,
-            self.MD5: '13655e31f88039cae030d374134c9438'
+            self.MD5: 'dcf8ea1396831a90c135f7a0936183d3'
         }
         self.run_basic_test(input_dir, params)
 

diff --git a/src/lib/djerba/plugins/pwgs/constants.py b/src/lib/djerba/plugins/pwgs/constants.py
@@ -6,6 +6,7 @@
 COVERAGE = 'coverage'
 CTDNA_DETECTION = 'ctdna_detection'
 CTDNA_OUTCOME = 'outcome'
+DATASET_DETECTION_CUTOFF = 'dataset_detection_cutoff'
 DETECTION_PLOT = 'detection_plot'
 INSERT_SIZE = 'median_insert_size'
 PVALUE = 'p-value'  

diff --git a/src/lib/djerba/plugins/pwgs/pwgs_tools.py b/src/lib/djerba/plugins/pwgs/pwgs_tools.py
@@ -17,7 +17,8 @@ def preprocess_results(self, results_path):
             try:
                 results_dict = {
                                 constants.TUMOUR_FRACTION_ZVIRAN: float('%.1E' % Decimal(row[7]))*100,
-                                constants.PVALUE:  float('%.3E' % Decimal(row[10]))
+                                constants.PVALUE:  float('%.3E' % Decimal(row[10])),
+                                constants.DATASET_DETECTION_CUTOFF: float(row[11])
                                 }
             except IndexError as err:
                 msg = "Incorrect number of columns in vaf row: '{0}' ".format(row)+\

diff --git a/src/lib/djerba/plugins/pwgs/summary/summary_template.html b/src/lib/djerba/plugins/pwgs/summary/summary_template.html
@@ -8,10 +8,10 @@
       <div class="oneoftwocellmain"  >
           <h2 >Summary</h2>
       </div>
-      <div class="twooftwocellmain"  style="height: 80px; padding: 10px; background-color:rgba(101,188,69,.5);  border: 3px solid #65bc45; border-radius: 12px" >
+      <div class="twooftwocellmain"  style="height: 80px; padding: 10px; background-color:rgba(194,194,194,.5);  border: 3px solid #65bc45; border-radius: 12px" >
           <hr class="big-line" style="margin-top: -30px;">
 
-  <h3 style="margin-top: 45px;">Minimal Residual Disease: ${results.get(constants.CTDNA_DETECTION)}</h3>
+  <h3 style="margin-top: 45px;">Cell-free DNA tumour burden: ${results.get(constants.CTDNA_DETECTION)}</h3>
   <table class="info" >
   <tr>
     <td>Mutational sampling analysis:</td>

diff --git a/src/lib/djerba/plugins/pwgs/summary/test/plugin_test.py b/src/lib/djerba/plugins/pwgs/summary/test/plugin_test.py
@@ -41,7 +41,7 @@ def testPwgsSummary(self):
         params = {
             self.INI: self.INI_NAME,
             self.JSON: json_location,
-            self.MD5: '6416078989efc8197eb5179ebf379614'
+            self.MD5: 'ee5d5bc62807914086abcd66c6038e85'
         }
         self.run_basic_test(input_dir, params)
 

diff --git a/src/lib/djerba/plugins/supplement/body/supplementary_materials_template.html b/src/lib/djerba/plugins/supplement/body/supplementary_materials_template.html
@@ -73,13 +73,7 @@
   </table>
 
   <br/>
-  <br/>
-  <br/>
-  <br/>
-  <br/>
-  <br/>
-  <br/>
-  <br/>
+
 
   ${html_builder().section_cells_end()}
-Original file line number
+Diff line change
@@ Expand Up / @@ -73,13 +73,7 @@ @@
       </table>
       <br/>
-      <br/>
-      <br/>
-      <br/>
-      <br/>
-      <br/>
-      <br/>
-      <br/>
       ${html_builder().section_cells_end()}
@@ Expand Down @@