From 8157f38fa2ac3881079746f88056b53b5ff4c67b Mon Sep 17 00:00:00 2001 From: Oumaima Hamza <73792407+OumaimaHamza@users.noreply.github.com> Date: Fri, 19 Apr 2024 16:54:16 -0400 Subject: [PATCH] GCGI-1318, pWGS report improvements (#374) * first draft of changes * add detection cutoff constant to results_dict * Add introductory sentence * introductory sentence tweaks * introductory sentence tweaks * import case overview plugins * no message * no message * Add extraction step to plugin.py * include data from different plugin to the results dictionary * Pull parameters from full.config.ini * get path to the workspace directory * pull pc.results from json output file * write data to json * Different pattern for extracted fields * no message * using built in method to write to json * Use wrapper to access configuration parameters * Added file not found exception * file exist check and adjusting test * checksum update and testing for scenario when file exists * update summary/plugin_test.py checksum --------- Co-authored-by: Oumaima Hamza --- .../pwgs/analysis/analysis_template.html | 7 +- .../djerba/plugins/pwgs/analysis/plugin.py | 90 +++++++++++-------- .../plugins/pwgs/analysis/test/plugin_test.py | 18 +++- .../pwgs/case_overview/case_template.html | 2 +- .../plugins/pwgs/case_overview/plugin.py | 32 +++---- .../pwgs/case_overview/test/plugin_test.py | 2 +- src/lib/djerba/plugins/pwgs/constants.py | 1 + src/lib/djerba/plugins/pwgs/pwgs_tools.py | 3 +- .../pwgs/summary/summary_template.html | 4 +- .../plugins/pwgs/summary/test/plugin_test.py | 2 +- .../supplementary_materials_template.html | 8 +- .../supplement/body/test/plugin_test.py | 14 +-- 12 files changed, 105 insertions(+), 78 deletions(-) diff --git a/src/lib/djerba/plugins/pwgs/analysis/analysis_template.html b/src/lib/djerba/plugins/pwgs/analysis/analysis_template.html index b64b4db0c..4dfda1183 100644 --- a/src/lib/djerba/plugins/pwgs/analysis/analysis_template.html +++ b/src/lib/djerba/plugins/pwgs/analysis/analysis_template.html @@ -8,7 +8,8 @@ -

This test counted the number of sites in the patient's plasma +

This patient has been diagnosed with ${results.get(constants.PRIMARY_CANCER)} and has been referred for the OICR Genomics ${results.get(constants.ASSAY)} assay through the ${results.get(constants.STUDY)} study. + This test counted the number of sites in the patient's plasma at which variants were detected that match variants found in the patient's primary tumour (candidate sites passing QC: ${html_builder.k_comma_format(results.get(constants.SITES_CHECKED))}). To test for the possibility that candidate SNVs match SNVs arising from sequencing artefacts, @@ -17,12 +18,12 @@ The number of SNVs of tumour origin detected in this plasma sample (detected sites: ${html_builder.k_comma_format(results.get(constants.SITES_DETECTED))}) was ${results.get(constants.SIGNIFICANCE)} - than in the control cohort (N=${results.get(constants.COHORT_N)}).

+ than in the control cohort (N=${results.get(constants.COHORT_N)}). Dataset detection cutoff is ${html_builder.k_comma_format(results.get(constants.DATASET_DETECTION_CUTOFF))}.

- + diff --git a/src/lib/djerba/plugins/pwgs/analysis/plugin.py b/src/lib/djerba/plugins/pwgs/analysis/plugin.py index 190f5930e..6443bc134 100644 --- a/src/lib/djerba/plugins/pwgs/analysis/plugin.py +++ b/src/lib/djerba/plugins/pwgs/analysis/plugin.py @@ -2,6 +2,8 @@ import os import csv from decimal import Decimal +import math +import json import re import logging @@ -15,11 +17,11 @@ import djerba.plugins.pwgs.pwgs_tools as pwgs_tools import djerba.plugins.pwgs.constants as pc -class main(plugin_base): +class main(plugin_base): PRIORITY = 200 PLUGIN_VERSION = '1.1' - + def configure(self, config): config = self.apply_defaults(config) wrapper = self.get_config_wrapper(config) @@ -51,31 +53,46 @@ def configure(self, config): def extract(self, config): wrapper = self.get_config_wrapper(config) - mrdetect_results = pwgs_tools.preprocess_results(self, config[self.identifier][pc.RESULTS_FILE]) - hbc_results = self.preprocess_hbc(config[self.identifier][pc.HBC_FILE]) - reads_detected = self.preprocess_vaf(config[self.identifier][pc.VAF_FILE]) - pwgs_base64 = self.write_pwgs_plot(config[self.identifier][pc.HBC_FILE], - config[self.identifier][pc.VAF_FILE], - output_dir = self.workspace.print_location()) + mrdetect_results = pwgs_tools.preprocess_results(self, wrapper.get_my_string(pc.RESULTS_FILE)) + hbc_results = self.preprocess_hbc(wrapper.get_my_string(pc.HBC_FILE)) + reads_detected = self.preprocess_vaf(wrapper.get_my_string(pc.VAF_FILE)) + pwgs_base64 = self.write_pwgs_plot(wrapper.get_my_string(pc.HBC_FILE), wrapper.get_my_string(pc.VAF_FILE), output_dir=self.workspace.print_location()) self.logger.info("PWGS ANALYSIS: Finished preprocessing files") - data = self.get_starting_plugin_data(wrapper, self.PLUGIN_VERSION) - results = { - pc.CTDNA_OUTCOME: mrdetect_results[pc.CTDNA_OUTCOME], - pc.SIGNIFICANCE: mrdetect_results[pc.SIGNIFICANCE], - pc.TUMOUR_FRACTION_READS: float('%.1E' % Decimal( reads_detected*100 / hbc_results[pc.READS_CHECKED] )), - pc.SITES_CHECKED: hbc_results[pc.SITES_CHECKED], - pc.READS_CHECKED: hbc_results[pc.READS_CHECKED], - pc.SITES_DETECTED: hbc_results[pc.SITES_DETECTED], - pc.READS_DETECTED: reads_detected, - pc.PVALUE: mrdetect_results[pc.PVALUE], - pc.COHORT_N: hbc_results[pc.COHORT_N], - 'pwgs_base64': pwgs_base64, - 'files': { - 'results_file': config[self.identifier][pc.RESULTS_FILE], - 'hbc_results': config[self.identifier][pc.HBC_FILE], - 'vaf_results': config[self.identifier][pc.VAF_FILE] - } + data = self.get_starting_plugin_data(wrapper, self.PLUGIN_VERSION) + workspace_dir = self.workspace.get_work_dir() + # Read from the case_overview JSON file and populate results + json_file_path = os.path.join(workspace_dir, "pWGS_case_overview_output.json") + if os.path.exists(json_file_path): + with open(json_file_path, 'r') as json_file: + json_data = json.load(json_file) + assay = json_data.get("results", {}).get("assay", "Assay name not found") + primary_cancer = json_data.get("results", {}).get("primary_cancer", "Primary cancer not found") + study_title = json_data.get("results", {}).get("study_title", "Study title not found") + else: + assay = "Assay name not found" + primary_cancer = "Primary cancer not found" + study_title = "Study title not found" + + results = { + pc.ASSAY: assay, + pc.STUDY: study_title, + pc.PRIMARY_CANCER: primary_cancer, + pc.CTDNA_OUTCOME: mrdetect_results[pc.CTDNA_OUTCOME], + pc.SIGNIFICANCE: mrdetect_results[pc.SIGNIFICANCE], + pc.TUMOUR_FRACTION_READS: float('%.1E' % Decimal(reads_detected * 100 / hbc_results[pc.READS_CHECKED])), + pc.SITES_CHECKED: hbc_results[pc.SITES_CHECKED], + pc.READS_CHECKED: hbc_results[pc.READS_CHECKED], + pc.SITES_DETECTED: hbc_results[pc.SITES_DETECTED], + pc.READS_DETECTED: reads_detected, + pc.PVALUE: mrdetect_results[pc.PVALUE], + pc.DATASET_DETECTION_CUTOFF: math.ceil(mrdetect_results[pc.DATASET_DETECTION_CUTOFF]), + pc.COHORT_N: hbc_results[pc.COHORT_N], + 'pwgs_base64': pwgs_base64, + 'files': { + 'hbc_results': wrapper.get_my_string(pc.HBC_FILE), + 'vaf_results': wrapper.get_my_string(pc.VAF_FILE) } + } data[pc.RESULTS] = results self.workspace.write_json('hbc_results.json', hbc_results) self.workspace.write_json('mrdetect_results.json', mrdetect_results) @@ -97,8 +114,8 @@ def preprocess_hbc(self, hbc_path): reads_checked.append(row[3]) sites_detected.append(row[4]) except IndexError as err: - msg = "Incorrect number of columns in HBC row: '{0}'".format(row)+\ - "read from '{0}'".format(hbc_path) + msg = "Incorrect number of columns in HBC row: '{0}'".format(row) + \ + "read from '{0}'".format(hbc_path) raise RuntimeError(msg) from err hbc_n = len(sites_detected) - 1 hbc_dict = {pc.SITES_CHECKED: int(sites_checked[0]), @@ -106,7 +123,7 @@ def preprocess_hbc(self, hbc_path): pc.SITES_DETECTED: int(sites_detected[0]), pc.COHORT_N: hbc_n} return hbc_dict - + def preprocess_vaf(self, vaf_path): """ summarize Variant Allele Frequency (VAF) file @@ -116,19 +133,19 @@ def preprocess_vaf(self, vaf_path): reader_file = csv.reader(hbc_file, delimiter="\t") next(reader_file, None) for row in reader_file: - try: + try: reads_tmp = row[1] reads_detected = reads_detected + int(reads_tmp) except IndexError as err: - msg = "Incorrect number of columns in vaf row: '{0}' ".format(row)+\ + msg = "Incorrect number of columns in vaf row: '{0}' ".format(row) + \ "read from '{0}'".format(vaf_path) - raise RuntimeError(msg) from err + raise RuntimeError(msg) from err return reads_detected - + def render(self, data): renderer = mako_renderer(self.get_module_dir()) return renderer.render_name(pc.ANALYSIS_TEMPLATE_NAME, data) - + def specify_params(self): discovered = [ pc.RESULTS_FILE, @@ -140,19 +157,18 @@ def specify_params(self): self.set_ini_default(core_constants.ATTRIBUTES, 'clinical') self.set_priority_defaults(self.PRIORITY) - def write_pwgs_plot(self, hbc_path, vaf_file, output_dir ): + def write_pwgs_plot(self, hbc_path, vaf_file, output_dir): ''' use R to plot the detection rate compared to healthy blood control, return in base64 ''' args = [ - os.path.join(os.path.dirname(__file__),'detection.plot.R'), + os.path.join(os.path.dirname(__file__), 'detection.plot.R'), '--hbc_results', hbc_path, '--vaf_results', vaf_file, '--output_directory', output_dir, '--pval', str(pc.DETECTION_ALPHA) ] pwgs_results = subprocess_runner().run(args) - return(pwgs_results.stdout.split('"')[1]) - + return (pwgs_results.stdout.split('"')[1]) diff --git a/src/lib/djerba/plugins/pwgs/analysis/test/plugin_test.py b/src/lib/djerba/plugins/pwgs/analysis/test/plugin_test.py index 48adf5327..2e9074814 100755 --- a/src/lib/djerba/plugins/pwgs/analysis/test/plugin_test.py +++ b/src/lib/djerba/plugins/pwgs/analysis/test/plugin_test.py @@ -6,6 +6,7 @@ import unittest import tempfile import string +import shutil from djerba.util.validator import path_validator from djerba.plugins.plugin_tester import PluginTester @@ -45,7 +46,16 @@ def testPreprocessResults(self): self.assertEqual(results[constants.CTDNA_OUTCOME], 'DETECTED') self.assertEqual(results[constants.SIGNIFICANCE], 'significantly larger') - def testPwgsAnalysis(self): + def test_pwgs_analysis_exists(self): + # test the scenario where pWGS_case_overview_output.json exist + shutil.copyfile(os.path.join(self.sup_dir, f"plugins/pwgs/report_json/pwgs.case.json"), os.path.join(self.get_tmp_dir(), "pWGS_case_overview_output.json")) + self.run_test_with_scenario("pwgs.analysis.file.exists.scenario.json", "4006439d93e4734c66b5c393ea62a307") + + def test_pwgs_analysis_not_exists(self): + # test the scenario where pWGS_case_overview_output.json doesn't exist + self.run_test_with_scenario("pwgs.analysis.file.doesnt.exist.scenario.json", "90c11f5e410fe24fb8cc2f5249d67cd7") + + def run_test_with_scenario(self, json_filename, md5_checksum): test_source_dir = os.path.realpath(os.path.dirname(__file__)) with open(os.path.join(test_source_dir, self.INI_NAME)) as in_file: template_str = in_file.read() @@ -55,11 +65,13 @@ def testPwgsAnalysis(self): os.mkdir(input_dir) with open(os.path.join(input_dir, self.INI_NAME), 'w') as ini_file: ini_file.write(ini_str) - json_location = os.path.join(self.sup_dir ,"plugins/pwgs/report_json/pwgs.analysis.json") + + json_location = os.path.join(self.sup_dir, f"plugins/pwgs/report_json/{json_filename}") + params = { self.INI: self.INI_NAME, self.JSON: json_location, - self.MD5: 'fce53e32cc5ea44828413e84a4d624b4' + self.MD5: md5_checksum } self.run_basic_test(input_dir, params) diff --git a/src/lib/djerba/plugins/pwgs/case_overview/case_template.html b/src/lib/djerba/plugins/pwgs/case_overview/case_template.html index 95695012b..f6356d063 100644 --- a/src/lib/djerba/plugins/pwgs/case_overview/case_template.html +++ b/src/lib/djerba/plugins/pwgs/case_overview/case_template.html @@ -22,7 +22,7 @@ - + diff --git a/src/lib/djerba/plugins/pwgs/case_overview/plugin.py b/src/lib/djerba/plugins/pwgs/case_overview/plugin.py index ff35d0cc7..c0a82a986 100644 --- a/src/lib/djerba/plugins/pwgs/case_overview/plugin.py +++ b/src/lib/djerba/plugins/pwgs/case_overview/plugin.py @@ -11,8 +11,8 @@ import djerba.plugins.pwgs.pwgs_tools as pwgs_tools import djerba.plugins.pwgs.constants as pc -class main(plugin_base): +class main(plugin_base): PRIORITY = 100 PLUGIN_VERSION = '1.0' @@ -20,7 +20,7 @@ def configure(self, config): config = self.apply_defaults(config) wrapper = self.get_config_wrapper(config) work_dir = self.workspace.get_work_dir() - if os.path.exists(os.path.join(work_dir,core_constants.DEFAULT_SAMPLE_INFO)): + if os.path.exists(os.path.join(work_dir, core_constants.DEFAULT_SAMPLE_INFO)): sample_info = self.workspace.read_json(core_constants.DEFAULT_SAMPLE_INFO) if wrapper.my_param_is_null(pc.DONOR): wrapper.set_my_param(pc.DONOR, sample_info[pc.DONOR]) @@ -40,24 +40,25 @@ def configure(self, config): def extract(self, config): wrapper = self.get_config_wrapper(config) data = self.get_starting_plugin_data(wrapper, self.PLUGIN_VERSION) - results = { - pc.ASSAY: "plasma Whole Genome Sequencing (pWGS) - 30X (v1.0)", - pc.PWGS_REPORT: config['core']['report_id'], - pc.PRIMARY_CANCER: config[self.identifier][pc.PRIMARY_CANCER], - pc.REQ_APPROVED: config[self.identifier][pc.REQ_APPROVED], - pc.DONOR: config[self.identifier][pc.DONOR], - pc.GROUP_ID: config[self.identifier][pc.GROUP_ID], - pc.PATIENT_ID: config[self.identifier][pc.PATIENT_ID_LOWER], - pc.STUDY: config[self.identifier][pc.STUDY], - pc.WGS_REPORT: config[self.identifier][pc.WGS_REPORT] - } + results = { + pc.ASSAY: "plasma Whole Genome Sequencing (pWGS) - 30X (v1.0)", + pc.PWGS_REPORT: config['core']['report_id'], + pc.PRIMARY_CANCER: config[self.identifier][pc.PRIMARY_CANCER], + pc.REQ_APPROVED: config[self.identifier][pc.REQ_APPROVED], + pc.DONOR: config[self.identifier][pc.DONOR], + pc.GROUP_ID: config[self.identifier][pc.GROUP_ID], + pc.PATIENT_ID: config[self.identifier][pc.PATIENT_ID_LOWER], + pc.STUDY: config[self.identifier][pc.STUDY], + pc.WGS_REPORT: config[self.identifier][pc.WGS_REPORT] + } data[pc.RESULTS] = results + self.workspace.write_json('pWGS_case_overview_output.json', data) return data - + def render(self, data): renderer = mako_renderer(self.get_module_dir()) return renderer.render_name(pc.CASE_OVERVIEW_TEMPLATE_NAME, data) - + def specify_params(self): required = [ pc.REQ_APPROVED, @@ -77,3 +78,4 @@ def specify_params(self): self.set_ini_default(core_constants.ATTRIBUTES, 'clinical') self.set_priority_defaults(self.PRIORITY) + diff --git a/src/lib/djerba/plugins/pwgs/case_overview/test/plugin_test.py b/src/lib/djerba/plugins/pwgs/case_overview/test/plugin_test.py index 6b2a2f8a7..92370a299 100755 --- a/src/lib/djerba/plugins/pwgs/case_overview/test/plugin_test.py +++ b/src/lib/djerba/plugins/pwgs/case_overview/test/plugin_test.py @@ -40,7 +40,7 @@ def testPwgsCase(self): params = { self.INI: self.INI_NAME, self.JSON: json_location, - self.MD5: '13655e31f88039cae030d374134c9438' + self.MD5: 'dcf8ea1396831a90c135f7a0936183d3' } self.run_basic_test(input_dir, params) diff --git a/src/lib/djerba/plugins/pwgs/constants.py b/src/lib/djerba/plugins/pwgs/constants.py index 7214d18b5..59c562371 100644 --- a/src/lib/djerba/plugins/pwgs/constants.py +++ b/src/lib/djerba/plugins/pwgs/constants.py @@ -6,6 +6,7 @@ COVERAGE = 'coverage' CTDNA_DETECTION = 'ctdna_detection' CTDNA_OUTCOME = 'outcome' +DATASET_DETECTION_CUTOFF = 'dataset_detection_cutoff' DETECTION_PLOT = 'detection_plot' INSERT_SIZE = 'median_insert_size' PVALUE = 'p-value' diff --git a/src/lib/djerba/plugins/pwgs/pwgs_tools.py b/src/lib/djerba/plugins/pwgs/pwgs_tools.py index 051098e0b..8ba17ad46 100644 --- a/src/lib/djerba/plugins/pwgs/pwgs_tools.py +++ b/src/lib/djerba/plugins/pwgs/pwgs_tools.py @@ -17,7 +17,8 @@ def preprocess_results(self, results_path): try: results_dict = { constants.TUMOUR_FRACTION_ZVIRAN: float('%.1E' % Decimal(row[7]))*100, - constants.PVALUE: float('%.3E' % Decimal(row[10])) + constants.PVALUE: float('%.3E' % Decimal(row[10])), + constants.DATASET_DETECTION_CUTOFF: float(row[11]) } except IndexError as err: msg = "Incorrect number of columns in vaf row: '{0}' ".format(row)+\ diff --git a/src/lib/djerba/plugins/pwgs/summary/summary_template.html b/src/lib/djerba/plugins/pwgs/summary/summary_template.html index e09588a88..6e5441576 100644 --- a/src/lib/djerba/plugins/pwgs/summary/summary_template.html +++ b/src/lib/djerba/plugins/pwgs/summary/summary_template.html @@ -8,10 +8,10 @@

Summary

-
+

-

Minimal Residual Disease: ${results.get(constants.CTDNA_DETECTION)}

+

Cell-free DNA tumour burden: ${results.get(constants.CTDNA_DETECTION)}

Reads Checked Reads DetectedDetected:Checked Reads Ratio (%)Detected:Checked Reads (%)
${results.get(constants.PRIMARY_CANCER)}
Site of biopsy:Liquid BiopsyBlood cell-free DNA
Study:${results.get(constants.STUDY)}
diff --git a/src/lib/djerba/plugins/pwgs/summary/test/plugin_test.py b/src/lib/djerba/plugins/pwgs/summary/test/plugin_test.py index a7b2ff80d..ec6dc3afa 100755 --- a/src/lib/djerba/plugins/pwgs/summary/test/plugin_test.py +++ b/src/lib/djerba/plugins/pwgs/summary/test/plugin_test.py @@ -41,7 +41,7 @@ def testPwgsSummary(self): params = { self.INI: self.INI_NAME, self.JSON: json_location, - self.MD5: '6416078989efc8197eb5179ebf379614' + self.MD5: 'ee5d5bc62807914086abcd66c6038e85' } self.run_basic_test(input_dir, params) diff --git a/src/lib/djerba/plugins/supplement/body/supplementary_materials_template.html b/src/lib/djerba/plugins/supplement/body/supplementary_materials_template.html index 826272387..b1ef53e6b 100644 --- a/src/lib/djerba/plugins/supplement/body/supplementary_materials_template.html +++ b/src/lib/djerba/plugins/supplement/body/supplementary_materials_template.html @@ -73,13 +73,7 @@
Mutational sampling analysis:

-
-
-
-
-
-
-
+ ${html_builder().section_cells_end()} diff --git a/src/lib/djerba/plugins/supplement/body/test/plugin_test.py b/src/lib/djerba/plugins/supplement/body/test/plugin_test.py index 5feca789f..6b184bc83 100755 --- a/src/lib/djerba/plugins/supplement/body/test/plugin_test.py +++ b/src/lib/djerba/plugins/supplement/body/test/plugin_test.py @@ -24,7 +24,7 @@ def testPwgsSupplementary(self): params = { self.INI: 'PWGS.supp.ini', self.JSON: json_location, - self.MD5: '2cb233975ad625231914a78768691a39' + self.MD5: '878c2defac8b85e2ca9000fa13479d03' } self.run_basic_test(test_source_dir, params) @@ -34,7 +34,7 @@ def testTarSupplementary(self): params = { self.INI: 'TAR.supp.ini', self.JSON: json_location, - self.MD5: '00b7c6f670fbe7503eb9133f109b74d9' + self.MD5: '4ff388fc36cba071715101583eb467af' } self.run_basic_test(test_source_dir, params) @@ -44,7 +44,7 @@ def testTarFailSupplementary(self): params = { self.INI: 'TAR.FAIL.supp.ini', self.JSON: json_location, - self.MD5: '56820f2e76a36d08089788239875e967' + self.MD5: 'b0b0c44835ff2055316f7df6408b8fa4' } self.run_basic_test(test_source_dir, params) @@ -54,7 +54,7 @@ def testWgtsSupplementary(self): params = { self.INI: 'WGTS.supp.ini', self.JSON: json_location, - self.MD5: 'dd9613479492b0e749a24f69d13fe802' + self.MD5: 'bb7e1ce8b6af9fcd3003be6d8ad5f976' } self.run_basic_test(test_source_dir, params) @@ -64,7 +64,7 @@ def testWgtsFailSupplementary(self): params = { self.INI: 'WGTS.FAIL.supp.ini', self.JSON: json_location, - self.MD5: '239fa6575240eb62e7779509f027b49d' + self.MD5: '56f3eb5c6c662cf67e8a4fb8ea219f09' } self.run_basic_test(test_source_dir, params) @@ -74,7 +74,7 @@ def testWgts40XSupplementary(self): params = { self.INI: 'WGTS40X.supp.ini', self.JSON: json_location, - self.MD5: 'bbed79bd0c16e46b620e2c4c9ff69bbe' + self.MD5: '021ef28363c75a55b69ff9abd6167320' } self.run_basic_test(test_source_dir, params) @@ -84,7 +84,7 @@ def testWgts40XFailSupplementary(self): params = { self.INI: 'WGTS40X.FAIL.supp.ini', self.JSON: json_location, - self.MD5: 'b56fe1b49400f8da5d97530c446c0d93' + self.MD5: '42e54fe6c150806d8d884d2c762957a1' } self.run_basic_test(test_source_dir, params)