Merge pull request #430 from oicr-gsi/release-1.6.6

Release 1.6.6
oicr-gsi · Jul 19, 2024 · 79c787f · 79c787f
2 parents 63664d0 + 9a9fdec
commit 79c787f
Show file tree

Hide file tree

Showing 7 changed files with 33 additions and 15 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,8 @@
 # CHANGELOG
 
+## v1.6.6: 2024-07-19
+- GCGI-1391: Fixed column names in data_CNA_oncoKBgenes_nonDiploid.txt which impacted oncoKB therapy annotation
+
 ## v1.6.5: 2024-07-11
 - GCGI-1336: Fixed "cannot resolve assay" issue in case overview plugin
 - GCGI-1887: Changed "no effect" fusion reading frame to "unknown"

diff --git a/src/lib/djerba/plugins/wgts/cnv_purple/plugin.py b/src/lib/djerba/plugins/wgts/cnv_purple/plugin.py
@@ -53,6 +53,7 @@ def extract(self, config):
         purity_ploidy = self.workspace.read_json(pc.PURITY_PLOIDY)
         self.logger.debug("Read purity/ploidy from workspace: {0}".format(purity_ploidy))
         ploidy = purity_ploidy[pc.PLOIDY]
+        tumour_id = wrapper.get_my_string(core_constants.TUMOUR_ID)
 
         # process purple files
         self.logger.debug("Starting purple data processing")
@@ -62,18 +63,18 @@ def extract(self, config):
         self.logger.debug("Evaluating purity fit")
         processor.consider_purity_fit(purple_files[pc.PURPLE_PURITY_RANGE])
         self.logger.debug("Converting data format")
-        processor.convert_purple_to_gistic(purple_files[pc.PURPLE_GENE], ploidy)
+        processor.convert_purple_to_gistic(purple_files[pc.PURPLE_GENE], tumour_id, ploidy)
         self.logger.debug("Analyzing genome segments")
         whizbam_link = processor.construct_whizbam_link(
             wrapper.get_my_string(pc.WHIZBAM_PROJECT),
-            wrapper.get_my_string(core_constants.TUMOUR_ID)
+            tumour_id,
         )
         cnv_plot_base64 = processor.analyze_segments(purple_files[pc.PURPLE_CNV],
                                                      purple_files[pc.PURPLE_SEG],
                                                      whizbam_link,
                                                      purity_ploidy[pc.PURITY],
                                                      ploidy)
-        processor.write_copy_states()
+        processor.write_copy_states(tumour_id)
 
         # write alternate solutions launcher JSON
         if os.path.exists(os.path.join(work_dir, core_constants.DEFAULT_PATH_INFO)):

diff --git a/src/lib/djerba/plugins/wgts/cnv_purple/purple_tools.py b/src/lib/djerba/plugins/wgts/cnv_purple/purple_tools.py
@@ -72,14 +72,15 @@ def construct_whizbam_link(studyid, tumourid):
         ))
         return whizbam
 
-    def convert_purple_to_gistic(self, purple_gene_file, ploidy):
+    def convert_purple_to_gistic(self, purple_gene_file, tumour_id, ploidy):
         dir_location = os.path.dirname(__file__)
         oncolistpath = os.path.join(self.data_dir, pc.ONCOLIST)
         cmd = [
             'Rscript', os.path.join(self.r_script_dir, "process_CNA_data.r"),
             '--genefile', purple_gene_file,
             '--outdir', self.work_dir,
             '--oncolist', oncolistpath,
+            '--tumourid', tumour_id,
             '--ploidy', str(ploidy)
         ]
         runner = subprocess_runner()
@@ -144,7 +145,7 @@ def unzip_purple(self, purple_zip):
                 purple_files[pc.PURPLE_GENE] = zf.extract(name, self.work_dir)
         return purple_files
 
-    def write_copy_states(self):
+    def write_copy_states(self, tumour_id):
         """
         Write the copy states to JSON for later reference, eg. by snv/indel plugin
         """
@@ -161,7 +162,7 @@ def write_copy_states(self):
             for row in reader:
                 gene = row['Hugo_Symbol']
                 try:
-                    cna = int(row['minCopyNumber'])
+                    cna = int(row[tumour_id])
                     states[gene] = conversion[cna]
                 except (TypeError, KeyError) as err:
                     msg = "Cannot convert unknown CNA code: {0}".format(row[1])

diff --git a/src/lib/djerba/plugins/wgts/cnv_purple/r/CNA_supporting_functions.r b/src/lib/djerba/plugins/wgts/cnv_purple/r/CNA_supporting_functions.r
@@ -88,7 +88,7 @@ construct_whizbam_links <- function(segs, whizbam_url) {
   return(segs)
 }
 
-preProcCNA <- function(genefile, oncolist, ploidy=2, ploidy_multiplier=2.4){
+preProcCNA <- function(genefile, oncolist, tumour_id, ploidy=2, ploidy_multiplier=2.4){
   #' take segment-level CNV calls and translate to genes
 
   amp = ploidy_multiplier * ploidy
@@ -97,7 +97,11 @@ preProcCNA <- function(genefile, oncolist, ploidy=2, ploidy_multiplier=2.4){
   oncogenes <- oncolist$Hugo.Symbol[oncolist$OncoKB.Annotated == "Yes"]
 
   df_cna_thresh <-  genefile[,c("gene","minCopyNumber")]
-
+
+  # "minCopyNumber" should actually be the tumour ID instead. 
+  # Rename the column to be tumour ID.
+  names(df_cna_thresh)[2] <- tumour_id
+
   # threshold data
   for (i in 2:ncol(df_cna_thresh))
   {
@@ -117,8 +121,8 @@ preProcCNA <- function(genefile, oncolist, ploidy=2, ploidy_multiplier=2.4){
  df_cna_thresh_onco_nondiploid <- df_cna_thresh_onco[(df_cna_thresh_onco[,2] != 0), ]
 
  df_cna_thresh$Hugo_Symbol <- NULL
- df_cna_thresh_onco_nondiploid$Hugo_Symbol <- NULL
-
+ df_cna_thresh_onco_nondiploid$gene <- NULL
+ 
  # return the list of dfs
  CNAs=list()
  CNAs[[1]] <- df_cna_thresh

diff --git a/src/lib/djerba/plugins/wgts/cnv_purple/r/process_CNA_data.r b/src/lib/djerba/plugins/wgts/cnv_purple/r/process_CNA_data.r
@@ -8,6 +8,7 @@ option_list = list(
   make_option(c("-d", "--outdir"), type="character", default=NULL, help="output directory", metavar="character"),
   make_option(c("-g", "--genefile"), type="character", default=NULL, help="seg file", metavar="character"),
   make_option(c("-o", "--oncolist"), type="character", default=NULL, help="oncoKB cancer genes", metavar="character"),
+  make_option(c("-t", "--tumourid"), type="character", default=NULL, help="sample tumour id", metavar="character"),
   make_option(c("-p", "--ploidy"), type="character", default=NULL, help="sample ploidy for CN cutoffs", metavar="character")
 )
 
@@ -19,6 +20,7 @@ opt <- parse_args(opt_parser)
 outdir    <- opt$outdir
 genefile  <- opt$genefile
 oncolist  <- opt$oncolist
+tumour_id <- opt$tumourid
 ploidy    <- as.numeric(opt$ploidy)
 
 # source functions
@@ -34,7 +36,7 @@ if (is.null(genefile)) {
   oncogenes <- read.delim(oncolist, header=TRUE)
   raw_gene_data <- read.delim(genefile, header=TRUE) 
 
-  CNAs <- preProcCNA(raw_gene_data, oncogenes, ploidy)
+  CNAs <- preProcCNA(raw_gene_data, oncogenes, tumour_id, ploidy)
 
   # necessary file to find copy number profile of genes with small mutations
   write.table(data.frame("Hugo_Symbol"=rownames(CNAs[[1]]), CNAs[[1]], check.names=FALSE),

diff --git a/src/lib/djerba/plugins/wgts/cnv_purple/tests/testthat/test_CNA_supporting_functions.R b/src/lib/djerba/plugins/wgts/cnv_purple/tests/testthat/test_CNA_supporting_functions.R
@@ -35,12 +35,19 @@ test_that("preProcCNA returns correct gene-level alterations with PURPLE input",
 
     gene_file_path = paste0(testdatadir, "/plugins/cnv-purple/purple.cnv.gene.tsv")
     genefile <- read.delim(gene_file_path, header=TRUE) 
+    tumour_id = "LBR-0242_LCM"
     ploidy=3
 
-    CNAs = preProcCNA(genefile=genefile, oncolist=oncolist, ploidy=ploidy)
+    CNAs = preProcCNA(genefile=genefile, oncolist=oncolist, tumour_id=tumour_id, ploidy=ploidy)
     df_cna_thresh_onco_nondiploid = as.data.frame(CNAs[2])
-
-    expect_equal(df_cna_thresh_onco_nondiploid$minCopyNumber[df_cna_thresh_onco_nondiploid$gene == "ARID1A"], 2)
+
+    # Convert row names to a proper column
+    df_cna_thresh_onco_nondiploid <- tibble::rownames_to_column(df_cna_thresh_onco_nondiploid, var = "gene")
+
+    # Rename the first column
+    colnames(df_cna_thresh_onco_nondiploid)[2] <- "LBR.0242_LCM" 
+
+    expect_equal(df_cna_thresh_onco_nondiploid$LBR.0242_LCM[df_cna_thresh_onco_nondiploid$gene == "ARID1A"], 2)
 
   }
 )

diff --git a/src/lib/djerba/version.py b/src/lib/djerba/version.py
@@ -3,7 +3,7 @@
 # 2) we can import it in setup.py for the same reason
 # 3) it only needs to be stored in one place
 # See https://stackoverflow.com/a/16084844
-__version__ = '1.6.5'
+__version__ = '1.6.6'
 
 def get_djerba_version():
     return __version__