From b9b369b91ca411d9bded3c7f51f31ef0974810e6 Mon Sep 17 00:00:00 2001
From: pruzanov <pruzanov@oicr.on.ca>
Date: Tue, 5 Jan 2021 18:13:44 -0500
Subject: [PATCH] Updated SequenzaProcessing script, added reference assembly
 and genome size parameters

---
 SequenzaProcess_v2.2.R | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/SequenzaProcess_v2.2.R b/SequenzaProcess_v2.2.R
index b7ff63b..019c34a 100755
--- a/SequenzaProcess_v2.2.R
+++ b/SequenzaProcess_v2.2.R
@@ -5,8 +5,10 @@ library(optparse)
 option_list = list(
   make_option(c("-s", "--seqz_file"), type="character", default=NULL,
            help="varscan snp file name", metavar="character"),
-  make_option(c("-r", "--ref_file"), type="character", default="hg19",
+  make_option(c("-r", "--ref_assembly"), type="character", default="hg19",
            help="reference assembly id", metavar="character"),
+  make_option(c("-z", "--genome_size"), type="integer", default=23,
+              help="genome siZe, as number of chromosomes to analyze [default= %default]", metavar="integer"),
   make_option(c("-b", "--breaks_method"), type="character", default="het",
            help="breaks method for segmentation [default= %default]", metavar="character"),
   make_option(c("-i", "--fit_method"), type="character", default="baf",
@@ -39,7 +41,7 @@ opt_parser = OptionParser(option_list=option_list, add_help_option=FALSE);
 opt = parse_args(opt_parser);
 
 FILE   <- opt$seqz_file
-REF    <- opt$ref_file
+REF    <- opt$ref_assembly
 SAMPLE <- opt$prefix
 PLOIDY <- opt$ploidy_file
 GAMMA  <- opt$gamma
@@ -50,7 +52,8 @@ BREAKS <- opt$breaks_method
 FIT    <- opt$fit_method
 MAXVAR <- opt$maxvar
 WIDTH  <- opt$width
-HEIGHT <- opt$height 
+HEIGHT <- opt$height
+SIZE   <- opt$genome_size
 min_read_normal <- opt$min.reads.normal
 min_read_baf <- opt$min.reads.baf
 
@@ -91,6 +94,7 @@ sequenza.extract <- function(file, gz = TRUE, window = 1e6, overlap = 1, gamma =
     gc.stats <- gc.sample.stats(file, gz = gz)
   }
   chr.vect <- as.character(gc.stats$file.metrics$chr)
+  print(chr.vect)
   if (normalization.method != "mean") {
     gc.vect  <- setNames(gc.stats$raw.median, gc.stats$gc.values)
   } else {
@@ -111,12 +115,13 @@ sequenza.extract <- function(file, gz = TRUE, window = 1e6, overlap = 1, gamma =
   } else {
     chromosome.list <- chromosome.list[chromosome.list %in% chr.vect]
   }
+  print(paste0("Cromosome List: ",chromosome.list))
   for (chr in chromosome.list){
     if (verbose){
       message("Processing ", chr, ": ", appendLF = FALSE) 
     }
     file.lines <- gc.stats$file.metrics[which(chr.vect == chr), ]
-    seqz.data   <- read.seqz(file, gz = gz, n.lines = c(file.lines$start, file.lines$end))
+    seqz.data   <- read.seqz(file , gz = gz, n.lines = c(file.lines$start, file.lines$end))
     seqz.data$adjusted.ratio <- round(seqz.data$depth.ratio / gc.vect[as.character(seqz.data$GC.percent)], 3)
     seqz.hom <- seqz.data$zygosity.normal == 'hom'
     seqz.het <- seqz.data[!seqz.hom, ]
@@ -246,7 +251,7 @@ EXTR  <- sequenza.extract(FILE, gz = FALSE,
                           breaks.method = BREAKS,
                           window = WINDOW,       # expose
                           gamma  = GAMMA,        # expose
-                          assembly = REF,
+                          assembly = REF,        # expose
                           min.reads.normal = min_read_normal, 
                           min.reads.baf = min_read_baf)     
 
@@ -254,9 +259,11 @@ print("Extract Ok")
 ratio_priority = FALSE
 
 # ======================= FITTING ===========================================
-load(file=PLOIDY) # goes to module
-priors <- subset(ploidy_table,cancer_type==TYPE)[c("CN","value")]
-
+priors = data.frame(CN = 2, value = 2)
+if (!is.null(PLOIDY)) {
+ load(file=PLOIDY) # goes to module
+ priors <- subset(ploidy_table,cancer_type==TYPE)[c("CN","value")]
+}
 # run sequenza.fit in a tryCatch block
 CP <- tryCatch({
  value<-sequenza.fit(EXTR,
@@ -264,7 +271,7 @@ CP <- tryCatch({
                      ratio.priority = ratio_priority,
                      method = FIT,
                      XY = c(X = "chrX", Y = "chrY"),
-                     chromosome.list = 1:23)
+                     chromosome.list = 1:SIZE)
  
  value
 }, error = function(err) {
@@ -276,7 +283,7 @@ CP <- tryCatch({
                         ratio.priority = ratio_priority,
                         method = "mufreq",
                         XY = c(X = "chrX", Y = "chrY"),
-                        chromosome.list = 1:23)
+                        chromosome.list = 1:SIZE)
          value
    }, error = function(err) {
       message("Fitting with mufreq failed")
@@ -294,7 +301,7 @@ sequenza.results(EXTR,
                  CNt.max = MAXVAR,
                  ratio.priority = ratio_priority,
                  XY = c(X = "chrX", Y = "chrY"),
-                 chromosome.list = 1:23)
+                 chromosome.list = 1:SIZE)
 
 print("Results Ok")
 print ("Getting alternative purity solutions");
@@ -318,7 +325,7 @@ if(length(purities)>1){ ## bug fix to not run alt solutions if there are no alt
                     SAMPLE,
                     out.dir = output_alt,
                     XY = c(X = "chrX", Y = "chrY"),
-                    chromosome.list = 1:23,
+                    chromosome.list = 1:SIZE,
                     cellularity = purities[i],
                     ploidy = ploidies[i])
  }