-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
211 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
### Generate sequenza ploidy priors table based on PANCAN Ascat results | ||
### Script from Jeffrey Bruce, modified by Peter Ruzanov | ||
|
||
library(openxlsx) | ||
cmd_args=commandArgs(trailingOnly = TRUE) | ||
FILE = cmd_args[1] | ||
purity_ploidy<-read.xlsx( | ||
FILE, | ||
sheet = 1, | ||
startRow = 1, | ||
colNames = TRUE, | ||
rowNames = FALSE, | ||
detectDates = FALSE, | ||
skipEmptyRows = TRUE, | ||
skipEmptyCols = TRUE, | ||
rows = NULL, | ||
cols = NULL, | ||
check.names = FALSE, | ||
namedRegion = NULL, | ||
na.strings = "NA", | ||
fillMergedCells = FALSE | ||
) | ||
|
||
cancer_types <- unique(purity_ploidy$tumor_type) | ||
|
||
# LAML Acute Myeloid Leukemia | ||
# ACC Adrenocortical carcinoma | ||
# BLCA Bladder Urothelial Carcinoma | ||
# LGG Brain Lower Grade Glioma | ||
# BRCA Breast invasive carcinoma | ||
# CESC Cervical squamous cell carcinoma and endocervical adenocarcinoma | ||
# CHOL Cholangiocarcinoma | ||
# LCML Chronic Myelogenous Leukemia | ||
# COAD Colon adenocarcinoma | ||
# CNTL Controls | ||
# ESCA Esophageal carcinoma | ||
# FPPP FFPE Pilot Phase II | ||
# GBM Glioblastoma multiforme | ||
# HNSC Head and Neck squamous cell carcinoma | ||
# KICH Kidney Chromophobe | ||
# KIRC Kidney renal clear cell carcinoma | ||
# KIRP Kidney renal papillary cell carcinoma | ||
# LIHC Liver hepatocellular carcinoma | ||
# LUAD Lung adenocarcinoma | ||
# LUSC Lung squamous cell carcinoma | ||
# DLBC Lymphoid Neoplasm Diffuse Large B-cell Lymphoma | ||
# MESO Mesothelioma | ||
# MISC Miscellaneous | ||
# OV Ovarian serous cystadenocarcinoma | ||
# PAAD Pancreatic adenocarcinoma | ||
# PCPG Pheochromocytoma and Paraganglioma | ||
# PRAD Prostate adenocarcinoma | ||
# READ Rectum adenocarcinoma | ||
# SARC Sarcoma | ||
# SKCM Skin Cutaneous Melanoma | ||
# STAD Stomach adenocarcinoma | ||
# TGCT Testicular Germ Cell Tumors | ||
# THYM Thymoma | ||
# THCA Thyroid carcinoma | ||
# UCS Uterine Carcinosarcoma | ||
# UCEC Uterine Corpus Endometrial Carcinoma | ||
# UVM Uveal Melanoma | ||
|
||
ploidy_table<- data.frame(CN = "", | ||
value = "", | ||
cancer_type="")[0,] | ||
|
||
for (cancer in cancer_types[which(cancer_types != "NA")]) { | ||
tab <- purity_ploidy[purity_ploidy$tumor_type==cancer,] | ||
num <- nrow(tab) | ||
|
||
tmp_frame <- data.frame(CN = 0:7, | ||
value = rep(0,8), | ||
cancer_type=rep(cancer,8)) | ||
|
||
for(i in 0:7){ | ||
tmp_frame[i+1,"value"] <- sum(tab$ploidy>(i-0.5) & | ||
tab$ploidy <= (i+0.5),na.rm=TRUE)/num | ||
} | ||
|
||
ploidy_table <- rbind(ploidy_table,tmp_frame) | ||
} | ||
|
||
## add the percentages for all cancers | ||
tmp_frame <- data.frame(CN = 0:7, | ||
value = rep(0,8), | ||
cancer_type=rep("all",8)) | ||
|
||
num <- nrow(purity_ploidy) | ||
for(i in 0:7){ | ||
tmp_frame[i+1,"value"] <- sum(purity_ploidy$ploidy>(i-0.5) & | ||
purity_ploidy$ploidy <= (i+0.5),na.rm=TRUE)/num | ||
} | ||
|
||
ploidy_table <- rbind(ploidy_table,tmp_frame) | ||
|
||
|
||
save(ploidy_table, | ||
file="PANCAN_ASCAT_ploidy_prob.Rdata") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
library(sequenza) | ||
library(optparse) | ||
|
||
## For now, test if commands are in original, trailing format, or new opt-parse format | ||
option_list = list( | ||
make_option(c("-s", "--snp_file"), type="character", default=NULL, | ||
help="varscan snp file name", metavar="character"), | ||
make_option(c("-c", "--cnv_file"), type="character", default=NULL, | ||
help="varscan copy number file name [default= %default]", metavar="character"), | ||
make_option(c("-y", "--remove_Y_vars"), type="logical", default=TRUE, | ||
help="Remove Y chromosome variants? [default= %default]", metavar="logical"), | ||
make_option(c("-p", "--prefix"), type="character", default=NULL, | ||
help="Output prefix for processed data file [default= %default]", metavar="character") | ||
); | ||
|
||
opt_parser = OptionParser(option_list=option_list); | ||
opt = parse_args(opt_parser); | ||
|
||
snp.file <- opt$snp_file | ||
cnv.file <- opt$cnv_file | ||
remove_Y <- opt$remove_Y_vars | ||
prefix <- opt$prefix | ||
|
||
print("Getting SeqZ file for Sequenza:") | ||
|
||
######## | ||
# MAIN # | ||
######## | ||
|
||
## read the snp and cnv files | ||
snp <- read.table(snp.file,sep="\t",header = TRUE) | ||
cnv <- read.table(cnv.file,sep="\t",header = TRUE) | ||
|
||
## remove chromosome M because it seems to cause problems | ||
snp <-subset(snp,chrom!="chrM") | ||
cnv <-subset(cnv,chrom!="chrM") | ||
|
||
## remove chromosome Y because it seems to cause problems | ||
if(remove_Y){ | ||
snp<-subset(snp,chrom!="chrY") | ||
cnv<-subset(cnv,chrom!="chrY") | ||
} | ||
|
||
##### prepare sequenza data file | ||
seqz.data <- VarScan2seqz(varscan.somatic = snp, | ||
varscan.copynumber = cnv) | ||
|
||
file <- paste0(prefix, ".seqz") | ||
write.table(seqz.data, file, col.names = TRUE, row.names = FALSE, sep = "\t") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
library(sequenza) | ||
library(optparse) | ||
|
||
## For now, test if commands are in original, trailing format, or new opt-parse format | ||
option_list = list( | ||
make_option(c("-s", "--seqz_file"), type="character", default=NULL, | ||
help="varscan snp file name", metavar="character"), | ||
make_option(c("-l", "--ploidy_file"), type="character", default=NULL, | ||
help="ploidy file name", metavar="character"), | ||
make_option(c("-p", "--prefix"), type="character", default=NULL, | ||
help="Output prefix for result files [default= %default]", metavar="character"), | ||
make_option(c("-g", "--gamma"), type="integer", default=80, | ||
help="Gamma parameter for data extraction [default= %default]", metavar="integer"), | ||
make_option(c("-f", "--female"), type="logical", default=TRUE, | ||
help="Female Sex flag [default= %default]", metavar="logical"), | ||
make_option(c("-w", "--window"), type="integer", default=50, | ||
help="Window for detection of CNVs [default= %default]", metavar="integer"), | ||
make_option(c("-t", "--type"), type="character", default="PCGP", | ||
help="Window for detection of CNVs [default= %default]", metavar="character") | ||
); | ||
|
||
opt_parser = OptionParser(option_list=option_list); | ||
opt = parse_args(opt_parser); | ||
|
||
FILE <- opt$seqz_file | ||
SAMPLE <- opt$prefix | ||
PLOIDY <- opt$ploidy_file | ||
GAMMA <- opt$gamma | ||
FEMALE <- opt$female | ||
WINDOW <- opt$window | ||
TYPE <- opt$type | ||
|
||
# ======================= PREPROCESSING =================================== | ||
EXTR <- sequenza.extract(FILE, gz = FALSE, | ||
breaks.method = "het", | ||
window = WINDOW, # expose | ||
gamma = GAMMA, # expose | ||
min.reads.normal = 10, | ||
min.reads.baf = 1) | ||
|
||
print("Extract Ok") | ||
ratio_priority = FALSE | ||
|
||
load(file=PLOIDY) # goes to module | ||
priors <- subset(ploidy_table,cancer_type==toupper(TYPE))[c("CN","value")] | ||
CP.example <- sequenza.fit(EXTR, | ||
priors.table = priors, | ||
method = "mufreq", | ||
ratio.priority = ratio_priority, | ||
chromosome.list = 1:22) | ||
|
||
print("Fit Ok") | ||
sequenza.results(EXTR, | ||
cp.table = CP.example, | ||
SAMPLE, | ||
out.dir = getwd(), | ||
female = FEMALE, # expose | ||
CNt.max = 20, | ||
ratio.priority = ratio_priority, | ||
XY = c(X = "chrX", Y = "chrY"), | ||
chromosome.list = 1:22) | ||
print("Results Ok") |