Skip to content

Commit

Permalink
added metabrain analysis and pleiotropy qc
Browse files Browse the repository at this point in the history
  • Loading branch information
Cath committed May 24, 2021
1 parent 9173b3a commit 22337e4
Show file tree
Hide file tree
Showing 25 changed files with 801 additions and 27 deletions.
17 changes: 11 additions & 6 deletions R/combine_results_liberal_r2_0.2.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ full_results <- data.frame()

for (i in 1:length(file_list_res)) {
temp_data <- read_tsv(file_list_res[i])
temp_data <- temp_data[,!(names(temp_data)=="fdr_qval")]
full_results <- rbind(full_results, temp_data)
}

Expand Down Expand Up @@ -54,15 +55,19 @@ write.table(full_results_significant, str_c("full_results_liberal_r2_0.2_", EXPO
file_list_qc <- str_subset(file_list, "egger")
file_list_qc <- str_subset(file_list_qc, "^(?!.*full)")

# read in each file and populate a data frame
full_results_qc <- data.frame()

for (i in 1:length(file_list_qc)) {
temp_data <- read_tsv(file_list_qc[i])
full_results_qc <- rbind(full_results_qc, temp_data)
if (length(file_list_qc) > 0) {
full_results_qc <- data.frame()

for (i in 1:length(file_list_qc)) {
temp_data <- read_tsv(file_list_qc[i])
full_results_qc <- rbind(full_results_qc, temp_data)
}

write.table(full_results_qc, str_c("full_results_liberal_r2_0.2_egger_cochransq_", EXPOSURE_DATA, "_", OUTCOME, ".txt"), sep = "\t", row.names = F)
}
# read in each file and populate a data frame

write.table(full_results_qc, str_c("full_results_liberal_r2_0.2_egger_cochransq_", EXPOSURE_DATA, "_", OUTCOME, ".txt"), sep = "\t", row.names = F)



Expand Down
14 changes: 7 additions & 7 deletions R/data_prep_eqtlgen.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ genes_id01[,3:5] <- sapply(genes_id01[,3:5], as.numeric)

# renames columns

names(genes_id01)<-c("exposure","gene.exposure","chromosome_name","start_position","end_position")
names(genes_id01)<-c("exposure","gene.exposure","chromosome_name","start_position","end_position")



Expand All @@ -75,11 +75,11 @@ genes_data <- data.frame()
# loop to keep snps within 5kb of gene start/end positions # from 1382626 eQTLs for 2918 genes to 280216 eQTLs for 2791 genes

for (i in 1:length(unique(genes_id$gene.exposure))) {

dat1 <- dat[which(dat$new_gene_id==genes_id$exposure[i] & dat$SNPChr==genes_id$chromosome_name[i] & dat$SNPPos >= (genes_id$start_position[i]-5000) & dat$SNPPos <= (genes_id$end_position[i]+5000)),]

genes_data <- rbind(genes_data,dat1)

}


Expand All @@ -104,12 +104,12 @@ full$eaf[mismatch] <- 1 - as.numeric(full$eaf[mismatch])

# calculate beta and standard error

full$beta <- as.numeric(full$Zscore) / sqrt(2 * as.numeric(full$eaf) *
(1- as.numeric(full$eaf)) *
full$beta <- as.numeric(full$Zscore) / sqrt(2 * as.numeric(full$eaf) *
(1- as.numeric(full$eaf)) *
(as.numeric(full$NrSamples) + as.numeric(full$Zscore)^2))

full$se = 1 / sqrt(2 * as.numeric(full$eaf) *
(1- as.numeric(full$eaf)) *
full$se = 1 / sqrt(2 * as.numeric(full$eaf) *
(1- as.numeric(full$eaf)) *
(as.numeric(full$NrSamples) + as.numeric(full$Zscore)^2))


Expand Down
79 changes: 79 additions & 0 deletions R/data_prep_metabrain_bg.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@


library("dplyr")
library("readr")
library("biomaRt")

# load druggable genome

druggable <- read_tsv("druggable_genome_new.txt", col_types=cols(.default="c"))


# load basal ganglia data

bg <- read_csv("eqtl_data_metabrain/metabrain_basalganglia_eur.csv", col_types=cols(.default="c"))
bg$new_gene_id <- gsub("\\..*","", bg$Gene)

bg <- subset(bg, bg$new_gene_id %in% druggable$gene_stable_id)


bg$rsid <- gsub(".*:rs", "rs",bg$`SNP Name`)
bg$rsid <- gsub("\\:.*", "",bg$rsid)

bg$beta <- gsub("\\ .*", "",bg$`Meta-analysis Beta (s.e.)`)
bg$se <- gsub(".*\\(", "",bg$`Meta-analysis Beta (s.e.)`)
bg$se <- gsub(")", "",bg$se)

names(bg) <- gsub(" ", "_", names(bg))


# find gene start and end positions #grch38
biolist <- as.data.frame(listMarts())
ensembl=useMart("ensembl")
esemblist <- as.data.frame(listDatasets(ensembl))
ensembl = useDataset("hsapiens_gene_ensembl",mart=ensembl)
filters = listFilters(ensembl)
attributes = listAttributes(ensembl)

t2g<-getBM(attributes=c('ensembl_gene_id',"ensembl_gene_id_version",'chromosome_name','start_position','end_position'), mart = ensembl)

my_ids <- data.frame(ensembl_gene_id_version=bg$Gene)
my_ids$ensembl_gene_id <- gsub("\\..*","", my_ids$ensembl_gene_id_version)

my_ids.version <- merge(my_ids, t2g, by= 'ensembl_gene_id')

bg_with_gene_pos <- left_join(bg, my_ids.version[,3:6], by = c("Gene"="ensembl_gene_id_version.y"))

bg_with_gene_pos <- bg_with_gene_pos[!is.na(bg_with_gene_pos$start_position),]



# keep 5kb window around the gene

if (all(bg_with_gene_pos$SNP_Chromosome == bg_with_gene_pos$Gene_Chromosome)) {
bg_with_gene_pos_cis <- bg_with_gene_pos
} else {
bg_with_gene_pos_cis <- bg_with_gene_pos[which(bg_with_gene_pos$SNP_Chromosome == bg_with_gene_pos$Gene_Chromosome),]
}

bg_with_gene_pos_cis$SNP_Chromosome_Position <- as.numeric(bg_with_gene_pos_cis$SNP_Chromosome_Position)

bg_5kb_window <- bg_with_gene_pos_cis[which(
bg_with_gene_pos_cis$SNP_Chromosome_Position >= (bg_with_gene_pos_cis$start_position-5000) &
bg_with_gene_pos_cis$SNP_Chromosome_Position <= (bg_with_gene_pos_cis$end_position+5000)
),]


bg_5kb_window <- tidyr::separate(bg_5kb_window,
col="SNP_Alleles",
"into"=c("allele1","allele2"),
sep="/"
)
bg_5kb_window$other_allele <- NA
bg_5kb_window$other_allele[which(bg_5kb_window$allele1==bg_5kb_window$Effect_Allele)] <- bg_5kb_window$allele2[which(bg_5kb_window$allele1==bg_5kb_window$Effect_Allele)]
bg_5kb_window$other_allele[which(bg_5kb_window$allele2==bg_5kb_window$Effect_Allele)] <- bg_5kb_window$allele1[which(bg_5kb_window$allele2==bg_5kb_window$Effect_Allele)]

write.table(bg_5kb_window, "eqtl_data_metabrain/metabrain_basalganglia_eur_exposure_dat_snps_5kb_window.txt", sep = "\t", row.names = F, quote = F)


print("mission complete")
78 changes: 78 additions & 0 deletions R/data_prep_metabrain_cortex.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@


library("dplyr")
library("readr")
library("biomaRt")


# load druggable genome

druggable <- read_tsv("druggable_genome_new.txt", col_types=cols(.default="c"))


# load cortex data

cortex <- read_csv("eqtl_data_metabrain/metabrain_cortex_eur.csv", col_types=cols(.default="c"))
cortex$new_gene_id <- gsub("\\..*","", cortex$Gene)

cortex <- subset(cortex, cortex$new_gene_id %in% druggable$gene_stable_id)


cortex$rsid <- gsub(".*:rs", "rs",cortex$`SNP Name`)
cortex$rsid <- gsub("\\:.*", "",cortex$rsid)

cortex$beta <- gsub("\\ .*", "",cortex$`Meta-analysis Beta (s.e.)`)
cortex$se <- gsub(".*\\(", "",cortex$`Meta-analysis Beta (s.e.)`)
cortex$se <- gsub(")", "",cortex$se)

names(cortex) <- gsub(" ", "_", names(cortex))


# find gene start and end positions #grch38
biolist <- as.data.frame(listMarts())
ensembl=useMart("ensembl")
esemblist <- as.data.frame(listDatasets(ensembl))
ensembl = useDataset("hsapiens_gene_ensembl",mart=ensembl)
filters = listFilters(ensembl)
attributes = listAttributes(ensembl)

t2g<-getBM(attributes=c('ensembl_gene_id',"ensembl_gene_id_version",'chromosome_name','start_position','end_position'), mart = ensembl)

my_ids <- data.frame(ensembl_gene_id_version=cortex$Gene)
my_ids$ensembl_gene_id <- gsub("\\..*","", my_ids$ensembl_gene_id_version)

my_ids.version <- merge(my_ids, t2g, by= 'ensembl_gene_id')

cortex_with_gene_pos <- left_join(cortex, my_ids.version[,3:6], by = c("Gene"="ensembl_gene_id_version.y"))

cortex_with_gene_pos <- cortex_with_gene_pos[!is.na(cortex_with_gene_pos$start_position),]


# keep 5kb window around the gene

if (all(cortex_with_gene_pos$SNP_Chromosome == cortex_with_gene_pos$Gene_Chromosome)) {
cortex_with_gene_pos_cis <- cortex_with_gene_pos
} else {
cortex_with_gene_pos_cis <- cortex_with_gene_pos[which(cortex_with_gene_pos$SNP_Chromosome == cortex_with_gene_pos$Gene_Chromosome),]
}

cortex_with_gene_pos_cis$SNP_Chromosome_Position <- as.numeric(cortex_with_gene_pos_cis$SNP_Chromosome_Position)

cortex_5kb_window <- cortex_with_gene_pos_cis[which(
cortex_with_gene_pos_cis$SNP_Chromosome_Position >= (cortex_with_gene_pos_cis$start_position-5000) &
cortex_with_gene_pos_cis$SNP_Chromosome_Position <= (cortex_with_gene_pos_cis$end_position+5000)
),]

cortex_5kb_window <- tidyr::separate(cortex_5kb_window,
col="SNP_Alleles",
"into"=c("allele1","allele2"),
sep="/"
)
cortex_5kb_window$other_allele <- NA
cortex_5kb_window$other_allele[which(cortex_5kb_window$allele1==cortex_5kb_window$Effect_Allele)] <- cortex_5kb_window$allele2[which(cortex_5kb_window$allele1==cortex_5kb_window$Effect_Allele)]
cortex_5kb_window$other_allele[which(cortex_5kb_window$allele2==cortex_5kb_window$Effect_Allele)] <- cortex_5kb_window$allele1[which(cortex_5kb_window$allele2==cortex_5kb_window$Effect_Allele)]

write.table(cortex_5kb_window, "eqtl_data_metabrain/metabrain_cortex_eur_exposure_dat_snps_5kb_window.txt", sep = "\t", row.names = F, quote = F)


print("mission complete")
19 changes: 18 additions & 1 deletion R/data_prep_nalls2014.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,21 @@ psychencode$chr_pos <- str_c(psychencode$chr, ":",psychencode$position)



metabrain_bg <- read_tsv("eqtl_data_metabrain/metabrain_basalganglia_eur_exposure_dat_snps_5kb_window_hg19.txt", col_types=cols(.default="c"))

metabrain_bg$chr_pos <- str_c("chr", metabrain_bg$hg19_snp_chr,":",metabrain_bg$hg19_snp_pos,sep="")

names(metabrain_bg)[names(metabrain_bg)=="rsid"] <- "rsid_metabrain_bg"



metabrain_cortex <- read_tsv("eqtl_data_metabrain/metabrain_cortex_eur_exposure_dat_snps_5kb_window_hg19.txt", col_types=cols(.default="c"))

metabrain_cortex$chr_pos <- str_c("chr", metabrain_cortex$hg19_snp_chr,":",metabrain_cortex$hg19_snp_pos,sep="")

names(metabrain_cortex)[names(metabrain_cortex)=="rsid"] <- "rsid_metabrain_cortex"




# pqtl
Expand All @@ -40,8 +55,10 @@ names(pqtl)[1] <- "rsid_replication_pqtl"
pd_risk_discovery_1 <- left_join(pd_risk_discovery, eqtlgen[,c("chr_pos", "rsid_eqtlgen")], by = "chr_pos")
pd_risk_discovery_2 <- left_join(pd_risk_discovery_1, psychencode[,c("chr_pos", "rsid_psychencode")], by = "chr_pos")
pd_risk_discovery_3 <- left_join(pd_risk_discovery_2, pqtl[,c("chr_pos", "rsid_replication_pqtl")], by = "chr_pos")
pd_risk_discovery_4 <- left_join(pd_risk_discovery_3, metabrain_bg[,c("chr_pos", "rsid_metabrain_bg")], by = "chr_pos")
pd_risk_discovery_5 <- left_join(pd_risk_discovery_4, metabrain_cortex[,c("chr_pos", "rsid_metabrain_cortex")], by = "chr_pos")


write.table(pd_risk_discovery_3,"outcome_data/pd_risk_discovery_discovery_risk.txt", sep = "\t", row.names = F, quote = F)
write.table(pd_risk_discovery_5,"outcome_data/pd_discovery_risk.txt", sep = "\t", row.names = F, quote = F)

print("mission complete")
7 changes: 3 additions & 4 deletions R/data_prep_psychencode.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ genes_id01[,3:5] <- sapply(genes_id01[,3:5], as.numeric)

# renames columns

names(genes_id01)<-c("exposure","gene.exposure","chromosome_name","start_position","end_position")
names(genes_id01)<-c("exposure","gene.exposure","chromosome_name","start_position","end_position")



Expand Down Expand Up @@ -79,11 +79,11 @@ genes_data <- data.frame()
# loop to keep snps within 5kb of gene start/end positions # from 343084 eQTLs for 3645 genes to 80069 eQTLs for 2449 genes

for (i in 1:length(unique(genes_id$gene.exposure))) {

dat1 <- dat[which(dat$new_gene_id==genes_id$exposure[i] & dat$SNP_chr==genes_id$chromosome_name[i] & dat$SNP_start >= (genes_id$start_position[i]-5000) & dat$SNP_start <= (genes_id$end_position[i]+5000)),]

genes_data <- rbind(genes_data,dat1)

}


Expand Down Expand Up @@ -114,4 +114,3 @@ write.table(full_with_names, "eqtl_data_psychencode/psychencode_exposure_dat_snp


print("mission complete")

Loading

0 comments on commit 22337e4

Please sign in to comment.