diff --git a/DESCRIPTION b/DESCRIPTION index 1e16baf..d2762b0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -27,7 +27,7 @@ LazyData: true Depends: R (>= 4.1) SystemRequirements: Python (>= 3.7.0) biocViews: -Imports: +Imports: echodata, echotabix, dplyr, @@ -50,7 +50,8 @@ Imports: S4Vectors, GenomeInfoDb, biomaRt, - IRanges + IRanges, + XGR Suggests: markdown, rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index 3dd9787..f1a8600 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,24 +3,39 @@ export(CORCES_2020.get_HiChIP_FitHiChIP_overlap) export(CS_bin_plot) export(CS_counts_plot) +export(ROADMAP.construct_reference) +export(ROADMAP.query) +export(ROADMAP.query_and_plot) +export(XGR.download_and_standardize) +export(XGR.enrichment) +export(merge_celltype_specific_epigenomics) export(merge_finemapping_results) export(peak_overlap_plot) export(plot_dataset_overlap) export(super_summary_plot) -import(ggplot2) +import(ggplot2, except = c(geom_rect, ggsave)) +import(ggplot2, except = geom_rect) +import(ggplot2, except = ggsave) importFrom(DescTools,Divisors) +importFrom(DescTools,RoundTo) importFrom(GenomeInfoDb,seqlevelsStyle) importFrom(GenomicRanges,GRanges) +importFrom(GenomicRanges,GRangesList) importFrom(GenomicRanges,elementMetadata) importFrom(GenomicRanges,end) importFrom(GenomicRanges,findOverlaps) importFrom(GenomicRanges,makeGRangesFromDataFrame) importFrom(GenomicRanges,mcols) +importFrom(GenomicRanges,seqnames) importFrom(GenomicRanges,start) importFrom(IRanges,IRanges) +importFrom(IRanges,overlapsAny) +importFrom(R.utils,gzip) importFrom(RColorBrewer,brewer.pal) importFrom(S4Vectors,queryHits) importFrom(S4Vectors,subjectHits) +importFrom(XGR,xGRviaGenomicAnno) +importFrom(XGR,xRDataLoader) importFrom(biomaRt,getBM) importFrom(biomaRt,useMart) importFrom(data.table,as.data.table) @@ -30,6 +45,7 @@ importFrom(data.table,fwrite) importFrom(data.table,melt.data.table) importFrom(data.table,merge.data.table) importFrom(data.table,rbindlist) +importFrom(data.table,transpose) importFrom(dplyr,"%>%") importFrom(dplyr,arrange) importFrom(dplyr,case_when) @@ -39,12 +55,15 @@ importFrom(dplyr,group_by) importFrom(dplyr,mutate) importFrom(dplyr,n_distinct) importFrom(dplyr,rename) +importFrom(dplyr,sample_n) importFrom(dplyr,select) importFrom(dplyr,slice) importFrom(dplyr,summarise) importFrom(dplyr,summarise_at) +importFrom(dplyr,tally) importFrom(dplyr,top_n) importFrom(dplyr,vars) +importFrom(echotabix,query_tabular) importFrom(ggbio,autoplot) importFrom(ggbio,geom_arch) importFrom(ggbio,geom_rect) @@ -53,9 +72,18 @@ importFrom(ggbio,ggsave) importFrom(ggbio,plotGrandLinear) importFrom(ggbio,scale_x_sequnit) importFrom(ggbio,theme_genome) +importFrom(ggplot2,aes) +importFrom(ggplot2,element_text) +importFrom(ggplot2,ggplot_build) +importFrom(ggplot2,guide_legend) +importFrom(ggplot2,guides) +importFrom(ggplot2,scale_y_continuous) +importFrom(ggplot2,theme) +importFrom(ggplot2,theme_classic) importFrom(grDevices,dev.off) importFrom(grDevices,png) importFrom(haploR,queryRegulome) +importFrom(methods,is) importFrom(parallel,mclapply) importFrom(patchwork,plot_layout) importFrom(patchwork,plot_spacer) @@ -65,6 +93,7 @@ importFrom(scales,alpha) importFrom(stats,as.formula) importFrom(stats,formula) importFrom(stats,median) +importFrom(stats,p.adjust) importFrom(stats,setNames) importFrom(tidyr,separate) importFrom(utils,head) diff --git a/R/CS_bin_plot.R b/R/CS_bin_plot.R index 21f5fe6..b34bdcc 100644 --- a/R/CS_bin_plot.R +++ b/R/CS_bin_plot.R @@ -4,8 +4,8 @@ #' @examples #' bin_plot <- CS_bin_plot(merged_DT = echodata::Nalls2019_merged) #' @export -#' @import ggplot2 #' @importFrom RColorBrewer brewer.pal +#' @importFrom stats setNames CS_bin_plot <- function(merged_DT, show_plot = TRUE) { Method <- bin <- ..count.. <- NULL @@ -17,40 +17,40 @@ CS_bin_plot <- function(merged_DT, custom_colors <- RColorBrewer::brewer.pal( n = length(levels(bin_counts$bin)), "GnBu" ) - custom_colors_dict <- setNames( + custom_colors_dict <- stats::setNames( custom_colors[seq(1, length(used_bins))], rev(used_bins) ) custom_colors_dict[names(custom_colors_dict) == "0"] <- "lightgray" - bin_plot <- ggplot( + bin_plot <- ggplot2::ggplot( subset(bin_counts, Method != "mean"), - aes(x = Method, fill = bin) + ggplot2::aes(x = Method, fill = bin) ) + - geom_bar( + gggplot2::eom_bar( stat = "count", show.legend = TRUE, - position = position_stack(reverse = FALSE), color = "white" + position = ggplot2::position_stack(reverse = FALSE), color = "white" ) + # scale_fill_brewer(palette = "Spectral", direction = -1) + - scale_fill_manual(values = custom_colors_dict) + + ggplot2::scale_fill_manual(values = custom_colors_dict) + # geom_text(aes(label = paste(bin,"SNPs")), # position = position_stack(vjust = .5), vjust=-1, stat = "count") + - geom_text(aes(label = ..count..), - position = position_stack(vjust = .5), + ggplot2::geom_text(ggplot2::aes(label = ..count..), + position = ggplot2::position_stack(vjust = .5), vjust = .5, stat = "count" ) + - theme_bw() + - labs(x = NULL, y = "Loci", fill = "CS size") + - coord_flip() + - theme( - panel.grid.major = element_blank(), - panel.grid.minor = element_blank(), - rect = element_blank(), - axis.text.x = element_blank(), - axis.ticks = element_blank(), + ggplot2::theme_bw() + + ggplot2::labs(x = NULL, y = "Loci", fill = "CS size") + + ggplot2::coord_flip() + + ggplot2::theme( + panel.grid.major = ggplot2::element_blank(), + panel.grid.minor = ggplot2::element_blank(), + rect = ggplot2::element_blank(), + axis.text.x = ggplot2::element_blank(), + axis.ticks = ggplot2::element_blank(), legend.position = "top" ) + - guides(fill = guide_legend(nrow = 1, reverse = TRUE)) + ggplot2::guides(fill = ggplot2::guide_legend(nrow = 1, reverse = TRUE)) if (show_plot) print(bin_plot) return(list( plot = bin_plot, diff --git a/R/CS_counts_plot.R b/R/CS_counts_plot.R index 760427c..b5ea64d 100644 --- a/R/CS_counts_plot.R +++ b/R/CS_counts_plot.R @@ -5,7 +5,7 @@ #' @examples #' gg_CS <- CS_counts_plot(merged_DT = echodata::Nalls2019_merged) #' @export -#' @import ggplot2 +#' @rawNamespace import(ggplot2, except = c(geom_rect, ggsave)) #' @importFrom dplyr %>% mutate arrange CS_counts_plot <- function(merged_DT, show_numbers = TRUE, diff --git a/R/GR.name_filter_convert.R b/R/GR.name_filter_convert.R new file mode 100644 index 0000000..08d7ea7 --- /dev/null +++ b/R/GR.name_filter_convert.R @@ -0,0 +1,17 @@ +#' GR.name_filter_convert +#' +#' @family plot +#' @keywords internal +GR.name_filter_convert <- function(GR.final, + GR.names, + min_hits = 1) { + names(GR.final) <- GR.names + grl <- GR.final[!as.logical(lapply(GR.final, is.null))] + # Filter to those that had at least N hits + grl <- grl[as.logical(lapply(grl, function(g, min_hits. = min_hits) { + length(GenomicRanges::seqnames(g)) >= min_hits. + }))] + # Convert to GRangesList (important) + grl <- GenomicRanges::GRangesList(grl) + return(grl) +} diff --git a/R/NOTT_2019.epigenomic_histograms.R b/R/NOTT_2019.epigenomic_histograms.R index 2f8627f..62d767c 100644 --- a/R/NOTT_2019.epigenomic_histograms.R +++ b/R/NOTT_2019.epigenomic_histograms.R @@ -14,10 +14,11 @@ #' return_assay_track = TRUE, #' save_annot = FALSE #' ) -#' @import ggplot2 +#' @rawNamespace import(ggplot2, except = c(geom_rect, ggsave)) #' @importFrom ggbio autoplot geom_rect scale_x_sequnit plotGrandLinear #' @importFrom ggbio theme_genome ggsave #' @importFrom stats formula +#' @importFrom GenomeInfoDb seqlevelsStyle NOTT_2019.epigenomic_histograms <- function(finemap_dat, locus_dir, show_plot = TRUE, @@ -45,15 +46,20 @@ NOTT_2019.epigenomic_histograms <- function(finemap_dat, # library(BiocGenerics) # library(GenomicRanges) # library(ggbio) - # show_plot=T;save_plot=T;full_data=T;return_assay_track=F;binwidth=2500; geom="histogram"; plot_formula="Cell_type ~."; show_regulatory_rects=T; bigwig_dir=NULL; verbose=T; nThread=1; - # finemap_dat=echoannot::LRRK2; plot.zoom=500000; fill_var="Assay"; density_adjust=.2; strip.text.y.angle=0; + # show_plot=T;save_plot=T;full_data=T;return_assay_track=F; + # binwidth=2500; geom="histogram"; plot_formula="Cell_type ~."; + # show_regulatory_rects=T; bigwig_dir=NULL; verbose=T; nThread=1; + # finemap_dat=echoannot::LRRK2; plot.zoom=500000; fill_var="Assay"; + # density_adjust=.2; strip.text.y.angle=0; # Import BigWig annotation files bigWigFiles <- echoannot::NOTT_2019.bigwig_metadata - # Some bigWig files were initially loaded to UCSC GB, but then later taken down by the authors.... + # Some bigWig files were initially loaded to UCSC GB, + # but then later taken down by the authors.... # However I saved these files on Minerva beforehand. bigWigFiles <- subset(bigWigFiles, UCSC_available == "T") - bigWigFiles <- dplyr::mutate(bigWigFiles, cell_type = gsub(" ", ".", cell_type)) + bigWigFiles <- dplyr::mutate(bigWigFiles, + cell_type = gsub(" ", ".", cell_type)) # Convert finemap data to granges dat <- finemap_dat dat$seqnames <- dat$CHR diff --git a/R/NOTT_2019.plac_seq_plot.R b/R/NOTT_2019.plac_seq_plot.R index 4365612..bd2d6f5 100644 --- a/R/NOTT_2019.plac_seq_plot.R +++ b/R/NOTT_2019.plac_seq_plot.R @@ -13,7 +13,8 @@ #' # Zoom in #' trks_plus_lines <- NOTT_2019.plac_seq_plot(finemap_dat = BST1, locus_dir = file.path("~/Desktop", locus_dir), zoom_window = 500000, highlight_plac = TRUE) #' } -#' @import ggplot2 +#' @rawNamespace import(ggplot2, except = geom_rect) +#' @rawNamespace import(ggplot2, except = ggsave) #' @importFrom ggbio ggbio geom_arch geom_rect scale_x_sequnit ggsave #' @importFrom IRanges IRanges NOTT_2019.plac_seq_plot <- function(finemap_dat = NULL, diff --git a/R/PLOT.get_max_histogram_height.R b/R/PLOT.get_max_histogram_height.R index d214fab..dca404b 100644 --- a/R/PLOT.get_max_histogram_height.R +++ b/R/PLOT.get_max_histogram_height.R @@ -1,10 +1,16 @@ +#' PLOT.get_max_histogram_height +#' +#' @keywords internal +#' @importFrom methods is +#' @importFrom ggplot2 ggplot_build +#' @importFrom DescTools RoundTo PLOT.get_max_histogram_height <- function(gg, round_to = NULL, verbose = TRUE) { - if (tolower(class(gg)[1]) == "ggbio") gg <- gg@ggplot - printer("+ PLOT:: Calculating max histogram height", v = verbose) + if (methods::is(gg,"ggbio")) {gg <- gg@ggplot} + messager("+ PLOT:: Calculating max histogram height", v = verbose) dat <- ggplot2::ggplot_build(gg)$data[[1]] - max_height <- max(dat$ymax) + max_height <- max(dat$ymax, na.rm = TRUE) if (!is.null(round_to)) { max_height <- DescTools::RoundTo(max_height, round_to) } diff --git a/R/PLOT.get_window_limits.R b/R/PLOT.get_window_limits.R index f9b73e7..032e81a 100644 --- a/R/PLOT.get_window_limits.R +++ b/R/PLOT.get_window_limits.R @@ -26,7 +26,7 @@ PLOT.get_window_limits <- function(finemap_dat, .index_as_center = index_as_center, .genomic_units = genomic_units, .verbose = verbose) { - printer("+ Inferring genomic limits for window:", pz, v = .verbose) + messager("+ Inferring genomic limits for window:", pz, v = .verbose) # Zoom #x as input if (.index_as_center) { middle_pos <- subset(.finemap_dat, leadSNP)$POS[1] diff --git a/R/ROADMAP.construct_reference.R b/R/ROADMAP.construct_reference.R new file mode 100644 index 0000000..640dfa1 --- /dev/null +++ b/R/ROADMAP.construct_reference.R @@ -0,0 +1,40 @@ +#' Gather Roadmap annotation metadata +#' +#' @param ref_path Where the ROADMAP metadata is stored. +#' @param keyword_query Search all columns in the Roadmap annotations metadata +#' and only query annotations that contain your keywords. +#' Can provide multiple keywords in list form: +#' \code{c("placenta","liver","monocytes")} +#' +#' @examples +#' ref <- ROADMAP.construct_reference(keyword_query = c( +#' "placenta", +#' "liver", +#' "monocytes" +#' )) +#' @family ROADMAP +#' @export +#' @importFrom data.table transpose fread +ROADMAP.construct_reference <- function(ref_path = + system.file( + "extdata/ROADMAP", + "ROADMAP_Epigenomic.js", + package = "echoannot" + ), + keyword_query = NULL) { + # %like% is from data.table + ref <- suppressWarnings(data.table::fread(ref_path)) + colnames(ref)[1] <- "EID" + if (!is.null(keyword_query)) { + rows <- grep(paste(keyword_query, collapse = "|"), + data.table::transpose(ref), + ignore.case = TRUE + ) + ref <- ref[rows, ] + messager( + "+ ROADMAP::", nrow(ref), + "annotation(s) identified that match `keyword_query`." + ) + } + return(ref) +} diff --git a/R/ROADMAP.merge_and_process_grl.R b/R/ROADMAP.merge_and_process_grl.R new file mode 100644 index 0000000..014f25b --- /dev/null +++ b/R/ROADMAP.merge_and_process_grl.R @@ -0,0 +1,44 @@ +#' Standardize Roadmap query +#' +#' @param grl.roadmap Roadmap query results +#' @param n_top_tissues The number of top tissues to include, +#' sorted by greatest number of rows +#' (i.e. the number of genomic ranges within the window). +#' @family ROADMAP +#' @importFrom IRanges overlapsAny +#' @importFrom dplyr %>% group_by tally n_distinct +ROADMAP.merge_and_process_grl <- function(grl.roadmap, + gr.snp, + n_top_tissues = 5, + sep = " ") { + grl.roadmap.merged <- unlist(grl.roadmap) + grl.roadmap.merged$Source <- names(grl.roadmap.merged) + grl.roadmap.merged$Source <- gsub("_", sep, grl.roadmap.merged$Source) + grl.roadmap.merged$ChromState <- + lapply( + grl.roadmap.merged$State, + function(ROW) { + base::strsplit(ROW, "_")[[1]][2] + } + ) %>% unlist() + grl.roadmap.filt <- grl.roadmap.merged[unlist(lapply( + grl.roadmap, function(e) { + IRanges::overlapsAny(e, gr.snp, minoverlap = 1) + } + ))] + if (!is.null(n_top_tissues)) { + top_tissues <- data.frame(grl.roadmap.filt) %>% + dplyr::group_by(Source) %>% + dplyr::tally(sort = TRUE) + grl.roadmap.filt <- subset( + grl.roadmap.filt, + Source %in% unique(top_tissues$Source[ + seq(1, min( + n_top_tissues, + dplyr::n_distinct(top_tissues$Source) + )) + ]) + ) + } + return(grl.roadmap.filt) +} diff --git a/R/ROADMAP.query.R b/R/ROADMAP.query.R new file mode 100644 index 0000000..7016efd --- /dev/null +++ b/R/ROADMAP.query.R @@ -0,0 +1,70 @@ +#' Query Roadmap by genomic coordinates +#' +#' @param gr.snp \code{\link[GenomicRanges]{GRanges}} object of +#' SNPs to query Roadmap with. +#' @param limit_files Limit the number of annotation files queried +#' (for faster testing). +#' @param nThread Number of threads to parallelise queries over. +#' @inheritParams ROADMAP.tabix +#' +#' @family ROADMAP +#' @examples +#' \dontrun{ +#' grl.roadmap <- ROADMAP.query( +#' gr.snp = echodata::BST1, +#' keyword_query = "placenta") +#' } +#' @export +#' @importFrom GenomicRanges seqnames GRanges start end +ROADMAP.query <- function(results_path = file.path(tempdir(), "Roadmap"), + gr.snp, + keyword_query = NULL, + limit_files = NULL, + nThread = 1, + verbose = TRUE) { + rm_start <- Sys.time() + gr.snp <- dt_to_granges(subset_DT = gr.snp, + verbose = verbose) + roadmap_ref <- ROADMAP.construct_reference(keyword_query = keyword_query) + if (!is.null(limit_files)) { + roadmap_ref <- roadmap_ref[seq(1,limit_files), ] + } + # Download via tabix (fast) + counter <- 1 + gr.roadmap <- parallel::mclapply(unique(roadmap_ref$EID), + function(eid, + gr.snp. = gr.snp, + results_path. = results_path) { + message_parallel("+ ROADMAP:: Querying subset from Roadmap API: ", + eid, " - ", counter, "/", length(unique(roadmap_ref$EID)) ) + counter <<- counter + 1 + dat <- GenomicRanges::GRanges() + try({ + dat <- ROADMAP.tabix( + results_path = results_path., + chrom = as.character(GenomicRanges::seqnames(gr.snp.)[1]), + min_pos = min(GenomicRanges::start(gr.snp.), na.rm = TRUE), + max_pos = max(GenomicRanges::end(gr.snp.), na.rm = TRUE), + eid = eid, + convert_to_granges = TRUE) + }) + if (length(GenomicRanges::seqnames(dat)) > 0) { + return(dat) + } else { + return(NULL) + } + }, + mc.cores = nThread + ) + remove(counter) + grl.roadmap <- GR.name_filter_convert( + GR.final = gr.roadmap, + GR.names = + roadmap_ref$`Epigenome name (from EDACC Release 9 directory)`, + min_hits = 1 + ) + rm_end <- Sys.time() + messager("ROADMAP:: All downloads complete") + print(round(rm_end - rm_start, 1)) + return(grl.roadmap) +} diff --git a/R/ROADMAP.query_and_plot.R b/R/ROADMAP.query_and_plot.R new file mode 100644 index 0000000..f1981ee --- /dev/null +++ b/R/ROADMAP.query_and_plot.R @@ -0,0 +1,102 @@ +#' Query and plot Roadmap epigenomic annotations +#' +#' @param subset_DT Data.frame with at least the following columns: +#' \describe{ +#' \item{SNP}{SNP RSID} +#' \item{CHR}{chromosome} +#' \item{POS}{position} +#' } +#' @param force_new_query Force a new query from the XGR database. +#' @inheritParams ROADMAP.construct_reference +#' @inheritParams ROADMAP.tabix +#' @inheritParams ROADMAP.merge_and_process_grl +#' +#' @return A named list containing: +#' \itemize{ +#' \item{\code{ggbio} plot} +#' \item{\code{GRanges} object within the queried coordinates} +#' } +#' +#' @family ROADMAP +#' @examples +#' \dontrun{ +#' roadmap_plot_query <- ROADMAP.query_and_plot( +#' subset_DT = echodata::BST1, +#' keyword_query = "monocytes") +#' } +#' @export +#' @importFrom GenomeInfoDb seqlevelsStyle +ROADMAP.query_and_plot <- function(subset_DT, + results_path = + file.path(tempdir(), "Roadmap"), + n_top_tissues = NULL, + keyword_query = NULL, + adjust = .2, + force_new_query = FALSE, + remove_tmps = TRUE, + verbose=TRUE) { + # Convert subset to GRanges + if (all(!is_granges(subset_DT))) { + messager("ROADMAP:: Converting data to GRanges...") + gr.snp <- GenomicRanges::makeGRangesFromDataFrame( + dplyr::mutate( subset_DT, + SEQnames = paste0("chr", CHR) + ), + seqnames.field = "SEQnames", + start.field = "POS", + end.field = "POS" + ) + GenomeInfoDb::seqlevelsStyle(gr.snp) <- "NCBI" + } else { + gr.snp <- subset_DT + } + # Roadmap query + lib <- "Roadmap_ChromatinMarks_CellTypes" + anno_path <- file.path( + results_path, "annotations", + paste0("GRanges_", lib, ".rds") + ) + if (file.exists(anno_path) & force_new_query == FALSE) { + messager("+ Saved annotation file detected. Loading...") + grl.roadmap <- readRDS(anno_path) + } else { + dir.create(dirname(anno_path), + showWarnings = FALSE, recursive = TRUE + ) + grl.roadmap <- ROADMAP.query( + results_path = results_path, + gr.snp = gr.snp, + keyword_query = keyword_query, + limit_files = NULL, + verbose = verbose + ) + save_annotations( + gr = grl.roadmap, + anno_path = anno_path, + libName = lib, + verbose = verbose + ) + } + grl.roadmap.filt <- ROADMAP.merge_and_process_grl( + grl.roadmap = grl.roadmap, + gr.snp = gr.snp, + n_top_tissues = n_top_tissues + ) + # Plot + track.roadmap <- ROADMAP.track_plot( + grl.roadmap.filt = grl.roadmap.filt, + gr.snp = gr.snp, + adjust = adjust + ) + if (remove_tmps) { + tbi <- list.files( + path = results_path, + pattern = ".tbi$", full.names = TRUE + ) + dummy <- suppressWarnings(file.remove(tbi)) + } + return(list( + Roadmap_plot = track.roadmap, + Roadmap_query = grl.roadmap.filt + )) +} diff --git a/R/ROADMAP.tabix.R b/R/ROADMAP.tabix.R new file mode 100644 index 0000000..7e74d5e --- /dev/null +++ b/R/ROADMAP.tabix.R @@ -0,0 +1,69 @@ +#' Query Roadmap API +#' +#' Query Roadmap epigenomic annotations (chromatin marks) +#' using a range of genomic coordinates. +#' +#' \href{https://egg2.wustl.edu/roadmap/data/byFileType/chromhmmSegmentations/ChmmModels/coreMarks/jointModel/final/}{ +#' ROADMAP file locations.} +#' +#' @param results_path Where to store query results. +#' @param chrom Chromosome to query +#' @param min_pos Minimum genomic position +#' @param max_pos Maximum genomic position +#' @param eid Roadmap annotation ID +#' @param convert_to_granges Whether to return query +#' as a \code{data.frame} or \code{\link[GenomicRanges]{GRanges}}. +#' +#' @examples +#' \dontrun{ +#' BST1 <- echodata::BST1 +#' dat <- ROADMAP.tabix(chrom=BST1$CHR[1], +#' min_pos = min(BST1$POS), +#' max_pos = max(BST1$POS), +#' eid = "E099") +#' } +#' +#' @family ROADMAP +#' @keywords internal +#' @importFrom GenomicRanges makeGRangesFromDataFrame +#' @importFrom GenomeInfoDb seqlevelsStyle +#' @importFrom data.table fread +#' @importFrom echotabix query_tabular +ROADMAP.tabix <- function(results_path=tempfile( + fileext = "ROADMAP_query.csv.gz"), + chrom, + min_pos, + max_pos, + eid, + convert_to_granges=TRUE, + verbose=TRUE){ + dir.create(results_path, showWarnings = FALSE, recursive = TRUE) + chrom <- paste0("chr",gsub("chr","",base::tolower(chrom))) + tbx_start = Sys.time() + messager("++ Downloading Roadmap Chromatin Marks:",eid, v=verbose) + fname <- paste0(eid,"_15_coreMarks_dense.bed.bgz") + URL <- file.path( + "https://egg2.wustl.edu/roadmap/data/byFileType", + "chromhmmSegmentations/ChmmModels/coreMarks/jointModel/final", + fname) # _15_coreMarks_stateno.bed.gz + #### Qiuery remote tabix file #### + dat <- echotabix::query_tabular(fullSS_tabix = URL, + chrom = chrom, + start_pos = min_pos, + end_pos = max_pos, + local = FALSE) + dat <- dat[,paste0("V",seq(1,4))] + colnames(dat) <- c("Chrom","Start","End","State") + dat$EID <- eid + dat$File <- fname + if(convert_to_granges){ + dat <- dt_to_granges(subset_DT = dat, + chrom_col = "Chrom", + start_col = "Start", + end_col = "End", + style = "NCBI") + } + tbx_end = Sys.time() + messager("BED subset downloaded in",round(tbx_end-tbx_start,3),"seconds") + return(dat) +} diff --git a/R/ROADMAP.track_plot.R b/R/ROADMAP.track_plot.R new file mode 100644 index 0000000..1ea7673 --- /dev/null +++ b/R/ROADMAP.track_plot.R @@ -0,0 +1,64 @@ +#' Plot Roadmap query +#' +#' @param grl.roadmap.filt Roadmap query results. +#' @param gr.snp Optionally, can include an extra \code{\link[GenomicRanges]{GRanges}} object +#' to ensure the plot does not extend beyond certain coordinates. +#' @param geom The type of plot to create. +#' Options include "density" and "histogram". +#' @param adjust The granularity of the peaks. +#' @param show_plot Whether to print the plot. +#' @examples +#' \dontrun{ +#' gr.snp <- dt_to_granges(echodata::BST1) +#' grl.roadmap <- ROADMAP.query( +#' gr.snp = gr.snp, +#' keyword_query = "monocyte" +#' ) +#' grl.roadmap.filt <- ROADMAP.merge_and_process_grl( +#' grl.roadmap = grl.roadmap, +#' gr.snp = gr.snp +#' ) +#' track.roadmap <- ROADMAP.track_plot(grl.roadmap.filt, +#' gr.snp = gr.snp +#' ) +#' } +#' @keywords internal +#' @importFrom ggbio autoplot +#' @importFrom ggplot2 aes theme_classic theme element_text +#' @importFrom ggplot2 guides guide_legend scale_y_continuous +ROADMAP.track_plot <- function(grl.roadmap.filt, + gr.snp = NULL, + geom = "density", + adjust = .2, + show_plot = TRUE, + as.ggplot = TRUE) { + track.roadmap <- ggbio::autoplot(grl.roadmap.filt, + which = gr.snp, + ggplot2::aes(fill = ChromState), + color = "white", + size = .1, + geom = geom, + adjust = adjust, + # bins=10, + position = "stack", # stack, fill, dodge + facets = Source ~ ., + alpha = 1 + ) + + ggplot2::theme_classic() + + ggplot2::theme( + strip.text.y = ggplot2::element_text(angle = 0), + strip.text = ggplot2::element_text(size = 9) + ) + + ggplot2::guides(fill = ggplot2::guide_legend(ncol = 2, + keyheight = .5, + keywidth = .5)) + + ggplot2::scale_y_continuous(n.breaks = 3) + if (show_plot) { + print(track.roadmap) + } + if (as.ggplot) { + return(track.roadmap@ggplot) + } else { + return(track.roadmap) + } +} diff --git a/R/XGR.download_and_standardize.R b/R/XGR.download_and_standardize.R new file mode 100644 index 0000000..e206ae8 --- /dev/null +++ b/R/XGR.download_and_standardize.R @@ -0,0 +1,87 @@ +#' Download, standardize, and merge XGR annotations +#' +#' Merges a list of XGR annotations into a single GRanges object +#' +#' @param lib.selections Which XGR annotations to check overlap with. +#' For full list of libraries see +#' \href{http://xgr.r-forge.r-project.org/#annotations-at-the-genomic-region-level}{ +#' here.} +#' @param as_grangesList Return as a \code{GRangesList}, +#' instead of a single merged \code{GRanges} object. +#' @param finemap_dat Fine-mapping results. +#' @param nThread Number of cores to parallelise across. +#' @return GRangesList +#' @family XGR +#' @examples +#' \dontrun{ +#' gr.lib <- XGR.download_and_standardize( +#' lib.selections = c("ENCODE_DNaseI_ClusteredV3_CellTypes"), +#' finemap_dat = echodata::BST1) +#' } +#' @export +#' @importFrom parallel mclapply +#' @importFrom XGR xRDataLoader +#' @importFrom dplyr %>% +#' @importFrom GenomicRanges GRangesList +#' @importFrom methods is +XGR.download_and_standardize <- function(lib.selections = c( + "ENCODE_TFBS_ClusteredV3_CellTypes", + "TFBS_Conserved", + "Uniform_TFBS" + ), + as_grangesList = FALSE, + finemap_dat, + nThread = 1) { + # Iterate over XGR libraries + gr.lib <- lapply(lib.selections, function(lib.name) { + GR.annotations <- XGR::xRDataLoader(RData.customised = lib.name) + # Iterate over lists within each library + all_GRL <- parallel::mclapply(names(GR.annotations), function(n1) { + grl <- GR.annotations[[n1]] + + # Handle both nested and unnested entries + if (methods::is(grl,"list")) { + # grl$name <- names(unlist(grl)) + GRL <- lapply(names(grl), function(n2) { + gr <- grl[[n2]] + gr$source <- n1 + gr$assay <- n2 + return(gr) + }) %>% unlist() + } else { + grl$name <- names(grl) + return(grl) + } + }, mc.cores = nThread) %>% unlist() # return all_GRL + # Rename GRanges after they've been unnested + names(all_GRL) <- names(unlist(GR.annotations)) + + # Merge lists together + if (!is.null(all_GRL)) { + ALL_GRL <- unlist(GenomicRanges::GRangesList(all_GRL)) + ALL_GRL <- granges_overlap( + dat1 = finemap_dat, + chrom_col.1 = "CHR", + start_col.1 = "POS", + end_col.1 = "POS", + dat2 = ALL_GRL + ) + # Parse metadata + ALL_GRL$library <- lib.name + ALL_GRL$fullname <- names(ALL_GRL) + ALL_GRL <- XGR.parse_metadata( + gr.lib = ALL_GRL, + lib.name = lib.name + ) + return(ALL_GRL) + } else { + return(NULL) + } + }) # return OVERLAP + # Merge + gr.lib <- GenomicRanges::GRangesList(unlist(gr.lib)) + if (as_grangesList == FALSE) { + gr.lib <- unlist(gr.lib) + } + return(gr.lib) +} diff --git a/R/XGR.enrichment.R b/R/XGR.enrichment.R new file mode 100644 index 0000000..2a4774b --- /dev/null +++ b/R/XGR.enrichment.R @@ -0,0 +1,123 @@ +#' XGR enrichment +#' +#' Run SNP-level enrichment test with \link[XGR]{xGRviaGenomicAnno}. +#' +#' @param gr Annotations to test for enrichment with. +#' @param merged_dat SNP-level fine-mapping results to test for enrichment with. +#' @param foreground_filter Filter to apply to foreground (target SNPs). +#' @param background_filter Filter to apply to background (non-target SNPs). +#' @param grouping_vars Columns in \code{merged_dat} to group by when conducting +#' enrichment tests. +#' @param fg_sample_size Foreground sample size. +#' @param bg_sample_size Background sample size. +#' @param background.annotatable.only For background SNPs, +#' only use SNPs that overlap with some annotation in \code{gr}. +#' This means that missing annotations (\code{NA}) will not be considered. +#' @param verbose Print messages. +#' +#' @family XGR +#' @examples +#' \dontrun{ +#' gr.merged <- echoannot::merge_celltype_specific_epigenomics() +#' enrich.lead <- XGR.enrichment( +#' gr = gr.merged, +#' merged_dat = echodata::Nalls2019_merged, +#' foreground_filter = "leadSNP==TRUE", +#' grouping_vars = c("Study", "Cell_type", "Assay") +#' ) +#' } +#' @export +#' @importFrom data.table rbindlist +#' @importFrom XGR xGRviaGenomicAnno +XGR.enrichment <- function(gr, + merged_dat, + foreground_filter = "Consensus_SNP==TRUE", + background_filter = NULL, + grouping_vars = c( + "Study", + "Assay", + "Cell_type" + ), + fg_sample_size = NULL, + bg_sample_size = NULL, + background.annotatable.only = FALSE, + verbose = TRUE) { + fg_bg <- XGR.prepare_foreground_background( + subset_DT = merged_dat, + foreground_filter = foreground_filter, + background_filter = background_filter, + fg_sample_size = fg_sample_size, + bg_sample_size = bg_sample_size + ) + # Create all combinations + # if(!is.null(grouping_vars)){ + # combos <- expand.grid(sapply( subset(data.frame(gr), + # select=grouping_vars), unique)) %>% + # `colnames<-`(grouping_vars) + # if(length(grouping_vars)<2) {combos$dummy1 <- 1; gr$dummy1 <- 1; } + # }else { + # combos <- data.frame(dummy1=1, dummy2=2); + # gr$dummy1 <- 1; gr$dummy2 <- 2; + # } + + combos <- unique(data.frame(gr)[, grouping_vars]) + combos[is.na(combos)] <- "NA" + + messager("+ XGR:: Conducting enrichment tests for", + nrow(combos), "combinations of `grouping_vars`.", + v = verbose + ) + RES <- lapply( + seq(1, nrow(combos)), + function(i, + .background.annotatable.only = + background.annotatable.only) { + ROW <- combos[i, ] + # messager("+ XGR::",ROW) + gr.sub <- gr + for (column in colnames(combos)) { + gr.sub <- subset( + gr.sub, + eval(parse(text = column)) == ROW[[column]] + ) + } + + res <- suppressMessages( + XGR::xGRviaGenomicAnno( + data.file = fg_bg$foreground, + background.file = fg_bg$background, + format.file = "data.frame", + GR.annotation = gr.sub, + background.annotatable.only = + .background.annotatable.only, + verbose = FALSE + ) + ) + for (column in colnames(combos)) { + res[[column]] <- ROW[[column]] + } + return(res) + } + ) %>% data.table::rbindlist() + RES$fg_filter <- if (is.null(foreground_filter)) { + NA + } else { + foreground_filter + } + RES$bg_filter <- if (is.null(background_filter)) { + NA + } else { + background_filter + } + RES$fg_sample_size <- if (is.null(fg_sample_size)) { + nrow(fg_bg$foreground) + } else { + fg_sample_size + } + RES$bg_sample_size <- if (is.null(bg_sample_size)) { + nrow(merged_dat) + } else { + bg_sample_size + } + return(RES) +} diff --git a/R/XGR.enrichment_bootstrap.R b/R/XGR.enrichment_bootstrap.R new file mode 100644 index 0000000..9f665ff --- /dev/null +++ b/R/XGR.enrichment_bootstrap.R @@ -0,0 +1,111 @@ +#' XGR enrichment (bootstrapped) +#' +#' Perform annotation enrichment tests using iterative bootstrapping procedure. +#' +#' @param snp_groups Which SNP groups to repeat enrichment tests for separately. +#' @param iterations Number of bootstrapping iterations. +#' @param bootstrap Whether to use bootstrapping. +#' @param save_path File path to save results to. +#' @param nThread Number of threads to parallelise bootstrapping over. +#' @inheritParams XGR.enrichment +#' +#' @family XGR +#' @examples +#' \dontrun{ +#' gr.merged <- echoannot::merge_celltype_specific_epigenomics() +#' enrich_res <- XGR.enrichment_bootstrap( +#' gr = gr.merged, +#' merged_dat = echodata::Nalls2019_merged +#' ) +#' } +#' @importFrom data.table rbindlist fwrite +#' @importFrom parallel mclapply +#' @importFrom dplyr %>% +#' @importFrom stats p.adjust +XGR.enrichment_bootstrap <- function(gr, + merged_dat, + snp_groups = c( + "Random", + "GWAS lead", + "UCS (-PolyFun)", + "UCS", "Consensus (-PolyFun)", + "Consensus" + ), + background_filter = NULL, + grouping_vars = c( + "Study", + "Assay", + "Cell_type" + ), + iterations = 1000, + fg_sample_size = 20, + bg_sample_size = NULL, + bootstrap = TRUE, + save_path = tempfile( + fileext = + "XGR_enrich_boot_res.csv.gz" + ), + nThread = 1, + verbose = TRUE) { + if (bootstrap) { + messager("XGR:: Initiating bootstrap enrichment procedure", v = verbose) + } else { + iterations <- 1 + bg_sample_size <- NULL + fg_sample_size <- NULL + } + sampling_df <- merged_dat + + RES_GROUPS <- lapply( + snp_groups, + function(snp_group, + .merged_dat = merged_dat, + .grouping_vars = grouping_vars, + .background_filter = background_filter, + .fg_sample_size = fg_sample_size, + .bg_sample_size = bg_sample_size) { + snp_filters <- snp_group_filters(random_sample_size = .fg_sample_size) + .foreground_filter <- snp_filters[snp_group] + message(snp_group, " :: ", .foreground_filter) + RES <- parallel::mclapply(seq(1, iterations), + function(i, + merged_dat = .merged_dat, + grouping_vars = .grouping_vars, + foreground_filter = .foreground_filter, + background_filter = .background_filter, + fg_sample_size = .fg_sample_size, + bg_sample_size = .bg_sample_size) { + try({ + XGR.enrichment( + gr = gr, + merged_dat = merged_dat, + foreground_filter = foreground_filter, + background_filter = background_filter, + grouping_vars = grouping_vars, + fg_sample_size = fg_sample_size, + bg_sample_size = bg_sample_size + ) + }) + }, + mc.cores = nThread + ) %>% data.table::rbindlist(fill = TRUE) + RES$SNP_group <- snp_group + return(RES) + } + ) %>% data.table::rbindlist(fill = T) + + # Post-process + RES_GROUPS <- RES_GROUPS %>% + dplyr::mutate( + SNP_group = factor(SNP_group, + levels = unique(SNP_group), + ordered = TRUE + ), + FDR = stats::p.adjust(p = pvalue, method = "fdr") + ) + if (save_path != FALSE) { + messager("XGR:: Saving enrichment results ==>", save_path, v = verbose) + data.table::fwrite(RES_GROUPS, save_path) + } + return(RES_GROUPS) +} diff --git a/R/XGR.enrichment_plot.R b/R/XGR.enrichment_plot.R new file mode 100644 index 0000000..795a10f --- /dev/null +++ b/R/XGR.enrichment_plot.R @@ -0,0 +1,167 @@ +#' Plot enrichment results +#' +#' @family XGR +#' @examples +#' \dontrun{ +#' root <- file.path( +#' "/sc/arion/projects/pd-omics/brian", +#' "Fine_Mapping/Data/GWAS/Nalls23andMe_2019/_genome_wide" +#' ) +#' ### merged enrichment results +#' enrich_res <- data.table::fread( +#' file.path( +#' root, +#' "XGR/celltypespecific_epigenomics.SNP_groups.csv.gz" +#' ) +#' ) +#' enrich_res <- data.table::fread( +#' file.path( +#' root, +#' "XGR/celltypespecific_epigenomics.snp_groups.csv.gz" +#' ) +#' ) +#' enrich_boot <- data.table::fread( +#' file.path( +#' root, +#' "XGR/celltypespecific_epigenomics.snp_groups.permute.csv.gz" +#' ) +#' ) +#' enrich_assay <- data.table::fread( +#' file.path( +#' root, +#' "XGR/celltypespecific_epigenomics.snp_groups.assay.csv.gz" +#' ) +#' ) +#' +#' # Merged volcano plot +#' enrich_res <- subset(enrich_res, SNP_Group != "Consensus (-PolyFun)") %>% +#' dplyr::rename(SNP_group = SNP_Group) +#' gp <- XGR.enrichment_plot( +#' enrich_res = subset(enrich_res, !Assay %in% c("HiChIP_FitHiChIP", "PLAC")), +#' title = "Enrichment: Cell-type-specific epigenomics", +#' plot_type = "point", +#' save_plot = file.path( +#' root, "XGR/celltypespecific_epigenomics.enrich_volcano.png" +#' ), +#' height = 6, width = 8, shape_var = "Assay" +#' ) +#' ## Merged bar plot +#' gp <- XGR.enrichment_plot( +#' enrich_res = enrich_res, +#' plot_type = "bar", +#' facet_formula = ".~Assay", +#' FDR_thresh = .05 +#' ) +#' # Merged volcano plot (permuted) +#' gp <- XGR.enrichment_plot( +#' enrich_res = enrich.scATAC.permute, +#' title = "Permuted enrichment: Cell-type-specific peaks and elements", +#' plot_type = "point" +#' ) +#' } +XGR.enrichment_plot <- function(enrich_res, + title = NULL, + subtitle = NULL, + facet_formula = NULL, + line_formula = "y ~ x", + line_method = "lm", + line_span = 1, + FDR_thresh = 1, + plot_type = "bar", + shape_var = "Cell_type", + facet_scales = "free", + show_plot = TRUE, + save_plot = FALSE, + height = 5, + width = 5) { + enrich_res <- dplyr::mutate( + enrich_res, + SNP_group = factor(SNP_group, + levels = unique(SNP_group), + ordered = TRUE + ), + ## Make Random size smaller (otherwise will + # make everything else relatively tiny) + nOverlap = ifelse(SNP_group == "Random", 10, nOverlap) + ) + sum(enrich_res$fc == -Inf) + colorDict <- snp_group_colorDict() + if (plot_type == "bar") { + gp <- ggplot( + data = subset(enrich_res, FDR <= FDR_thresh), + aes(x = SNP_group, y = fc, fill = SNP_group) + ) + + # geom_col(stat="identity", alpha=.5, show.legend = F) + + geom_boxplot() + + geom_jitter(height = 0, width = 0, alpha = .1, show.legend = F) + + scale_fill_manual(values = colorDict) + + # ggpubr::stat_compare_means(method = method, + # comparisons = comparisons, + # label = "p.signif", size=3, vjust = 1.5) + + facet_grid( + facets = if (is.null(facet_formula)) { + facet_formula + } else { + as.formula(facet_formula) + }, + scales = "free_y" + ) + + labs(x = "SNP Group", title = title, subtitle = subtitle) + + theme_bw() + + theme( + strip.background = element_rect(fill = "grey20"), + strip.text = element_text(color = "white"), + axis.text.x = element_text(angle = 45, hjust = 1) + ) + } + + if (plot_type == "point") { + gp <- ggplot( + data = subset(enrich_res, FDR <= FDR_thresh), + aes( + x = log1p(fc), y = -log10(pvalue), + size = nOverlap, color = SNP_group, group = SNP_group, + fill = SNP_group, + shape = eval(parse(text = shape_var)) + ) + ) + + geom_smooth( + alpha = 0.1, size = 0, span = line_span, + method = line_method, formula = line_formula + ) + + stat_smooth( + geom = "line", alpha = 0.3, size = 1, span = line_span, + method = line_method, formula = line_formula + ) + + geom_point(alpha = .5) + + scale_color_manual(values = colorDict) + + scale_fill_manual(values = colorDict) + + scale_shape_manual( + values = seq(12, (12 + dplyr::n_distinct( + enrich_res[[shape_var]] + ))) + ) + + geom_hline(yintercept = -log10(0.05), linetype = 2, alpha = .5) + + facet_grid( + facets = if (is.null(facet_formula)) { + facet_formula + } else { + as.formula(facet_formula) + }, + scales = facet_scales + ) + + labs(title = title, subtitle = subtitle, shape = shape_var) + + theme_bw() + + theme( + strip.background = element_rect(fill = "grey20"), + strip.text = element_text(color = "white") + ) + } + + if (show_plot) print(gp) + + if (save_plot != FALSE) { + ggsave(save_plot, gp, dpi = 400, height = height, width = width) + } + return(gp) +} diff --git a/R/XGR.filter_assays.R b/R/XGR.filter_assays.R new file mode 100644 index 0000000..e983627 --- /dev/null +++ b/R/XGR.filter_assays.R @@ -0,0 +1,26 @@ +#' Filter assays +#' +#' Identify the assays with the most annotations in the locus. +#' Then only keep these assays +#' @keywords internal +#' @family XGR +#' @importFrom dplyr %>% n_distinct group_by tally +XGR.filter_assays <- function(gr.lib, + n_top_assays = 5) { + top_assays <- data.frame(gr.lib) %>% + dplyr::group_by(library, Assay) %>% + dplyr::tally(sort = T) + if (!is.null(n_top_assays)) { + gr.filt <- subset( + gr.lib, + Assay %in% + unique(top_assays$Assay[ + seq(1, min( + n_top_assays, + dplyr::n_distinct(top_assays$Assay) + )) + ]) + ) + } + return(gr.filt) +} diff --git a/R/XGR.filter_sources.R b/R/XGR.filter_sources.R new file mode 100644 index 0000000..55faf1f --- /dev/null +++ b/R/XGR.filter_sources.R @@ -0,0 +1,26 @@ +#' Filter sources +#' +#' Identify the sources with the most annotations in the locus. +#' Then only keep these sources. +#' @keywords internal +#' @family XGR +#' @importFrom dplyr n_distinct +XGR.filter_sources <- function(gr.lib, + n_top_sources = 5) { + top_sources <- data.frame(gr.lib) %>% + dplyr::group_by(library, Source) %>% + dplyr::tally(sort = TRUE) + if (!is.null(n_top_sources)) { + gr.filt <- subset( + gr.lib, + Source %in% + unique(top_sources$Source[ + seq(1, min( + n_top_sources, + dplyr::n_distinct(top_sources$Source) + )) + ]) + ) + } + return(gr.filt) +} diff --git a/R/XGR.import_annotations.R b/R/XGR.import_annotations.R new file mode 100644 index 0000000..b6f2abc --- /dev/null +++ b/R/XGR.import_annotations.R @@ -0,0 +1,55 @@ +#' Download XGR annotations +#' +#' @family XGR +#' @keywords internal +#' @importFrom XGR xRDataLoader +XGR.import_annotations <- function(gr.snp, + anno_data_path = + file.path( + "annotations", + paste0( + "XGR_", + lib.name, ".rds" + ) + ), + lib.name, + save_xgr = TRUE, + annot_overlap_threshold = 5) { + if (file.exists(anno_data_path)) { + messager("") + messager("+ Saved annotation file detected. Loading...") + GR.annotations <- readRDS(anno_data_path) + } else { + messager("") + messager("+ XGR: Downloading...", lib.name) + GR.annotations <- XGR::xRDataLoader(RData.customised = lib.name) + if (save_xgr & !is.null(GR.annotations)) { + dir.create(dirname(anno_data_path), + showWarnings = FALSE, recursive = TRUE + ) + saveRDS(GR.annotations, file = anno_data_path) + } + } + GR.orig <- unlist(GR.annotations) + + gr.xgr <- lapply(names(GR.orig), function(g, gr.snp. = gr.snp) { + # messager("Finding overlap for:", g) + subsetByOverlaps <- get( + "subsetByOverlaps", + asNamespace("GenomicRanges") + ) + GR.overlap <- subsetByOverlaps(GR.orig[[g]], gr.snp.) + len <- length(seqnames(GR.overlap)) + # messager(" - Overlapping annotations = ",len) + if (len > 0) { + return(GR.overlap) + } else { + return(NULL) + } + }) + grl.xgr <- GR.name_filter_convert(gr.xgr, + names(GR.orig), + min_hits = annot_overlap_threshold + ) + return(grl.xgr) +} diff --git a/R/XGR.iterate_enrichment.R b/R/XGR.iterate_enrichment.R new file mode 100644 index 0000000..5aaccba --- /dev/null +++ b/R/XGR.iterate_enrichment.R @@ -0,0 +1,126 @@ +#' Conduct enrichment tests for each annotation +#' +#' XGR uses a binomial enrichment tests for each annotation. +#' +#' +#' \href{https://www.rdocumentation.org/packages/XGR/versions/1.1.5/topics/xDefineGenomicAnno}{ +#' Description of all datasets} +#' @inheritParams XGR.prepare_foreground_background +#' @examples +#' \dontrun{ +#' enrich_res <- XGR.iterate_enrichment( +#' subset_DT = echodata::Nalls2019_merged, +#' foreground_filter = "Consensus_SNP", +#' background_filter = "leadSNP", +#' lib.selections = c("ENCODE_TFBS_ClusteredV3_CellTypes") +#' ) +#' } +#' @family XGR +#' @keywords internal +#' @importFrom data.table rbindlist data.table fwrite +#' @importFrom parallel mclapply +#' @importFrom XGR xGRviaGenomicAnno xRDataLoader +#' @importFrom stats p.adjust +#' @importFrom dplyr %>% mutate arrange +XGR.iterate_enrichment <- function(subset_DT, + foreground_filter = "Consensus_SNP", + background_filter = "leadSNP", + lib.selections = + c( + "ENCODE_TFBS_ClusteredV3_CellTypes", + "ENCODE_DNaseI_ClusteredV3_CellTypes", + "Broad_Histone", + "FANTOM5_Enhancer", + "Segment_Combined_Gm12878", + "TFBS_Conserved", + "ReMap_PublicAndEncode_TFBS", + "Blueprint_VenousBlood_Histone", + "Blueprint_DNaseI", + # "Blueprint_Methylation_hyper", + # "Blueprint_Methylation_hypo", + # "Genic_anno", + "FANTOM5_CAT_Cell", + "FANTOM5_CAT_MESH", + "GWAScatalog_alltraits" + ), + save_path = FALSE, + nThread = 1) { + fg_bg <- XGR.prepare_foreground_background( + subset_DT, + foreground_filter = foreground_filter, + background_filter = background_filter + ) + + # lib.selections = c("ENCODE_TFBS_ClusteredV3_CellTypes", + # "ENCODE_DNaseI_ClusteredV3_CellTypes", + # "Broad_Histone", + # "UW_Histone", + # "SYDH_Histone", + # "FANTOM5_Enhancer", + # "TFBS_Conserved", + # "Uniform_TFBS", + # "Uniform_DNaseI_HS") + # roadmap_grl <- lapply(unique(subset_DT$Locus), function(locus){ + # locus_DT <- subset(subset_DT, Locus==locus) + # dat <- ROADMAP.tabix(results_path=results_path, + # chrom = locus_DT$CHR[1], + # min_pos = min(locus_DT$POS), + # max_pos = max(locus_DT$POS), + # eid=eid, + # convert_to_GRanges=T) + # return(dat) + # }) + database_results <- parallel::mclapply(lib.selections, function(lib.name) { + messager("XGR:: Testing enrichment: ", lib.name) + eTerm <- NULL + try({ + GR.annotations <- XGR::xRDataLoader(RData.customised = lib.name) + eTerm <- lapply(GR.annotations, function(grl) { + et <- XGR::xGRviaGenomicAnno( + data.file = fg_bg$foreground, + background.file = fg_bg$background, + format.file = "data.frame", + GR.annotation = grl + ) + return(et) + }) %>% data.table::rbindlist() + eTerm$lib <- lib.name + eTerm$fullname <- names(unlist(GR.annotations)) + eTerm$source <- lapply( + eTerm$fullname, + function(e) { + strsplit(e, "[.]")[[1]][1] + } + ) %>% + as.character() + eTerm$assay <- lapply( + eTerm$fullname, + function(e) { + strsplit(e, "[.]")[[1]][2] + } + ) %>% + as.character() + }) + return(eTerm) + }, mc.cores = nThread) + + # Re-calculate corrected p-val to account for multiple dbs tested + enrich_res <- data.table::rbindlist(database_results) %>% + dplyr::mutate( + FDR = stats::p.adjust( + p = pvalue, + method = "fdr" + ), + Bonf = stats::p.adjust( + p = pvalue, + method = "bonferroni" + ) + ) %>% + dplyr::arrange(FDR, -nOverlap, -fc) %>% + subset(adjp < 0.05) %>% + data.table::data.table() + if (save_path != FALSE) { + data.table::fwrite(enrich_res, save_path, quote = FALSE) + } + return(enrich_res) +} diff --git a/R/XGR.iterate_overlap.R b/R/XGR.iterate_overlap.R new file mode 100644 index 0000000..564dbac --- /dev/null +++ b/R/XGR.iterate_overlap.R @@ -0,0 +1,61 @@ +#' Check overlap with XGR annotations +#' +#' +#' Automatically handles different file formats provided by XGR +#' (e.g. varying kinds of nested/unnested \code{GRanges}). +#' Then returns a \code{Granges} object with only the XGR annotation ranges +#' that overlap with the SNPs in \code{subset_DT}. +#' The \code{GRanges} merges hits from \code{subset_DT}. +#' +#' @param nThread Multi-thread across libraries. +#' @param save_path Save the results as a \code{data.frame}. +#' @inheritParams XGR.prepare_foreground_background +#' @inheritParams XGR.download_and_standardize +#' @family XGR +#' @keywords internal +#' @examples +#' \dontrun{ +#' gr.hits <- XGR.iterate_overlap( +#' lib.selections = c("ENCODE_TFBS_ClusteredV3_CellTypes"), +#' subset_DT = echodata::BST1 +#' ) +#' } +#' @importFrom data.table fwrite +XGR.iterate_overlap <- function(lib.selections = + c( + "ENCODE_TFBS_ClusteredV3_CellTypes", + "TFBS_Conserved", + "ReMap_PublicAndEncode_TFBS", + "Uniform_TFBS" + ), + subset_DT, + save_path = FALSE, + nThread = 1) { + gr.lib <- XGR.download_and_standardize( + lib.selections = lib.selections, + finemap_dat = subset_DT, + nThread = nThread + ) + gr.hits <- granges_overlap( + dat1 = subset_DT, + chrom_col.1 = "CHR", + start_col.1 = "POS", + end_col.1 = "POS", + dat2 = gr.lib + ) + + ucs.hits <- subset(gr.hits, Consensus_SNP) + length(unique(subset(subset_DT, Consensus_SNP)$SNP)) + length(unique(subset(subset_DT, Consensus_SNP)$Locus)) + + length(unique(ucs.hits$SNP)) + length(unique(ucs.hits$Locus)) + + if (save_path != FALSE) { + dir.create(dirname(save_path), + showWarnings = FALSE, recursive = TRUE + ) + data.table::fwrite(data.frame(ucs.hits), save_path) + } + return(gr.hits) +} diff --git a/R/XGR.merge_and_process.R b/R/XGR.merge_and_process.R new file mode 100644 index 0000000..14f7b7d --- /dev/null +++ b/R/XGR.merge_and_process.R @@ -0,0 +1,56 @@ +#' Standardize XGR annotations +#' +#' Parses the metadata and adds it as columns, +#' and then merges the results into a single +#' \code{\link[GenomicRanges]{GenomicRangesList}} +#' +#' @param grl.xgr \link[GenomicRanges]{GenomicRangesList} of XGR queries. +#' @family XGR +#' @keywords internal +#' @importFrom GenomicRanges start end +#' @importFrom dplyr %>% group_by tally +XGR.merge_and_process <- function(grl.xgr, + lib, + n_top_sources = 10) { + # grl.xgr <- check_saved_XGR(results_path, lib) + ## Make track + ## Add and modify columns + grl.xgr.merged <- unlist(grl.xgr) + names(grl.xgr.merged) <- gsub("Broad_Histone_", "", names(grl.xgr.merged)) + sep <- XGR.sep_handler(lib.name = lib) + grl.xgr.merged$Source <- lapply( + names(grl.xgr.merged), + function(e) { + strsplit(e, sep)[[1]][1] + } + ) %>% + unlist() + # grl.xgr.merged$Source <- gsub("_","\n", grl.xgr.merged$Source) + grl.xgr.merged$Assay <- lapply( + names(grl.xgr.merged), + function(e) { + strsplit(e, sep)[[1]][2] + } + ) %>% + unlist() + grl.xgr.merged$Start <- GenomicRanges::start(grl.xgr.merged) + grl.xgr.merged$End <- GenomicRanges::end(grl.xgr.merged) + # Filter + top_sources <- grl.xgr.merged %>% + data.frame() %>% + dplyr::group_by(Source) %>% + dplyr::tally(sort = T) + grl.xgr.merged.filt <- subset( + grl.xgr.merged, + Source %in% unique(top_sources$Source[seq(1, n_top_sources)]) + ) + # Count + # snp.pos <- subset(gr.snp, SNP %in% c("rs7294619"))$POS + # snp.sub <- subset(grl.xgr.merged, + # Start<=snp.pos & End>=snp.pos) %>% data.frame() + grl.xgr.merged.filt$Source_Assay <- paste0( + grl.xgr.merged.filt$Source, + "_", grl.xgr.merged.filt$Assay + ) + return(grl.xgr.merged.filt) +} diff --git a/R/XGR.parse_metadata.R b/R/XGR.parse_metadata.R new file mode 100644 index 0000000..b88eb62 --- /dev/null +++ b/R/XGR.parse_metadata.R @@ -0,0 +1,17 @@ +#' XGR.parse_metadata +#' +#' @keywords internal +#' @family XGR +XGR.parse_metadata <- function(gr.lib, + lib.name = NA) { + # https://stackoverflow.com/questions/50518137/separate-a-column-into-2-columns-at-the-last-underscore-in-r + sep <- XGR.sep_handler(lib.name = lib.name) + GenomicRanges::mcols(gr.lib) <- + tidyr::separate(data.frame(GenomicRanges::mcols(gr.lib)), + sep = sep, + col = "fullname", + into = c("Source", "Assay"), + extra = "merge" + ) + return(gr.lib) +} diff --git a/R/XGR.plot_enrichment.R b/R/XGR.plot_enrichment.R new file mode 100644 index 0000000..5434fb1 --- /dev/null +++ b/R/XGR.plot_enrichment.R @@ -0,0 +1,65 @@ +#' Plot XGR enrichment +#' +#' @family XGR +#' @keywords internal +#' @examples +#' \dontrun{ +#' enrich_res <- XGR.iterate_enrichment( +#' subset_DT = echodata::Nalls2019_merged, +#' foreground_filter = "Consensus_SNP", +#' background_filter = "leadSNP", +#' lib.selections = c("ENCODE_TFBS_ClusteredV3_CellTypes") +#' ) +#' XGR.plot_enrichment(enrich_res) +#' } +XGR.plot_enrichment <- function(enrich_res, + adjp_thresh = 0.05, + top_annotations = NULL, + show_plot = TRUE) { + enrich_res <- dplyr::arrange(enrich_res, desc(fc)) + enrich_res$source <- factor(enrich_res$source, + unique(enrich_res$source), + ordered = TRUE + ) + enrich_res$assay <- factor(enrich_res$assay, + unique(enrich_res$assay), + ordered = TRUE + ) + if (is.null(top_annotations)) { + top_annotations <- nrow(enrich_res) + } + + gp <- ggplot( + data = subset(enrich_res, adjp < adjp_thresh)[ + seq(1, top_annotations), + ], + aes(y = fc, x = assay, fill = fc) + ) + + geom_col() + + labs( + title = paste0( + "Epigenomic annotation enrichment (FDR < ", + FDR_thresh, ")" + ), + subtitle = + "Foreground = Consensus SNPs\nBackground = Lead GWAS SNPs", + y = "Fold-change" + ) + + facet_grid( + facets = Cell_type ~ Assay, + scales = "free", + space = "free" + ) + + theme_bw() + + theme( + strip.placement = "outside", + strip.text.y.left = element_text(angle = 0), + strip.background = element_rect(color = "black", fill = "white"), + plot.title = element_text(hjust = .5), + plot.subtitle = element_text(hjust = .5) + ) + if (show_plot) { + print(gp) + } + return(gp) +} diff --git a/R/XGR.plot_peaks.R b/R/XGR.plot_peaks.R new file mode 100644 index 0000000..61258a3 --- /dev/null +++ b/R/XGR.plot_peaks.R @@ -0,0 +1,77 @@ +#' Plot XGR peaks +#' +#' Plots the distribution of annotations across a genomic region (x-axis). +#' +#' @family XGR +#' @keywords internal +#' @param gr.lib \code{GRanges} object of annotations. +#' @param geom Plot type ("density", or "histogram"). +#' @param locus Locus name (\emph{optional}). +#' @param adjust The granularity of the peaks. +#' @param show_plot Print the plot. +#' @return \code{ggbio} track plot. +#' @inheritParams XGR.prepare_foreground_background +#' @examples +#' \dontrun{ +#' gr.lib <- XGR.download_and_standardize( +#' c("ENCODE_DNaseI_ClusteredV3_CellTypes"), +#' finemap_dat = echodata::BST1 +#' ) +#' gr.filt <- XGR.filter_sources(gr.lib = gr.lib, n_top_sources = 5) +#' gr.filt <- XGR.filter_assays(gr.lib = gr.filt, n_top_assays = 5) +#' xgr.track <- XGR.plot_peaks( +#' gr.lib = gr.filt, +#' subset_DT = echodata::BST1, +#' fill_var = "Assay", +#' facet_var = "Source" +#' ) +#' } +#' @importFrom GenomicRanges mcols +XGR.plot_peaks <- function(gr.lib, + subset_DT, + fill_var = "Assay", + facet_var = "Source", + geom = "density", + locus = NULL, + adjust = .2, + show_plot = TRUE, + show.legend = TRUE, + as.ggplot = TRUE, + trim_xlims = FALSE) { + # data("BST1"); subset_DT <- BST1; show.legend=T; + # fill_var="Assay"; facet_var="Source"; geom="density"; adjust=.2; + gr.lib$facet_label <- gsub( + "_", "\n", + GenomicRanges::mcols(gr.lib)[, facet_var] + ) + xgr.track <- ggbio::autoplot(gr.lib, + # which = gr.snp, + ggplot2::aes(fill = eval(parse(text = fill_var))), + # formula(paste0(facet_var," ~ .")), + facets = formula("facet_label ~ ."), + # fill = "magenta", + color = "white", # NA + geom = geom, + adjust = adjust, + position = "stack", + # bins=50, + size = .1, + alpha = .7, + show.legend = show.legend + ) + + ggplot2::theme_bw() + + ggplot2::labs(fill = fill_var) + if (trim_xlims) { + xgr.track <- suppressMessages( + xgr.track + + xlim(min(subset_DT$POS), max(subset_DT$POS)) + ) + } + # ggbio::tracks(list("XGR"=xgr.track)) + if (show_plot) print(xgr.track) + if (as.ggplot) { + return(xgr.track@ggplot) + } else { + return(xgr.track) + } +} diff --git a/R/XGR.prepare_foreground_background.R b/R/XGR.prepare_foreground_background.R new file mode 100644 index 0000000..2ab3d1b --- /dev/null +++ b/R/XGR.prepare_foreground_background.R @@ -0,0 +1,86 @@ +#' Prepare SNP sets for enrichment +#' +#' Prepare custom foreground and background SNPs sets for enrichment +#' tests with XGR annotations. +#' +#' @param subset_DT Data.frame with at least the following columns: +#' \describe{ +#' \item{SNP}{SNP RSID} +#' \item{CHR}{chromosome} +#' \item{POS}{position} +#' } +#' @param foreground_filter Specify foreground by filtering SNPs +#' in \code{subset_DT}. +#' Write filter as a string (or \code{NULL} to include all SNPs). +#' @param background_filter Specify background by filtering SNPs +#' in \code{subset_DT}. +#' Write filter as a string (or \code{NULL} to include all SNPs). +#' @family XGR +#' @keywords internal +#' @examples +#' \dontrun{ +#' fg_bg <- XGR.prepare_foreground_background( +#' subset_DT = echodata::Nalls2019_merged, +#' foreground_filter = "Consensus_SNP==TRUE", +#' background_filter = "leadSNP==TRUE" +#' ) +#' } +#' @importFrom dplyr %>% mutate select sample_n +XGR.prepare_foreground_background <- function(subset_DT, + foreground_filter = "Support>0", + background_filter = NULL, + fg_sample_size = NULL, + bg_sample_size = NULL, + verbose = TRUE) { + if (!exists("sampling_df")) sampling_df <- subset_DT + messager("XGR:: Preparing foreground/background for enrichment test", + v = verbose + ) + #### Foreground #### + fg <- subset(subset_DT, eval(parse(text = foreground_filter))) %>% + dplyr::mutate( + chrom = paste0(gsub("chr", "", CHR)), + chromStart = POS, + chromEnd = POS, + name = SNP + ) %>% + dplyr::select(chrom, chromStart, chromEnd, name) + + #### Background #### + if (any(is.na(background_filter))) { + ## Optionally, can supply no background at all to XGR + bg <- NULL + } else { + if (!is.null(background_filter)) { + bg_DT <- subset(subset_DT, eval(parse(text = background_filter))) + } else { + bg_DT <- subset_DT + } + bg <- bg_DT %>% + dplyr::mutate( + chrom = paste0(gsub("chr", "", CHR)), + chromStart = POS, + chromEnd = POS, + name = SNP + ) %>% + dplyr::select(chrom, chromStart, chromEnd, name) + } + + + #### Sample fg/bg (for bootstrapping) #### + if (!is.null(fg_sample_size)) { + messager("XGR:: Sampling", fg_sample_size, "foreground SNPs", v = verbose) + fg <- fg %>% dplyr::sample_n(size = fg_sample_size) + } + if (!is.null(bg_sample_size)) { + messager("XGR:: Sampling", bg_sample_size, "background SNPs", v = verbose) + bg <- bg %>% dplyr::sample_n(size = bg_sample_size) + } + + messager("XGR::", nrow(fg), "SNPs in foreground.") + messager("XGR::", nrow(bg), "SNPs in background") + return(list( + "foreground" = fg, + "background" = bg + )) +} diff --git a/R/XGR.sep_handler.R b/R/XGR.sep_handler.R new file mode 100644 index 0000000..de29894 --- /dev/null +++ b/R/XGR.sep_handler.R @@ -0,0 +1,20 @@ +#' XGR.sep_handler +#' +#' @keywords internal +#' @family XGR +XGR.sep_handler <- function(lib.name) { + # "_(?=[^_]+$)" : Split by the last "_" + sepDict <- list( + "ENCODE_TFBS_ClusteredV3_CellTypes" = "[.]", + "ENCODE_DNaseI_ClusteredV3_CellTypes" = "_(?=[^_]+$)", + "Broad_Histone" = "_(?=[^_]+$)", + "FANTOM5_Enhancer" = "_(?=[^_]+$)", + "TFBS_Conserved" = "[$]" + ) + if (lib.name %in% names(sepDict)) { + sep <- sepDict[[lib.name]] + } else { + sep <- "_(?=[^_]+$)" + } + return(sep) +} diff --git a/R/cell_type_specificity.R b/R/cell_type_specificity.R index d26eb8b..a110a26 100644 --- a/R/cell_type_specificity.R +++ b/R/cell_type_specificity.R @@ -4,7 +4,7 @@ #' and then identify the number of SNPs overlapping by each cell type #' #' @keywords internal -#' @import ggplot2 +#' @rawNamespace import(ggplot2, except = c(geom_rect, ggsave)) #' @importFrom dplyr %>% top_n cell_type_specificity <- function(plot_dat, merged_DT, diff --git a/R/coloc_nominated_eGenes.R b/R/coloc_nominated_eGenes.R index 7f98a3c..8b9c31b 100644 --- a/R/coloc_nominated_eGenes.R +++ b/R/coloc_nominated_eGenes.R @@ -37,7 +37,7 @@ #' ) #' } #' @keywords internal -#' @import ggplot2 +#' @rawNamespace import(ggplot2, except = c(geom_rect, ggsave)) #' @importFrom dplyr %>% group_by top_n slice mutate desc arrange #' @importFrom data.table fread data.table coloc_nominated_eGenes <- function(coloc_results, diff --git a/R/dt_to_granges.R b/R/dt_to_granges.R new file mode 100644 index 0000000..12db2ba --- /dev/null +++ b/R/dt_to_granges.R @@ -0,0 +1,29 @@ +#' Convert data.table to GRanges object +#' +#' @family utils +#' @keywords internal +#' @importFrom GenomicRanges makeGRangesFromDataFrame +#' @importFrom GenomeInfoDb seqlevelsStyle +dt_to_granges <- function(subset_DT, + chrom_col="CHR", + start_col="POS", + end_col=start_col, + style="NCBI", + verbose=TRUE){ + if (is_granges(subset_DT)) { + messager("subset_DT is already a GRanges object.",v=verbose) + gr.snp <- subset_DT + } else { + messager("Converting subset_DT to GRanges object.",v=verbose) + subset_DT[["SEQnames"]] <- subset_DT[[chrom_col]] + gr.snp <- GenomicRanges::makeGRangesFromDataFrame( + subset_DT, + seqnames.field = "SEQnames", + start.field = start_col, + end.field = end_col, + keep.extra.columns = TRUE) + } + suppressMessages(suppressWarnings( + GenomeInfoDb::seqlevelsStyle(gr.snp) <- style)) + return(gr.snp) +} diff --git a/R/granges_overlap.R b/R/granges_overlap.R index 8300f2f..a1baa86 100644 --- a/R/granges_overlap.R +++ b/R/granges_overlap.R @@ -17,7 +17,7 @@ granges_overlap <- function(dat1, chr_format = "NCBI", verbose = FALSE) { # dat1 - if (class(dat1)[1] == "GRanges") { + if (is_granges(dat1)) { messager("+ dat1 already in GRanges format", v = verbose) gr.dat1 <- dat1 } else { @@ -31,7 +31,7 @@ granges_overlap <- function(dat1, ) } # dat2 - if (class(dat2)[1] == "GRanges") { + if (is_granges(dat2)) { messager("+ dat2 already in GRanges format", v = verbose) gr.dat2 <- dat2 } else { @@ -64,7 +64,7 @@ granges_overlap <- function(dat1, # gr.hits <- cbind(mcols(gr.regions[ S4Vectors::subjectHits(hits), ] ), # mcols(gr.consensus[S4Vectors::queryHits(hits),]) ) message( - "", nrow(GenomicRanges::mcols(gr.hits)), + "", formatC(nrow(GenomicRanges::mcols(gr.hits)), big.mark = ","), " query SNP(s) detected with reference overlap." ) # print(data.frame(mcols(gr.hits[,c("Name","SNP")])) ) diff --git a/R/granges_to_bed.R b/R/granges_to_bed.R new file mode 100644 index 0000000..d167797 --- /dev/null +++ b/R/granges_to_bed.R @@ -0,0 +1,43 @@ +#' Convert GRanges object to BED format and save +#' +#' @family XGR +#' @keywords internal +#' @importFrom data.table as.data.table fwrite +#' @importFrom parallel mclapply +#' @importFrom dplyr %>% select +#' @importFrom R.utils gzip +granges_to_bed <- function(GR.annotations, + output_path, + sep = "\t", + nThread = 1, + gzip = FALSE) { + BED_paths <- parallel::mclapply(names(GR.annotations), + function(name, + .gzip = gzip) { + GR <- GR.annotations[[name]] + BED <- data.table::as.data.table(GR) %>% + dplyr::select( + chrom = seqnames, + chromStart = start, + chromEnd = end, + strand + ) + BED_path <- file.path(output_path, paste0( + gsub(":", "-", name), + ".bed.txt" + )) + dir.create(dirname(BED_path), recursive = TRUE, showWarnings = FALSE) + data.table::fwrite(BED, BED_path, + sep = sep, + col.names = FALSE, quote = FALSE + ) + if (.gzip) { + R.utils::gzip(BED_path, overwrite = TRUE) + BED_path <- paste0(BED_path, ".gz") + } + return(BED_path) + }, + mc.cores = nThread + ) %>% unlist() + return(BED_paths) +} diff --git a/R/is_granges.R b/R/is_granges.R new file mode 100644 index 0000000..8a035d6 --- /dev/null +++ b/R/is_granges.R @@ -0,0 +1,7 @@ +#' is_granges +#' +#' @keywords internal +#' @importFrom methods is +is_granges <- function(obj){ + methods::is(obj,"GRanges") +} \ No newline at end of file diff --git a/R/merge_celltype_specific_epigenomics.R b/R/merge_celltype_specific_epigenomics.R index fa6e16c..ae12f76 100644 --- a/R/merge_celltype_specific_epigenomics.R +++ b/R/merge_celltype_specific_epigenomics.R @@ -2,9 +2,12 @@ #' #' Merges multiple cell-type-specific epigenomic datasets #' (Nott 2019, Corces 2020) into a single \link[GenomicRanges]{GRanges} object. +#' @param keep_extra_cols Keep extra columns +#' that are not shared across all annotations. #' #' @examples -#' gr.merged <- merge_celltype_specific_epigenomics() +#' gr.merged <- echoannot::merge_celltype_specific_epigenomics() +#' @export #' @importFrom tidyr separate #' @importFrom dplyr %>% mutate select #' @importFrom data.table rbindlist data.table diff --git a/R/peak_overlap_plot.R b/R/peak_overlap_plot.R index 15ace29..299e05a 100644 --- a/R/peak_overlap_plot.R +++ b/R/peak_overlap_plot.R @@ -29,7 +29,7 @@ #' fill_title = "UCS SNPs in epigenomic peaks" #' ) #' @export -#' @import ggplot2 +#' @rawNamespace import(ggplot2, except = c(geom_rect, ggsave)) #' @importFrom patchwork plot_layout #' @importFrom scales alpha #' @importFrom stats formula @@ -41,7 +41,8 @@ peak_overlap_plot <- function(merged_DT, include.CORCES_2020_scATACpeaks = TRUE, include.CORCES_2020_Cicero_coaccess = TRUE, include.CORCES_2020_bulkATACpeaks = TRUE, - include.CORCES_2020_HiChIP_FitHiChIP_coaccess = TRUE, + include.CORCES_2020_HiChIP_FitHiChIP_coaccess = + TRUE, include.CORCES_2020_gene_annotations = TRUE, plot_celltype_specificity = TRUE, plot_celltype_specificity_genes = FALSE, diff --git a/R/save_annotations.R b/R/save_annotations.R new file mode 100644 index 0000000..fa74989 --- /dev/null +++ b/R/save_annotations.R @@ -0,0 +1,8 @@ +save_annotations <- function(gr, + anno_path=tempfile(fileext = ".rds"), + libName, + verbose=TRUE){ + messager("Saving annotations ==>", anno_path, v=verbose) + dir.create(dirname(anno_path), showWarnings = FALSE, recursive = TRUE) + saveRDS(gr, file.path(anno_path)) +} \ No newline at end of file diff --git a/README.Rmd b/README.Rmd index 3994289..962cc5e 100644 --- a/README.Rmd +++ b/README.Rmd @@ -29,7 +29,8 @@ knitr::opts_chunk$set(echo = TRUE) ## Fine-mapping results data -Provides both built-in fine-mapping results and API access to the [**echolocatoR Fine-mapping Portal**](https://rajlab.shinyapps.io/Fine_Mapping_Shiny/). +Provides both built-in, and API accessible genomic annotations +and epigenomic data. ```{r, eval=FALSE} if(!"remotes" %in% rownames(installed.packages())){install.packages("remotes")} diff --git a/docs/404.html b/docs/404.html new file mode 100644 index 0000000..62ad943 --- /dev/null +++ b/docs/404.html @@ -0,0 +1,148 @@ + + + +
+ + + + +vignettes/echoannot.Rmd
+ echoannot.Rmd
## Warning: replacing previous import 'ggplot2::geom_rect' by 'ggbio::geom_rect'
+## when loading 'echoannot'
+## Warning: replacing previous import 'ggplot2::ggsave' by 'ggbio::ggsave' when
+## loading 'echoannot'
+
+Import pre-calculated fine-mapping results from the echolocatoR Fine-mapping Portal.
+
+local_files <- echodata::portal_query(phenotypes = "parkinson",
+ LD_panels = "UKB",
+ loci = c("BST1","LRRK2","MEX3C"),
+ file_types = "multi_finemap")
Merge (and annotate) SNP-wise fine-mapping results.
+
+dataset <- dirname(dirname(dirname(local_files)))
+merged_DT <- echoannot::merge_finemapping_results(dataset = dataset[1],
+ minimum_support = 1,
+ haploreg_annotation = TRUE)
+knitr::kable(merged_DT)
SNP | +Dataset | +Locus | +CHR | +POS | +P | +Effect | +StdErr | +A1 | +A2 | +Freq | +MAF | +N_cases | +N_controls | +proportion_cases | +N | +t_stat | +leadSNP | +ABF.CS | +ABF.PP | +SUSIE.CS | +SUSIE.PP | +POLYFUN_SUSIE.CS | +POLYFUN_SUSIE.PP | +FINEMAP.CS | +FINEMAP.PP | +Support | +Consensus_SNP | +mean.PP | +mean.CS | +Mb | +chr | +pos_hg38 | +r2 | +D’ | +is_query_snp | +ref | +alt | +AFR | +AMR | +ASN | +EUR | +GERP_cons | +SiPhy_cons | +Chromatin_States | +Chromatin_States_Imputed | +Chromatin_Marks | +DNAse | +Proteins | +eQTL | +gwas | +grasp | +Motifs | +GENCODE_id | +GENCODE_name | +GENCODE_direction | +GENCODE_distance | +RefSeq_id | +RefSeq_name | +RefSeq_direction | +RefSeq_distance | +dbSNP_functional_annotation | +query_snp_rsid | +Promoter_histone_marks | +Enhancer_histone_marks | +
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
rs10853605 | +Nalls23andMe_2019 | +MEX3C | +18 | +48783342 | +0.0000039 | +0.0466 | +0.0101 | +T | +C | +0.5569 | +0.4431 | +49053 | +1411006 | +0.0336 | +189620 | +4.6138614 | +FALSE | +NA | +NA | +0 | +0 | +0 | +0.0000000 | +1 | +1.000000 | +1 | +FALSE | +0.2500000 | +0 | +48.78334 | +18 | +51256972 | +0 | +0 | +0 | +T | +C | +0.34 | +0.49 | +0.60 | +0.45 | +1 | +1 | ++ | + | E011,H3K4me1_Enh;E013,H3K4me1_Enh;E061,H3K4me1_Enh;E080,H3K4me1_Enh;E083,H3K4me1_Enh;E092,H3K4me1_Enh;E095,H3K4me1_Enh;E097,H3K4me1_Enh;E083,H3K9ac_Pro;E113,H3K4me3_Pro | ++ | . | +GTEx2015_v6,Cells_Transformed_fibroblasts,MEX3C,4.62396778876924e-08 | +. | +22233810,Gene expression of MEX3C (ENSG00000176624) in dendritic cells treated with Mycobacterium tuberculosis,0.001893471 | +Hand1_1;Mef2_disc1;Mef2_known3;Mef2_known5 | +ENSG00000207154.1 | +U1 | +3 | +26760 | +NM_016626 | +MEX3C | +5 | +59290 | +. | +rs10853605 | ++ | + |
rs11175620 | +Nalls23andMe_2019 | +LRRK2 | +12 | +40610864 | +0.0000000 | +0.1307 | +0.0142 | +C | +G | +0.1200 | +0.1200 | +56306 | +1417791 | +0.0382 | +216621 | +9.2042254 | +FALSE | +NA | +NA | +3 | +1 | +3 | +1.0000000 | +NA | +NA | +2 | +TRUE | +0.5000000 | +0 | +40.61086 | +12 | +40217062 | +0 | +0 | +0 | +G | +C | +0.11 | +0.06 | +0.00 | +0.11 | +0 | +0 | +E023,7_Enh;E024,7_Enh;E028,7_Enh;E029,7_Enh;E030,2_TssAFlnk;E031,2_TssAFlnk;E032,7_Enh;E035,1_TssA;E052,7_Enh;E080,7_Enh;E114,7_Enh;E117,7_Enh;E119,7_Enh;E124,2_TssAFlnk | +E023,16_EnhW1;E025,19_DNase;E028,19_DNase;E029,14_EnhA2;E030,13_EnhA1;E031,14_EnhA2;E032,22_PromP;E033,16_EnhW1;E035,16_EnhW1;E036,14_EnhA2;E037,22_PromP;E038,22_PromP;E039,22_PromP;E040,22_PromP;E041,22_PromP;E042,22_PromP;E043,22_PromP;E044,22_PromP;E045,22_PromP;E046,22_PromP;E048,22_PromP;E050,22_PromP;E051,16_EnhW1;E057,22_PromP;E062,16_EnhW1;E071,22_PromP;E080,22_PromP;E086,19_DNase;E088,22_PromP;E112,17_EnhW2;E113,16_EnhW1;E114,19_DNase;E117,19_DNase;E124,13_EnhA1 | +E011,H3K4me1_Enh;E018,H3K4me1_Enh;E023,H3K4me1_Enh;E024,H3K4me1_Enh;E025,H3K4me1_Enh;E028,H3K4me1_Enh;E029,H3K4me1_Enh;E030,H3K4me1_Enh;E031,H3K4me1_Enh;E032,H3K4me1_Enh;E036,H3K4me1_Enh;E049,H3K4me1_Enh;E050,H3K4me1_Enh;E052,H3K4me1_Enh;E056,H3K4me1_Enh;E057,H3K4me1_Enh;E080,H3K4me1_Enh;E086,H3K4me1_Enh;E088,H3K4me1_Enh;E113,H3K4me1_Enh;E114,H3K4me1_Enh;E117,H3K4me1_Enh;E119,H3K4me1_Enh;E124,H3K4me1_Enh;E023,H3K4me3_Pro;E030,H3K4me3_Pro;E031,H3K4me3_Pro;E035,H3K4me3_Pro;E109,H3K4me3_Pro;E124,H3K4me3_Pro;E025,H3K9ac_Pro;E029,H3K27ac_Enh;E032,H3K27ac_Enh;E080,H3K27ac_Enh;E113,H3K27ac_Enh;E124,H3K27ac_Enh | +E006;E021;E029;E046;E080;E086;E088;E117;E120;E124 | +. | +GTEx2015_v6,Adipose_Subcutaneous,LRRK2,3.29211855956645e-07;GTEx2015_v6,Artery_Tibial,LRRK2,1.08781866153343e-11;GTEx2015_v6,Cells_Transformed_fibroblasts,LRRK2,1.72128347769633e-05;GTEx2015_v6,Nerve_Tibial,LRRK2,5.31856078341246e-10;GTEx2015_v6,Skin_Not_Sun_Exposed_Suprapubic,LRRK2,1.00796507632068e-05;GTEx2015_v6,Skin_Sun_Exposed_Lower_leg,LRRK2,1.06318553434241e-05;Westra2013,Whole_Blood,-,1.3808392059010189E-31;Westra2013,Whole_Blood,LRRK2,2.116806823753094E-5 | +. | +. | +BCL_disc5;HDAC2_disc3 | +ENSG00000225342.1 | +AC079630.4 | +0 | +0 | +NM_198578 | +LRRK2 | +5 | +7947 | +. | +rs11175620 | +BLD | +FAT, ESC, BRST, BLD, MUS, ADRL, LNG, CRVX | +
rs113505952 | +Nalls23andMe_2019 | +MEX3C | +18 | +48687884 | +0.6645000 | +-0.0081 | +0.0186 | +T | +C | +0.0793 | +0.0793 | +49053 | +1411006 | +0.0336 | +189620 | +-0.4354839 | +FALSE | +NA | +NA | +0 | +0 | +0 | +0.0000000 | +1 | +1.000000 | +1 | +FALSE | +0.2500000 | +0 | +48.68788 | +18 | +51161514 | +0 | +0 | +0 | +C | +T | +0.25 | +0.09 | +0.09 | +0.09 | +0 | +0 | +E046,7_Enh | +E034,17_EnhW2;E046,18_EnhAc | +E011,H3K4me1_Enh;E031,H3K4me1_Enh;E044,H3K4me1_Enh;E046,H3K4me1_Enh;E047,H3K4me1_Enh;E113,H3K4me1_Enh;E014,H3K9ac_Pro;E020,H3K27ac_Enh | ++ | . | +. | +. | +. | +Foxa_disc3;Pax-5_disc1;TCF12_disc6 | +ENSG00000176624.8 | +MEX3C | +3 | +13035 | +NM_016626 | +MEX3C | +3 | +13034 | +. | +rs113505952 | ++ | BLD | +
rs11564209 | +Nalls23andMe_2019 | +LRRK2 | +12 | +40650875 | +0.0000000 | +-0.1868 | +0.0292 | +A | +T | +0.9742 | +0.0258 | +56306 | +1417791 | +0.0382 | +216621 | +-6.3972603 | +FALSE | +NA | +NA | +0 | +0 | +NA | +NA | +1 | +1.000000 | +1 | +FALSE | +0.2500000 | +0 | +40.65087 | +12 | +40257073 | +0 | +0 | +0 | +A | +T | +0.01 | +0.17 | +0.03 | +0.02 | +0 | +0 | ++ | + | E124,H3K27ac_Enh | ++ | . | +GTEx2015_v6,Nerve_Tibial,LRRK2,2.57325753962331e-07 | +. | +. | +BATF_disc3;BCL_disc4;Mrg_2;Myc_known9;Nanog_known1;Pou2f2_known11;p300_disc5 | +ENSG00000188906.9 | +LRRK2 | +0 | +0 | +NM_198578 | +LRRK2 | +0 | +0 | +INT | +rs11564209 | ++ | + |
rs117929583 | +Nalls23andMe_2019 | +LRRK2 | +12 | +40791407 | +0.0000000 | +0.2198 | +0.0285 | +A | +G | +0.0263 | +0.0263 | +56306 | +1417791 | +0.0382 | +216621 | +7.7122807 | +FALSE | +NA | +NA | +0 | +0 | +NA | +NA | +1 | +1.000000 | +1 | +FALSE | +0.2500000 | +0 | +40.79141 | +12 | +40397605 | +0 | +0 | +0 | +G | +A | +0.01 | +0.15 | +0.03 | +0.02 | +0 | +0 | ++ | + | + | + | . | +GTEx2015_v6,Nerve_Tibial,LRRK2,1.98632825918171e-07 | +. | +. | +CEBPB_known6;Cphx;HNF1_7;Mef2_disc2;Myc_disc5;Pax-3_2;Pou1f1_2;Pou3f2_2;Pou3f4;p300_disc2 | +ENSG00000258167.1 | +RP11-115F18.1 | +0 | +0 | +NM_198578 | +LRRK2 | +3 | +28320 | +INT | +rs117929583 | ++ | + |
rs1893379 | +Nalls23andMe_2019 | +MEX3C | +18 | +48718536 | +0.0000000 | +0.0558 | +0.0100 | +A | +G | +0.4542 | +0.4542 | +49053 | +1411006 | +0.0336 | +189620 | +5.5800000 | +FALSE | +NA | +NA | +1 | +1 | +1 | +1.0000000 | +NA | +NA | +2 | +TRUE | +0.5000000 | +0 | +48.71854 | +18 | +51192166 | +0 | +0 | +0 | +G | +A | +0.15 | +0.38 | +0.23 | +0.44 | +0 | +0 | +E002,7_Enh;E011,3_TxFlnk;E013,6_EnhG;E014,6_EnhG;E015,6_EnhG;E016,6_EnhG;E018,7_Enh;E019,6_EnhG;E020,3_TxFlnk;E023,7_Enh;E025,7_Enh;E026,7_Enh;E027,1_TssA;E028,7_Enh;E031,7_Enh;E032,7_Enh;E033,7_Enh;E035,6_EnhG;E037,7_Enh;E038,7_Enh;E039,7_Enh;E040,7_Enh;E041,2_TssAFlnk;E042,7_Enh;E043,7_Enh;E044,2_TssAFlnk;E045,7_Enh;E047,7_Enh;E048,7_Enh;E049,7_Enh;E050,7_Enh;E051,7_Enh;E052,7_Enh;E053,7_Enh;E054,7_Enh;E055,7_Enh;E057,7_Enh;E058,7_Enh;E061,7_Enh;E062,7_Enh;E068,7_Enh;E076,7_Enh;E078,7_Enh;E081,7_Enh;E082,7_Enh;E083,7_Enh;E086,7_Enh;E087,7_Enh;E088,7_Enh;E099,7_Enh;E103,7_Enh;E107,7_Enh;E115,6_EnhG;E116,1_TssA;E117,7_Enh;E119,2_TssAFlnk;E122,7_Enh;E123,7_Enh;E126,7_Enh;E127,7_Enh | +E001,12_TxEnhW;E002,12_TxEnhW;E003,12_TxEnhW;E004,12_TxEnhW;E008,12_TxEnhW;E009,12_TxEnhW;E010,12_TxEnhW;E011,10_TxEnh5;E012,12_TxEnhW;E013,12_TxEnhW;E014,12_TxEnhW;E015,12_TxEnhW;E016,12_TxEnhW;E018,12_TxEnhW;E019,10_TxEnh5;E020,12_TxEnhW;E023,12_TxEnhW;E024,12_TxEnhW;E025,12_TxEnhW;E026,12_TxEnhW;E027,10_TxEnh5;E028,10_TxEnh5;E030,12_TxEnhW;E031,10_TxEnh5;E032,10_TxEnh5;E033,10_TxEnh5;E034,10_TxEnh5;E035,12_TxEnhW;E036,12_TxEnhW;E037,9_TxReg;E038,10_TxEnh5;E039,10_TxEnh5;E040,9_TxReg;E041,9_TxReg;E042,9_TxReg;E043,4_PromD2;E044,9_TxReg;E045,4_PromD2;E046,12_TxEnhW;E047,10_TxEnh5;E048,9_TxReg;E049,12_TxEnhW;E050,10_TxEnh5;E051,10_TxEnh5;E052,12_TxEnhW;E053,12_TxEnhW;E054,12_TxEnhW;E055,12_TxEnhW;E057,12_TxEnhW;E058,10_TxEnh5;E061,12_TxEnhW;E062,12_TxEnhW;E063,12_TxEnhW;E066,12_TxEnhW;E067,12_TxEnhW;E070,12_TxEnhW;E072,12_TxEnhW;E073,12_TxEnhW;E078,12_TxEnhW;E081,12_TxEnhW;E082,12_TxEnhW;E083,10_TxEnh5;E086,12_TxEnhW;E087,12_TxEnhW;E088,12_TxEnhW;E093,12_TxEnhW;E099,12_TxEnhW;E103,12_TxEnhW;E104,12_TxEnhW;E108,12_TxEnhW;E112,12_TxEnhW;E114,12_TxEnhW;E115,10_TxEnh5;E116,9_TxReg;E117,12_TxEnhW;E119,9_TxReg;E120,12_TxEnhW;E121,12_TxEnhW;E122,12_TxEnhW;E123,12_TxEnhW;E124,12_TxEnhW;E125,12_TxEnhW;E126,12_TxEnhW;E127,10_TxEnh5;E129,12_TxEnhW | +E001,H3K4me1_Enh;E003,H3K4me1_Enh;E004,H3K4me1_Enh;E008,H3K4me1_Enh;E009,H3K4me1_Enh;E010,H3K4me1_Enh;E011,H3K4me1_Enh;E012,H3K4me1_Enh;E013,H3K4me1_Enh;E014,H3K4me1_Enh;E015,H3K4me1_Enh;E016,H3K4me1_Enh;E018,H3K4me1_Enh;E019,H3K4me1_Enh;E020,H3K4me1_Enh;E023,H3K4me1_Enh;E024,H3K4me1_Enh;E025,H3K4me1_Enh;E026,H3K4me1_Enh;E027,H3K4me1_Enh;E028,H3K4me1_Enh;E029,H3K4me1_Enh;E031,H3K4me1_Enh;E032,H3K4me1_Enh;E033,H3K4me1_Enh;E034,H3K4me1_Enh;E035,H3K4me1_Enh;E036,H3K4me1_Enh;E037,H3K4me1_Enh;E038,H3K4me1_Enh;E039,H3K4me1_Enh;E040,H3K4me1_Enh;E041,H3K4me1_Enh;E042,H3K4me1_Enh;E043,H3K4me1_Enh;E044,H3K4me1_Enh;E045,H3K4me1_Enh;E046,H3K4me1_Enh;E047,H3K4me1_Enh;E048,H3K4me1_Enh;E049,H3K4me1_Enh;E050,H3K4me1_Enh;E051,H3K4me1_Enh;E052,H3K4me1_Enh;E053,H3K4me1_Enh;E054,H3K4me1_Enh;E055,H3K4me1_Enh;E056,H3K4me1_Enh;E057,H3K4me1_Enh;E058,H3K4me1_Enh;E061,H3K4me1_Enh;E062,H3K4me1_Enh;E063,H3K4me1_Enh;E066,H3K4me1_Enh;E067,H3K4me1_Enh;E068,H3K4me1_Enh;E069,H3K4me1_Enh;E070,H3K4me1_Enh;E071,H3K4me1_Enh;E072,H3K4me1_Enh;E073,H3K4me1_Enh;E074,H3K4me1_Enh;E075,H3K4me1_Enh;E076,H3K4me1_Enh;E077,H3K4me1_Enh;E078,H3K4me1_Enh;E080,H3K4me1_Enh;E081,H3K4me1_Enh;E082,H3K4me1_Enh;E083,H3K4me1_Enh;E084,H3K4me1_Enh;E085,H3K4me1_Enh;E086,H3K4me1_Enh;E087,H3K4me1_Enh;E088,H3K4me1_Enh;E092,H3K4me1_Enh;E095,H3K4me1_Enh;E099,H3K4me1_Enh;E100,H3K4me1_Enh;E101,H3K4me1_Enh;E102,H3K4me1_Enh;E103,H3K4me1_Enh;E105,H3K4me1_Enh;E106,H3K4me1_Enh;E107,H3K4me1_Enh;E108,H3K4me1_Enh;E109,H3K4me1_Enh;E110,H3K4me1_Enh;E114,H3K4me1_Enh;E115,H3K4me1_Enh;E116,H3K4me1_Enh;E117,H3K4me1_Enh;E118,H3K4me1_Enh;E119,H3K4me1_Enh;E120,H3K4me1_Enh;E121,H3K4me1_Enh;E122,H3K4me1_Enh;E123,H3K4me1_Enh;E124,H3K4me1_Enh;E125,H3K4me1_Enh;E126,H3K4me1_Enh;E127,H3K4me1_Enh;E128,H3K4me1_Enh;E129,H3K4me1_Enh;E003,H3K4me3_Pro;E010,H3K4me3_Pro;E011,H3K4me3_Pro;E012,H3K4me3_Pro;E013,H3K4me3_Pro;E015,H3K4me3_Pro;E016,H3K4me3_Pro;E018,H3K4me3_Pro;E019,H3K4me3_Pro;E020,H3K4me3_Pro;E025,H3K4me3_Pro;E026,H3K4me3_Pro;E027,H3K4me3_Pro;E028,H3K4me3_Pro;E033,H3K4me3_Pro;E034,H3K4me3_Pro;E035,H3K4me3_Pro;E037,H3K4me3_Pro;E038,H3K4me3_Pro;E040,H3K4me3_Pro;E041,H3K4me3_Pro;E042,H3K4me3_Pro;E043,H3K4me3_Pro;E044,H3K4me3_Pro;E045,H3K4me3_Pro;E046,H3K4me3_Pro;E050,H3K4me3_Pro;E051,H3K4me3_Pro;E052,H3K4me3_Pro;E053,H3K4me3_Pro;E054,H3K4me3_Pro;E058,H3K4me3_Pro;E062,H3K4me3_Pro;E063,H3K4me3_Pro;E067,H3K4me3_Pro;E072,H3K4me3_Pro;E073,H3K4me3_Pro;E078,H3K4me3_Pro;E082,H3K4me3_Pro;E083,H3K4me3_Pro;E088,H3K4me3_Pro;E101,H3K4me3_Pro;E103,H3K4me3_Pro;E105,H3K4me3_Pro;E111,H3K4me3_Pro;E112,H3K4me3_Pro;E114,H3K4me3_Pro;E115,H3K4me3_Pro;E116,H3K4me3_Pro;E119,H3K4me3_Pro;E121,H3K4me3_Pro;E122,H3K4me3_Pro;E123,H3K4me3_Pro;E125,H3K4me3_Pro;E127,H3K4me3_Pro;E004,H3K9ac_Pro;E008,H3K9ac_Pro;E011,H3K9ac_Pro;E014,H3K9ac_Pro;E016,H3K9ac_Pro;E018,H3K9ac_Pro;E019,H3K9ac_Pro;E020,H3K9ac_Pro;E023,H3K9ac_Pro;E025,H3K9ac_Pro;E026,H3K9ac_Pro;E052,H3K9ac_Pro;E062,H3K9ac_Pro;E063,H3K9ac_Pro;E066,H3K9ac_Pro;E067,H3K9ac_Pro;E072,H3K9ac_Pro;E077,H3K9ac_Pro;E083,H3K9ac_Pro;E086,H3K9ac_Pro;E088,H3K9ac_Pro;E110,H3K9ac_Pro;E114,H3K9ac_Pro;E115,H3K9ac_Pro;E116,H3K9ac_Pro;E119,H3K9ac_Pro;E120,H3K9ac_Pro;E121,H3K9ac_Pro;E122,H3K9ac_Pro;E123,H3K9ac_Pro;E125,H3K9ac_Pro;E008,H3K27ac_Enh;E012,H3K27ac_Enh;E013,H3K27ac_Enh;E014,H3K27ac_Enh;E020,H3K27ac_Enh;E026,H3K27ac_Enh;E034,H3K27ac_Enh;E037,H3K27ac_Enh;E038,H3K27ac_Enh;E039,H3K27ac_Enh;E040,H3K27ac_Enh;E041,H3K27ac_Enh;E042,H3K27ac_Enh;E043,H3K27ac_Enh;E044,H3K27ac_Enh;E045,H3K27ac_Enh;E046,H3K27ac_Enh;E048,H3K27ac_Enh;E049,H3K27ac_Enh;E050,H3K27ac_Enh;E056,H3K27ac_Enh;E058,H3K27ac_Enh;E062,H3K27ac_Enh;E063,H3K27ac_Enh;E066,H3K27ac_Enh;E076,H3K27ac_Enh;E078,H3K27ac_Enh;E080,H3K27ac_Enh;E087,H3K27ac_Enh;E089,H3K27ac_Enh;E090,H3K27ac_Enh;E093,H3K27ac_Enh;E101,H3K27ac_Enh;E103,H3K27ac_Enh;E108,H3K27ac_Enh;E109,H3K27ac_Enh;E112,H3K27ac_Enh;E113,H3K27ac_Enh;E115,H3K27ac_Enh;E116,H3K27ac_Enh;E117,H3K27ac_Enh;E119,H3K27ac_Enh;E121,H3K27ac_Enh;E122,H3K27ac_Enh;E123,H3K27ac_Enh;E124,H3K27ac_Enh;E127,H3K27ac_Enh | +E028;E082;E090 | +. | +GTEx2015_v6,Breast_Mammary_Tissue,MEX3C,6.43632475579646e-06;GTEx2015_v6,Cells_Transformed_fibroblasts,MEX3C,5.10420348099408e-12;GTEx2015_v6,Thyroid,MEX3C,1.85337713345071e-06;Lappalainen2013,Lymphoblastoid_EUR_exonlevel,ENSG00000176624.8_48700920_48703946,6.84711512731581e-07;Lappalainen2013,Lymphoblastoid_EUR_genelevel,MEX3C,1.94053610430114e-07;Westra2013,Whole_Blood,ME2,1.268991877789626E-4 | +. | +. | +. | +ENSG00000176624.8 | +MEX3C | +0 | +0 | +NM_016626 | +MEX3C | +0 | +0 | +INT | +rs1893379 | +BRST, BLD | +ESC, ESDR, IPSC, FAT, STRM, BRST, BLD, MUS, BRN, SKIN, GI, HRT, KID, PANC, LNG, PLCNT, CRVX, VAS | +
rs34559912 | +Nalls23andMe_2019 | +BST1 | +4 | +15730146 | +0.0000000 | +0.1030 | +0.0095 | +T | +G | +0.5526 | +0.4474 | +56306 | +1417791 | +0.0382 | +216621 | +10.8421053 | +FALSE | +NA | +NA | +3 | +1 | +3 | +1.0000000 | +NA | +NA | +2 | +TRUE | +0.5000000 | +0 | +15.73015 | +4 | +15728523 | +0 | +0 | +0 | +G | +T | +0.17 | +0.52 | +0.37 | +0.57 | +0 | +0 | +E093,7_Enh | ++ | E014,H3K9ac_Pro;E047,H3K9ac_Pro;E068,H3K9ac_Pro;E072,H3K9ac_Pro;E023,H3K4me1_Enh;E093,H3K4me1_Enh;E124,H3K27ac_Enh | ++ | . | +GTEx2015_v6,Adipose_Visceral_Omentum,RP11-115L11.1,4.90492322683713e-06;GTEx2015_v6,Brain_Caudate_basal_ganglia,CD38,1.25017558750329e-08;GTEx2015_v6,Brain_Cortex,CD38,1.71117192134186e-06;GTEx2015_v6,Brain_Nucleus_accumbens_basal_ganglia,CD38,1.08915972859775e-06;GTEx2015_v6,Cells_Transformed_fibroblasts,RP11-115L11.1,3.20069844767782e-15;GTEx2015_v6,Nerve_Tibial,RP11-115L11.1,6.59512086449678e-08 | +. | +. | +Mrg1::Hoxa9_2 | +ENSG00000109743.6 | +BST1 | +0 | +0 | +NM_004334 | +BST1 | +0 | +0 | +INT | +rs34559912 | ++ | THYM | +
rs35519415 | +Nalls23andMe_2019 | +BST1 | +4 | +15710330 | +0.0000000 | +0.0843 | +0.0095 | +A | +C | +0.5655 | +0.4345 | +56306 | +1417791 | +0.0382 | +216621 | +8.8736842 | +FALSE | +NA | +NA | +0 | +0 | +0 | +0.0000000 | +1 | +0.999959 | +1 | +FALSE | +0.2499898 | +0 | +15.71033 | +4 | +15708707 | +0 | +0 | +0 | +A | +C | +0.59 | +0.44 | +0.56 | +0.43 | +0 | +0 | +E030,6_EnhG | ++ | E007,H3K4me1_Enh;E009,H3K4me1_Enh;E010,H3K4me1_Enh;E012,H3K4me1_Enh;E029,H3K4me1_Enh;E030,H3K4me1_Enh;E055,H3K4me1_Enh;E088,H3K4me1_Enh;E111,H3K4me1_Enh;E117,H3K4me1_Enh;E124,H3K4me1_Enh;E013,H3K27ac_Enh;E067,H3K27ac_Enh;E069,H3K27ac_Enh;E124,H3K27ac_Enh;E027,H3K9ac_Pro;E083,H3K9ac_Pro;E111,H3K9ac_Pro | ++ | . | +GTEx2015_v6,Brain_Nucleus_accumbens_basal_ganglia,FAM200B,1.43240818019526e-08;GTEx2015_v6,Cells_Transformed_fibroblasts,RP11-115L11.1,2.8464859168388e-11 | +. | +. | +Ik-2_1;Irf_known3;PLZF;STAT_disc3 | +ENSG00000109743.6 | +BST1 | +0 | +0 | +NM_004334 | +BST1 | +0 | +0 | +INT | +rs35519415 | ++ | BLD | +
rs3756246 | +Nalls23andMe_2019 | +BST1 | +4 | +15706790 | +0.0000000 | +-0.0661 | +0.0095 | +A | +T | +0.4034 | +0.4034 | +56306 | +1417791 | +0.0382 | +216621 | +-6.9578947 | +FALSE | +NA | +NA | +0 | +0 | +0 | +0.0000000 | +1 | +1.000000 | +1 | +FALSE | +0.2500000 | +0 | +15.70679 | +4 | +15705167 | +0 | +0 | +0 | +T | +A | +0.58 | +0.41 | +0.33 | +0.40 | +0 | +0 | +E029,6_EnhG;E030,3_TxFlnk;E053,7_Enh;E074,7_Enh;E118,7_Enh;E124,3_TxFlnk | +E029,10_TxEnh5;E030,9_TxReg;E031,12_TxEnhW;E035,17_EnhW2;E124,13_EnhA1 | +E002,H3K4me1_Enh;E005,H3K4me1_Enh;E006,H3K4me1_Enh;E007,H3K4me1_Enh;E009,H3K4me1_Enh;E010,H3K4me1_Enh;E012,H3K4me1_Enh;E013,H3K4me1_Enh;E016,H3K4me1_Enh;E025,H3K4me1_Enh;E029,H3K4me1_Enh;E030,H3K4me1_Enh;E031,H3K4me1_Enh;E038,H3K4me1_Enh;E053,H3K4me1_Enh;E063,H3K4me1_Enh;E066,H3K4me1_Enh;E067,H3K4me1_Enh;E069,H3K4me1_Enh;E071,H3K4me1_Enh;E072,H3K4me1_Enh;E074,H3K4me1_Enh;E078,H3K4me1_Enh;E079,H3K4me1_Enh;E082,H3K4me1_Enh;E083,H3K4me1_Enh;E084,H3K4me1_Enh;E085,H3K4me1_Enh;E094,H3K4me1_Enh;E097,H3K4me1_Enh;E099,H3K4me1_Enh;E104,H3K4me1_Enh;E121,H3K4me1_Enh;E124,H3K4me1_Enh;E125,H3K4me1_Enh;E002,H3K4me3_Pro;E030,H3K4me3_Pro;E077,H3K4me3_Pro;E110,H3K4me3_Pro;E111,H3K4me3_Pro;E124,H3K4me3_Pro;E007,H3K9ac_Pro;E063,H3K9ac_Pro;E067,H3K9ac_Pro;E072,H3K9ac_Pro;E073,H3K9ac_Pro;E074,H3K9ac_Pro;E077,H3K9ac_Pro;E083,H3K9ac_Pro;E101,H3K9ac_Pro;E111,H3K9ac_Pro;E124,H3K9ac_Pro;E012,H3K27ac_Enh;E029,H3K27ac_Enh;E059,H3K27ac_Enh;E063,H3K27ac_Enh;E066,H3K27ac_Enh;E067,H3K27ac_Enh;E068,H3K27ac_Enh;E069,H3K27ac_Enh;E071,H3K27ac_Enh;E072,H3K27ac_Enh;E073,H3K27ac_Enh;E074,H3K27ac_Enh;E076,H3K27ac_Enh;E078,H3K27ac_Enh;E084,H3K27ac_Enh;E085,H3K27ac_Enh;E092,H3K27ac_Enh;E096,H3K27ac_Enh;E097,H3K27ac_Enh;E101,H3K27ac_Enh;E106,H3K27ac_Enh;E111,H3K27ac_Enh;E113,H3K27ac_Enh;E115,H3K27ac_Enh;E124,H3K27ac_Enh | ++ | . | +GTEx2015_v6,Artery_Tibial,FAM200B,2.99871044171767e-06;GTEx2015_v6,Brain_Nucleus_accumbens_basal_ganglia,FAM200B,9.80660669297648e-08;GTEx2015_v6,Cells_Transformed_fibroblasts,RP11-115L11.1,4.75076314728826e-08;Westra2013,Whole_Blood,-,6.795346249460126E-6;Westra2013,Whole_Blood,BST1,5.187033440697593E-116;Westra2013,Whole_Blood,FBXL5,2.4479224150185407E-14;Westra2013,Whole_Blood,FBXL5,3.0810590020329485E-28 | +. | +. | +Sox_16;Sox_17 | +ENSG00000237765.2 | +FAM200B | +0 | +0 | +NM_004334 | +BST1 | +0 | +0 | +INT | +rs3756246 | ++ | BLD, BRN, LIV | +
rs4389574 | +Nalls23andMe_2019 | +BST1 | +4 | +15730398 | +0.0000000 | +-0.0977 | +0.0116 | +A | +G | +0.4443 | +0.4443 | +42598 | +1322509 | +0.0312 | +165075 | +-8.4224138 | +FALSE | +NA | +NA | +1 | +1 | +1 | +1.0000000 | +NA | +NA | +2 | +TRUE | +0.5000000 | +0 | +15.73040 | +4 | +15728775 | +0 | +0 | +0 | +A | +G | +0.17 | +0.52 | +0.37 | +0.57 | +0 | +0 | +E093,7_Enh | ++ | E014,H3K9ac_Pro;E047,H3K9ac_Pro;E068,H3K9ac_Pro;E072,H3K9ac_Pro;E093,H3K4me1_Enh;E124,H3K27ac_Enh | ++ | . | +GTEx2015_v6,Adipose_Visceral_Omentum,RP11-115L11.1,7.96707696325135e-06;GTEx2015_v6,Brain_Caudate_basal_ganglia,CD38,1.2467177895689e-08;GTEx2015_v6,Brain_Cortex,CD38,1.70977511189002e-06;GTEx2015_v6,Brain_Nucleus_accumbens_basal_ganglia,CD38,1.08358888563218e-06;GTEx2015_v6,Cells_Transformed_fibroblasts,RP11-115L11.1,3.21205053848606e-15;GTEx2015_v6,Nerve_Tibial,RP11-115L11.1,6.74374690500463e-08;Ramasamy2014,Brain_OCTX,FBXL5,8.47E-08 | +. | +. | +Myc_known6;YY1_disc4 | +ENSG00000109743.6 | +BST1 | +0 | +0 | +NM_004334 | +BST1 | +0 | +0 | +INT | +rs4389574 | ++ | THYM | +
rs4541502 | +Nalls23andMe_2019 | +BST1 | +4 | +15712787 | +0.0000000 | +-0.0897 | +0.0093 | +T | +G | +0.4749 | +0.4749 | +56306 | +1417791 | +0.0382 | +216621 | +-9.6451613 | +FALSE | +NA | +NA | +2 | +1 | +2 | +1.0000000 | +1 | +1.000000 | +3 | +TRUE | +0.7500000 | +0 | +15.71279 | +4 | +15711164 | +0 | +0 | +0 | +T | +G | +0.39 | +0.55 | +0.45 | +0.54 | +0 | +0 | ++ | + | E124,H3K27ac_Enh | ++ | . | +GTEx2015_v6,Brain_Caudate_basal_ganglia,CD38,2.1906795246105e-06;GTEx2015_v6,Brain_Nucleus_accumbens_basal_ganglia,FAM200B,8.16827868804762e-10;GTEx2015_v6,Brain_Putamen_basal_ganglia,CD38,1.11682671421168e-06;GTEx2015_v6,Cells_Transformed_fibroblasts,RP11-115L11.1,2.70302919332058e-10;GTEx2015_v6,Nerve_Tibial,RP11-115L11.1,5.06599980966537e-08;Westra2013,Whole_Blood,-,2.0272206900010006E-4;Westra2013,Whole_Blood,BST1,9.813427854297537E-198;Westra2013,Whole_Blood,FBXL5,1.47934426698415E-9;Westra2013,Whole_Blood,FBXL5,7.286931933820059E-15 | +. | +. | +Hoxa10;Hoxa9;Hoxb8;Hoxc9;Hoxd8;Isl2;Mef2_known2;Nkx6-1_1;Nkx6-1_3 | +ENSG00000109743.6 | +BST1 | +0 | +0 | +NM_004334 | +BST1 | +0 | +0 | +INT | +rs4541502 | ++ | + |
rs4698412 | +Nalls23andMe_2019 | +BST1 | +4 | +15737348 | +0.0000000 | +0.1035 | +0.0094 | +A | +G | +0.5529 | +0.4471 | +56306 | +1417791 | +0.0382 | +216621 | +11.0106383 | +TRUE | +NA | +NA | +0 | +0 | +0 | +0.0000000 | +1 | +1.000000 | +1 | +FALSE | +0.2500000 | +0 | +15.73735 | +4 | +15735725 | +0 | +0 | +0 | +G | +A | +0.12 | +0.51 | +0.37 | +0.56 | +0 | +0 | +E010,7_Enh;E093,7_Enh | +E009,17_EnhW2 | +E005,H3K4me3_Pro;E068,H3K4me3_Pro;E096,H3K4me3_Pro;E098,H3K4me3_Pro;E009,H3K4me1_Enh;E010,H3K4me1_Enh;E032,H3K4me1_Enh;E039,H3K4me1_Enh;E042,H3K4me1_Enh;E068,H3K4me1_Enh;E069,H3K4me1_Enh;E071,H3K4me1_Enh;E073,H3K4me1_Enh;E074,H3K4me1_Enh;E093,H3K4me1_Enh;E113,H3K4me1_Enh;E115,H3K4me1_Enh;E011,H3K27ac_Enh;E068,H3K27ac_Enh;E093,H3K27ac_Enh;E115,H3K27ac_Enh;E124,H3K27ac_Enh | +E081;E082 | +. | +GTEx2015_v6,Adipose_Visceral_Omentum,RP11-115L11.1,4.90492322683713e-06;GTEx2015_v6,Brain_Caudate_basal_ganglia,CD38,1.04569060365368e-08;GTEx2015_v6,Brain_Cortex,CD38,7.9258927156503e-07;GTEx2015_v6,Brain_Nucleus_accumbens_basal_ganglia,CD38,1.48737436798319e-06;GTEx2015_v6,Brain_Putamen_basal_ganglia,CD38,1.654496840313e-06;GTEx2015_v6,Cells_Transformed_fibroblasts,RP11-115L11.1,4.41873234452936e-15;GTEx2015_v6,Nerve_Tibial,RP11-115L11.1,1.71815210274496e-07;Westra2013,Whole_Blood,-,2.834212197516388E-4;Westra2013,Whole_Blood,BST1,9.813427854297537E-198;Westra2013,Whole_Blood,FBXL5,2.0449986736598613E-7;Westra2013,Whole_Blood,FBXL5,2.2421033720911498E-8 | +21084426,Parkinson’s disease,2E-6;22451204,Parkinson’s disease,3E-7 | +21886157,Serum ratio of (guanosine)/(inosine),7.10E-04 | +CACD_1;KAP1_disc2;SP2_disc3;ZBTB7A_disc2 | +ENSG00000109743.6 | +BST1 | +0 | +0 | +NM_004334 | +BST1 | +3 | +3551 | +. | +rs4698412 | ++ | ESDR, THYM | +
rs6852450 | +Nalls23andMe_2019 | +BST1 | +4 | +15742986 | +0.0000000 | +-0.0846 | +0.0094 | +T | +C | +0.4359 | +0.4359 | +56306 | +1417791 | +0.0382 | +216621 | +-9.0000000 | +FALSE | +NA | +NA | +0 | +0 | +0 | +0.0000000 | +1 | +1.000000 | +1 | +FALSE | +0.2500000 | +0 | +15.74299 | +4 | +15741363 | +0 | +0 | +0 | +T | +C | +0.23 | +0.58 | +0.35 | +0.59 | +0 | +0 | +E066,7_Enh;E115,2_TssAFlnk;E118,7_Enh | +E066,16_EnhW1;E115,13_EnhA1 | +E030,H3K4me1_Enh;E063,H3K4me1_Enh;E066,H3K4me1_Enh;E095,H3K4me1_Enh;E115,H3K4me1_Enh;E118,H3K4me1_Enh;E066,H3K27ac_Enh;E096,H3K27ac_Enh;E115,H3K27ac_Enh;E115,H3K4me3_Pro;E115,H3K9ac_Pro | ++ | . | +GTEx2015_v6,Brain_Caudate_basal_ganglia,CD38,1.5255834739153e-07;GTEx2015_v6,Cells_Transformed_fibroblasts,RP11-115L11.1,1.93332338517896e-11;Ramasamy2014,Brain_HIPP,CD38,2.74E-08;Ramasamy2014,Brain_HIPP,CD38,2.85E-07;Ramasamy2014,Brain_HIPP,CD38,4.51E-08;Ramasamy2014,Brain_HIPP,CD38,5.25E-07;Westra2013,Whole_Blood,BST1,1.1060585197180966E-187;Westra2013,Whole_Blood,FBXL5,1.9688073249254627E-6 | +. | +. | +GZF1;Nkx2_3 | +ENSG00000109743.6 | +BST1 | +3 | +3049 | +NM_004334 | +BST1 | +3 | +9189 | +. | +rs6852450 | +BLD | +LIV | +
rs7294619 | +Nalls23andMe_2019 | +LRRK2 | +12 | +40617202 | +0.0000000 | +-0.1276 | +0.0140 | +T | +C | +0.8783 | +0.1217 | +56306 | +1417791 | +0.0382 | +216621 | +-9.1142857 | +FALSE | +NA | +NA | +2 | +1 | +2 | +1.0000000 | +1 | +1.000000 | +3 | +TRUE | +0.7500000 | +0 | +40.61720 | +12 | +40223400 | +0 | +0 | +0 | +T | +C | +0.11 | +0.06 | +0.00 | +0.11 | +0 | +0 | +E023,7_Enh;E025,7_Enh;E029,1_TssA;E030,2_TssAFlnk;E031,2_TssAFlnk;E032,1_TssA;E033,1_TssA;E035,1_TssA;E046,1_TssA;E050,7_Enh;E051,1_TssA;E062,1_TssA;E063,7_Enh;E066,7_Enh;E068,7_Enh;E074,7_Enh;E076,7_Enh;E078,1_TssA;E086,1_TssA;E103,7_Enh;E116,10_TssBiv;E124,1_TssA | +E009,23_PromBiv;E023,22_PromP;E025,22_PromP;E029,4_PromD2;E030,3_PromD1;E031,4_PromD2;E032,4_PromD2;E033,4_PromD2;E034,22_PromP;E035,4_PromD2;E036,4_PromD2;E037,22_PromP;E038,4_PromD2;E039,12_TxEnhW;E040,12_TxEnhW;E041,12_TxEnhW;E042,22_PromP;E043,4_PromD2;E044,4_PromD2;E045,12_TxEnhW;E046,4_PromD2;E047,4_PromD2;E048,22_PromP;E050,4_PromD2;E051,4_PromD2;E052,19_DNase;E053,22_PromP;E055,22_PromP;E057,22_PromP;E061,22_PromP;E062,4_PromD2;E063,22_PromP;E066,4_PromD2;E067,22_PromP;E068,22_PromP;E069,22_PromP;E070,22_PromP;E071,12_TxEnhW;E072,22_PromP;E073,22_PromP;E074,19_DNase;E075,22_PromP;E076,4_PromD2;E077,4_PromD2;E078,4_PromD2;E080,22_PromP;E081,22_PromP;E084,22_PromP;E085,22_PromP;E086,22_PromP;E088,22_PromP;E091,22_PromP;E092,22_PromP;E093,22_PromP;E100,22_PromP;E101,4_PromD2;E102,4_PromD2;E103,22_PromP;E106,22_PromP;E108,22_PromP;E109,22_PromP;E110,22_PromP;E111,16_EnhW1;E112,4_PromD2;E113,4_PromD2;E114,22_PromP;E115,23_PromBiv;E116,4_PromD2;E118,22_PromP;E119,22_PromP;E122,22_PromP;E124,3_PromD1 | +E023,H3K4me1_Enh;E025,H3K4me1_Enh;E026,H3K4me1_Enh;E027,H3K4me1_Enh;E028,H3K4me1_Enh;E030,H3K4me1_Enh;E031,H3K4me1_Enh;E032,H3K4me1_Enh;E035,H3K4me1_Enh;E036,H3K4me1_Enh;E038,H3K4me1_Enh;E041,H3K4me1_Enh;E043,H3K4me1_Enh;E047,H3K4me1_Enh;E048,H3K4me1_Enh;E049,H3K4me1_Enh;E050,H3K4me1_Enh;E051,H3K4me1_Enh;E052,H3K4me1_Enh;E061,H3K4me1_Enh;E062,H3K4me1_Enh;E063,H3K4me1_Enh;E066,H3K4me1_Enh;E067,H3K4me1_Enh;E068,H3K4me1_Enh;E069,H3K4me1_Enh;E071,H3K4me1_Enh;E072,H3K4me1_Enh;E073,H3K4me1_Enh;E074,H3K4me1_Enh;E076,H3K4me1_Enh;E077,H3K4me1_Enh;E078,H3K4me1_Enh;E080,H3K4me1_Enh;E087,H3K4me1_Enh;E102,H3K4me1_Enh;E103,H3K4me1_Enh;E108,H3K4me1_Enh;E109,H3K4me1_Enh;E111,H3K4me1_Enh;E114,H3K4me1_Enh;E124,H3K4me1_Enh;E129,H3K4me1_Enh;E023,H3K4me3_Pro;E024,H3K4me3_Pro;E025,H3K4me3_Pro;E029,H3K4me3_Pro;E030,H3K4me3_Pro;E031,H3K4me3_Pro;E032,H3K4me3_Pro;E033,H3K4me3_Pro;E035,H3K4me3_Pro;E036,H3K4me3_Pro;E039,H3K4me3_Pro;E041,H3K4me3_Pro;E043,H3K4me3_Pro;E045,H3K4me3_Pro;E046,H3K4me3_Pro;E047,H3K4me3_Pro;E049,H3K4me3_Pro;E051,H3K4me3_Pro;E052,H3K4me3_Pro;E062,H3K4me3_Pro;E063,H3K4me3_Pro;E065,H3K4me3_Pro;E066,H3K4me3_Pro;E067,H3K4me3_Pro;E068,H3K4me3_Pro;E071,H3K4me3_Pro;E072,H3K4me3_Pro;E073,H3K4me3_Pro;E074,H3K4me3_Pro;E076,H3K4me3_Pro;E077,H3K4me3_Pro;E078,H3K4me3_Pro;E084,H3K4me3_Pro;E086,H3K4me3_Pro;E088,H3K4me3_Pro;E091,H3K4me3_Pro;E093,H3K4me3_Pro;E101,H3K4me3_Pro;E102,H3K4me3_Pro;E103,H3K4me3_Pro;E104,H3K4me3_Pro;E105,H3K4me3_Pro;E108,H3K4me3_Pro;E111,H3K4me3_Pro;E112,H3K4me3_Pro;E114,H3K4me3_Pro;E116,H3K4me3_Pro;E124,H3K4me3_Pro;E126,H3K4me3_Pro;E023,H3K9ac_Pro;E025,H3K9ac_Pro;E026,H3K9ac_Pro;E052,H3K9ac_Pro;E062,H3K9ac_Pro;E063,H3K9ac_Pro;E066,H3K9ac_Pro;E068,H3K9ac_Pro;E076,H3K9ac_Pro;E077,H3K9ac_Pro;E086,H3K9ac_Pro;E101,H3K9ac_Pro;E102,H3K9ac_Pro;E116,H3K9ac_Pro;E124,H3K9ac_Pro;E029,H3K27ac_Enh;E032,H3K27ac_Enh;E046,H3K27ac_Enh;E062,H3K27ac_Enh;E063,H3K27ac_Enh;E066,H3K27ac_Enh;E067,H3K27ac_Enh;E068,H3K27ac_Enh;E069,H3K27ac_Enh;E072,H3K27ac_Enh;E073,H3K27ac_Enh;E074,H3K27ac_Enh;E075,H3K27ac_Enh;E076,H3K27ac_Enh;E078,H3K27ac_Enh;E084,H3K27ac_Enh;E093,H3K27ac_Enh;E101,H3K27ac_Enh;E102,H3K27ac_Enh;E103,H3K27ac_Enh;E116,H3K27ac_Enh;E124,H3K27ac_Enh;E126,H3K27ac_Enh | ++ | . | +GTEx2015_v6,Adipose_Subcutaneous,LRRK2,4.21086496303076e-07;GTEx2015_v6,Artery_Tibial,LRRK2,7.12887180198536e-12;GTEx2015_v6,Cells_Transformed_fibroblasts,LRRK2,1.24729820163706e-05;GTEx2015_v6,Nerve_Tibial,LRRK2,5.97959499940652e-10;GTEx2015_v6,Skin_Not_Sun_Exposed_Suprapubic,LRRK2,1.00796507632068e-05;GTEx2015_v6,Skin_Sun_Exposed_Lower_leg,LRRK2,1.08361107066017e-05;Westra2013,Whole_Blood,-,1.9330946764156724E-31;Westra2013,Whole_Blood,LRRK2,1.807765948958669E-5 | +. | +. | +Barx1;Barx2;CDP_2;En-1_1;Hoxb6;Irf_known7;Ncx_1;Pou5f1_known2;Sox_18;Sox_3;Sox_9 | +ENSG00000225342.1 | +AC079630.4 | +0 | +0 | +NM_198578 | +LRRK2 | +5 | +1609 | +. | +rs7294619 | +BLD, GI, KID | +FAT, BLD, LIV, BRN, GI | +
rs74324737 | +Nalls23andMe_2019 | +LRRK2 | +12 | +40625081 | +0.0000000 | +0.1246 | +0.0141 | +A | +G | +0.1216 | +0.1216 | +56306 | +1417791 | +0.0382 | +216621 | +8.8368794 | +FALSE | +NA | +NA | +4 | +1 | +4 | +1.0000000 | +NA | +NA | +2 | +TRUE | +0.5000000 | +0 | +40.62508 | +12 | +40231279 | +0 | +0 | +0 | +G | +A | +0.09 | +0.06 | +0.00 | +0.11 | +0 | +0 | +E029,6_EnhG;E030,6_EnhG;E031,7_Enh;E032,6_EnhG;E080,7_Enh;E124,6_EnhG | +E029,10_TxEnh5;E030,10_TxEnh5;E031,10_TxEnh5;E032,10_TxEnh5;E124,10_TxEnh5 | +E029,H3K27ac_Enh;E032,H3K27ac_Enh;E046,H3K27ac_Enh;E068,H3K27ac_Enh;E080,H3K27ac_Enh;E124,H3K27ac_Enh;E029,H3K4me1_Enh;E030,H3K4me1_Enh;E031,H3K4me1_Enh;E032,H3K4me1_Enh;E067,H3K4me1_Enh;E073,H3K4me1_Enh;E078,H3K4me1_Enh;E116,H3K4me1_Enh;E124,H3K4me1_Enh;E031,H3K4me3_Pro;E067,H3K4me3_Pro;E116,H3K4me3_Pro;E124,H3K4me3_Pro;E124,H3K9ac_Pro | ++ | . | +GTEx2015_v6,Adipose_Subcutaneous,LRRK2,7.71727072375557e-07;GTEx2015_v6,Artery_Tibial,LRRK2,3.79436480774191e-12;GTEx2015_v6,Cells_Transformed_fibroblasts,LRRK2,6.94075809560474e-06;GTEx2015_v6,Nerve_Tibial,LRRK2,8.74398504351068e-10;GTEx2015_v6,Skin_Not_Sun_Exposed_Suprapubic,LRRK2,9.31129758632871e-06;GTEx2015_v6,Skin_Sun_Exposed_Lower_leg,LRRK2,1.18501232307152e-05 | +. | +. | +E2A_3;E2A_5;Lmo2-complex_1;Mtf1_1;Myf_1;Myf_3;NF-E2_disc3;TCF12_disc1;ZEB1_known3 | +ENSG00000188906.9 | +LRRK2 | +0 | +0 | +NM_198578 | +LRRK2 | +0 | +0 | +INT | +rs74324737 | ++ | BLD, ADRL | +
rs75143074 | +Nalls23andMe_2019 | +LRRK2 | +12 | +40623324 | +0.0000000 | +-0.1928 | +0.0316 | +A | +C | +0.9773 | +0.0227 | +56306 | +1417791 | +0.0382 | +216621 | +-6.1012658 | +FALSE | +NA | +NA | +0 | +0 | +NA | +NA | +1 | +1.000000 | +1 | +FALSE | +0.2500000 | +0 | +40.62332 | +12 | +40229522 | +0 | +0 | +0 | +A | +C | +0.00 | +0.01 | +0.01 | +0.02 | +0 | +0 | +E029,1_TssA;E030,6_EnhG;E031,2_TssAFlnk;E066,7_Enh;E114,7_Enh;E116,1_TssA;E124,2_TssAFlnk | +E029,4_PromD2;E030,9_TxReg;E031,9_TxReg;E032,12_TxEnhW;E034,22_PromP;E035,12_TxEnhW;E040,12_TxEnhW;E047,12_TxEnhW;E077,22_PromP;E124,4_PromD2 | +E029,H3K4me1_Enh;E030,H3K4me1_Enh;E031,H3K4me1_Enh;E032,H3K4me1_Enh;E066,H3K4me1_Enh;E067,H3K4me1_Enh;E069,H3K4me1_Enh;E071,H3K4me1_Enh;E073,H3K4me1_Enh;E074,H3K4me1_Enh;E076,H3K4me1_Enh;E078,H3K4me1_Enh;E086,H3K4me1_Enh;E087,H3K4me1_Enh;E101,H3K4me1_Enh;E114,H3K4me1_Enh;E124,H3K4me1_Enh;E029,H3K4me3_Pro;E030,H3K4me3_Pro;E031,H3K4me3_Pro;E035,H3K4me3_Pro;E062,H3K4me3_Pro;E067,H3K4me3_Pro;E068,H3K4me3_Pro;E069,H3K4me3_Pro;E072,H3K4me3_Pro;E073,H3K4me3_Pro;E078,H3K4me3_Pro;E087,H3K4me3_Pro;E103,H3K4me3_Pro;E104,H3K4me3_Pro;E108,H3K4me3_Pro;E114,H3K4me3_Pro;E116,H3K4me3_Pro;E124,H3K4me3_Pro;E032,H3K27ac_Enh;E046,H3K27ac_Enh;E066,H3K27ac_Enh;E067,H3K27ac_Enh;E068,H3K27ac_Enh;E069,H3K27ac_Enh;E072,H3K27ac_Enh;E080,H3K27ac_Enh;E114,H3K27ac_Enh;E124,H3K27ac_Enh;E066,H3K9ac_Pro;E068,H3K9ac_Pro;E076,H3K9ac_Pro;E110,H3K9ac_Pro;E114,H3K9ac_Pro;E124,H3K9ac_Pro | ++ | . | +. | +. | +. | +Pou2f2_known2;Zfp691 | +ENSG00000188906.9 | +LRRK2 | +0 | +0 | +NM_198578 | +LRRK2 | +0 | +0 | +INT | +rs75143074 | +BLD | +BLD, LIV, LNG | +
rs76904798 | +Nalls23andMe_2019 | +LRRK2 | +12 | +40614434 | +0.0000000 | +0.1439 | +0.0130 | +T | +C | +0.1444 | +0.1444 | +56306 | +1417791 | +0.0382 | +216621 | +11.0692308 | +TRUE | +NA | +NA | +1 | +1 | +1 | +1.0000000 | +1 | +1.000000 | +3 | +TRUE | +0.7500000 | +0 | +40.61443 | +12 | +40220632 | +0 | +0 | +0 | +C | +T | +0.11 | +0.21 | +0.03 | +0.13 | +0 | +0 | +E029,7_Enh;E030,7_Enh;E124,7_Enh | ++ | E029,H3K4me1_Enh;E030,H3K4me1_Enh;E031,H3K4me1_Enh;E124,H3K4me1_Enh;E124,H3K27ac_Enh | ++ | . | +GTEx2015_v6,Adipose_Subcutaneous,LRRK2,2.6136557196757e-09;GTEx2015_v6,Artery_Coronary,LRRK2,8.10828434519411e-08;GTEx2015_v6,Artery_Tibial,LRRK2,1.29589062525083e-14;GTEx2015_v6,Cells_Transformed_fibroblasts,LRRK2,2.01546552400868e-05;GTEx2015_v6,Esophagus_Muscularis,LRRK2,1.17034509288903e-05;GTEx2015_v6,Nerve_Tibial,LRRK2,1.17833931025132e-15;GTEx2015_v6,Skin_Not_Sun_Exposed_Suprapubic,LRRK2,3.02678179339592e-06;GTEx2015_v6,Skin_Sun_Exposed_Lower_leg,LRRK2,1.21658440247309e-07 | +25064009,Parkinson’s disease,5E-14 | +. | +Dbx1;Hmx_2;Hoxb8;Hoxd8;Ncx_2;Nkx6-1_3;Pou2f2_known2;Sox_14;Sox_15;Sox_16;Sox_18;Sox_19;Sox_2;Sox_5;Sox_7;Sox_9 | +ENSG00000225342.1 | +AC079630.4 | +0 | +0 | +NM_198578 | +LRRK2 | +5 | +4377 | +. | +rs76904798 | ++ | BLD | +
rs8087969 | +Nalls23andMe_2019 | +MEX3C | +18 | +48683589 | +0.0000000 | +-0.0578 | +0.0102 | +T | +G | +0.5496 | +0.4504 | +49053 | +1411006 | +0.0336 | +189620 | +-5.6666667 | +TRUE | +NA | +NA | +0 | +0 | +3 | +0.9962226 | +1 | +1.000000 | +2 | +TRUE | +0.4990556 | +0 | +48.68359 | +18 | +51157219 | +0 | +0 | +0 | +T | +G | +0.16 | +0.38 | +0.25 | +0.44 | +0 | +0 | +E046,7_Enh | ++ | E036,H3K4me1_Enh;E046,H3K4me1_Enh;E038,H3K4me3_Pro;E075,H3K27ac_Enh;E098,H3K27ac_Enh;E124,H3K9ac_Pro | ++ | . | +GTEx2015_v6,Cells_Transformed_fibroblasts,MEX3C,2.55824097994295e-12;GTEx2015_v6,Thyroid,MEX3C,1.5051282263434e-06;Lappalainen2013,Lymphoblastoid_EUR_exonlevel,ENSG00000176624.8_48700920_48703946,6.5678636014336e-07;Lappalainen2013,Lymphoblastoid_EUR_genelevel,MEX3C,1.80647222987676e-07 | +. | +. | +CEBPB_known2;CEBPB_known4;RFX5_known1 | +ENSG00000176624.8 | +MEX3C | +3 | +17330 | +NM_016626 | +MEX3C | +3 | +17329 | +. | +rs8087969 | ++ | BLD | +
rs8099086 | +Nalls23andMe_2019 | +MEX3C | +18 | +48717363 | +0.0000002 | +0.0530 | +0.0101 | +T | +C | +0.5350 | +0.4650 | +49053 | +1411006 | +0.0336 | +189620 | +5.2475248 | +FALSE | +NA | +NA | +0 | +0 | +0 | +0.0000000 | +1 | +1.000000 | +1 | +FALSE | +0.2500000 | +0 | +48.71736 | +18 | +51190993 | +0 | +0 | +0 | +C | +T | +0.51 | +0.48 | +0.42 | +0.53 | +0 | +0 | +E011,6_EnhG;E014,6_EnhG;E015,6_EnhG;E016,6_EnhG;E019,6_EnhG;E020,6_EnhG;E023,7_Enh;E025,7_Enh;E026,7_Enh;E027,7_Enh;E028,7_Enh;E032,6_EnhG;E033,7_Enh;E035,6_EnhG;E037,7_Enh;E038,7_Enh;E039,7_Enh;E040,2_TssAFlnk;E041,2_TssAFlnk;E042,7_Enh;E043,7_Enh;E044,7_Enh;E045,7_Enh;E046,6_EnhG;E047,7_Enh;E048,7_Enh;E050,7_Enh;E051,7_Enh;E052,7_Enh;E053,7_Enh;E062,7_Enh;E070,7_Enh;E076,7_Enh;E077,7_Enh;E078,7_Enh;E080,7_Enh;E081,7_Enh;E083,7_Enh;E087,7_Enh;E088,7_Enh;E107,7_Enh;E116,2_TssAFlnk;E117,7_Enh;E118,7_Enh | +E001,12_TxEnhW;E003,12_TxEnhW;E004,12_TxEnhW;E008,12_TxEnhW;E011,12_TxEnhW;E012,12_TxEnhW;E013,12_TxEnhW;E014,12_TxEnhW;E015,12_TxEnhW;E016,12_TxEnhW;E018,12_TxEnhW;E019,12_TxEnhW;E020,12_TxEnhW;E023,12_TxEnhW;E024,12_TxEnhW;E025,12_TxEnhW;E026,12_TxEnhW;E027,12_TxEnhW;E028,12_TxEnhW;E031,12_TxEnhW;E032,10_TxEnh5;E033,10_TxEnh5;E034,10_TxEnh5;E035,12_TxEnhW;E036,12_TxEnhW;E037,9_TxReg;E038,10_TxEnh5;E039,10_TxEnh5;E040,9_TxReg;E041,9_TxReg;E042,10_TxEnh5;E043,10_TxEnh5;E044,10_TxEnh5;E045,10_TxEnh5;E046,10_TxEnh5;E047,10_TxEnh5;E048,10_TxEnh5;E049,12_TxEnhW;E050,10_TxEnh5;E051,10_TxEnh5;E054,12_TxEnhW;E062,10_TxEnh5;E063,12_TxEnhW;E078,12_TxEnhW;E081,12_TxEnhW;E083,12_TxEnhW;E088,12_TxEnhW;E093,10_TxEnh5;E101,12_TxEnhW;E109,12_TxEnhW;E110,12_TxEnhW;E112,12_TxEnhW;E115,12_TxEnhW;E116,10_TxEnh5;E119,12_TxEnhW;E123,12_TxEnhW;E127,12_TxEnhW | +E001,H3K4me1_Enh;E003,H3K4me1_Enh;E008,H3K4me1_Enh;E009,H3K4me1_Enh;E010,H3K4me1_Enh;E011,H3K4me1_Enh;E012,H3K4me1_Enh;E014,H3K4me1_Enh;E015,H3K4me1_Enh;E016,H3K4me1_Enh;E017,H3K4me1_Enh;E018,H3K4me1_Enh;E019,H3K4me1_Enh;E020,H3K4me1_Enh;E023,H3K4me1_Enh;E024,H3K4me1_Enh;E025,H3K4me1_Enh;E026,H3K4me1_Enh;E027,H3K4me1_Enh;E028,H3K4me1_Enh;E031,H3K4me1_Enh;E032,H3K4me1_Enh;E033,H3K4me1_Enh;E034,H3K4me1_Enh;E035,H3K4me1_Enh;E036,H3K4me1_Enh;E037,H3K4me1_Enh;E038,H3K4me1_Enh;E039,H3K4me1_Enh;E040,H3K4me1_Enh;E041,H3K4me1_Enh;E042,H3K4me1_Enh;E043,H3K4me1_Enh;E044,H3K4me1_Enh;E045,H3K4me1_Enh;E046,H3K4me1_Enh;E047,H3K4me1_Enh;E048,H3K4me1_Enh;E049,H3K4me1_Enh;E050,H3K4me1_Enh;E051,H3K4me1_Enh;E052,H3K4me1_Enh;E053,H3K4me1_Enh;E054,H3K4me1_Enh;E055,H3K4me1_Enh;E056,H3K4me1_Enh;E058,H3K4me1_Enh;E061,H3K4me1_Enh;E062,H3K4me1_Enh;E063,H3K4me1_Enh;E066,H3K4me1_Enh;E067,H3K4me1_Enh;E068,H3K4me1_Enh;E069,H3K4me1_Enh;E070,H3K4me1_Enh;E071,H3K4me1_Enh;E072,H3K4me1_Enh;E073,H3K4me1_Enh;E074,H3K4me1_Enh;E075,H3K4me1_Enh;E076,H3K4me1_Enh;E077,H3K4me1_Enh;E078,H3K4me1_Enh;E080,H3K4me1_Enh;E081,H3K4me1_Enh;E082,H3K4me1_Enh;E083,H3K4me1_Enh;E084,H3K4me1_Enh;E085,H3K4me1_Enh;E086,H3K4me1_Enh;E087,H3K4me1_Enh;E088,H3K4me1_Enh;E092,H3K4me1_Enh;E093,H3K4me1_Enh;E099,H3K4me1_Enh;E101,H3K4me1_Enh;E103,H3K4me1_Enh;E105,H3K4me1_Enh;E106,H3K4me1_Enh;E107,H3K4me1_Enh;E108,H3K4me1_Enh;E109,H3K4me1_Enh;E110,H3K4me1_Enh;E112,H3K4me1_Enh;E113,H3K4me1_Enh;E114,H3K4me1_Enh;E115,H3K4me1_Enh;E116,H3K4me1_Enh;E117,H3K4me1_Enh;E118,H3K4me1_Enh;E119,H3K4me1_Enh;E120,H3K4me1_Enh;E121,H3K4me1_Enh;E122,H3K4me1_Enh;E123,H3K4me1_Enh;E125,H3K4me1_Enh;E127,H3K4me1_Enh;E129,H3K4me1_Enh;E003,H3K4me3_Pro;E011,H3K4me3_Pro;E015,H3K4me3_Pro;E016,H3K4me3_Pro;E019,H3K4me3_Pro;E020,H3K4me3_Pro;E026,H3K4me3_Pro;E037,H3K4me3_Pro;E038,H3K4me3_Pro;E040,H3K4me3_Pro;E041,H3K4me3_Pro;E042,H3K4me3_Pro;E044,H3K4me3_Pro;E045,H3K4me3_Pro;E046,H3K4me3_Pro;E051,H3K4me3_Pro;E053,H3K4me3_Pro;E054,H3K4me3_Pro;E058,H3K4me3_Pro;E062,H3K4me3_Pro;E067,H3K4me3_Pro;E078,H3K4me3_Pro;E083,H3K4me3_Pro;E088,H3K4me3_Pro;E101,H3K4me3_Pro;E103,H3K4me3_Pro;E112,H3K4me3_Pro;E115,H3K4me3_Pro;E116,H3K4me3_Pro;E012,H3K27ac_Enh;E014,H3K27ac_Enh;E020,H3K27ac_Enh;E034,H3K27ac_Enh;E037,H3K27ac_Enh;E038,H3K27ac_Enh;E039,H3K27ac_Enh;E040,H3K27ac_Enh;E041,H3K27ac_Enh;E042,H3K27ac_Enh;E043,H3K27ac_Enh;E044,H3K27ac_Enh;E045,H3K27ac_Enh;E046,H3K27ac_Enh;E048,H3K27ac_Enh;E049,H3K27ac_Enh;E050,H3K27ac_Enh;E058,H3K27ac_Enh;E062,H3K27ac_Enh;E063,H3K27ac_Enh;E076,H3K27ac_Enh;E080,H3K27ac_Enh;E087,H3K27ac_Enh;E093,H3K27ac_Enh;E101,H3K27ac_Enh;E102,H3K27ac_Enh;E103,H3K27ac_Enh;E113,H3K27ac_Enh;E116,H3K27ac_Enh;E117,H3K27ac_Enh;E119,H3K27ac_Enh;E124,H3K27ac_Enh;E127,H3K27ac_Enh;E014,H3K9ac_Pro;E018,H3K9ac_Pro;E019,H3K9ac_Pro;E020,H3K9ac_Pro;E025,H3K9ac_Pro;E052,H3K9ac_Pro;E062,H3K9ac_Pro;E063,H3K9ac_Pro;E067,H3K9ac_Pro;E068,H3K9ac_Pro;E083,H3K9ac_Pro;E086,H3K9ac_Pro;E115,H3K9ac_Pro;E116,H3K9ac_Pro;E119,H3K9ac_Pro | +E034;E046;E127 | +. | +GTEx2015_v6,Cells_Transformed_fibroblasts,MEX3C,4.58872527039341e-08 | +. | +. | +Dlx3;Evi-1_4;Lhx4;Mef2_known6;Pou1f1_1;Pou2f2_known2;Pou2f2_known3;Pou6f1_2;Prrx2_1 | +ENSG00000176624.8 | +MEX3C | +0 | +0 | +NM_016626 | +MEX3C | +0 | +0 | +INT | +rs8099086 | +BLD | +ESDR, ESC, IPSC, FAT, STRM, BRST, BLD, MUS, BRN, GI, ADRL, HRT, PANC, LNG, CRVX, LIV | +
rs9956987 | +Nalls23andMe_2019 | +MEX3C | +18 | +48774088 | +0.0000000 | +-0.0557 | +0.0102 | +T | +G | +0.5472 | +0.4528 | +49053 | +1411006 | +0.0336 | +189620 | +-5.4607843 | +FALSE | +NA | +NA | +0 | +0 | +0 | +0.0000000 | +1 | +1.000000 | +1 | +FALSE | +0.2500000 | +0 | +48.77409 | +18 | +51247718 | +0 | +0 | +0 | +G | +T | +0.66 | +0.66 | +0.71 | +0.56 | +0 | +0 | ++ | + | E021,H3K27ac_Enh | ++ | . | +GTEx2015_v6,Cells_Transformed_fibroblasts,MEX3C,6.69428550606873e-09;GTEx2015_v6,Thyroid,MEX3C,1.96900615796465e-06;Lappalainen2013,Lymphoblastoid_EUR_exonlevel,ENSG00000176624.8_48700920_48703946,1.81243932746836e-05;Lappalainen2013,Lymphoblastoid_EUR_genelevel,MEX3C,4.47533865893179e-06 | +. | +. | +Pax-6_1;THAP1_disc1;YY1_known5 | +ENSG00000207154.1 | +U1 | +3 | +36014 | +NM_016626 | +MEX3C | +5 | +50036 | +. | +rs9956987 | ++ | + |
+gg_cs_bin <- echoannot::CS_bin_plot(merged_DT = echodata::Nalls2019_merged)
## Credible Set counts plot
+
+gg_cs_counts <- echoannot::CS_counts_plot(merged_DT = echodata::Nalls2019_merged)
## Warning: Removed 83 rows containing missing values (position_stack).
+
+## Warning: Removed 83 rows containing missing values (position_stack).
+## Warning: Removed 375 rows containing missing values (position_stack).
+
+
+gg_cs_counts <- echoannot::CS_counts_plot(merged_DT = echodata::Nalls2019_merged)
## Warning: Removed 83 rows containing missing values (position_stack).
+
+## Warning: Removed 83 rows containing missing values (position_stack).
+## Warning: Removed 375 rows containing missing values (position_stack).
+
+
+gg_epi <- echoannot::peak_overlap_plot(merged_DT = echodata::Nalls2019_merged,
+ include.NOTT_2019_enhancers_promoters = TRUE,
+ include.NOTT_2019_PLACseq = TRUE,
+ #### Omit many annot to save time ####
+ include.NOTT_2019_peaks = FALSE,
+ include.CORCES_2020_scATACpeaks = FALSE,
+ include.CORCES_2020_Cicero_coaccess = FALSE,
+ include.CORCES_2020_bulkATACpeaks = FALSE,
+ include.CORCES_2020_HiChIP_FitHiChIP_coaccess = FALSE,
+ include.CORCES_2020_gene_annotations = FALSE)
## Warning: The `.dots` argument of `group_by()` is deprecated as of dplyr 1.0.0.
+## This warning is displayed once every 8 hours.
+## Call `lifecycle::last_warnings()` to see where this warning was generated.
+
+Creates one big merged plot using the subfunctions above.
+
+super_plot <- echoannot::super_summary_plot(merged_DT = echodata::Nalls2019_merged,
+ plot_missense = FALSE)
+utils::sessionInfo()
## R version 4.1.0 (2021-05-18)
+## Platform: x86_64-apple-darwin17.0 (64-bit)
+## Running under: macOS Big Sur 10.16
+##
+## Matrix products: default
+## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.dylib
+## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
+##
+## locale:
+## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
+##
+## attached base packages:
+## [1] stats graphics grDevices utils datasets methods base
+##
+## other attached packages:
+## [1] echodata_0.99.1 echoannot_0.99.1 BiocStyle_2.20.2
+##
+## loaded via a namespace (and not attached):
+## [1] backports_1.2.1 Hmisc_4.5-0
+## [3] BiocFileCache_2.0.0 systemfonts_1.0.2
+## [5] plyr_1.8.6 lazyeval_0.2.2
+## [7] splines_4.1.0 BiocParallel_1.26.2
+## [9] GenomeInfoDb_1.28.4 ggplot2_3.3.5
+## [11] digest_0.6.27 ensembldb_2.16.4
+## [13] htmltools_0.5.2 fansi_0.5.0
+## [15] magrittr_2.0.1 checkmate_2.0.0
+## [17] memoise_2.0.0 BSgenome_1.60.0
+## [19] cluster_2.1.2 Biostrings_2.60.2
+## [21] matrixStats_0.60.1 R.utils_2.10.1
+## [23] ggbio_1.40.0 pkgdown_1.6.1
+## [25] prettyunits_1.1.1 jpeg_0.1-9
+## [27] colorspace_2.0-2 blob_1.2.2
+## [29] rappdirs_0.3.3 textshaping_0.3.5
+## [31] xfun_0.26 dplyr_1.0.7
+## [33] crayon_1.4.1 RCurl_1.98-1.4
+## [35] jsonlite_1.7.2 graph_1.70.0
+## [37] Exact_3.0 VariantAnnotation_1.38.0
+## [39] survival_3.2-13 glue_1.4.2
+## [41] gtable_0.3.0 zlibbioc_1.38.0
+## [43] XVector_0.32.0 DelayedArray_0.18.0
+## [45] BiocGenerics_0.38.0 scales_1.1.1
+## [47] pheatmap_1.0.12 mvtnorm_1.1-2
+## [49] DBI_1.1.1 GGally_2.1.2
+## [51] Rcpp_1.0.7 viridisLite_0.4.0
+## [53] progress_1.2.2 htmlTable_2.2.1
+## [55] foreign_0.8-81 bit_4.0.4
+## [57] proxy_0.4-26 OrganismDbi_1.34.0
+## [59] Formula_1.2-4 DT_0.19
+## [61] stats4_4.1.0 htmlwidgets_1.5.4
+## [63] httr_1.4.2 RColorBrewer_1.1-2
+## [65] ellipsis_0.3.2 haploR_4.0.4
+## [67] farver_2.1.0 R.methodsS3_1.8.1
+## [69] pkgconfig_2.0.3 reshape_0.8.8
+## [71] XML_3.99-0.7 nnet_7.3-16
+## [73] sass_0.4.0 dbplyr_2.1.1
+## [75] RJSONIO_1.3-1.5 utf8_1.2.2
+## [77] labeling_0.4.2 tidyselect_1.1.1
+## [79] rlang_0.4.11 reshape2_1.4.4
+## [81] AnnotationDbi_1.54.1 munsell_0.5.0
+## [83] tools_4.1.0 cachem_1.0.6
+## [85] generics_0.1.0 RSQLite_2.2.8
+## [87] evaluate_0.14 stringr_1.4.0
+## [89] fastmap_1.1.0 yaml_2.2.1
+## [91] ragg_1.1.3 knitr_1.34
+## [93] bit64_4.0.5 fs_1.5.0
+## [95] purrr_0.3.4 KEGGREST_1.32.0
+## [97] AnnotationFilter_1.16.0 rootSolve_1.8.2.2
+## [99] RBGL_1.68.0 R.oo_1.24.0
+## [101] xml2_1.3.2 biomaRt_2.48.3
+## [103] compiler_4.1.0 rstudioapi_0.13
+## [105] filelock_1.0.2 curl_4.3.2
+## [107] png_0.1-7 e1071_1.7-8
+## [109] tibble_3.1.4 bslib_0.3.0
+## [111] DescTools_0.99.43 stringi_1.7.4
+## [113] highr_0.9 GenomicFeatures_1.44.2
+## [115] desc_1.3.0 lattice_0.20-44
+## [117] ProtGenerics_1.24.0 Matrix_1.3-4
+## [119] vctrs_0.3.8 pillar_1.6.2
+## [121] lifecycle_1.0.0 RUnit_0.4.32
+## [123] BiocManager_1.30.16 jquerylib_0.1.4
+## [125] data.table_1.14.0 bitops_1.0-7
+## [127] lmom_2.8 patchwork_1.1.1
+## [129] rtracklayer_1.52.1 GenomicRanges_1.44.0
+## [131] R6_2.5.1 BiocIO_1.2.0
+## [133] latticeExtra_0.6-29 bookdown_0.24
+## [135] gridExtra_2.3 IRanges_2.26.0
+## [137] gld_2.6.2 dichromat_2.0-0
+## [139] boot_1.3-28 MASS_7.3-54
+## [141] assertthat_0.2.1 SummarizedExperiment_1.22.0
+## [143] rprojroot_2.0.2 rjson_0.2.20
+## [145] GenomicAlignments_1.28.0 Rsamtools_2.8.0
+## [147] S4Vectors_0.30.0 GenomeInfoDbData_1.2.6
+## [149] expm_0.999-6 parallel_4.1.0
+## [151] hms_1.1.0 grid_4.1.0
+## [153] rpart_4.1-15 tidyr_1.1.3
+## [155] class_7.3-19 rmarkdown_2.11
+## [157] MatrixGenerics_1.4.3 biovizBase_1.40.0
+## [159] Biobase_2.52.0 base64enc_0.1-3
+## [161] restfulr_0.0.13
+Provides both built-in fine-mapping results and API access to the echolocatoR Fine-mapping Portal.
+
+if(!"remotes" %in% rownames(installed.packages())){install.packages("remotes")}
+
+remotes::install_github("RajLabMSSM/echoannot")
+library(echoannot)
For more detailed information about each dataset, use ?
:R library(echolocatoR) ?NOTT_2019.interactome # example dataset
Data from this publication contains results from cell type-specific (neurons, oligodendrocytes, astrocytes, microglia, & peripheral myeloid cells) epigenomic assays (H3K27ac, ATAC, H3K4me3) from human brain tissue.
For detailed metadata, see:
+
+data("NOTT_2019.bigwig_metadata")
Built-in datasets:
+
+data("NOTT_2019.interactome")
+# Examples of the data nested in "NOTT_2019.interactome" object:
+NOTT_2019.interactome$`Neuronal promoters`
+NOTT_2019.interactome$`Neuronal enhancers`
+NOTT_2019.interactome$`Microglia promoters`
+NOTT_2019.interactome$`Microglia enhancers`
+...
+...
+NOTT_2019.interactome$H3K4me3_around_TSS_annotated_pe
+NOTT_2019.interactome$`Microglia interactome`
+NOTT_2019.interactome$`Neuronal interactome`
+NOTT_2019.interactome$`Oligo interactome`
+...
+...
API access to full bigWig files on UCSC Genome Browser, which includes
+Data from this preprint contains results from bulk and single-cell chromatin accessibility epigenomic assays in 39 human brains.
+ +R/data.R
+ CORCES_2020.HiChIP_FitHiChIP_loop_calls.Rd
FitHiChIP loop calls that overlap SNPs derived from analysis + of H3K27ac HiChIP data. +Each row represents an individual peak identified from the feature +binarization analysis (see methods).
+CORCES_2020.HiChIP_FitHiChIP_loop_calls
+
+
+ An object of class data.table
(inherits from data.frame
) with 11542 rows and 11 columns.
https://doi.org/10.1038/s41588-020-00721-x
+Data originally from +Corces et al. (bioRxiv), as of May 2020. +Specifically: STable10_Coacessibility_Peak_loop_connection, +HiChIP FitHiChIP Loop Calls sheet.
+Column dictionary
The hg38 chromosome of the first loop Anchor.
The hg38 start position of the first loop Anchor.
The hg38 stop position of the first loop Anchor.
The width of the first loop Anchor.
The hg38 chromosome of the second loop Anchor.
The hg38 start position of the second loop Anchor.
The hg38 stop position of the second loop Anchor.
The width of the second loop Anchor.
The -log10(q-value) of the loop call from FitHiChIP.
A boolean variable determining whether the first +anchor overlaps a SNP from our AD/PD GWAS analyses.
A boolean variable determining whether the second +anchor overlaps a SNP from our AD/PD GWAS analyses.
Other CORCES_2020:
+CORCES_2020.bulkATACseq_peaks
,
+CORCES_2020.cicero_coaccessibility
,
+CORCES_2020.get_ATAC_peak_overlap()
,
+CORCES_2020.get_HiChIP_FitHiChIP_overlap()
,
+CORCES_2020.prepare_bulkATAC_peak_overlap()
,
+CORCES_2020.prepare_scATAC_peak_overlap()
,
+CORCES_2020.scATACseq_celltype_peaks
,
+CORCES_2020.scATACseq_peaks
+if (FALSE) { +dat <- readxl::read_excel( + file.path( + "~/Desktop/Fine_Mapping/echolocatoR/annotations", + "Coceres_2020/STable10_Coacessibility_Peak_loop_connection.xlsx" + ), + skip = 19, sheet = 1 +) +CORCES_2020.HiChIP_FitHiChIP_loop_calls <- data.table::data.table(dat) +usethis::use_data(CORCES_2020.HiChIP_FitHiChIP_loop_calls) +} +
Each row represents an individual peak identified in the bulk ATAC-seq data.
+CORCES_2020.bulkATACseq_peaks
+
+
+ An object of class data.table
(inherits from data.frame
) with 186559 rows and 10 columns.
https://doi.org/10.1038/s41588-020-00721-x
+Data originally from +Corces et al. (bioRxiv), as of May 2020. +Specifically: STable2_Features_bulkATAC-seq_Peaks
+Other CORCES_2020:
+CORCES_2020.HiChIP_FitHiChIP_loop_calls
,
+CORCES_2020.cicero_coaccessibility
,
+CORCES_2020.get_ATAC_peak_overlap()
,
+CORCES_2020.get_HiChIP_FitHiChIP_overlap()
,
+CORCES_2020.prepare_bulkATAC_peak_overlap()
,
+CORCES_2020.prepare_scATAC_peak_overlap()
,
+CORCES_2020.scATACseq_celltype_peaks
,
+CORCES_2020.scATACseq_peaks
+if (FALSE) { +dat <- readxl::read_excel( + file.path( + "~/Desktop/Fine_Mapping/echolocatoR", + "annotations/Coceres_2020", + "STable2_Features_bulkATAC-seq_Peaks.xlsx" + ), + skip = 18 +) +CORCES_2020.bulkATACseq_peaks <- data.table::data.table(dat) +usethis::use_data(CORCES_2020.bulkATACseq_peaks, overwrite = TRUE) +} +
R/data.R
+ CORCES_2020.cicero_coaccessibility.Rd
Cicero coaccessibility analysis for peaks that overlap SNPs derived +from analysis of scATAC-seq data. +Each row represents an individual peak identified from the feature +binarization analysis (see methods).
+CORCES_2020.cicero_coaccessibility
+
+
+ An object of class data.table
(inherits from data.frame
) with 9795 rows and 14 columns.
https://doi.org/10.1038/s41588-020-00721-x
+Data originally from +Corces et al. (bioRxiv), as of May 2020. +Specifically: STable10_Coacessibility_Peak_loop_connection, +Cicero Coaccessibility sheet. +Peak_ID_Peak1 - A unique number that identifies the peak across +supplementary tables.
+Column dictionary:
The hg38 chromosome of the first loop Peak.
The hg38 start position of the first loop Peak.
The hg38 stop position of the first loop Peak.
The width of the first loop Peak.
A unique number that identifies the peak +across supplementary tables.
The hg38 chromosome of the second loop Peak.
The hg38 start position of the second loop Peak.
The hg38 stop position of the second loop Peak.
The width of the second loop Peak.
The coaccessibility correlation +for the given peak pair.
A boolean variable determining whether +the first peak overlaps a SNP from our AD/PD GWAS analyses.
A boolean variable determining whether +the second peak overlaps a SNP from our AD/PD GWAS analyses.
Other CORCES_2020:
+CORCES_2020.HiChIP_FitHiChIP_loop_calls
,
+CORCES_2020.bulkATACseq_peaks
,
+CORCES_2020.get_ATAC_peak_overlap()
,
+CORCES_2020.get_HiChIP_FitHiChIP_overlap()
,
+CORCES_2020.prepare_bulkATAC_peak_overlap()
,
+CORCES_2020.prepare_scATAC_peak_overlap()
,
+CORCES_2020.scATACseq_celltype_peaks
,
+CORCES_2020.scATACseq_peaks
+if (FALSE) { +dat <- readxl::read_excel( + file.path( + "~/Desktop/Fine_Mapping/echolocatoR/annotations", + "Coceres_2020/STable10_Coacessibility_Peak_loop_connection.xlsx" + ), + skip = 21, sheet = 2 +) +CORCES_2020.cicero_coaccessibility <- data.table::data.table(dat) +usethis::use_data(CORCES_2020.cicero_coaccessibility) +} +
R/CORCES_2020.get_ATAC_peak_overlap.R
+ CORCES_2020.get_ATAC_peak_overlap.Rd
Can optionally add Cicero
coaccessibility scores,
+which are also derived from scATAC-seq data.
CORCES_2020.get_ATAC_peak_overlap( + finemap_dat, + FDR_filter = NULL, + add_cicero = TRUE, + cell_type_specific = TRUE, + verbose = TRUE +)+ + +
https://doi.org/10.1038/s41588-020-00721-x
+Other CORCES_2020:
+CORCES_2020.HiChIP_FitHiChIP_loop_calls
,
+CORCES_2020.bulkATACseq_peaks
,
+CORCES_2020.cicero_coaccessibility
,
+CORCES_2020.get_HiChIP_FitHiChIP_overlap()
,
+CORCES_2020.prepare_bulkATAC_peak_overlap()
,
+CORCES_2020.prepare_scATAC_peak_overlap()
,
+CORCES_2020.scATACseq_celltype_peaks
,
+CORCES_2020.scATACseq_peaks
R/CORCES_2020.get_HiChIP_FitHiChIP_overlap.R
+ CORCES_2020.get_HiChIP_FitHiChIP_overlap.Rd
Anchors are the genomic regions that have evidence of being +functionally connected to one another (coaccessible), + e.g. enhancer-promoter interactions.
+CORCES_2020.get_HiChIP_FitHiChIP_overlap(finemap_dat, verbose = TRUE)+ +
finemap_dat | +Fine-mapping results. |
+
---|---|
verbose | +Print messages. |
+
https://doi.org/10.1038/s41588-020-00721-x
+Other CORCES_2020:
+CORCES_2020.HiChIP_FitHiChIP_loop_calls
,
+CORCES_2020.bulkATACseq_peaks
,
+CORCES_2020.cicero_coaccessibility
,
+CORCES_2020.get_ATAC_peak_overlap()
,
+CORCES_2020.prepare_bulkATAC_peak_overlap()
,
+CORCES_2020.prepare_scATAC_peak_overlap()
,
+CORCES_2020.scATACseq_celltype_peaks
,
+CORCES_2020.scATACseq_peaks
R/CORCES_2020.prepare_bulkATAC_peak_overlap.R
+ CORCES_2020.prepare_bulkATAC_peak_overlap.Rd
Prepare data to plot overlap between datatable of SNPs and +cell-type-specific epigenomic peaks and coaccessibility data.
+CORCES_2020.prepare_bulkATAC_peak_overlap( + merged_DT, + FDR_filter = NULL, + snp_filter = "Consensus_SNP==TRUE", + add_HiChIP_FitHiChIP = TRUE, + annotate_genes = FALSE, + return_counts = TRUE, + verbose = TRUE +)+ + +
https://doi.org/10.1038/s41588-020-00721-x
+Other CORCES_2020:
+CORCES_2020.HiChIP_FitHiChIP_loop_calls
,
+CORCES_2020.bulkATACseq_peaks
,
+CORCES_2020.cicero_coaccessibility
,
+CORCES_2020.get_ATAC_peak_overlap()
,
+CORCES_2020.get_HiChIP_FitHiChIP_overlap()
,
+CORCES_2020.prepare_scATAC_peak_overlap()
,
+CORCES_2020.scATACseq_celltype_peaks
,
+CORCES_2020.scATACseq_peaks
+dat_melt <- CORCES_2020.prepare_bulkATAC_peak_overlap( + merged_DT = echodata::Nalls2019_merged +) +#> Error in CORCES_2020.prepare_bulkATAC_peak_overlap(merged_DT = echodata::Nalls2019_merged): could not find function "CORCES_2020.prepare_bulkATAC_peak_overlap"
R/CORCES_2020.prepare_scATAC_peak_overlap.R
+ CORCES_2020.prepare_scATAC_peak_overlap.Rd
Prepare data to plot overlap between datatable of SNPs and +cell-type-specific epigenomic peaks and coaccessibility data.
+CORCES_2020.prepare_scATAC_peak_overlap( + merged_DT, + FDR_filter = NULL, + snp_filter = "Consensus_SNP==TRUE", + add_cicero = TRUE, + annotate_genes = TRUE, + return_counts = TRUE, + verbose = TRUE +)+ + +
https://doi.org/10.1038/s41588-020-00721-x
+Other CORCES_2020:
+CORCES_2020.HiChIP_FitHiChIP_loop_calls
,
+CORCES_2020.bulkATACseq_peaks
,
+CORCES_2020.cicero_coaccessibility
,
+CORCES_2020.get_ATAC_peak_overlap()
,
+CORCES_2020.get_HiChIP_FitHiChIP_overlap()
,
+CORCES_2020.prepare_bulkATAC_peak_overlap()
,
+CORCES_2020.scATACseq_celltype_peaks
,
+CORCES_2020.scATACseq_peaks
+if (FALSE) { +dat_melt <- CORCES_2020.prepare_scATAC_peak_overlap( + merged_DT = echodata::Nalls2019_merged +) +} +
R/data.R
+ CORCES_2020.scATACseq_celltype_peaks.Rd
Each row represents an individual peak identified from the feature + binarization analysis (see methods).
+CORCES_2020.scATACseq_celltype_peaks
+
+
+ An object of class data.table
(inherits from data.frame
) with 221062 rows and 13 columns.
https://doi.org/10.1038/s41588-020-00721-x
+Data originally from +Corces et al. (bioRxiv), as of May 2020. +Specifically: STable6_Features_scATAC-seq_celltype_Peaks
+Other CORCES_2020:
+CORCES_2020.HiChIP_FitHiChIP_loop_calls
,
+CORCES_2020.bulkATACseq_peaks
,
+CORCES_2020.cicero_coaccessibility
,
+CORCES_2020.get_ATAC_peak_overlap()
,
+CORCES_2020.get_HiChIP_FitHiChIP_overlap()
,
+CORCES_2020.prepare_bulkATAC_peak_overlap()
,
+CORCES_2020.prepare_scATAC_peak_overlap()
,
+CORCES_2020.scATACseq_peaks
+if (FALSE) { +dat <- readxl::read_excel( + file.path( + "~/Desktop/Fine_Mapping/echolocatoR/annotations", + "Coceres_2020/STable6_Features_scATAC-seq_celltype_Peaks.xlsx" + ), + skip = 15 +) +CORCES_2020.scATACseq_celltype_peaks <- data.table::data.table(dat) +usethis::use_data(CORCES_2020.scATACseq_celltype_peaks, overwrite = TRUE) +} +
Each row represents an individual peak identified in +the single-cell ATAC-seq data.
+CORCES_2020.scATACseq_peaks
+
+
+ An object of class data.table
(inherits from data.frame
) with 359022 rows and 10 columns.
https://doi.org/10.1038/s41588-020-00721-x
+Data originally from +Corces et al. (bioRxiv), as of May 2020. +Specifically: STable5_Features_scATAC-seq_Peaks_all
+Other CORCES_2020:
+CORCES_2020.HiChIP_FitHiChIP_loop_calls
,
+CORCES_2020.bulkATACseq_peaks
,
+CORCES_2020.cicero_coaccessibility
,
+CORCES_2020.get_ATAC_peak_overlap()
,
+CORCES_2020.get_HiChIP_FitHiChIP_overlap()
,
+CORCES_2020.prepare_bulkATAC_peak_overlap()
,
+CORCES_2020.prepare_scATAC_peak_overlap()
,
+CORCES_2020.scATACseq_celltype_peaks
+if (FALSE) { +dat <- readxl::read_excel( + file.path( + "~/Desktop/Fine_Mapping/echolocatoR/annotations", + "Coceres_2020/STable5_Features_scATAC-seq_Peaks_all.xlsx" + ), + skip = 18 +) +CORCES_2020.scATACseq_peaks <- data.table::data.table(dat) +usethis::use_data(CORCES_2020.scATACseq_peaks, overwrite = TRUE) +} +
Plot CS bin counts
+CS_bin_plot(merged_DT, show_plot = TRUE)+ + +
Other summarise:
+CS_counts_plot()
,
+get_CS_bins()
,
+get_CS_counts()
,
+get_SNPgroup_counts()
,
+peak_overlap_plot()
,
+plot_dataset_overlap()
,
+results_report()
,
+super_summary_plot()
+#> Error in loadNamespace(x): there is no package called 'gggplot2'
Loci ordered by UCS size (smallest to largest).
+CS_counts_plot( + merged_DT, + show_numbers = TRUE, + ylabel = "Locus", + legend_nrow = 3, + label_yaxis = TRUE, + top_CS_only = FALSE, + show_plot = TRUE +)+ + +
Other summarise:
+CS_bin_plot()
,
+get_CS_bins()
,
+get_CS_counts()
,
+get_SNPgroup_counts()
,
+peak_overlap_plot()
,
+plot_dataset_overlap()
,
+results_report()
,
+super_summary_plot()
+#> Warning: Removed 83 rows containing missing values (position_stack).#> Warning: Removed 83 rows containing missing values (position_stack).#> Warning: Removed 375 rows containing missing values (position_stack).#>#>
GR.name_filter_convert
+GR.name_filter_convert(GR.final, GR.names, min_hits = 1)+ + +
Other plot:
+PLOT.get_window_limits()
Metadata for cell type-specific epigenomic bigWig files hosted + on UCSC Genome Browser. +bigWig files contain the genomic ranges from each epigenomic assay, +as well as a Score column which describes the peaks of the aggregate reads.
+NOTT_2019.bigwig_metadata
+
+
+ An object of class data.table
(inherits from data.frame
) with 18 rows and 14 columns.
https://science.sciencemag.org/content/366/6469/1134
+Other NOTT_2019:
+NOTT_2019.epigenomic_histograms()
,
+NOTT_2019.get_epigenomic_peaks()
,
+NOTT_2019.get_interactions()
,
+NOTT_2019.get_interactome()
,
+NOTT_2019.get_promoter_celltypes()
,
+NOTT_2019.get_promoter_interactome_data()
,
+NOTT_2019.get_regulatory_regions()
,
+NOTT_2019.interactome
,
+NOTT_2019.plac_seq_plot()
,
+NOTT_2019.superenhancer_interactome
,
+NOTT_2019.superenhancers()
+if (FALSE) { +NOTT_2019.bigwig_metadata <- data.table::data.table( + readxl::read_excel( + file.path( + "~/Desktop/Fine_Mapping/echolocatoR/annotations", + "Nott_2019/Nott_2019.snEpigenomics.xlsx" + ) + ) +) +usethis::use_data(NOTT_2019.bigwig_metadata, overwrite = TRUE) +} +
R/NOTT_2019.epigenomic_histograms.R
+ NOTT_2019.epigenomic_histograms.Rd
Brain cell-specific epigenomic data from Nott et al. (2019).
+NOTT_2019.epigenomic_histograms( + finemap_dat, + locus_dir, + show_plot = TRUE, + save_plot = TRUE, + full_data = TRUE, + return_assay_track = FALSE, + binwidth = 200, + density_adjust = 0.2, + plot.zoom = "1x", + strip.text.y.angle = 90, + xtext = TRUE, + geom = "density", + plot_formula = "Cell_type ~.", + fill_var = "Assay", + bigwig_dir = NULL, + genomic_units = "Mb", + as_ggplot = TRUE, + nThread = 1, + save_annot = FALSE, + verbose = TRUE +)+ + +
Nott et al. (2019) +https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&lastVirtModeType=default&lastVirtModeExtraState=&virtModeType=default&virtMode=0&nonVirtPosition=&position=chr2:127770344-127983251&hgsid=778249165_ySowqECRKNxURRn6bafH0yewAiuf
+Other NOTT_2019:
+NOTT_2019.bigwig_metadata
,
+NOTT_2019.get_epigenomic_peaks()
,
+NOTT_2019.get_interactions()
,
+NOTT_2019.get_interactome()
,
+NOTT_2019.get_promoter_celltypes()
,
+NOTT_2019.get_promoter_interactome_data()
,
+NOTT_2019.get_regulatory_regions()
,
+NOTT_2019.interactome
,
+NOTT_2019.plac_seq_plot()
,
+NOTT_2019.superenhancer_interactome
,
+NOTT_2019.superenhancers()
+track.Nott_histo <- NOTT_2019.epigenomic_histograms( + finemap_dat = echodata::BST1, + locus_dir = echodata::locus_dir, + save_plot = FALSE, + return_assay_track = TRUE, + save_annot = FALSE +) +#> Error in NOTT_2019.epigenomic_histograms(finemap_dat = echodata::BST1, locus_dir = echodata::locus_dir, save_plot = FALSE, return_assay_track = TRUE, save_annot = FALSE): could not find function "NOTT_2019.epigenomic_histograms"
R/NOTT_2019.get_epigenomic_peaks.R
+ NOTT_2019.get_epigenomic_peaks.Rd
API access to brain cell type-specific epigenomic peaks (bed format) +from Nott et al. (2019).
+NOTT_2019.get_epigenomic_peaks( + assays = c("ATAC", "H3K27ac", "H3K4me3"), + cell_types = c("neurons", "microglia", "oligo", "astrocytes"), + convert_to_GRanges = TRUE, + nThread = 1, + verbose = TRUE +)+ + +
Nott et al. (2019) +https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&lastVirtModeType=default&lastVirtModeExtraState=&virtModeType=default&virtMode=0&nonVirtPosition=&position=chr2:127770344-127983251&hgsid=778249165_ySowqECRKNxURRn6bafH0yewAiuf
+Other NOTT_2019:
+NOTT_2019.bigwig_metadata
,
+NOTT_2019.epigenomic_histograms()
,
+NOTT_2019.get_interactions()
,
+NOTT_2019.get_interactome()
,
+NOTT_2019.get_promoter_celltypes()
,
+NOTT_2019.get_promoter_interactome_data()
,
+NOTT_2019.get_regulatory_regions()
,
+NOTT_2019.interactome
,
+NOTT_2019.plac_seq_plot()
,
+NOTT_2019.superenhancer_interactome
,
+NOTT_2019.superenhancers()
+PEAKS <- NOTT_2019.get_epigenomic_peaks(nThread = 1) +#> Error in NOTT_2019.get_epigenomic_peaks(nThread = 1): could not find function "NOTT_2019.get_epigenomic_peaks"
R/NOTT_2019.get_interactions.R
+ NOTT_2019.get_interactions.Rd
Brain cell-specific epigenomic data from Nott et al. (2019).
+NOTT_2019.get_interactions(finemap_dat, as.granges = FALSE)+ + +
Nott et al. (2019) +https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&lastVirtModeType=default&lastVirtModeExtraState=&virtModeType=default&virtMode=0&nonVirtPosition=&position=chr2:127770344-127983251&hgsid=778249165_ySowqECRKNxURRn6bafH0yewAiuf
+Other NOTT_2019:
+NOTT_2019.bigwig_metadata
,
+NOTT_2019.epigenomic_histograms()
,
+NOTT_2019.get_epigenomic_peaks()
,
+NOTT_2019.get_interactome()
,
+NOTT_2019.get_promoter_celltypes()
,
+NOTT_2019.get_promoter_interactome_data()
,
+NOTT_2019.get_regulatory_regions()
,
+NOTT_2019.interactome
,
+NOTT_2019.plac_seq_plot()
,
+NOTT_2019.superenhancer_interactome
,
+NOTT_2019.superenhancers()
R/NOTT_2019.get_interactome.R
+ NOTT_2019.get_interactome.Rd
Brain cell-specific epigenomic data from Nott et al. (2019).
+NOTT_2019.get_interactome( + annot_sub, + top.consensus.pos, + marker_key, + verbose = TRUE +)+ + +
Nott et al. (2019) +https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&lastVirtModeType=default&lastVirtModeExtraState=&virtModeType=default&virtMode=0&nonVirtPosition=&position=chr2:127770344-127983251&hgsid=778249165_ySowqECRKNxURRn6bafH0yewAiuf
+Other NOTT_2019:
+NOTT_2019.bigwig_metadata
,
+NOTT_2019.epigenomic_histograms()
,
+NOTT_2019.get_epigenomic_peaks()
,
+NOTT_2019.get_interactions()
,
+NOTT_2019.get_promoter_celltypes()
,
+NOTT_2019.get_promoter_interactome_data()
,
+NOTT_2019.get_regulatory_regions()
,
+NOTT_2019.interactome
,
+NOTT_2019.plac_seq_plot()
,
+NOTT_2019.superenhancer_interactome
,
+NOTT_2019.superenhancers()
R/NOTT_2019.get_promoter_celltypes.R
+ NOTT_2019.get_promoter_celltypes.Rd
Brain cell-specific epigenomic data from Nott et al. (2019).
+NOTT_2019.get_promoter_celltypes(annot_sub, marker_key)+ + +
Other NOTT_2019:
+NOTT_2019.bigwig_metadata
,
+NOTT_2019.epigenomic_histograms()
,
+NOTT_2019.get_epigenomic_peaks()
,
+NOTT_2019.get_interactions()
,
+NOTT_2019.get_interactome()
,
+NOTT_2019.get_promoter_interactome_data()
,
+NOTT_2019.get_regulatory_regions()
,
+NOTT_2019.interactome
,
+NOTT_2019.plac_seq_plot()
,
+NOTT_2019.superenhancer_interactome
,
+NOTT_2019.superenhancers()
R/NOTT_2019.get_promoter_interactome_data.R
+ NOTT_2019.get_promoter_interactome_data.Rd
Brain cell-specific epigenomic data from Nott et al. (2019).
+NOTT_2019.get_promoter_interactome_data(finemap_dat)+ + +
Nott et al. (2019) +https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&lastVirtModeType=default&lastVirtModeExtraState=&virtModeType=default&virtMode=0&nonVirtPosition=&position=chr2:127770344-127983251&hgsid=778249165_ySowqECRKNxURRn6bafH0yewAiuf
+Other NOTT_2019:
+NOTT_2019.bigwig_metadata
,
+NOTT_2019.epigenomic_histograms()
,
+NOTT_2019.get_epigenomic_peaks()
,
+NOTT_2019.get_interactions()
,
+NOTT_2019.get_interactome()
,
+NOTT_2019.get_promoter_celltypes()
,
+NOTT_2019.get_regulatory_regions()
,
+NOTT_2019.interactome
,
+NOTT_2019.plac_seq_plot()
,
+NOTT_2019.superenhancer_interactome
,
+NOTT_2019.superenhancers()
R/NOTT_2019.get_regulatory_regions.R
+ NOTT_2019.get_regulatory_regions.Rd
Plot brain cell-specific epigenomic data
+NOTT_2019.get_regulatory_regions( + as.granges = FALSE, + nThread = 1, + verbose = TRUE +)+ + +
Nott et al. (2019) +https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&lastVirtModeType=default&lastVirtModeExtraState=&virtModeType=default&virtMode=0&nonVirtPosition=&position=chr2:127770344-127983251&hgsid=778249165_ySowqECRKNxURRn6bafH0yewAiuf
+Other NOTT_2019:
+NOTT_2019.bigwig_metadata
,
+NOTT_2019.epigenomic_histograms()
,
+NOTT_2019.get_epigenomic_peaks()
,
+NOTT_2019.get_interactions()
,
+NOTT_2019.get_interactome()
,
+NOTT_2019.get_promoter_celltypes()
,
+NOTT_2019.get_promoter_interactome_data()
,
+NOTT_2019.interactome
,
+NOTT_2019.plac_seq_plot()
,
+NOTT_2019.superenhancer_interactome
,
+NOTT_2019.superenhancers()
R/data.R
+ NOTT_2019.interactome.Rd
Originally from +Nott et al. (2019). +Specifically: aay0793-Nott-Table-S5.xlsx.
+NOTT_2019.interactome
+
+
+ An object of class list
of length 12.
https://science.sciencemag.org/content/366/6469/1134
+Other NOTT_2019:
+NOTT_2019.bigwig_metadata
,
+NOTT_2019.epigenomic_histograms()
,
+NOTT_2019.get_epigenomic_peaks()
,
+NOTT_2019.get_interactions()
,
+NOTT_2019.get_interactome()
,
+NOTT_2019.get_promoter_celltypes()
,
+NOTT_2019.get_promoter_interactome_data()
,
+NOTT_2019.get_regulatory_regions()
,
+NOTT_2019.plac_seq_plot()
,
+NOTT_2019.superenhancer_interactome
,
+NOTT_2019.superenhancers()
+if (FALSE) { +file <- file.path( + "~/Desktop/Fine_Mapping/echolocatoR/annotations", + "Nott_2019/aay0793-Nott-Table-S5.xlsx" +) +sheets <- readxl::excel_sheets(file) +enh_prom_sheets <- grep("enhancers|promoters", sheets, value = TRUE) +other_sheets <- grep("enhancers|promoters", sheets, + value = TRUE, + invert = TRUE +) +NOTT_2019.interactome <- lapply(other_sheets, function(s) { + readxl::read_excel(file, sheet = s, skip = 2) +}) +NOTT_2019.interactome <- append( + NOTT_2019.interactome, + lapply(enh_prom_sheets, function(s) { + readxl::read_excel(file, + sheet = s, skip = 2, + col_names = c("chr", "start", "end") + ) + }) +) +names(NOTT_2019.interactome) <- c(other_sheets, enh_prom_sheets) +usethis::use_data(NOTT_2019.interactome, overwrite = TRUE) +} +
R/NOTT_2019.plac_seq_plot.R
+ NOTT_2019.plac_seq_plot.Rd
Plot brain cell-specific interactome data
+NOTT_2019.plac_seq_plot( + finemap_dat = NULL, + locus_dir = NULL, + title = NULL, + print_plot = TRUE, + save_plot = TRUE, + return_interaction_track = FALSE, + xlims = NULL, + zoom_window = NULL, + index_SNP = NULL, + genomic_units = "Mb", + color_dict = c(enhancers = "springgreen2", promoters = "purple", anchors = "black"), + return_consensus_overlap = TRUE, + show_arches = TRUE, + highlight_plac = FALSE, + show_regulatory_rects = TRUE, + show_anchors = TRUE, + strip.text.y.angle = 0, + xtext = TRUE, + save_annot = FALSE, + point_size = 2, + height = 7, + width = 7, + dpi = 300, + as_ggplot = TRUE, + nThread = 1, + verbose = TRUE +)+ + +
Nott et al. (2019) +https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&lastVirtModeType=default&lastVirtModeExtraState=&virtModeType=default&virtMode=0&nonVirtPosition=&position=chr2:127770344-127983251&hgsid=778249165_ySowqECRKNxURRn6bafH0yewAiuf
+Other NOTT_2019:
+NOTT_2019.bigwig_metadata
,
+NOTT_2019.epigenomic_histograms()
,
+NOTT_2019.get_epigenomic_peaks()
,
+NOTT_2019.get_interactions()
,
+NOTT_2019.get_interactome()
,
+NOTT_2019.get_promoter_celltypes()
,
+NOTT_2019.get_promoter_interactome_data()
,
+NOTT_2019.get_regulatory_regions()
,
+NOTT_2019.interactome
,
+NOTT_2019.superenhancer_interactome
,
+NOTT_2019.superenhancers()
+if (FALSE) { +data("BST1") +data("locus_dir") +trks_plus_lines <- NOTT_2019.plac_seq_plot(finemap_dat = BST1, locus_dir = file.path("~/Desktop", locus_dir), highlight_plac = TRUE) +# Zoom in +trks_plus_lines <- NOTT_2019.plac_seq_plot(finemap_dat = BST1, locus_dir = file.path("~/Desktop", locus_dir), zoom_window = 500000, highlight_plac = TRUE) +} +
R/data.R
+ NOTT_2019.superenhancer_interactome.Rd
Originally from +Nott et al. (2019). +Specifically: aay0793-Nott-Table-S6.xlsx.
+NOTT_2019.superenhancer_interactome
+
+
+ An object of class data.table
(inherits from data.frame
) with 2954 rows and 29 columns.
https://science.sciencemag.org/content/366/6469/1134
+Other NOTT_2019:
+NOTT_2019.bigwig_metadata
,
+NOTT_2019.epigenomic_histograms()
,
+NOTT_2019.get_epigenomic_peaks()
,
+NOTT_2019.get_interactions()
,
+NOTT_2019.get_interactome()
,
+NOTT_2019.get_promoter_celltypes()
,
+NOTT_2019.get_promoter_interactome_data()
,
+NOTT_2019.get_regulatory_regions()
,
+NOTT_2019.interactome
,
+NOTT_2019.plac_seq_plot()
,
+NOTT_2019.superenhancers()
+if (FALSE) { +NOTT_2019.superenhancer_interactome <- data.table::data.table( + readxl::read_excel( + file.path( + "~/Desktop/Fine_Mapping/echolocatoR", + "annotations/Nott_2019/aay0793-Nott-Table-S6.xlsx" + ), + skip = 2 + ) +) +usethis::use_data(NOTT_2019.superenhancer_interactome) +} +
R/NOTT_2019.superenhancers.R
+ NOTT_2019.superenhancers.Rd
Brain cell-specific epigenomic data from Nott et al. (2019).
+NOTT_2019.superenhancers(finemap_dat)+ + +
Nott et al. (2019) +https://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&lastVirtModeType=default&lastVirtModeExtraState=&virtModeType=default&virtMode=0&nonVirtPosition=&position=chr2:127770344-127983251&hgsid=778249165_ySowqECRKNxURRn6bafH0yewAiuf
+Other NOTT_2019:
+NOTT_2019.bigwig_metadata
,
+NOTT_2019.epigenomic_histograms()
,
+NOTT_2019.get_epigenomic_peaks()
,
+NOTT_2019.get_interactions()
,
+NOTT_2019.get_interactome()
,
+NOTT_2019.get_promoter_celltypes()
,
+NOTT_2019.get_promoter_interactome_data()
,
+NOTT_2019.get_regulatory_regions()
,
+NOTT_2019.interactome
,
+NOTT_2019.plac_seq_plot()
,
+NOTT_2019.superenhancer_interactome
R/PLOT.get_max_histogram_height.R
+ PLOT.get_max_histogram_height.Rd
PLOT.get_max_histogram_height
+PLOT.get_max_histogram_height(gg, round_to = NULL, verbose = TRUE)+ + + +
R/PLOT.get_window_limits.R
+ PLOT.get_window_limits.Rd
Get window size limits for plot
+PLOT.get_window_limits( + finemap_dat, + index_as_center = TRUE, + plot.zoom = NULL, + genomic_units = "Mb", + verbose = TRUE +)+ + +
Other plot:
+GR.name_filter_convert()
+#> Warning: data set 'BST1' not foundxlims <- PLOT.get_window_limits(finemap_dat = BST1, plot.zoom = 50000) +#> Error in PLOT.get_window_limits(finemap_dat = BST1, plot.zoom = 50000): could not find function "PLOT.get_window_limits"xlims <- PLOT.get_window_limits(finemap_dat = BST1, plot.zoom = "all") +#> Error in PLOT.get_window_limits(finemap_dat = BST1, plot.zoom = "all"): could not find function "PLOT.get_window_limits"xlims <- PLOT.get_window_limits(finemap_dat = BST1, plot.zoom = "5x") +#> Error in PLOT.get_window_limits(finemap_dat = BST1, plot.zoom = "5x"): could not find function "PLOT.get_window_limits"
R/ROADMAP.construct_reference.R
+ ROADMAP.construct_reference.Rd
Gather Roadmap annotation metadata
+ROADMAP.construct_reference( + ref_path = system.file("extdata/ROADMAP", "ROADMAP_Epigenomic.js", package = + "echoannot"), + keyword_query = NULL +)+ +
ref_path | +Where the ROADMAP metadata is stored. |
+
---|---|
keyword_query | +Search all columns in the Roadmap annotations metadata
+and only query annotations that contain your keywords.
+Can provide multiple keywords in list form:
+ |
+
Other ROADMAP:
+ROADMAP.merge_and_process_grl()
,
+ROADMAP.query_and_plot()
,
+ROADMAP.query()
,
+ROADMAP.tabix()
+#>
R/ROADMAP.merge_and_process_grl.R
+ ROADMAP.merge_and_process_grl.Rd
Standardize Roadmap query
+ROADMAP.merge_and_process_grl( + grl.roadmap, + gr.snp, + n_top_tissues = 5, + sep = " " +)+ +
grl.roadmap | +Roadmap query results |
+
---|---|
n_top_tissues | +The number of top tissues to include, +sorted by greatest number of rows +(i.e. the number of genomic ranges within the window). |
+
Other ROADMAP:
+ROADMAP.construct_reference()
,
+ROADMAP.query_and_plot()
,
+ROADMAP.query()
,
+ROADMAP.tabix()
Query Roadmap by genomic coordinates
+ROADMAP.query( + results_path = file.path(tempdir(), "Roadmap"), + gr.snp, + keyword_query = NULL, + limit_files = NULL, + nThread = 1, + verbose = TRUE +)+ +
results_path | +Where to store query results. |
+
---|---|
gr.snp | +
|
+
limit_files | +Limit the number of annotation files queried +(for faster testing). |
+
nThread | +Number of threads to parallelise queries over. |
+
Other ROADMAP:
+ROADMAP.construct_reference()
,
+ROADMAP.merge_and_process_grl()
,
+ROADMAP.query_and_plot()
,
+ROADMAP.tabix()
+if (FALSE) { +grl.roadmap <- ROADMAP.query( + gr.snp = echodata::BST1, + keyword_query = "placenta") +} +
R/ROADMAP.query_and_plot.R
+ ROADMAP.query_and_plot.Rd
Query and plot Roadmap epigenomic annotations
+ROADMAP.query_and_plot( + subset_DT, + results_path = file.path(tempdir(), "Roadmap"), + n_top_tissues = NULL, + keyword_query = NULL, + adjust = 0.2, + force_new_query = FALSE, + remove_tmps = TRUE, + verbose = TRUE +)+ +
subset_DT | +Data.frame with at least the following columns:
|
+
---|---|
results_path | +Where to store query results. |
+
n_top_tissues | +The number of top tissues to include, +sorted by greatest number of rows +(i.e. the number of genomic ranges within the window). |
+
keyword_query | +Search all columns in the Roadmap annotations metadata
+and only query annotations that contain your keywords.
+Can provide multiple keywords in list form:
+ |
+
force_new_query | +Force a new query from the XGR database. |
+
A named list containing:
ggbio
plot
GRanges
object within the queried coordinates
Other ROADMAP:
+ROADMAP.construct_reference()
,
+ROADMAP.merge_and_process_grl()
,
+ROADMAP.query()
,
+ROADMAP.tabix()
+if (FALSE) { +roadmap_plot_query <- ROADMAP.query_and_plot( + subset_DT = echodata::BST1, + keyword_query = "monocytes") +} +
Query Roadmap epigenomic annotations (chromatin marks) +using a range of genomic coordinates.
+ROADMAP.tabix( + results_path = tempfile(fileext = "ROADMAP_query.csv.gz"), + chrom, + min_pos, + max_pos, + eid, + convert_to_granges = TRUE, + verbose = TRUE +)+ +
results_path | +Where to store query results. |
+
---|---|
chrom | +Chromosome to query |
+
min_pos | +Minimum genomic position |
+
max_pos | +Maximum genomic position |
+
eid | +Roadmap annotation ID |
+
convert_to_granges | +Whether to return query
+as a |
+
Other ROADMAP:
+ROADMAP.construct_reference()
,
+ROADMAP.merge_and_process_grl()
,
+ROADMAP.query_and_plot()
,
+ROADMAP.query()
+
Plot Roadmap query
+ROADMAP.track_plot( + grl.roadmap.filt, + gr.snp = NULL, + geom = "density", + adjust = 0.2, + show_plot = TRUE, + as.ggplot = TRUE +)+ +
grl.roadmap.filt | +Roadmap query results. |
+
---|---|
gr.snp | +Optionally, can include an extra |
+
geom | +The type of plot to create. +Options include "density" and "histogram". |
+
adjust | +The granularity of the peaks. |
+
show_plot | +Whether to print the plot. |
+
+if (FALSE) { +gr.snp <- dt_to_granges(echodata::BST1) +grl.roadmap <- ROADMAP.query( + gr.snp = gr.snp, + keyword_query = "monocyte" +) +grl.roadmap.filt <- ROADMAP.merge_and_process_grl( + grl.roadmap = grl.roadmap, + gr.snp = gr.snp +) +track.roadmap <- ROADMAP.track_plot(grl.roadmap.filt, + gr.snp = gr.snp +) +} +
R/XGR.download_and_standardize.R
+ XGR.download_and_standardize.Rd
Merges a list of XGR annotations into a single GRanges object
+XGR.download_and_standardize( + lib.selections = c("ENCODE_TFBS_ClusteredV3_CellTypes", "TFBS_Conserved", + "Uniform_TFBS"), + as_grangesList = FALSE, + finemap_dat, + nThread = 1 +)+ +
lib.selections | +Which XGR annotations to check overlap with. +For full list of libraries see + + here. |
+
---|---|
as_grangesList | +Return as a |
+
finemap_dat | +Fine-mapping results. |
+
nThread | +Number of cores to parallelise across. |
+
GRangesList
+Other XGR:
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
+
Run SNP-level enrichment test with xGRviaGenomicAnno.
+XGR.enrichment( + gr, + merged_dat, + foreground_filter = "Consensus_SNP==TRUE", + background_filter = NULL, + grouping_vars = c("Study", "Assay", "Cell_type"), + fg_sample_size = NULL, + bg_sample_size = NULL, + background.annotatable.only = FALSE, + verbose = TRUE +)+ +
gr | +Annotations to test for enrichment with. |
+
---|---|
merged_dat | +SNP-level fine-mapping results to test for enrichment with. |
+
foreground_filter | +Filter to apply to foreground (target SNPs). |
+
background_filter | +Filter to apply to background (non-target SNPs). |
+
grouping_vars | +Columns in |
+
fg_sample_size | +Foreground sample size. |
+
bg_sample_size | +Background sample size. |
+
background.annotatable.only | +For background SNPs,
+only use SNPs that overlap with some annotation in |
+
verbose | +Print messages. |
+
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
+if (FALSE) { +gr.merged <- echoannot::merge_celltype_specific_epigenomics() +enrich.lead <- XGR.enrichment( + gr = gr.merged, + merged_dat = echodata::Nalls2019_merged, + foreground_filter = "leadSNP==TRUE", + grouping_vars = c("Study", "Cell_type", "Assay") +) +} +
R/XGR.enrichment_bootstrap.R
+ XGR.enrichment_bootstrap.Rd
Perform annotation enrichment tests using iterative bootstrapping procedure.
+XGR.enrichment_bootstrap( + gr, + merged_dat, + snp_groups = c("Random", "GWAS lead", "UCS (-PolyFun)", "UCS", + "Consensus (-PolyFun)", "Consensus"), + background_filter = NULL, + grouping_vars = c("Study", "Assay", "Cell_type"), + iterations = 1000, + fg_sample_size = 20, + bg_sample_size = NULL, + bootstrap = TRUE, + save_path = tempfile(fileext = "XGR_enrich_boot_res.csv.gz"), + nThread = 1, + verbose = TRUE +)+ +
gr | +Annotations to test for enrichment with. |
+
---|---|
merged_dat | +SNP-level fine-mapping results to test for enrichment with. |
+
snp_groups | +Which SNP groups to repeat enrichment tests for separately. |
+
background_filter | +Filter to apply to background (non-target SNPs). |
+
grouping_vars | +Columns in |
+
iterations | +Number of bootstrapping iterations. |
+
fg_sample_size | +Foreground sample size. |
+
bg_sample_size | +Background sample size. |
+
bootstrap | +Whether to use bootstrapping. |
+
save_path | +File path to save results to. |
+
nThread | +Number of threads to parallelise bootstrapping over. |
+
verbose | +Print messages. |
+
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
+if (FALSE) { +gr.merged <- echoannot::merge_celltype_specific_epigenomics() +enrich_res <- XGR.enrichment_bootstrap( + gr = gr.merged, + merged_dat = echodata::Nalls2019_merged +) +} +
Plot enrichment results
+XGR.enrichment_plot( + enrich_res, + title = NULL, + subtitle = NULL, + facet_formula = NULL, + line_formula = "y ~ x", + line_method = "lm", + line_span = 1, + FDR_thresh = 1, + plot_type = "bar", + shape_var = "Cell_type", + facet_scales = "free", + show_plot = TRUE, + save_plot = FALSE, + height = 5, + width = 5 +)+ + +
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
+if (FALSE) { +root <- file.path( + "/sc/arion/projects/pd-omics/brian", + "Fine_Mapping/Data/GWAS/Nalls23andMe_2019/_genome_wide" +) +### merged enrichment results +enrich_res <- data.table::fread( + file.path( + root, + "XGR/celltypespecific_epigenomics.SNP_groups.csv.gz" + ) +) +enrich_res <- data.table::fread( + file.path( + root, + "XGR/celltypespecific_epigenomics.snp_groups.csv.gz" + ) +) +enrich_boot <- data.table::fread( + file.path( + root, + "XGR/celltypespecific_epigenomics.snp_groups.permute.csv.gz" + ) +) +enrich_assay <- data.table::fread( + file.path( + root, + "XGR/celltypespecific_epigenomics.snp_groups.assay.csv.gz" + ) +) + +# Merged volcano plot +enrich_res <- subset(enrich_res, SNP_Group != "Consensus (-PolyFun)") %>% + dplyr::rename(SNP_group = SNP_Group) +gp <- XGR.enrichment_plot( + enrich_res = subset(enrich_res, !Assay %in% c("HiChIP_FitHiChIP", "PLAC")), + title = "Enrichment: Cell-type-specific epigenomics", + plot_type = "point", + save_plot = file.path( + root, "XGR/celltypespecific_epigenomics.enrich_volcano.png" + ), + height = 6, width = 8, shape_var = "Assay" +) +## Merged bar plot +gp <- XGR.enrichment_plot( + enrich_res = enrich_res, + plot_type = "bar", + facet_formula = ".~Assay", + FDR_thresh = .05 +) +# Merged volcano plot (permuted) +gp <- XGR.enrichment_plot( + enrich_res = enrich.scATAC.permute, + title = "Permuted enrichment: Cell-type-specific peaks and elements", + plot_type = "point" +) +} +
Identify the assays with the most annotations in the locus. +Then only keep these assays
+XGR.filter_assays(gr.lib, n_top_assays = 5)+ + +
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
Identify the sources with the most annotations in the locus. +Then only keep these sources.
+XGR.filter_sources(gr.lib, n_top_sources = 5)+ + +
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
Download XGR annotations
+XGR.import_annotations( + gr.snp, + anno_data_path = file.path("annotations", paste0("XGR_", lib.name, ".rds")), + lib.name, + save_xgr = TRUE, + annot_overlap_threshold = 5 +)+ + +
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
R/XGR.iterate_enrichment.R
+ XGR.iterate_enrichment.Rd
XGR uses a binomial enrichment tests for each annotation.
+XGR.iterate_enrichment( + subset_DT, + foreground_filter = "Consensus_SNP", + background_filter = "leadSNP", + lib.selections = c("ENCODE_TFBS_ClusteredV3_CellTypes", + "ENCODE_DNaseI_ClusteredV3_CellTypes", "Broad_Histone", "FANTOM5_Enhancer", + "Segment_Combined_Gm12878", "TFBS_Conserved", "ReMap_PublicAndEncode_TFBS", + "Blueprint_VenousBlood_Histone", "Blueprint_DNaseI", "FANTOM5_CAT_Cell", + "FANTOM5_CAT_MESH", "GWAScatalog_alltraits"), + save_path = FALSE, + nThread = 1 +)+ +
subset_DT | +Data.frame with at least the following columns:
|
+
---|---|
foreground_filter | +Specify foreground by filtering SNPs
+in |
+
background_filter | +Specify background by filtering SNPs
+in |
+
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
+if (FALSE) { +enrich_res <- XGR.iterate_enrichment( + subset_DT = echodata::Nalls2019_merged, + foreground_filter = "Consensus_SNP", + background_filter = "leadSNP", + lib.selections = c("ENCODE_TFBS_ClusteredV3_CellTypes") +) +} +
Automatically handles different file formats provided by XGR
+ (e.g. varying kinds of nested/unnested GRanges
).
+Then returns a Granges
object with only the XGR annotation ranges
+that overlap with the SNPs in subset_DT
.
+The GRanges
merges hits from subset_DT
.
XGR.iterate_overlap( + lib.selections = c("ENCODE_TFBS_ClusteredV3_CellTypes", "TFBS_Conserved", + "ReMap_PublicAndEncode_TFBS", "Uniform_TFBS"), + subset_DT, + save_path = FALSE, + nThread = 1 +)+ +
lib.selections | +Which XGR annotations to check overlap with. +For full list of libraries see + + here. |
+
---|---|
subset_DT | +Data.frame with at least the following columns:
|
+
save_path | +Save the results as a |
+
nThread | +Multi-thread across libraries. |
+
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
+
Parses the metadata and adds it as columns,
+and then merges the results into a single
+GenomicRangesList
XGR.merge_and_process(grl.xgr, lib, n_top_sources = 10)+ +
grl.xgr | +GenomicRangesList of XGR queries. |
+
---|
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
XGR.parse_metadata
+XGR.parse_metadata(gr.lib, lib.name = NA)+ + +
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
Plot XGR enrichment
+XGR.plot_enrichment( + enrich_res, + adjp_thresh = 0.05, + top_annotations = NULL, + show_plot = TRUE +)+ + +
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
+if (FALSE) { +enrich_res <- XGR.iterate_enrichment( + subset_DT = echodata::Nalls2019_merged, + foreground_filter = "Consensus_SNP", + background_filter = "leadSNP", + lib.selections = c("ENCODE_TFBS_ClusteredV3_CellTypes") +) +XGR.plot_enrichment(enrich_res) +} +
Plots the distribution of annotations across a genomic region (x-axis).
+XGR.plot_peaks( + gr.lib, + subset_DT, + fill_var = "Assay", + facet_var = "Source", + geom = "density", + locus = NULL, + adjust = 0.2, + show_plot = TRUE, + show.legend = TRUE, + as.ggplot = TRUE, + trim_xlims = FALSE +)+ +
gr.lib | +
|
+
---|---|
subset_DT | +Data.frame with at least the following columns:
|
+
geom | +Plot type ("density", or "histogram"). |
+
locus | +Locus name (optional). |
+
adjust | +The granularity of the peaks. |
+
show_plot | +Print the plot. |
+
ggbio
track plot.
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
,
+granges_to_bed()
+if (FALSE) { +gr.lib <- XGR.download_and_standardize( + c("ENCODE_DNaseI_ClusteredV3_CellTypes"), + finemap_dat = echodata::BST1 +) +gr.filt <- XGR.filter_sources(gr.lib = gr.lib, n_top_sources = 5) +gr.filt <- XGR.filter_assays(gr.lib = gr.filt, n_top_assays = 5) +xgr.track <- XGR.plot_peaks( + gr.lib = gr.filt, + subset_DT = echodata::BST1, + fill_var = "Assay", + facet_var = "Source" +) +} +
R/XGR.prepare_foreground_background.R
+ XGR.prepare_foreground_background.Rd
Prepare custom foreground and background SNPs sets for enrichment +tests with XGR annotations.
+XGR.prepare_foreground_background( + subset_DT, + foreground_filter = "Support>0", + background_filter = NULL, + fg_sample_size = NULL, + bg_sample_size = NULL, + verbose = TRUE +)+ +
subset_DT | +Data.frame with at least the following columns:
|
+
---|---|
foreground_filter | +Specify foreground by filtering SNPs
+in |
+
background_filter | +Specify background by filtering SNPs
+in |
+
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.sep_handler()
,
+granges_to_bed()
+if (FALSE) { +fg_bg <- XGR.prepare_foreground_background( + subset_DT = echodata::Nalls2019_merged, + foreground_filter = "Consensus_SNP==TRUE", + background_filter = "leadSNP==TRUE" +) +} +
XGR.sep_handler
+XGR.sep_handler(lib.name)+ + +
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+granges_to_bed()
Annotate any missense variants
+annotate_missense(merged_DT, snp_filter = "Support>0")+ + +
Other annotate:
+biomart_geneInfo()
,
+biomart_snp_info()
,
+biomart_snps_to_geneInfo()
,
+haplor_epigenetics_enrichment()
,
+haplor_epigenetics_summary()
,
+haplor_haploreg()
,
+haplor_regulomedb()
,
+plot_missense()
,
+snps_by_mutation_type()
+if (FALSE) { +annotated_DT <- annotate_missense( + merged_DT = echodata::Nalls2019_merged, + snp_filter = "Support>0" +) +} +
Name annotation file
+annotation_file_name(locus_dir, lib_name)+ + + +
If none of the SNPs in the data.frame have leadSNP==TRUE
,
+then sort by lowest p-value (and then highest Effect size)
+and assign the top SNP as the lead SNP.
assign_lead_SNP(new_DT, verbose = TRUE)+ +
data.frame | +Fine-mapping results data.frame. |
+
---|
Fine-mapping results data.frame with +new boolean leadSNP column, + indicating whether each SNPs is the lead GWAS SNP in that locus or not.
+ +Get gene info using Biomart
+biomart_geneInfo(geneList, reference_genome = "grch37")+ + +
Other annotate:
+annotate_missense()
,
+biomart_snp_info()
,
+biomart_snps_to_geneInfo()
,
+haplor_epigenetics_enrichment()
,
+haplor_epigenetics_summary()
,
+haplor_haploreg()
,
+haplor_regulomedb()
,
+plot_missense()
,
+snps_by_mutation_type()
+
Download SNP-wise annotations from Biomart
+biomart_snp_info( + snp_list, + reference_genome = "grch37", + attributes = c("refsnp_id", "allele", "chr_name", "chrom_start", "chrom_end", + "chrom_strand", "ensembl_gene_stable_id", "consequence_type_tv", + "polyphen_prediction", "polyphen_score", "sift_prediction", "sift_score", + "reg_consequence_types", "validated"), + verbose = TRUE +)+ + +
Other annotate:
+annotate_missense()
,
+biomart_geneInfo()
,
+biomart_snps_to_geneInfo()
,
+haplor_epigenetics_enrichment()
,
+haplor_epigenetics_summary()
,
+haplor_haploreg()
,
+haplor_regulomedb()
,
+plot_missense()
,
+snps_by_mutation_type()
R/biomart_snps_to_geneInfo.R
+ biomart_snps_to_geneInfo.Rd
Identify which genes SNPs belong to using Biomart
+biomart_snps_to_geneInfo(snp_list, reference_genome = "grch37")+ + +
Other annotate:
+annotate_missense()
,
+biomart_geneInfo()
,
+biomart_snp_info()
,
+haplor_epigenetics_enrichment()
,
+haplor_epigenetics_summary()
,
+haplor_haploreg()
,
+haplor_regulomedb()
,
+plot_missense()
,
+snps_by_mutation_type()
+if (FALSE) { +# biomart_snps_to_geneInfo(c("rs114360492")) +} +
R/cell_type_specificity.R
+ cell_type_specificity.Rd
Aggregate SNP overlap across various epigenomic datasets +and then identify the number of SNPs overlapping by each cell type
+cell_type_specificity( + plot_dat, + merged_DT, + min_count = NULL, + top_celltype_only = FALSE, + label_yaxis = TRUE, + y_lab = NULL, + show_genes = FALSE, + x_strip_angle = 40, + show_plot = TRUE +)+ + + +
Clean GRanges object
+clean_granges(gr)+ + + +
R/coloc_nominated_eGenes.R
+ coloc_nominated_eGenes.Rd
Across all GWAS-QTL colocalization tests across all studies, +take the eGene with the highest colocalziation probability (PP.H4) +and assign it as the most likely causal gene in that locus.
+coloc_nominated_eGenes( + coloc_results, + merged_DT, + label_yaxis = TRUE, + y_lab = "Locus", + x_lab = NULL, + fill_var = "PP.H4", + text_size = 2, + PP_threshold = NULL, + nThread = 1, + show_plot = TRUE, + verbose = TRUE +)+ + +
eQTL queries and colocalization test done with catalogueR.
+ ++if (FALSE) { +merged_DT <- echodata::Nalls2019_merged +base_url <- "~/Desktop/Fine_Mapping/Data/GWAS/Nalls23andMe_2019" +coloc_results_path <- file.path( + base_url, "_genome_wide/COLOC/coloc.eQTL_Catalogue_ALL.csv.gz" +) +gg_egene <- coloc_nominated_eGenes(coloc_results, + merged_DT = merged_DT, + fill_var = NULL +) + +# QTL +base_url <- "/sc/hydra/projects/ad-omics/microglia_omics/Fine_Mapping" +coloc_results_path <- file.path( + base_url, + "Kunkle_Microglia_all_regions/QTL_merged_coloc_results.snp.tsv.gz" +) +merged_DT <- data.table::fread( + file.path( + "/pd-omics/brian/Fine_Mapping/Data/QTL", + "Microglia_all_regions", + "multiGWAS.microgliaQTL_finemapping.csv.gz" + ) +) +gg_egene <- coloc_nominated_eGenes(coloc_results, + merged_DT = merged_DT, + fill_var = NULL +) +} +
Convert data.table to GRanges object
+dt_to_granges( + subset_DT, + chrom_col = "CHR", + start_col = "POS", + end_col = start_col, + style = "NCBI", + verbose = TRUE +)+ + + +
Adds several columns that summarise the results across all fine-mapping +tools that were run:
The number of tools in which the SNP was proposed +in a credible set.
The mean per-SNP PP across all fine-mapping tools used.
Whether or not the SNP was in the credible set of >
+ consensus_thresh
SNPs (default=2).
find_consensus_SNPs( + finemap_dat, + credset_thresh = 0.95, + consensus_thresh = 2, + sort_by_support = TRUE, + exclude_methods = NULL, + top_CS_only = FALSE, + replace_PP_NAs = TRUE, + verbose = FALSE +)+ +
consensus_thresh | +Threshold for determining +Consensus_SNP status. |
+
---|
+#> Error in find_consensus_SNPs(finemap_dat = echodata::Nalls2019_merged): could not find function "find_consensus_SNPs"
Identify the top_N
Consensus SNP(s) per Locus,
+defined as the Consensus SNPs with the highest mean PP
+across all fine-mapping tools used.
find_topConsensus(dat, top_N = 1, grouping_vars = c("Locus"))+ + + +
Count bins of tool-specific and union CS sizes
+get_CS_bins(merged_DT)+ + +
Other summarise:
+CS_bin_plot()
,
+CS_counts_plot()
,
+get_CS_counts()
,
+get_SNPgroup_counts()
,
+peak_overlap_plot()
,
+plot_dataset_overlap()
,
+results_report()
,
+super_summary_plot()
+
Tally tool-specific and union CS sizes
+get_CS_counts(merged_DT, top_CS_only = FALSE)+ + +
Other summarise:
+CS_bin_plot()
,
+CS_counts_plot()
,
+get_CS_bins()
,
+get_SNPgroup_counts()
,
+peak_overlap_plot()
,
+plot_dataset_overlap()
,
+results_report()
,
+super_summary_plot()
+
Tally locus-specific SNP group sizes
+get_SNPgroup_counts(merged_DT, grouping_vars = "Locus")+ + +
Other summarise:
+CS_bin_plot()
,
+CS_counts_plot()
,
+get_CS_bins()
,
+get_CS_counts()
,
+peak_overlap_plot()
,
+plot_dataset_overlap()
,
+results_report()
,
+super_summary_plot()
+#> Warning: data set 'merged_DT' not found#> Error in get_SNPgroup_counts(merged_DT = echodata::Nalls2019_merged): could not find function "get_SNPgroup_counts"
R/granges_overlap.R
+ granges_overlap.Rd
Find overlap between genomic coordinates/ranges
+granges_overlap( + dat1, + dat2, + chrom_col.1 = "chrom", + start_col.1 = "start", + end_col.1 = "end", + chrom_col.2 = chrom_col.1, + start_col.2 = start_col.1, + end_col.2 = end_col.1, + return_merged = TRUE, + chr_format = "NCBI", + verbose = FALSE +)+ + + +
Convert GRanges object to BED format and save
+granges_to_bed( + GR.annotations, + output_path, + sep = "\t", + nThread = 1, + gzip = FALSE +)+ + +
Other XGR:
+XGR.download_and_standardize()
,
+XGR.enrichment_bootstrap()
,
+XGR.enrichment_plot()
,
+XGR.enrichment()
,
+XGR.filter_assays()
,
+XGR.filter_sources()
,
+XGR.import_annotations()
,
+XGR.iterate_enrichment()
,
+XGR.iterate_overlap()
,
+XGR.merge_and_process()
,
+XGR.parse_metadata()
,
+XGR.plot_enrichment()
,
+XGR.plot_peaks()
,
+XGR.prepare_foreground_background()
,
+XGR.sep_handler()
HaploR
annotationsR/haplor_epigenetic_enrichment.R
+ haplor_epigenetics_enrichment.Rd
Test for enrichment of HaploR
annotations
haplor_epigenetics_enrichment( + snp_list1, + snp_list2, + chisq = TRUE, + fisher = TRUE, + epigenetic_variables = c("Promoter_histone_marks", "Enhancer_histone_marks"), + tissue_list = c("BRN", "BLD") +)+ + +
Other annotate:
+annotate_missense()
,
+biomart_geneInfo()
,
+biomart_snp_info()
,
+biomart_snps_to_geneInfo()
,
+haplor_epigenetics_summary()
,
+haplor_haploreg()
,
+haplor_regulomedb()
,
+plot_missense()
,
+snps_by_mutation_type()
HaploR
annotationsR/haplor_epigenetics_summary.R
+ haplor_epigenetics_summary.Rd
Summarise HaploR
annotations
haplor_epigenetics_summary( + merged_results, + tissue_list = c("BRN", "BLD"), + epigenetic_variables = c("Promoter_histone_marks", "Enhancer_histone_marks") +)+ + +
Other annotate:
+annotate_missense()
,
+biomart_geneInfo()
,
+biomart_snp_info()
,
+biomart_snps_to_geneInfo()
,
+haplor_epigenetics_enrichment()
,
+haplor_haploreg()
,
+haplor_regulomedb()
,
+plot_missense()
,
+snps_by_mutation_type()
Download SNP-wise annotations from HaploReg
+haplor_haploreg(snp_list, verbose = T, chunk_size = NA)+ + +
Other annotate:
+annotate_missense()
,
+biomart_geneInfo()
,
+biomart_snp_info()
,
+biomart_snps_to_geneInfo()
,
+haplor_epigenetics_enrichment()
,
+haplor_epigenetics_summary()
,
+haplor_regulomedb()
,
+plot_missense()
,
+snps_by_mutation_type()
R/haplor_regulomedb.R
+ haplor_regulomedb.Rd
Download SNP-wise annotations from RegulomeDB
+haplor_regulomedb(snp_list, verbose = TRUE, chunk_size = NA)+ + +
Other annotate:
+annotate_missense()
,
+biomart_geneInfo()
,
+biomart_snp_info()
,
+biomart_snps_to_geneInfo()
,
+haplor_epigenetics_enrichment()
,
+haplor_epigenetics_summary()
,
+haplor_haploreg()
,
+plot_missense()
,
+snps_by_mutation_type()
Import a subset of a bigwig file +based on the coordinates in a GRanges object (gr.dat).
+import.bw.filt(bw.file, gr.dat, full_data = TRUE)+ +
bw.file | +Path to a bigwig file. |
+
---|---|
gr.dat | +GenomicRanges object to query the bigwig file with. |
+
full_data | +Whether to return the actual read ranges ( |
+
is_granges
+is_granges(obj)+ + + +
R/merge_celltype_specific_epigenomics.R
+ merge_celltype_specific_epigenomics.Rd
Merges multiple cell-type-specific epigenomic datasets +(Nott 2019, Corces 2020) into a single GRanges object.
+merge_celltype_specific_epigenomics(keep_extra_cols = FALSE)+ +
keep_extra_cols | +Keep extra columns +that are not shared across all annotations. |
+
---|
+gr.merged <- echoannot::merge_celltype_specific_epigenomics() +#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#> Error in -c(start, end): invalid argument to unary operator
R/merge_finemapping_results.R
+ merge_finemapping_results.Rd
Gather fine-mapping results from echolocatoR across all loci +and merge into a single data.frame.
+merge_finemapping_results( + dataset = "./Data/GWAS", + minimum_support = 1, + include_leadSNPs = TRUE, + LD_reference = NULL, + save_path = tempfile(fileext = "merged_results.csv.gz"), + from_storage = TRUE, + haploreg_annotation = FALSE, + regulomeDB_annotation = FALSE, + biomart_annotation = FALSE, + PP_threshold = 0.95, + consensus_threshold = 2, + exclude_methods = NULL, + top_CS_only = FALSE, + verbose = TRUE, + nThread = 1 +)+ +
dataset | +Path to the folder you want to recursively search for results files within + (e.g. "Data/GWAS/Nalls23andMe_2019"). +Set this to a path that includes multiple subfolders if you want to gather results +from multiple studies at once +(e.g. "Data/GWAS"). |
+
---|---|
minimum_support | +Filter SNPs by the minimum number +of fine-mapping tools that contained the SNP in their Credible Set. |
+
include_leadSNPs | +Include lead GWAS/QTL SNPs per locus +(regardless of other filtering criterion). |
+
from_storage | +Search for stored results files. |
+
haploreg_annotation | +Annotate SNPs with HaploReg (using |
+
regulomeDB_annotation | +Annotate SNPs with regulaomeDB (using |
+
biomart_annotation | +Annotate SNPs with |
+
PP_threshold | +Mean posterior probability threshold to include SNPs in mean PP Credible Set +(averaged across all fine-mapping tools). |
+
exclude_methods | +Exclude certain fine-mapping methods when estimating +mean.CS and Consensus_SNP. |
+
verbose | +Print messages. |
+
xlsx_path | +Save merged data.frame as excel file. |
+
consensus_thresh | +The minimum number of tools that have the SNPs in their Credible Set +to classify it as a Consensus_SNP. |
+
R/merge_finemapping_results_each.R
+ merge_finemapping_results_each.Rd
Create full cross-locus merged files for each dataset, +then return a subset of those files as one super-merged table.
+merge_finemapping_results_each( + study_dirs, + LD_reference = "1KGphase3", + minimum_support = 1, + include_leadSNPs = TRUE, + return_filter = "!is.na(SNP)", + merged_path = "merged_DT.csv.gz", + force_new_merge = FALSE, + nThread = 1, + verbose = TRUE +)+ + + +
R/message_parallel.R
+ message_parallel.Rd
Send messages to console even from within parallel processes
+message_parallel(...)+ + +
A message
+ +Order loci by UCS size, or alphabetically
+order_loci( + dat, + merged_DT, + by_UCS_size = FALSE, + descending = TRUE, + verbose = FALSE +)+ + + +
+#> Warning: data set 'merged_DT' not foundif (FALSE) { +merged_DT <- echodata::Nalls2019_merged +merged_DT <- order_loci( + dat = merged_DT, + merged_DT = merged_DT, + descending = FALSE +) +} +
Get overlap between SNPs and epigenomic peaks
+peak_overlap( + merged_DT, + snp_filter = "!is.na(SNP)", + include.NOTT_2019_peaks = TRUE, + include.NOTT_2019_enhancers_promoters = TRUE, + include.NOTT_2019_PLACseq = TRUE, + include.CORCES_2020_scATACpeaks = TRUE, + include.CORCES_2020_Cicero_coaccess = TRUE, + include.CORCES_2020_bulkATACpeaks = TRUE, + include.CORCES_2020_HiChIP_FitHiChIP_coaccess = TRUE, + include.CORCES_2020_gene_annotations = TRUE, + verbose = T +)+ + + +
R/peak_overlap_plot.R
+ peak_overlap_plot.Rd
Plot overlap between some SNP group and various epigenomic data
+peak_overlap_plot( + merged_DT, + snp_filter = "Consensus_SNP==TRUE", + include.NOTT_2019_peaks = TRUE, + include.NOTT_2019_enhancers_promoters = TRUE, + include.NOTT_2019_PLACseq = TRUE, + include.CORCES_2020_scATACpeaks = TRUE, + include.CORCES_2020_Cicero_coaccess = TRUE, + include.CORCES_2020_bulkATACpeaks = TRUE, + include.CORCES_2020_HiChIP_FitHiChIP_coaccess = TRUE, + include.CORCES_2020_gene_annotations = TRUE, + plot_celltype_specificity = TRUE, + plot_celltype_specificity_genes = FALSE, + facets_formula = ". ~ Cell_type", + show_plot = TRUE, + label_yaxis = TRUE, + x_strip_angle = 90, + x_tick_angle = 40, + drop_empty_cols = F, + fill_title = paste(snp_filter, "\nin epigenomic peaks"), + save_path = F, + height = 11, + width = 12, + subplot_widths = c(1, 0.5), + verbose = TRUE +)+ +
include.NOTT_2019_peaks | +Plot SNP subset overlap with +peaks from cell-type-specific bulk ATAC, H3K27ac, and H3K4me3 assays. |
+
---|---|
include.NOTT_2019_enhancers_promoters | +Plot SNP subset overlap with +cell enhancers and promoters. |
+
include.CORCES_2020_scATACpeaks | +Plot SNP subset overlap with +cell-type-specific scATAC-seq peaks. |
+
include.CORCES_2020_Cicero_coaccess | +Plot SNP subset overlap with +Cicero coaccessibility peaks (derived from scATACseq). |
+
+Nott et al. (2019) + +Corces et al. (2020/bioRxiv)
+Other summarise:
+CS_bin_plot()
,
+CS_counts_plot()
,
+get_CS_bins()
,
+get_CS_counts()
,
+get_SNPgroup_counts()
,
+plot_dataset_overlap()
,
+results_report()
,
+super_summary_plot()
+# Consensus SNPs +gg_peaks <- peak_overlap_plot( + merged_DT = echodata::Nalls2019_merged, + snp_filter = "Consensus_SNP==T", + fill_title = "Consensus SNPs in epigenomic peaks" +) +#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#> Warning: The `.dots` argument of `group_by()` is deprecated as of dplyr 1.0.0.#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#> Error in if (!is.null(convert_ref_genome) && toupper(convert_ref_genome) != : +#> missing value where TRUE/FALSE needed# UCS SNPs +gg_peaks <- peak_overlap_plot( + merged_DT = echodata::Nalls2019_merged, + snp_filter = "Support>0", + fill_title = "UCS SNPs in epigenomic peaks" +) +#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#>#> Error in if (!is.null(convert_ref_genome) && toupper(convert_ref_genome) != : +#> missing value where TRUE/FALSE needed
Cross-tabulate SNP overlap (after applying filter) +between each pair of studies.
+plot_dataset_overlap( + merged_DT, + snp_filter = "!is.na(SNP)", + filename = NA, + formula_str = "~ SNP + Dataset", + triangle = FALSE, + proxies = NULL +)+ + +
Other summarise:
+CS_bin_plot()
,
+CS_counts_plot()
,
+get_CS_bins()
,
+get_CS_counts()
,
+get_SNPgroup_counts()
,
+peak_overlap_plot()
,
+results_report()
,
+super_summary_plot()
Plot any missense variants
+plot_missense( + merged_DT, + snp_filter = "Support>0", + label_yaxis = FALSE, + x_label = "UCS missense\nmutations", + show.legend = TRUE, + show_numbers = FALSE, + show_plot = TRUE +)+ + +
Other annotate:
+annotate_missense()
,
+biomart_geneInfo()
,
+biomart_snp_info()
,
+biomart_snps_to_geneInfo()
,
+haplor_epigenetics_enrichment()
,
+haplor_epigenetics_summary()
,
+haplor_haploreg()
,
+haplor_regulomedb()
,
+snps_by_mutation_type()
+if (FALSE) { +merged_DT <- echodata::Nalls2019_merged +gg_missense <- plot_missense( + merged_DT = merged_DT, + snp_filter = "Support>0" +) +gg_missense <- plot_missense( + merged_DT = merged_DT, + snp_filter = "Consensus_SNP==TRUE" +) +} +
Bind GRanges with different mcols
+rbind_granges(gr1, gr2)+ + + +
R/results_report.R
+ results_report.Rd
Give a quick summary report of the fine-mapping results
+results_report(merged_dat)+ + +
Null
+Other summarise:
+CS_bin_plot()
,
+CS_counts_plot()
,
+get_CS_bins()
,
+get_CS_counts()
,
+get_SNPgroup_counts()
,
+peak_overlap_plot()
,
+plot_dataset_overlap()
,
+super_summary_plot()
+
Return only the missense SNPs
+snps_by_mutation_type(merged_results, mutation_type = "missense_variant")+ + +
Other annotate:
+annotate_missense()
,
+biomart_geneInfo()
,
+biomart_snp_info()
,
+biomart_snps_to_geneInfo()
,
+haplor_epigenetics_enrichment()
,
+haplor_epigenetics_summary()
,
+haplor_haploreg()
,
+haplor_regulomedb()
,
+plot_missense()
Source all files in a directory at once. +Also loads selected libraries.
+source_all(path = "R/", pattern = "*.R$", packages = "dplyr")+ +
path | +a character vector of full path names; the default
+ corresponds to the working directory, |
+
---|---|
pattern | +an optional regular expression. Only file names + which match the regular expression will be returned. |
+
R/super_summary_plot.R
+ super_summary_plot.Rd
Merge all summary plots into one super plot
+super_summary_plot( + merged_DT, + snp_filter = "Consensus_SNP==TRUE", + coloc_results = NULL, + plot_missense = TRUE, + show_plot = TRUE, + save_plot = FALSE, + height = 15, + width = 13, + dpi = 500 +)+ + +
Other summarise:
+CS_bin_plot()
,
+CS_counts_plot()
,
+get_CS_bins()
,
+get_CS_counts()
,
+get_SNPgroup_counts()
,
+peak_overlap_plot()
,
+plot_dataset_overlap()
,
+results_report()