Add d XGR, RR ROADMAP

RajLabMSSM · Sep 18, 2021 · 73b6b4a · 73b6b4a
1 parent 1f87557
commit 73b6b4a
Show file tree

Hide file tree

Showing 191 changed files with 22,614 additions and 49 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -27,7 +27,7 @@ LazyData: true
 Depends: R (>= 4.1)
 SystemRequirements: Python (>= 3.7.0)
 biocViews:
-Imports:  
+Imports: 
     echodata,
     echotabix,
     dplyr,
@@ -50,7 +50,8 @@ Imports:
     S4Vectors,
     GenomeInfoDb,
     biomaRt,
-    IRanges
+    IRanges,
+    XGR
 Suggests:
     markdown,
     rmarkdown,

diff --git a/NAMESPACE b/NAMESPACE
@@ -3,24 +3,39 @@
 export(CORCES_2020.get_HiChIP_FitHiChIP_overlap)
 export(CS_bin_plot)
 export(CS_counts_plot)
+export(ROADMAP.construct_reference)
+export(ROADMAP.query)
+export(ROADMAP.query_and_plot)
+export(XGR.download_and_standardize)
+export(XGR.enrichment)
+export(merge_celltype_specific_epigenomics)
 export(merge_finemapping_results)
 export(peak_overlap_plot)
 export(plot_dataset_overlap)
 export(super_summary_plot)
-import(ggplot2)
+import(ggplot2, except = c(geom_rect, ggsave))
+import(ggplot2, except = geom_rect)
+import(ggplot2, except = ggsave)
 importFrom(DescTools,Divisors)
+importFrom(DescTools,RoundTo)
 importFrom(GenomeInfoDb,seqlevelsStyle)
 importFrom(GenomicRanges,GRanges)
+importFrom(GenomicRanges,GRangesList)
 importFrom(GenomicRanges,elementMetadata)
 importFrom(GenomicRanges,end)
 importFrom(GenomicRanges,findOverlaps)
 importFrom(GenomicRanges,makeGRangesFromDataFrame)
 importFrom(GenomicRanges,mcols)
+importFrom(GenomicRanges,seqnames)
 importFrom(GenomicRanges,start)
 importFrom(IRanges,IRanges)
+importFrom(IRanges,overlapsAny)
+importFrom(R.utils,gzip)
 importFrom(RColorBrewer,brewer.pal)
 importFrom(S4Vectors,queryHits)
 importFrom(S4Vectors,subjectHits)
+importFrom(XGR,xGRviaGenomicAnno)
+importFrom(XGR,xRDataLoader)
 importFrom(biomaRt,getBM)
 importFrom(biomaRt,useMart)
 importFrom(data.table,as.data.table)
@@ -30,6 +45,7 @@ importFrom(data.table,fwrite)
 importFrom(data.table,melt.data.table)
 importFrom(data.table,merge.data.table)
 importFrom(data.table,rbindlist)
+importFrom(data.table,transpose)
 importFrom(dplyr,"%>%")
 importFrom(dplyr,arrange)
 importFrom(dplyr,case_when)
@@ -39,12 +55,15 @@ importFrom(dplyr,group_by)
 importFrom(dplyr,mutate)
 importFrom(dplyr,n_distinct)
 importFrom(dplyr,rename)
+importFrom(dplyr,sample_n)
 importFrom(dplyr,select)
 importFrom(dplyr,slice)
 importFrom(dplyr,summarise)
 importFrom(dplyr,summarise_at)
+importFrom(dplyr,tally)
 importFrom(dplyr,top_n)
 importFrom(dplyr,vars)
+importFrom(echotabix,query_tabular)
 importFrom(ggbio,autoplot)
 importFrom(ggbio,geom_arch)
 importFrom(ggbio,geom_rect)
@@ -53,9 +72,18 @@ importFrom(ggbio,ggsave)
 importFrom(ggbio,plotGrandLinear)
 importFrom(ggbio,scale_x_sequnit)
 importFrom(ggbio,theme_genome)
+importFrom(ggplot2,aes)
+importFrom(ggplot2,element_text)
+importFrom(ggplot2,ggplot_build)
+importFrom(ggplot2,guide_legend)
+importFrom(ggplot2,guides)
+importFrom(ggplot2,scale_y_continuous)
+importFrom(ggplot2,theme)
+importFrom(ggplot2,theme_classic)
 importFrom(grDevices,dev.off)
 importFrom(grDevices,png)
 importFrom(haploR,queryRegulome)
+importFrom(methods,is)
 importFrom(parallel,mclapply)
 importFrom(patchwork,plot_layout)
 importFrom(patchwork,plot_spacer)
@@ -65,6 +93,7 @@ importFrom(scales,alpha)
 importFrom(stats,as.formula)
 importFrom(stats,formula)
 importFrom(stats,median)
+importFrom(stats,p.adjust)
 importFrom(stats,setNames)
 importFrom(tidyr,separate)
 importFrom(utils,head)
diff --git a/R/CS_bin_plot.R b/R/CS_bin_plot.R
@@ -4,8 +4,8 @@
 #' @examples
 #' bin_plot <- CS_bin_plot(merged_DT = echodata::Nalls2019_merged)
 #' @export
-#' @import ggplot2
 #' @importFrom RColorBrewer brewer.pal
+#' @importFrom stats setNames
 CS_bin_plot <- function(merged_DT,
                         show_plot = TRUE) {
     Method <- bin <- ..count.. <- NULL
@@ -17,40 +17,40 @@ CS_bin_plot <- function(merged_DT,
     custom_colors <- RColorBrewer::brewer.pal(
         n = length(levels(bin_counts$bin)), "GnBu"
     )
-    custom_colors_dict <- setNames(
+    custom_colors_dict <- stats::setNames(
         custom_colors[seq(1, length(used_bins))],
         rev(used_bins)
     )
     custom_colors_dict[names(custom_colors_dict) == "0"] <- "lightgray"
 
-    bin_plot <- ggplot(
+    bin_plot <- ggplot2::ggplot(
         subset(bin_counts, Method != "mean"),
-        aes(x = Method, fill = bin)
+        ggplot2::aes(x = Method, fill = bin)
     ) +
-        geom_bar(
+        gggplot2::eom_bar(
             stat = "count", show.legend = TRUE,
-            position = position_stack(reverse = FALSE), color = "white"
+            position = ggplot2::position_stack(reverse = FALSE), color = "white"
         ) +
         # scale_fill_brewer(palette = "Spectral", direction = -1) +
-        scale_fill_manual(values = custom_colors_dict) +
+        ggplot2::scale_fill_manual(values = custom_colors_dict) +
         # geom_text(aes(label = paste(bin,"SNPs")),
         # position =  position_stack(vjust = .5), vjust=-1, stat = "count") +
-        geom_text(aes(label = ..count..),
-            position = position_stack(vjust = .5),
+        ggplot2::geom_text(ggplot2::aes(label = ..count..),
+            position = ggplot2::position_stack(vjust = .5),
             vjust = .5, stat = "count"
         ) +
-        theme_bw() +
-        labs(x = NULL, y = "Loci", fill = "CS size") +
-        coord_flip() +
-        theme(
-            panel.grid.major = element_blank(),
-            panel.grid.minor = element_blank(),
-            rect = element_blank(),
-            axis.text.x = element_blank(),
-            axis.ticks = element_blank(),
+        ggplot2::theme_bw() +
+        ggplot2::labs(x = NULL, y = "Loci", fill = "CS size") +
+        ggplot2::coord_flip() +
+        ggplot2::theme(
+            panel.grid.major = ggplot2::element_blank(),
+            panel.grid.minor = ggplot2::element_blank(),
+            rect = ggplot2::element_blank(),
+            axis.text.x = ggplot2::element_blank(),
+            axis.ticks = ggplot2::element_blank(),
             legend.position = "top"
         ) +
-        guides(fill = guide_legend(nrow = 1, reverse = TRUE))
+        ggplot2::guides(fill = ggplot2::guide_legend(nrow = 1, reverse = TRUE))
     if (show_plot) print(bin_plot)
     return(list(
         plot = bin_plot,

diff --git a/R/CS_counts_plot.R b/R/CS_counts_plot.R
@@ -5,7 +5,7 @@
 #' @examples
 #' gg_CS <- CS_counts_plot(merged_DT = echodata::Nalls2019_merged)
 #' @export
-#' @import ggplot2
+#' @rawNamespace import(ggplot2, except = c(geom_rect, ggsave))
 #' @importFrom dplyr %>% mutate arrange
 CS_counts_plot <- function(merged_DT,
                            show_numbers = TRUE,

diff --git a/R/GR.name_filter_convert.R b/R/GR.name_filter_convert.R
@@ -0,0 +1,17 @@
+#' GR.name_filter_convert
+#'
+#' @family plot
+#' @keywords internal
+GR.name_filter_convert <- function(GR.final,
+                                   GR.names,
+                                   min_hits = 1) {
+    names(GR.final) <- GR.names
+    grl <- GR.final[!as.logical(lapply(GR.final, is.null))]
+    # Filter to those that had at least N hits
+    grl <- grl[as.logical(lapply(grl, function(g, min_hits. = min_hits) {
+        length(GenomicRanges::seqnames(g)) >= min_hits.
+    }))]
+    # Convert to GRangesList (important)
+    grl <- GenomicRanges::GRangesList(grl)
+    return(grl)
+}
diff --git a/R/NOTT_2019.epigenomic_histograms.R b/R/NOTT_2019.epigenomic_histograms.R
@@ -14,10 +14,11 @@
 #'     return_assay_track = TRUE,
 #'     save_annot = FALSE
 #' )
-#' @import ggplot2
+#' @rawNamespace import(ggplot2, except = c(geom_rect, ggsave))
 #' @importFrom ggbio autoplot geom_rect scale_x_sequnit plotGrandLinear
 #' @importFrom ggbio theme_genome ggsave
 #' @importFrom stats formula
+#' @importFrom GenomeInfoDb seqlevelsStyle
 NOTT_2019.epigenomic_histograms <- function(finemap_dat,
                                             locus_dir,
                                             show_plot = TRUE,
@@ -45,15 +46,20 @@ NOTT_2019.epigenomic_histograms <- function(finemap_dat,
     # library(BiocGenerics)
     # library(GenomicRanges)
     # library(ggbio)
-    # show_plot=T;save_plot=T;full_data=T;return_assay_track=F;binwidth=2500; geom="histogram"; plot_formula="Cell_type ~."; show_regulatory_rects=T;  bigwig_dir=NULL; verbose=T; nThread=1;
-    # finemap_dat=echoannot::LRRK2; plot.zoom=500000; fill_var="Assay"; density_adjust=.2; strip.text.y.angle=0;
+    # show_plot=T;save_plot=T;full_data=T;return_assay_track=F;
+    # binwidth=2500; geom="histogram"; plot_formula="Cell_type ~."; 
+    # show_regulatory_rects=T;  bigwig_dir=NULL; verbose=T; nThread=1;
+    # finemap_dat=echoannot::LRRK2; plot.zoom=500000; fill_var="Assay"; 
+    # density_adjust=.2; strip.text.y.angle=0;
 
     # Import BigWig annotation files
     bigWigFiles <- echoannot::NOTT_2019.bigwig_metadata
-    # Some bigWig files were initially loaded to UCSC GB, but then later taken down by the authors....
+    # Some bigWig files were initially loaded to UCSC GB, 
+    # but then later taken down by the authors....
     # However I saved these files on Minerva beforehand.
     bigWigFiles <- subset(bigWigFiles, UCSC_available == "T")
-    bigWigFiles <- dplyr::mutate(bigWigFiles, cell_type = gsub(" ", ".", cell_type))
+    bigWigFiles <- dplyr::mutate(bigWigFiles, 
+                                 cell_type = gsub(" ", ".", cell_type))
     # Convert finemap data to granges
     dat <- finemap_dat
     dat$seqnames <- dat$CHR

diff --git a/R/NOTT_2019.plac_seq_plot.R b/R/NOTT_2019.plac_seq_plot.R
@@ -13,7 +13,8 @@
 #' # Zoom in
 #' trks_plus_lines <- NOTT_2019.plac_seq_plot(finemap_dat = BST1, locus_dir = file.path("~/Desktop", locus_dir), zoom_window = 500000, highlight_plac = TRUE)
 #' }
-#' @import ggplot2
+#' @rawNamespace import(ggplot2, except = geom_rect)
+#' @rawNamespace import(ggplot2, except = ggsave)
 #' @importFrom ggbio ggbio geom_arch geom_rect scale_x_sequnit ggsave
 #' @importFrom IRanges IRanges
 NOTT_2019.plac_seq_plot <- function(finemap_dat = NULL,

diff --git a/R/PLOT.get_max_histogram_height.R b/R/PLOT.get_max_histogram_height.R
@@ -1,10 +1,16 @@
+#' PLOT.get_max_histogram_height
+#' 
+#' @keywords internal
+#' @importFrom methods is
+#' @importFrom ggplot2 ggplot_build
+#' @importFrom DescTools RoundTo
 PLOT.get_max_histogram_height <- function(gg,
                                           round_to = NULL,
                                           verbose = TRUE) {
-    if (tolower(class(gg)[1]) == "ggbio") gg <- gg@ggplot
-    printer("+ PLOT:: Calculating max histogram height", v = verbose)
+    if (methods::is(gg,"ggbio")) {gg <- gg@ggplot}
+    messager("+ PLOT:: Calculating max histogram height", v = verbose)
     dat <- ggplot2::ggplot_build(gg)$data[[1]]
-    max_height <- max(dat$ymax)
+    max_height <- max(dat$ymax, na.rm = TRUE)
     if (!is.null(round_to)) {
         max_height <- DescTools::RoundTo(max_height, round_to)
     }

diff --git a/R/PLOT.get_window_limits.R b/R/PLOT.get_window_limits.R
@@ -26,7 +26,7 @@ PLOT.get_window_limits <- function(finemap_dat,
                                              .index_as_center = index_as_center,
                                              .genomic_units = genomic_units,
                                              .verbose = verbose) {
-        printer("+ Inferring genomic limits for window:", pz, v = .verbose)
+        messager("+ Inferring genomic limits for window:", pz, v = .verbose)
         # Zoom #x as  input
         if (.index_as_center) {
             middle_pos <- subset(.finemap_dat, leadSNP)$POS[1]

diff --git a/R/ROADMAP.construct_reference.R b/R/ROADMAP.construct_reference.R
@@ -0,0 +1,40 @@
+#' Gather Roadmap annotation metadata
+#'
+#' @param ref_path Where the ROADMAP metadata is stored.
+#' @param keyword_query Search all columns in the Roadmap annotations metadata
+#' and only query annotations that contain your keywords.
+#' Can provide multiple keywords in list form:
+#' \code{c("placenta","liver","monocytes")}
+#'
+#' @examples
+#' ref <- ROADMAP.construct_reference(keyword_query = c(
+#'     "placenta",
+#'     "liver",
+#'     "monocytes"
+#' ))
+#' @family ROADMAP
+#' @export
+#' @importFrom data.table transpose fread
+ROADMAP.construct_reference <- function(ref_path =
+                                            system.file(
+                                                "extdata/ROADMAP",
+                                                "ROADMAP_Epigenomic.js",
+                                                package = "echoannot"
+                                            ),
+                                        keyword_query = NULL) {
+    # %like% is from data.table
+    ref <- suppressWarnings(data.table::fread(ref_path))
+    colnames(ref)[1] <- "EID"
+    if (!is.null(keyword_query)) {
+        rows <- grep(paste(keyword_query, collapse = "|"),
+            data.table::transpose(ref),
+            ignore.case = TRUE
+        )
+        ref <- ref[rows, ]
+        messager(
+            "+ ROADMAP::", nrow(ref),
+            "annotation(s) identified that match `keyword_query`."
+        )
+    }
+    return(ref)
+}
diff --git a/R/ROADMAP.merge_and_process_grl.R b/R/ROADMAP.merge_and_process_grl.R
@@ -0,0 +1,44 @@
+#' Standardize Roadmap query
+#'
+#' @param grl.roadmap Roadmap query results
+#' @param n_top_tissues The number of top tissues to include,
+#' sorted by greatest number of rows
+#' (i.e. the number of genomic ranges within the window).
+#' @family ROADMAP
+#' @importFrom IRanges overlapsAny
+#' @importFrom dplyr %>% group_by tally n_distinct
+ROADMAP.merge_and_process_grl <- function(grl.roadmap,
+                                          gr.snp,
+                                          n_top_tissues = 5,
+                                          sep = " ") {
+    grl.roadmap.merged <- unlist(grl.roadmap)
+    grl.roadmap.merged$Source <- names(grl.roadmap.merged)
+    grl.roadmap.merged$Source <- gsub("_", sep, grl.roadmap.merged$Source)
+    grl.roadmap.merged$ChromState <-
+        lapply(
+            grl.roadmap.merged$State,
+            function(ROW) {
+                base::strsplit(ROW, "_")[[1]][2]
+            }
+        ) %>% unlist()
+    grl.roadmap.filt <- grl.roadmap.merged[unlist(lapply(
+        grl.roadmap, function(e) {
+            IRanges::overlapsAny(e, gr.snp, minoverlap = 1)
+        }
+    ))]
+    if (!is.null(n_top_tissues)) {
+        top_tissues <- data.frame(grl.roadmap.filt) %>%
+            dplyr::group_by(Source) %>%
+            dplyr::tally(sort = TRUE)
+        grl.roadmap.filt <- subset(
+            grl.roadmap.filt,
+            Source %in% unique(top_tissues$Source[
+                seq(1, min(
+                    n_top_tissues,
+                    dplyr::n_distinct(top_tissues$Source)
+                ))
+            ])
+        )
+    }
+    return(grl.roadmap.filt)
+}