-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
saral98
committed
Nov 8, 2024
1 parent
25ed97e
commit 8289df4
Showing
13 changed files
with
212 additions
and
111 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,17 +2,20 @@ Package: CENTRE | |
Type: Package | ||
Title: an R package to predict tissue specific enhancer target interactions | ||
Version: 0.99.0 | ||
Authors@R: c(person("Sara", "Lopez Ruiz de Vargas", role = c("cre"),email = "[email protected]"), | ||
Authors@R: c(person("Sara", "Lopez Ruiz de Vargas", role = c("cre"), | ||
email = "[email protected]", | ||
comment=c(ORCID="0000-0002-1290-4774")), | ||
person("Trisevgeni", "Rapakoulia", role = c("aut"), email = ""), | ||
person("Martin", "Vingron", role = c("aut"), email = "[email protected]")) | ||
person("Martin", "Vingron", role = c("aut"), | ||
email = "[email protected]")) | ||
Description: An R package to predict Enhancer Promoter Interactions in specific | ||
tissues. The package has two main use cases : the user can give a | ||
file with genes of interest or enhancer target pairs of interest. | ||
In the first case CENTRE will generate the enhancer target pairs | ||
and predict how likely they are to interact in the tissue and in | ||
the second case it will predict how likely the given pairs are to | ||
interact. | ||
License: MIT + file LICENSE | ||
License: Artistic-2.0 | ||
Encoding: UTF-8 | ||
LazyData: false | ||
Imports: | ||
|
@@ -26,9 +29,9 @@ Imports: | |
utils, | ||
stats | ||
Roxygen: list(markdown = TRUE) | ||
RoxygenNote: 7.2.3 | ||
RoxygenNote: 7.3.1 | ||
Depends: | ||
R (>= 4.2.0) | ||
R (>= 4.4.0) | ||
Suggests: | ||
knitr, | ||
testthat (>= 3.0.0), | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,81 +1,48 @@ | ||
#' Create Pairs | ||
#' | ||
#' Creates all of the possible gene enhancer pairs at 500kb distance | ||
#' from the given genes | ||
#' transcription start sites. | ||
#' from the given genes transcription start sites. The pairs can be also computed | ||
#' from enhancers, in that mode we collect all gene enhancer pairs at 500kb distance | ||
#' of the enhancer middle point. | ||
#' | ||
#' @param gene One column dataframe with gene ENSEMBL id's | ||
#' @param ids One column dataframe with gene ENSEMBL id's or enhancer cCREs ids | ||
#' @param enhancerCentered Boolean value. If true the pairs are computed from | ||
#' enhancers. In the default setting (false) pairs are computed from genes. | ||
#' @return dataframe with two columns the ENSEMBL id's without their version and | ||
#' the enhancer id's | ||
#' the enhancer id's (ENCODE cCREs) | ||
#' | ||
#' | ||
#' @examples | ||
#' #Create gene enhancer pairs | ||
#' genes <- as.data.frame(c("ENSG00000130203.10", | ||
#' ids <- as.data.frame(c("ENSG00000130203.10", | ||
#' "ENSG00000171119.3")) | ||
#' colnames(genes) <- c("gene_id") #It is important to name the column gene_id | ||
#' pairs <- CENTRE::createPairs(genes) | ||
#' colnames(ids) <- c("gene_id") #It is important to name the column gene_id or | ||
#' # enhancer_id | ||
#' pairs <- CENTRE::createPairs(ids) | ||
#' @export | ||
#' @import utils | ||
#' @importFrom GenomicRanges GRanges findOverlaps | ||
#' @importFrom IRanges IRanges | ||
#' @importFrom RSQLite dbConnect dbGetQuery dbDisconnect | ||
#' @importFrom regioneR extendRegions | ||
createPairs <- function(gene) { | ||
createPairs <- function(ids, enhancerCentered = FALSE) { | ||
startTime <- Sys.time() | ||
|
||
#check that the user named the column correctly | ||
stopifnot("The column needs to be named gene_id" = | ||
any(names(gene) == "gene_id")) | ||
|
||
## remove the "." version id of ENSEMBL ids | ||
gene$gene_id1 <- gsub("\\..*", "", gene$gene_id) | ||
|
||
|
||
## connect to our GENCODE v40 database to get tts of the genes | ||
conn <- RSQLite::dbConnect(RSQLite::SQLite(), | ||
system.file("extdata", | ||
"Annotation.db", | ||
package = "CENTRE")) | ||
#get chromosome and tts of our genes | ||
|
||
query <- paste("SELECT gene_id1, chr, transcription_start FROM gencode WHERE gene_id1 in (", | ||
paste0(sprintf("'%s'", gene$gene_id1), collapse = ", "), ")", sep = "") | ||
gene <- RSQLite::dbGetQuery(conn, query) | ||
|
||
#Select all of the annotation for ccres v3 | ||
ccresEnhancer <- RSQLite::dbGetQuery(conn, "SELECT * FROM ccres_enhancer") | ||
RSQLite::dbDisconnect(conn) | ||
|
||
genesRange <- with(gene, | ||
GenomicRanges::GRanges(chr, | ||
IRanges::IRanges(start = transcription_start, | ||
end = transcription_start))) | ||
|
||
#extend the gene region 500Kb to the left of TTS and to the right | ||
genesRange <- regioneR::extendRegions(genesRange, | ||
extend.start = 500000, | ||
extend.end = 500000) | ||
|
||
enhancerRange <- with(ccresEnhancer, | ||
GenomicRanges::GRanges(V1, | ||
IRanges::IRanges(start = new_start, | ||
end = new_end))) | ||
|
||
|
||
# find the enhancers that overlap the extended gene region | ||
overlaps <- GenomicRanges::findOverlaps(genesRange, enhancerRange, | ||
ignore.strand = TRUE) | ||
|
||
ccresOverlapping <- data.frame(gene = overlaps@from, enhancer = overlaps@to) | ||
ccresOverlapping$gene_id1 <- gene$gene_id1[ccresOverlapping$gene] | ||
ccresOverlapping$enhancer_id <- ccresEnhancer$V5[ccresOverlapping$enhancer] | ||
|
||
#select the gene and enhancer ids | ||
ccresOverlapping <- ccresOverlapping[, c("gene_id1", "enhancer_id")] | ||
|
||
### add a function to exclude any pairs that are not in the same chromosome | ||
|
||
|
||
if (enhancerCentered == TRUE){ | ||
stopifnot("The column needs to be named enhancer_id" = | ||
any(names(ids) == "enhancer_id")) | ||
cat("Pairs are computed from the enhancer IDs\n") | ||
ccresOverlapping <- enhancerCenteredPairs(ids) | ||
|
||
} else { | ||
#check that the user named the column correctly | ||
stopifnot("The column needs to be named gene_id" = | ||
any(names(ids) == "gene_id")) | ||
cat("Pairs are computed from the gene IDs\n") | ||
ccresOverlapping <- geneCenteredPairs(ids) | ||
|
||
} | ||
|
||
cat(paste0("time: ", format(Sys.time() - startTime), "\n")) | ||
return(ccresOverlapping) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.