Skip to content

Commit

Permalink
added createPairs from enhancer
Browse files Browse the repository at this point in the history
  • Loading branch information
saral98 committed Nov 8, 2024
1 parent 25ed97e commit 8289df4
Show file tree
Hide file tree
Showing 13 changed files with 212 additions and 111 deletions.
13 changes: 8 additions & 5 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,20 @@ Package: CENTRE
Type: Package
Title: an R package to predict tissue specific enhancer target interactions
Version: 0.99.0
Authors@R: c(person("Sara", "Lopez Ruiz de Vargas", role = c("cre"),email = "[email protected]"),
Authors@R: c(person("Sara", "Lopez Ruiz de Vargas", role = c("cre"),
email = "[email protected]",
comment=c(ORCID="0000-0002-1290-4774")),
person("Trisevgeni", "Rapakoulia", role = c("aut"), email = ""),
person("Martin", "Vingron", role = c("aut"), email = "[email protected]"))
person("Martin", "Vingron", role = c("aut"),
email = "[email protected]"))
Description: An R package to predict Enhancer Promoter Interactions in specific
tissues. The package has two main use cases : the user can give a
file with genes of interest or enhancer target pairs of interest.
In the first case CENTRE will generate the enhancer target pairs
and predict how likely they are to interact in the tissue and in
the second case it will predict how likely the given pairs are to
interact.
License: MIT + file LICENSE
License: Artistic-2.0
Encoding: UTF-8
LazyData: false
Imports:
Expand All @@ -26,9 +29,9 @@ Imports:
utils,
stats
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.2.3
RoxygenNote: 7.3.1
Depends:
R (>= 4.2.0)
R (>= 4.4.0)
Suggests:
knitr,
testthat (>= 3.0.0),
Expand Down
7 changes: 3 additions & 4 deletions R/computeGenericFeatures.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#' @examples
#' #Create gene enhancer pairs
#' genes <- as.data.frame(c("ENSG00000130203.10",
#' "ENSG00000171119.3"))
#' "ENSG00000280087.1"))
#' colnames(genes) <- c("gene_id") #It is important to name the column gene_id
#' pairs <- CENTRE::createPairs(genes)
#'
Expand All @@ -25,7 +25,6 @@
#' @import utils
#' @importFrom metapod combineParallelPValues
#' @importFrom RSQLite dbConnect dbGetQuery dbDisconnect
#'
computeGenericFeatures <- function(pairs) {
startTime <- Sys.time()
## Pre-eliminary checks and computations
Expand All @@ -46,12 +45,12 @@ computeGenericFeatures <- function(pairs) {
featuresDistances$pair <- paste(featuresDistances$enhancer_id,
featuresDistances$gene_id2,
sep = "_")

#connect to the precomputed values database
conn <- RSQLite::dbConnect(RSQLite::SQLite(),
system.file("extdata",
"PrecomputedDataLight.db",
package = "CENTRE"))

combinedTestDf <- getPrecomputedValues("combinedTestData",
"combined_tests",
featuresDistances,
Expand Down
89 changes: 28 additions & 61 deletions R/createPairs.R
Original file line number Diff line number Diff line change
@@ -1,81 +1,48 @@
#' Create Pairs
#'
#' Creates all of the possible gene enhancer pairs at 500kb distance
#' from the given genes
#' transcription start sites.
#' from the given genes transcription start sites. The pairs can be also computed
#' from enhancers, in that mode we collect all gene enhancer pairs at 500kb distance
#' of the enhancer middle point.
#'
#' @param gene One column dataframe with gene ENSEMBL id's
#' @param ids One column dataframe with gene ENSEMBL id's or enhancer cCREs ids
#' @param enhancerCentered Boolean value. If true the pairs are computed from
#' enhancers. In the default setting (false) pairs are computed from genes.
#' @return dataframe with two columns the ENSEMBL id's without their version and
#' the enhancer id's
#' the enhancer id's (ENCODE cCREs)
#'
#'
#' @examples
#' #Create gene enhancer pairs
#' genes <- as.data.frame(c("ENSG00000130203.10",
#' ids <- as.data.frame(c("ENSG00000130203.10",
#' "ENSG00000171119.3"))
#' colnames(genes) <- c("gene_id") #It is important to name the column gene_id
#' pairs <- CENTRE::createPairs(genes)
#' colnames(ids) <- c("gene_id") #It is important to name the column gene_id or
#' # enhancer_id
#' pairs <- CENTRE::createPairs(ids)
#' @export
#' @import utils
#' @importFrom GenomicRanges GRanges findOverlaps
#' @importFrom IRanges IRanges
#' @importFrom RSQLite dbConnect dbGetQuery dbDisconnect
#' @importFrom regioneR extendRegions
createPairs <- function(gene) {
createPairs <- function(ids, enhancerCentered = FALSE) {
startTime <- Sys.time()

#check that the user named the column correctly
stopifnot("The column needs to be named gene_id" =
any(names(gene) == "gene_id"))

## remove the "." version id of ENSEMBL ids
gene$gene_id1 <- gsub("\\..*", "", gene$gene_id)


## connect to our GENCODE v40 database to get tts of the genes
conn <- RSQLite::dbConnect(RSQLite::SQLite(),
system.file("extdata",
"Annotation.db",
package = "CENTRE"))
#get chromosome and tts of our genes

query <- paste("SELECT gene_id1, chr, transcription_start FROM gencode WHERE gene_id1 in (",
paste0(sprintf("'%s'", gene$gene_id1), collapse = ", "), ")", sep = "")
gene <- RSQLite::dbGetQuery(conn, query)

#Select all of the annotation for ccres v3
ccresEnhancer <- RSQLite::dbGetQuery(conn, "SELECT * FROM ccres_enhancer")
RSQLite::dbDisconnect(conn)

genesRange <- with(gene,
GenomicRanges::GRanges(chr,
IRanges::IRanges(start = transcription_start,
end = transcription_start)))

#extend the gene region 500Kb to the left of TTS and to the right
genesRange <- regioneR::extendRegions(genesRange,
extend.start = 500000,
extend.end = 500000)

enhancerRange <- with(ccresEnhancer,
GenomicRanges::GRanges(V1,
IRanges::IRanges(start = new_start,
end = new_end)))


# find the enhancers that overlap the extended gene region
overlaps <- GenomicRanges::findOverlaps(genesRange, enhancerRange,
ignore.strand = TRUE)

ccresOverlapping <- data.frame(gene = overlaps@from, enhancer = overlaps@to)
ccresOverlapping$gene_id1 <- gene$gene_id1[ccresOverlapping$gene]
ccresOverlapping$enhancer_id <- ccresEnhancer$V5[ccresOverlapping$enhancer]

#select the gene and enhancer ids
ccresOverlapping <- ccresOverlapping[, c("gene_id1", "enhancer_id")]

### add a function to exclude any pairs that are not in the same chromosome


if (enhancerCentered == TRUE){
stopifnot("The column needs to be named enhancer_id" =
any(names(ids) == "enhancer_id"))
cat("Pairs are computed from the enhancer IDs\n")
ccresOverlapping <- enhancerCenteredPairs(ids)

} else {
#check that the user named the column correctly
stopifnot("The column needs to be named gene_id" =
any(names(ids) == "gene_id"))
cat("Pairs are computed from the gene IDs\n")
ccresOverlapping <- geneCenteredPairs(ids)

}

cat(paste0("time: ", format(Sys.time() - startTime), "\n"))
return(ccresOverlapping)
}
103 changes: 103 additions & 0 deletions R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -377,3 +377,106 @@ downloader <- function(file, method) {
}
}

################################################################################
# function : gene centered pairs
################################################################################

geneCenteredPairs <- function(gene){
## remove the "." version id of ENSEMBL ids
gene$gene_id1 <- gsub("\\..*", "", gene$gene_id)


## connect to our GENCODE v40 database to get tts of the genes
conn <- RSQLite::dbConnect(RSQLite::SQLite(),
system.file("extdata",
"Annotation.db",
package = "CENTRE"))
#get chromosome and tts of our genes

query <- paste("SELECT gene_id1, chr, transcription_start FROM gencode WHERE gene_id1 in (",
paste0(sprintf("'%s'", gene$gene_id1), collapse = ", "), ")", sep = "")
gene <- RSQLite::dbGetQuery(conn, query)

#Select all of the annotation for ccres v3
ccresEnhancer <- RSQLite::dbGetQuery(conn, "SELECT * FROM ccres_enhancer")
RSQLite::dbDisconnect(conn)


genesRange <- with(gene,
GenomicRanges::GRanges(chr,
IRanges::IRanges(start = transcription_start,
end = transcription_start),
gene_id1 = gene_id1))

#extend the gene region 500Kb to the left of TTS and to the right
genesRange <- regioneR::extendRegions(genesRange,
extend.start = 500000,
extend.end = 500000)

enhancerRange <- with(ccresEnhancer,
GenomicRanges::GRanges(V1,
IRanges::IRanges(start = new_start,
end = new_end),
enhancer_id = V5))


# find the enhancers that overlap the extended gene region
overlaps <- GenomicRanges::findOverlaps(genesRange, enhancerRange,
ignore.strand = TRUE)

ccresOverlapping <- data.frame(gene_id1 = GenomicRanges::elementMetadata(genesRange)$gene_id1[overlaps@from],
enhancer_id = GenomicRanges::elementMetadata(enhancerRange)$enhancer_id[overlaps@to])

return(ccresOverlapping)

}



################################################################################
# function : enhancer centered pairs
################################################################################

enhancerCenteredPairs <- function(enhancer){

conn <- RSQLite::dbConnect(RSQLite::SQLite(),
system.file("extdata",
"Annotation.db",
package = "CENTRE"))
#get chromosome and tts of our genes

query <- paste("SELECT V5, V1, middle_point FROM ccres_enhancer WHERE V5 in (",
paste0(sprintf("'%s'", enhancer$enhancer_id), collapse = ", "), ")", sep = "")
enhancer <- RSQLite::dbGetQuery(conn, query)

#Select all of the annotation for ccres v3
gene <- RSQLite::dbGetQuery(conn, "SELECT * FROM gencode")
RSQLite::dbDisconnect(conn)
gene$gene_id1 <- gsub("\\..*", "", gene$gene_id)
enhancerRange <- with(enhancer,
GenomicRanges::GRanges(V1,
IRanges::IRanges(start = middle_point,
end = middle_point),
enhancer_id = V5))

#extend the gene region 500Kb to the left of TTS and to the right
enhancerRange <- regioneR::extendRegions(enhancerRange,
extend.start = 500000,
extend.end = 500000)

genesRange <- with(gene,
GenomicRanges::GRanges(chr,
IRanges::IRanges(start = new_start,
end = new_end),
gene_id1 = gene_id1))


# find the enhancers that overlap the extended gene region
overlaps <- GenomicRanges::findOverlaps(enhancerRange,
genesRange,
ignore.strand = TRUE)

ccresOverlapping <- data.frame(gene_id1 = GenomicRanges::elementMetadata(genesRange)$gene_id1[overlaps@to],
enhancer_id = GenomicRanges::elementMetadata(enhancerRange)$enhancer_id[overlaps@from])
return(ccresOverlapping)
}
Binary file added inst/extdata/output_enh_pairs.rds
Binary file not shown.
18 changes: 11 additions & 7 deletions man/centrePrediction.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 13 additions & 7 deletions man/computeCellTypeFeatures.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/computeGenericFeatures.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 8289df4

Please sign in to comment.