-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
5 changed files
with
167 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,6 +2,7 @@ | |
^fasta_files$ | ||
^genbank_files$ | ||
^gff_files$ | ||
^bed_files$ | ||
^renv$ | ||
^renv\.lock$ | ||
^.*\.Rproj$ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
.DS_store | ||
renv/ | ||
fasta_files/ | ||
bed_files/ | ||
genbank_files/ | ||
gff_files/ | ||
renv.lock | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
#' Read BED Files | ||
#' | ||
#' This function reads BED files from a specified directory or file path and | ||
#' combines them into a single data frame. BED files use 0-based coordinate starts, | ||
#' while this function transforms the coordinates to 1-based during import. | ||
#' | ||
#' @param path A character string specifying the directory containing BED files | ||
#' or the file path to a single BED file. | ||
#' | ||
#' @return A data frame combining data from the BED files. | ||
#' | ||
#' @details This function can read multiple BED files from a directory or a | ||
#' single BED file from a specified path. It adds a 'filename' column with the | ||
#' name of the file, and combines the data frames from all files into one. | ||
#' | ||
#' @examples | ||
#' \dontrun{ | ||
#' # Read BED files from a directory | ||
#' bed_data <- read_bed("path/to/directory") | ||
#' | ||
#' # Read a single BED file | ||
#' bed_data <- read_bed("path/to/file.bed") | ||
#' } | ||
#' @importFrom dplyr bind_rows | ||
#' @export | ||
read_bed <- function(path){ | ||
|
||
if(dir.exists(path)){ | ||
# It's a directory | ||
files <- list.files(path, pattern = "\\.bed[0-9]*$", full.names = TRUE) | ||
|
||
# Check if there are any .bed files in the directory | ||
if (length(files) == 0) { | ||
stop("No .bed files found in the specified directory.") | ||
} | ||
|
||
# Initialize an empty list to store data frames from each file | ||
data_list <- list() | ||
|
||
# Process each .bed file in the directory | ||
for (file in files) { | ||
data <- process_bed(file) | ||
filename <- sub("\\.bed[0-9]*$", "", basename(file)) | ||
data$filename <- filename | ||
data_list[[filename]] <- data | ||
} | ||
|
||
# Combine data frames from all files into one | ||
combined_data <- do.call(dplyr::bind_rows, data_list) | ||
|
||
return(combined_data) | ||
|
||
} else if(file.exists(path)){ | ||
|
||
data <- process_bed(path) | ||
|
||
return(data) | ||
|
||
} else { | ||
stop("The specified path does not exist.") | ||
} | ||
} | ||
|
||
#' @noRd | ||
block_to_numeric <- function(x) { | ||
as.numeric(unlist(strsplit(x, ","))) + 1 | ||
} | ||
|
||
#' @noRd | ||
process_bed <- function(path){ | ||
|
||
field_names <- c("chrom", "chromStart", "chromEnd", "name", "score", | ||
"strand", "thickStart", "thickEnd", "itemRgb", "blockCount", | ||
"blockSizes", "blockStarts") | ||
|
||
if(file.exists(path)){ | ||
|
||
lines <- readLines(path) | ||
# Find the line with the pattern 'track' | ||
track_line <- grep("track", lines) | ||
# Read data from the line after the 'track' line if it exists | ||
if (length(track_line) > 0) { | ||
lines <- lines[(track_line + 1):length(lines)] | ||
} | ||
|
||
# Determine the number of columns from the first data line | ||
if (length(lines) > 0) { | ||
num_cols <- length(strsplit(lines[1], "\t")[[1]]) | ||
used_fields <- field_names[1:num_cols] | ||
} else { | ||
used_fields <- field_names | ||
} | ||
|
||
data <- read.table( | ||
text = lines, | ||
header = FALSE, | ||
quote = "", | ||
sep = "\t", | ||
fill = TRUE, | ||
stringsAsFactors = FALSE, | ||
col.names = used_fields | ||
) | ||
|
||
# Add +1 to Start and End | ||
if ("chromStart" %in% colnames(data)) { | ||
data$chromStart <- data$chromStart + 1 | ||
} | ||
if("chromEnd" %in% colnames(data)){ | ||
data$chromEnd <- data$chromEnd + 1 | ||
} | ||
if ("thickStart" %in% colnames(data)) { | ||
data$thickStart <- data$thickStart + 1 | ||
} | ||
if("thickEnd" %in% colnames(data)){ | ||
data$thickEnd <- data$thickEnd + 1 | ||
} | ||
# Convert blocks to numeric vector | ||
if ("blockSizes" %in% colnames(data)) { | ||
data$blockSizes <- sapply(data$blockSizes, block_to_numeric) | ||
} | ||
if ("blockStarts" %in% colnames(data)) { | ||
data$blockStarts <- sapply(data$blockStarts, block_to_numeric) | ||
} | ||
|
||
} else { | ||
stop("The specified path does not exist.") | ||
} | ||
|
||
return(data) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.