diff --git a/NAMESPACE b/NAMESPACE index eabac62..37fd978 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -64,5 +64,4 @@ importFrom(stats,setNames) importFrom(tidyr,pivot_wider) importFrom(tools,file_path_sans_ext) importFrom(utils,modifyList) -importFrom(utils,packageVersion) importFrom(utils,read.table) diff --git a/R/cluster_blast.R b/R/cluster_blast.R index 7bfcfa8..7b64ffa 100644 --- a/R/cluster_blast.R +++ b/R/cluster_blast.R @@ -219,38 +219,23 @@ synteny_score <- function(order1, order2, identity, i = 0.5) { #' cur_data arrange desc #' @importFrom stats setNames ave #' @importFrom rlang .data -#' @importFrom utils packageVersion #' @importFrom parallel detectCores makeCluster clusterExport clusterEvalQ #' parLapply stopCluster -#' @note This function selects the appropriate alignment package based on the -#' Bioconductor version. For versions earlier than 3.19, it relies on the -#' Biostrings package for sequence alignment. For versions 3.19 and higher, it -#' uses the pwalign package. Ensure these packages are installed using -#' BiocManager and loaded into your R session. The dplyr package is also used -#' for data manipulation. +#' @note This function relies on the Biostrings and pwalign package for sequence +#' alignment and the dplyr package for data manipulation. Ensure these packages +#' are installed and loaded into your R session. #' #' @export protein_blast <- function(data, query, id = "protein_id", start = "start", end = "end", cluster = "cluster", genes = NULL, identity = 30, parallel = TRUE) { - # Check Bioconductor version and use either Biostrings or pwalign - if (requireNamespace("BiocVersion", quietly = TRUE)) { - if (utils::packageVersion("BiocVersion") < "3.19") { - if (!requireNamespace("Biostrings", quietly = TRUE)) { - stop('Biostrings package is not installed. Please install it using BiocManager::install("Biostrings").') - } - pairwise_alignment_fun <- Biostrings::pairwiseAlignment - get_pattern <- Biostrings::pattern - get_subject <- Biostrings::subject - } else { - if (!requireNamespace("pwalign", quietly = TRUE)) { - stop('pwalign package is not installed. Please install it using BiocManager::install("pwalign").') - } - pairwise_alignment_fun <- pwalign::pairwiseAlignment - get_pattern <- pwalign::pattern - get_subject <- pwalign::subject - } - } else { - stop("BiocManager is not installed. Please install it using install.packages('BiocManager').") + # Check if Biostrings package is installed + if (!requireNamespace("Biostrings", quietly = TRUE) || !"Biostrings" %in% loadedNamespaces()) { + stop('Biostrings package is not installed or not loaded. Install with BiocManager::install("Biostrings").') + } + + # Check if pwalign package is installed and loaded + if (!requireNamespace("pwalign", quietly = TRUE) || !"pwalign" %in% loadedNamespaces()) { + stop('pwalign package is not installed or not loaded. Install with BiocManager::install("pwalign").') } # Load from .gbk files @@ -337,9 +322,9 @@ protein_blast <- function(data, query, id = "protein_id", start = "start", end = protein_combinations_alignment <- protein_combinations_all[protein_combinations_all$cluster1 != protein_combinations_all$cluster2, ] protein_combinations_query <- protein_combinations_all[protein_combinations_all$rowID.x == protein_combinations_all$rowID.y, ] - alignments <- pairwise_alignment_fun(pattern = protein_combinations_alignment$translation1, - subject = protein_combinations_alignment$translation2, - scoreOnly = FALSE) + alignments <- pwalign::pairwiseAlignment(pattern = protein_combinations_alignment$translation1, + subject = protein_combinations_alignment$translation2, + scoreOnly = FALSE) # Decide on parallel or sequential processing based on the number of rows if (parallel && nrow(protein_combinations_alignment) > 1000) { @@ -351,15 +336,15 @@ protein_blast <- function(data, query, id = "protein_id", start = "start", end = no_cores <- parallel::detectCores() - 1 cl <- parallel::makeCluster(no_cores) - # Export the alignments list and the Biostrings library to each cluster node + # Export the alignments list and the pwalign library to each cluster node parallel::clusterExport(cl, varlist = c("alignments"), envir = environment()) - # parallel::clusterEvalQ(cl, library(Biostrings)) + parallel::clusterEvalQ(cl, library(pwalign)) # Use parLapply for parallel execution alignment_list <- parallel::parLapply(cl, alignments, function(alignment) { list( - pattern = as.character(get_pattern(alignment)), - subject = as.character(get_subject(alignment)) + pattern = as.character(pwalign::pattern(alignment)), + subject = as.character(pwalign::subject(alignment)) ) }) @@ -369,8 +354,8 @@ protein_blast <- function(data, query, id = "protein_id", start = "start", end = # Sequential processing alignment_list <- lapply(alignments, function(alignment) { list( - pattern = as.character(get_pattern(alignment)), - subject = as.character(get_subject(alignment)) + pattern = as.character(pwalign::pattern(alignment)), + subject = as.character(pwalign::subject(alignment)) ) }) } diff --git a/R/geneviewer.R b/R/geneviewer.R index 7e4e689..c77eea4 100644 --- a/R/geneviewer.R +++ b/R/geneviewer.R @@ -1882,7 +1882,7 @@ GC_trackMouse <- function( #' #' @param GC_chart The gene chart object to be modified. #' @param formatter A character string defining the HTML content of the tooltip. It can -#' include placeholders like `{start}` and `{end}` which will be replaced by actual +#' include placeholders like \code{\{start\}} and \code{\{end\}} which will be replaced by actual #' data values. The default value shows start and end data. #' @param show Logical, whether to display the tooltip or not. #' @param cluster Optional; used to specify which clusters in the chart should have tooltips. diff --git a/docs/articles/BLASTP.html b/docs/articles/BLASTP.html index 022a34c..fad4065 100644 --- a/docs/articles/BLASTP.html +++ b/docs/articles/BLASTP.html @@ -116,22 +116,20 @@

Materialsgeneviewer-tutorials repository. For visualization, the geneviewer package is -required. Sequence alignment is performed using either the -pwalign or Biostrings package, depending on -the Bioconductor version in use. Optionally, the parallel +required. Sequence alignment is performed using the +Biostrings and pwalign packages that can be +downloaded from Bioconductor. Optionally, the parallel package can be utilized to increase processing times.

-install.packages("devtools")
-install.packages("parallel") # Optional but recommended for speeding up processing
-
-devtools::install_github("nvelden/geneviewer")
-
-# For BiocManager versions > 3.19 
+devtools::install_github("nvelden/geneviewer")
+BiocManager::install("Biostrings")
 BiocManager::install("pwalign")
-# For BiocManager versions < 3.19 
-BiocManager::install("Biostrings") 
+# Optional but recommended for speeding up processing
+install.packages("parallel") 
 
 library(geneviewer)
+library(Biostrings)
+library(pwalign)
 library(parallel) 
@@ -231,8 +229,10 @@

Run BlastP
 BlastP_results <- geneviewer::protein_blast(
   folder_path,
diff --git a/man/GC_tooltip.Rd b/man/GC_tooltip.Rd
index 7993bb3..437fbe8 100644
--- a/man/GC_tooltip.Rd
+++ b/man/GC_tooltip.Rd
@@ -16,7 +16,7 @@ GC_tooltip(
 \item{GC_chart}{The gene chart object to be modified.}
 
 \item{formatter}{A character string defining the HTML content of the tooltip. It can
-include placeholders like `{start}` and `{end}` which will be replaced by actual
+include placeholders like \code{\{start\}} and \code{\{end\}} which will be replaced by actual
 data values. The default value shows start and end data.}
 
 \item{show}{Logical, whether to display the tooltip or not.}
diff --git a/man/protein_blast.Rd b/man/protein_blast.Rd
index 8267c0d..ad2bb63 100644
--- a/man/protein_blast.Rd
+++ b/man/protein_blast.Rd
@@ -58,12 +58,9 @@ sequence identity and similarity, and filters results based on a minimum
 identity threshold.
 }
 \note{
-This function selects the appropriate alignment package based on the
-  Bioconductor version. For versions earlier than 3.19, it relies on the
-  Biostrings package for sequence alignment. For versions 3.19 and higher, it
-  uses the pwalign package. Ensure these packages are installed using
-  BiocManager and loaded into your R session. The dplyr package is also used
-  for data manipulation.
+This function relies on the Biostrings and pwalign package for sequence
+alignment and the dplyr package for data manipulation. Ensure these packages
+are installed and loaded into your R session.
 }
 \examples{
 \dontrun{
diff --git a/vignettes/BLASTP.Rmd b/vignettes/BLASTP.Rmd
index b50bf23..936c716 100644
--- a/vignettes/BLASTP.Rmd
+++ b/vignettes/BLASTP.Rmd
@@ -12,6 +12,8 @@ library(knitr)
 library(geneviewer)
 library(parallel)
 library(dplyr)
+library(Biostrings)
+library(pwalign)
 
 knitr::opts_chunk$set(
   collapse = TRUE,
@@ -25,20 +27,18 @@ This tutorial describes how we can use `geneviewer` to identify and visualize ho
 
 ### Materials
 
-The .gbk files and the additional gene info can be downloaded from the [geneviewer-tutorials](https://github.com/nvelden/geneviewer-tutorials/tree/main/Cluster%20comparison%20using%20BlastP) repository. For visualization, the `geneviewer` package is required. Sequence alignment is performed using either the `pwalign` or `Biostrings` package, depending on the Bioconductor version in use. Optionally, the `parallel` package can be utilized to increase processing times.
+The .gbk files and the additional gene info can be downloaded from the [geneviewer-tutorials](https://github.com/nvelden/geneviewer-tutorials/tree/main/Cluster%20comparison%20using%20BlastP) repository. For visualization, the `geneviewer` package is required. Sequence alignment is performed using the `Biostrings` and `pwalign` packages that can be downloaded from Bioconductor. Optionally, the `parallel` package can be utilized to increase processing times.
 
 ```{r eval=FALSE, results='hide'}
-install.packages("devtools")
-install.packages("parallel") # Optional but recommended for speeding up processing
-
 devtools::install_github("nvelden/geneviewer")
-
-# For BiocManager versions > 3.19 
+BiocManager::install("Biostrings")
 BiocManager::install("pwalign")
-# For BiocManager versions < 3.19 
-BiocManager::install("Biostrings") 
+# Optional but recommended for speeding up processing
+install.packages("parallel") 
 
 library(geneviewer)
+library(Biostrings)
+library(pwalign)
 library(parallel) 
 ```
 
@@ -67,7 +67,7 @@ kable(head(genbank_df))
 
 ### Run BlastP
 
-In this tutorial, we will directly input the folder path into `the protein_blast()` function to load our data. We'll select BGC0000055 as our query cluster and conduct a BlastP analysis to find the homologous in the other clusters. We use 30 as the minimum identity threshold. Performing the BlastP analysis can take several minutes so we set parallel processing to TRUE.
+In this tutorial, we will directly input the folder path into `the protein_blast()` function to load our data. We'll select BGC0000055 as our query cluster and conduct a BlastP analysis to find the homologous in the other clusters. We use 30 as the minimum identity threshold. Performing the BlastP analysis with this dataset can take several minutes so we set parallel processing to TRUE. For smaller datasets or if the `parallel` package is not installed, set parallel processing to `FALSE`.
 
 ```{r echo=TRUE, results='hide'}
 BlastP_results <- geneviewer::protein_blast(