diff --git a/DESCRIPTION b/DESCRIPTION index 5e9d4a6..32099bb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: CNVMetrics Type: Package -Version: 0.1.1 -Date: 2021-01-29 +Version: 0.1.2 +Date: 2021-02-10 Title: Copy Number Variant Metrics Description: Calculate similarity metrics to facilite copy number variant comparison among samples and/or methods. diff --git a/NAMESPACE b/NAMESPACE index a34436f..c5c4850 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -2,7 +2,7 @@ S3method(is,CNVMetric) S3method(print,CNVMetric) -export(calculateOverlapRegionsMetric) +export(calculateOverlapMetric) export(calculateWeightedEuclideanDistance) export(plotOverlapMetric) export(prepareInformation) diff --git a/NEWS.md b/NEWS.md index 5ff92b0..247af18 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,9 +1,21 @@ +# CNVMetrics 0.1.2 + +NEW FEATURES + +* Added a `NEWS.md` file to track changes to the package. +* `plotOneOverlapMetric()` method enables plotting result of overlapping metric calculation. + +SIGNIFICANT USER-VISIBLE CHANGES + +* `calculateOverlapRegionsMetric()` method changed to `calculateOverlapMetric()`. + + # CNVMetrics 0.1.1 NEW FEATURES * Added a `NEWS.md` file to track changes to the package. -* `calculateOverlapRegionsMetric()` enables calculation of similarity metrics using overlapping amplified/deleted regions. +* `calculateOverlapRegionsMetric()` method enables calculation of similarity metrics using overlapping amplified/deleted regions. SIGNIFICANT USER-VISIBLE CHANGES diff --git a/R/CNVMetrics.R b/R/CNVMetrics.R index fd14bf7..0f53395 100644 --- a/R/CNVMetrics.R +++ b/R/CNVMetrics.R @@ -15,5 +15,6 @@ #' Maintainer: #' Astrid Deschênes #' +#' @encoding UTF-8 #' @keywords package NULL diff --git a/R/CNVMetricsOverlapInternalMethods.R b/R/CNVMetricsOverlapInternalMethods.R index 231af61..eaca640 100644 --- a/R/CNVMetricsOverlapInternalMethods.R +++ b/R/CNVMetricsOverlapInternalMethods.R @@ -68,27 +68,27 @@ #' state = c("AMPLIFICATION", "DELETION", "DELETION")) #' #' ## Calculate Sorensen metric for the amplified regions -#' CNVMetrics:::calculateOverlapMetric(sample01, sample02, method="sorensen", -#' type="AMPLIFICATION") +#' CNVMetrics:::calculateOneOverlapMetric(sample01, sample02, +#' method="sorensen", type="AMPLIFICATION") #' #' ## Calculate Szymkiewicz-Simpson metric for the amplified regions #' ## Amplified regions of sample02 are a subset of the amplified #' ## regions in sample01 -#' CNVMetrics:::calculateOverlapMetric(sample01, sample02, method="szymkiewicz", -#' type="AMPLIFICATION") +#' CNVMetrics:::calculateOneOverlapMetric(sample01, sample02, +#' method="szymkiewicz", type="AMPLIFICATION") #' #' ## Calculate Sorensen metric for the deleted regions -#' CNVMetrics:::calculateOverlapMetric(sample01, sample02, method="sorensen", -#' type="DELETION") +#' CNVMetrics:::calculateOneOverlapMetric(sample01, sample02, +#' method="sorensen", type="DELETION") #' #' ## Calculate Szymkiewicz-Simpson metric for the deleted regions -#' CNVMetrics:::calculateOverlapMetric(sample01, sample02, method="szymkiewicz", -#' type="DELETION") +#' CNVMetrics:::calculateOneOverlapMetric(sample01, sample02, +#' method="szymkiewicz", type="DELETION") #' #' @author Astrid Deschênes #' @encoding UTF-8 #' @keywords internal -calculateOverlapMetric <- function(sample01, sample02, method, type) { +calculateOneOverlapMetric <- function(sample01, sample02, method, type) { sample01 <- sample01[sample01$state == type,] sample02 <- sample02[sample02$state == type,] @@ -246,7 +246,7 @@ calculateSzymkiewicz <- function(sample01, sample02) { #' amplified/deleted regions. #' #' @param metric a \code{CNVMetric} object containing the metrics calculated -#' by \code{calculateOverlapRegionsMetric}. +#' by \code{calculateOverlapMetric}. #' #' @param type a \code{character} string indicating which graph to generate. #' This should be (an unambiguous abbreviation of) one of @@ -291,7 +291,7 @@ calculateSzymkiewicz <- function(sample01, sample02) { #' state = c("AMPLIFICATION", "DELETION")) #' #' ## Calculating Sorensen metric -#' metric <- calculateOverlapRegionsMetric(demo, method="sorensen") +#' metric <- calculateOverlapMetric(demo, method="sorensen") #' #' ## Plot both amplification metrics using darkorange color #' CNVMetrics:::plotOneOverlapMetric(metric, type="AMPLIFICATION", @@ -314,8 +314,8 @@ plotOneOverlapMetric <- function(metric, type, colorRange, show_colnames, ...) ## Prepare main title (might not be used if main argument given by user) metricInfo <- switch(attributes(metric)$metric, - "szymkiewicz"="Szymkiewicz-Simpson", - "sorensen"="Sorensen") + "szymkiewicz"="Szymkiewicz-Simpson", + "sorensen"="Sorensen") metricInfo <- paste0(type, " - ", metricInfo, " metric") ## Create heatmap @@ -328,10 +328,10 @@ plotOneOverlapMetric <- function(metric, type, colorRange, show_colnames, ...) if (!hasArg("main")) { pheatmap(metricMat, main=metricInfo, show_colnames=show_colnames, - color=colors, breaks=breaks, ...)[[4]] + color=colors, breaks=breaks, ...)[[4]] } else { pheatmap(metricMat, show_colnames=show_colnames, - color=colors, breaks=breaks, ...)[[4]] + color=colors, breaks=breaks, ...)[[4]] } } else { if (!hasArg("main")) { diff --git a/R/CNVMetricsOverlapMethods.R b/R/CNVMetricsOverlapMethods.R index 2fb26c2..ff68049 100644 --- a/R/CNVMetricsOverlapMethods.R +++ b/R/CNVMetricsOverlapMethods.R @@ -102,16 +102,16 @@ #' state = c("AMPLIFICATION", "DELETION")) #' #' ## Calculating Sorensen metric -#' calculateOverlapRegionsMetric(demo, method="sorensen") +#' calculateOverlapMetric(demo, method="sorensen") #' #' ## Calculating Szymkiewicz-Simpson metric -#' calculateOverlapRegionsMetric(demo, method="szymkiewicz") +#' calculateOverlapMetric(demo, method="szymkiewicz") #' #' @author Astrid Deschênes, Pascal Belleau #' @import GenomicRanges #' @encoding UTF-8 #' @export -calculateOverlapRegionsMetric <- function(segmentData, +calculateOverlapMetric <- function(segmentData, method=c("sorensen", "szymkiewicz")) { method <- match.arg(method) @@ -143,7 +143,7 @@ calculateOverlapRegionsMetric <- function(segmentData, for(i in seq_len(nb)[-1]) { for(j in seq_len(i-1)) { - dataTMP[i, j] <- calculateOverlapMetric( + dataTMP[i, j] <- calculateOneOverlapMetric( sample01=segmentData[[names[i]]], sample02=segmentData[[names[j]]], method=method, type=type) @@ -169,7 +169,7 @@ calculateOverlapRegionsMetric <- function(segmentData, #' heatmap related to amplified, deleted regions or both. #' #' @param metric a \code{CNVMetric} object containing the metrics calculated -#' by \code{calculateOverlapRegionsMetric}. +#' by \code{calculateOverlapMetric}. #' #' @param type a \code{character} string indicating which graph to generate. #' This should be (an unambiguous abbreviation of) one of "\code{BOTH}", @@ -215,7 +215,7 @@ calculateOverlapRegionsMetric <- function(segmentData, #' state = c("AMPLIFICATION", "DELETION")) #' #' ## Calculating Sorensen metric -#' metric <- calculateOverlapRegionsMetric(demo, method="sorensen") +#' metric <- calculateOverlapMetric(demo, method="sorensen") #' #' ## Plot both amplification and deletion metrics #' plotOverlapMetric(metric, type="BOTH") diff --git a/man/CNVMetrics-package.Rd b/man/CNVMetrics-package.Rd index b416b75..fa45bbd 100644 --- a/man/CNVMetrics-package.Rd +++ b/man/CNVMetrics-package.Rd @@ -1,6 +1,7 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/CNVMetrics.R \docType{package} +\encoding{UTF-8} \name{CNVMetrics-package} \alias{CNVMetrics-package} \alias{CNVMetrics} diff --git a/man/calculateOneOverlapMetric.Rd b/man/calculateOneOverlapMetric.Rd new file mode 100644 index 0000000..58f824b --- /dev/null +++ b/man/calculateOneOverlapMetric.Rd @@ -0,0 +1,101 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/CNVMetricsOverlapInternalMethods.R +\encoding{UTF-8} +\name{calculateOneOverlapMetric} +\alias{calculateOneOverlapMetric} +\title{Calculate metric using overlapping amplified/deleted regions between +two samples.} +\usage{ +calculateOneOverlapMetric(sample01, sample02, method, type) +} +\arguments{ +\item{sample01}{a \code{GRanges} which contains a collection of +genomic ranges representing copy number events for the first sample. +The \code{GRanges} must have a metadata column called '\code{state}' with +amplified regions identified as '\code{AMPLIFICATION}' and +deleted regions identified as '\code{DELETION}'; regions with different +identifications will not be used in the +calculation of the metric.} + +\item{sample02}{a \code{GRanges} which contains a collection of +genomic ranges representing copy number events for the second sample.} + +\item{method}{a \code{character} string representing the metric to be +used ('\code{sorensen}' or '\code{szymkiewicz}'.} + +\item{type}{a \code{character} string representing the type of +copy number events to be used ('\code{AMPLIFICATION}' or '\code{DELETION}').} +} +\value{ +a \code{numeric}, the value of the specified metric. If +the metric cannot be calculated, \code{NA} is returned. +} +\description{ +Calculate a specific metric using overlapping +amplified/deleted regions between two samples. +} +\details{ +The method calculates a specified metric using overlapping +regions between the samples. Only regions corresponding to the type +specified by user are used in the calculation of the metric. The strand of +the regions is not taken into account while +calculating the metric. + +The Sorensen metric is calculated by dividing twice the size of +the intersection by the sum of the size of the two sets. If the sum of +the size of the two sets is zero; the value \code{NA} is +returned instead. + +The Szymkiewicz-Simpson metric is calculated by dividing the size of +the intersection by the smaller of the size of the two sets. If one sample +has a size of zero, the metric is not calculated; the value \code{NA} is +returned instead. +} +\examples{ + +## Load required package to generate the two samples +require(GenomicRanges) + +## Generate two samples with identical sequence levels +sample01 <- GRanges(seqnames = "chr1", + ranges = IRanges(start = c(100, 201, 400), + end = c(200, 350, 500)), strand = "*", + state = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) +sample02 <- GRanges(seqnames = "chr1", + ranges = IRanges(start = c(150, 200, 450), + end = c(250, 350, 500)), strand = "*", + state = c("AMPLIFICATION", "DELETION", "DELETION")) + +## Calculate Sorensen metric for the amplified regions +CNVMetrics:::calculateOneOverlapMetric(sample01, sample02, + method="sorensen", type="AMPLIFICATION") + +## Calculate Szymkiewicz-Simpson metric for the amplified regions +## Amplified regions of sample02 are a subset of the amplified +## regions in sample01 +CNVMetrics:::calculateOneOverlapMetric(sample01, sample02, + method="szymkiewicz", type="AMPLIFICATION") + +## Calculate Sorensen metric for the deleted regions +CNVMetrics:::calculateOneOverlapMetric(sample01, sample02, + method="sorensen", type="DELETION") + +## Calculate Szymkiewicz-Simpson metric for the deleted regions +CNVMetrics:::calculateOneOverlapMetric(sample01, sample02, + method="szymkiewicz", type="DELETION") + +} +\references{ +Sørensen, Thorvald. n.d. “A Method of Establishing Groups of Equal +Amplitude in Plant Sociology Based on Similarity of Species and Its +Application to Analyses of the Vegetation on Danish Commons.” +Biologiske Skrifter, no. 5: 1–34. + +Vijaymeena, M. K, and Kavitha K. 2016. “A Survey on Similarity Measures in +Text Mining.” Machine Learning and Applications: An International +Journal 3 (1): 19–28. doi: \url{https://doi.org/10.5121/mlaij.2016.3103} +} +\author{ +Astrid Deschênes +} +\keyword{internal} diff --git a/man/calculateOverlapMetric.Rd b/man/calculateOverlapMetric.Rd index 49b561d..a41548d 100644 --- a/man/calculateOverlapMetric.Rd +++ b/man/calculateOverlapMetric.Rd @@ -1,88 +1,106 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CNVMetricsOverlapInternalMethods.R +% Please edit documentation in R/CNVMetricsOverlapMethods.R \encoding{UTF-8} \name{calculateOverlapMetric} \alias{calculateOverlapMetric} -\title{Calculate metric using overlapping amplified/deleted regions between -two samples.} +\title{Calculate metric using overlapping amplified/deleted regions} \usage{ -calculateOverlapMetric(sample01, sample02, method, type) +calculateOverlapMetric(segmentData, method = c("sorensen", "szymkiewicz")) } \arguments{ -\item{sample01}{a \code{GRanges} which contains a collection of -genomic ranges representing copy number events for the first sample. -The \code{GRanges} must have a metadata column called '\code{state}' with -amplified regions identified as '\code{AMPLIFICATION}' and -deleted regions identified as '\code{DELETION}'; regions with different -identifications will not be used in the +\item{segmentData}{a \code{GRangesList} that contains a collection of +genomic ranges representing copy number events, including amplified/deleted +status, from at least 2 samples. All samples must have a metadata column +called '\code{state}' with amplified regions identified as +'\code{AMPLIFICATION}' and deleted regions identified as '\code{DELETION}'; +regions with different identifications will not be used in the calculation of the metric.} -\item{sample02}{a \code{GRanges} which contains a collection of -genomic ranges representing copy number events for the second sample.} - -\item{method}{a \code{character} string representing the metric to be -used ('\code{sorensen}' or '\code{szymkiewicz}'.} - -\item{type}{a \code{character} string representing the type of -copy number events to be used ('\code{AMPLIFICATION}' or '\code{DELETION}').} +\item{method}{a \code{character} string representing the metric to be used. +This should be (an unambiguous abbreviation of) one of "sorensen" or +"szymkiewicz". Default: "sorensen".} } \value{ -a \code{numeric}, the value of the specified metric. If -the metric cannot be calculated, \code{NA} is returned. +a \code{list} of class "\code{CNVMetric}". This list has +the following components: +\itemize{ +\item{\code{AMPLIFICATION}}{ a lower-triangular \code{matrix} with the + results of the selected metric on the amplified regions for each paired + samples. The value \code{NA} is present when the metric cannot be + calculated. The value \code{NA} is also present in the top-triangular + section of the matrix. + } + \item{\code{DELETION}}{ a lower-triangular \code{matrix} with the + results of the selected metric on the deleted regions for each paired + samples. The value \code{NA} is present when the metric cannot be + calculated. The value \code{NA} is also present in the top-triangular + section of the matrix. +}} + +The object has the following attributes (besides "class" equal +to "CNVMetric"): +\itemize{ +\item{\code{metric}}{ the metric used for the calculation. + } +\item{\code{names}}{ the names of the two matrix containing the metrics for +the amplified and deleted regions. +}} } \description{ Calculate a specific metric using overlapping -amplified/deleted regions between two samples. +amplified/deleted regions between to samples. The metric is calculated for +the amplified and deleted regions separately. When more than 2 samples are +present, the metric is calculated for each sample pair. } \details{ -The method calculates a specified metric using overlapping -regions between the samples. Only regions corresponding to the type -specified by user are used in the calculation of the metric. The strand of -the regions is not taken into account while -calculating the metric. +The two methods each estimate the overlap between paired samples. They use +different metrics, all in the range [0, 1] with 0 indicating no overlap. + +The available metrics are (written for two GRanges): -The Sorensen metric is calculated by dividing twice the size of -the intersection by the sum of the size of the two sets. If the sum of -the size of the two sets is zero; the value \code{NA} is -returned instead. +\code{sorensen}: -The Szymkiewicz-Simpson metric is calculated by dividing the size of -the intersection by the smaller of the size of the two sets. If one sample -has a size of zero, the metric is not calculated; the value \code{NA} is -returned instead. +This metric is calculated by dividing twice the size of the intersection +by the sum of the size of the two sets. +With this metric, an overlap metric value of 1 is only obtained when the +two samples are identical. + +\code{szymkiewicz}: + +This metric is calculated by dividing the size of the intersection +by the size of the smallest set. With this metric, if one set is a +subset of the other set, the overlap metric value is 1. } \examples{ -## Load required package to generate the two samples +## Load required package to generate the samples require(GenomicRanges) -## Generate two samples with identical sequence levels -sample01 <- GRanges(seqnames = "chr1", - ranges = IRanges(start = c(100, 201, 400), - end = c(200, 350, 500)), strand = "*", +## Create a GRangesList object with 3 samples +## The stand of the regions doesn't affect the calculation of the metric +demo <- GRangesList() +demo[["sample01"]] <- GRanges(seqnames = "chr1", + ranges = IRanges(start = c(1905048, 4554832, 31686841), + end = c(2004603, 4577608, 31695808)), strand = "*", + state = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) + +demo[["sample02"]] <- GRanges(seqnames = "chr1", + ranges = IRanges(start = c(1995066, 31611222, 31690000), + end = c(2204505, 31689898, 31895666)), strand = c("-", "+", "+"), state = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) -sample02 <- GRanges(seqnames = "chr1", - ranges = IRanges(start = c(150, 200, 450), - end = c(250, 350, 500)), strand = "*", - state = c("AMPLIFICATION", "DELETION", "DELETION")) -## Calculate Sorensen metric for the amplified regions -CNVMetrics:::calculateOverlapMetric(sample01, sample02, method="sorensen", - type="AMPLIFICATION") +## The amplified region in sample03 is a subset of the amplified regions +## in sample01 +demo[["sample03"]] <- GRanges(seqnames = "chr1", + ranges = IRanges(start = c(1906069, 4558838), + end = c(1909505, 4570601)), strand = "*", + state = c("AMPLIFICATION", "DELETION")) -## Calculate Szymkiewicz-Simpson metric for the amplified regions -## Amplified regions of sample02 are a subset of the amplified -## regions in sample01 -CNVMetrics:::calculateOverlapMetric(sample01, sample02, method="szymkiewicz", - type="AMPLIFICATION") +## Calculating Sorensen metric +calculateOverlapMetric(demo, method="sorensen") -## Calculate Sorensen metric for the deleted regions -CNVMetrics:::calculateOverlapMetric(sample01, sample02, method="sorensen", - type="DELETION") - -## Calculate Szymkiewicz-Simpson metric for the deleted regions -CNVMetrics:::calculateOverlapMetric(sample01, sample02, method="szymkiewicz", - type="DELETION") +## Calculating Szymkiewicz-Simpson metric +calculateOverlapMetric(demo, method="szymkiewicz") } \references{ @@ -96,6 +114,5 @@ Text Mining.” Machine Learning and Applications: An International Journal 3 (1): 19–28. doi: \url{https://doi.org/10.5121/mlaij.2016.3103} } \author{ -Astrid Deschênes +Astrid Deschênes, Pascal Belleau } -\keyword{internal} diff --git a/man/calculateOverlapRegionsMetric.Rd b/man/calculateOverlapRegionsMetric.Rd deleted file mode 100644 index 019bed4..0000000 --- a/man/calculateOverlapRegionsMetric.Rd +++ /dev/null @@ -1,121 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/CNVMetricsOverlapMethods.R -\encoding{UTF-8} -\name{calculateOverlapRegionsMetric} -\alias{calculateOverlapRegionsMetric} -\title{Calculate metric using overlapping amplified/deleted regions} -\usage{ -calculateOverlapRegionsMetric( - segmentData, - method = c("sorensen", "szymkiewicz") -) -} -\arguments{ -\item{segmentData}{a \code{GRangesList} that contains a collection of -genomic ranges representing copy number events, including amplified/deleted -status, from at least 2 samples. All samples must have a metadata column -called '\code{state}' with amplified regions identified as -'\code{AMPLIFICATION}' and deleted regions identified as '\code{DELETION}'; -regions with different identifications will not be used in the -calculation of the metric.} - -\item{method}{a \code{character} string representing the metric to be used. -This should be (an unambiguous abbreviation of) one of "sorensen" or -"szymkiewicz". Default: "sorensen".} -} -\value{ -a \code{list} of class "\code{CNVMetric}". This list has -the following components: -\itemize{ -\item{\code{AMPLIFICATION}}{ a lower-triangular \code{matrix} with the - results of the selected metric on the amplified regions for each paired - samples. The value \code{NA} is present when the metric cannot be - calculated. The value \code{NA} is also present in the top-triangular - section of the matrix. - } - \item{\code{DELETION}}{ a lower-triangular \code{matrix} with the - results of the selected metric on the deleted regions for each paired - samples. The value \code{NA} is present when the metric cannot be - calculated. The value \code{NA} is also present in the top-triangular - section of the matrix. -}} - -The object has the following attributes (besides "class" equal -to "CNVMetric"): -\itemize{ -\item{\code{metric}}{ the metric used for the calculation. - } -\item{\code{names}}{ the names of the two matrix containing the metrics for -the amplified and deleted regions. -}} -} -\description{ -Calculate a specific metric using overlapping -amplified/deleted regions between to samples. The metric is calculated for -the amplified and deleted regions separately. When more than 2 samples are -present, the metric is calculated for each sample pair. -} -\details{ -The two methods each estimate the overlap between paired samples. They use -different metrics, all in the range [0, 1] with 0 indicating no overlap. - -The available metrics are (written for two GRanges): - -\code{sorensen}: - -This metric is calculated by dividing twice the size of the intersection -by the sum of the size of the two sets. -With this metric, an overlap metric value of 1 is only obtained when the -two samples are identical. - -\code{szymkiewicz}: - -This metric is calculated by dividing the size of the intersection -by the size of the smallest set. With this metric, if one set is a -subset of the other set, the overlap metric value is 1. -} -\examples{ - -## Load required package to generate the samples -require(GenomicRanges) - -## Create a GRangesList object with 3 samples -## The stand of the regions doesn't affect the calculation of the metric -demo <- GRangesList() -demo[["sample01"]] <- GRanges(seqnames = "chr1", - ranges = IRanges(start = c(1905048, 4554832, 31686841), - end = c(2004603, 4577608, 31695808)), strand = "*", - state = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) - -demo[["sample02"]] <- GRanges(seqnames = "chr1", - ranges = IRanges(start = c(1995066, 31611222, 31690000), - end = c(2204505, 31689898, 31895666)), strand = c("-", "+", "+"), - state = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) - -## The amplified region in sample03 is a subset of the amplified regions -## in sample01 -demo[["sample03"]] <- GRanges(seqnames = "chr1", - ranges = IRanges(start = c(1906069, 4558838), - end = c(1909505, 4570601)), strand = "*", - state = c("AMPLIFICATION", "DELETION")) - -## Calculating Sorensen metric -calculateOverlapRegionsMetric(demo, method="sorensen") - -## Calculating Szymkiewicz-Simpson metric -calculateOverlapRegionsMetric(demo, method="szymkiewicz") - -} -\references{ -Sørensen, Thorvald. n.d. “A Method of Establishing Groups of Equal -Amplitude in Plant Sociology Based on Similarity of Species and Its -Application to Analyses of the Vegetation on Danish Commons.” -Biologiske Skrifter, no. 5: 1–34. - -Vijaymeena, M. K, and Kavitha K. 2016. “A Survey on Similarity Measures in -Text Mining.” Machine Learning and Applications: An International -Journal 3 (1): 19–28. doi: \url{https://doi.org/10.5121/mlaij.2016.3103} -} -\author{ -Astrid Deschênes, Pascal Belleau -} diff --git a/man/plotOneOverlapMetric.Rd b/man/plotOneOverlapMetric.Rd index 5f5f7ec..403a297 100644 --- a/man/plotOneOverlapMetric.Rd +++ b/man/plotOneOverlapMetric.Rd @@ -10,7 +10,7 @@ plotOneOverlapMetric(metric, type, colorRange, show_colnames, ...) } \arguments{ \item{metric}{a \code{CNVMetric} object containing the metrics calculated -by \code{calculateOverlapRegionsMetric}.} +by \code{calculateOverlapMetric}.} \item{type}{a \code{character} string indicating which graph to generate. This should be (an unambiguous abbreviation of) one of @@ -56,7 +56,7 @@ demo[["sample03"]] <- GRanges(seqnames = "chr1", state = c("AMPLIFICATION", "DELETION")) ## Calculating Sorensen metric -metric <- calculateOverlapRegionsMetric(demo, method="sorensen") +metric <- calculateOverlapMetric(demo, method="sorensen") ## Plot both amplification metrics using darkorange color CNVMetrics:::plotOneOverlapMetric(metric, type="AMPLIFICATION", diff --git a/man/plotOverlapMetric.Rd b/man/plotOverlapMetric.Rd index fae7bbb..ec2859f 100644 --- a/man/plotOverlapMetric.Rd +++ b/man/plotOverlapMetric.Rd @@ -15,7 +15,7 @@ plotOverlapMetric( } \arguments{ \item{metric}{a \code{CNVMetric} object containing the metrics calculated -by \code{calculateOverlapRegionsMetric}.} +by \code{calculateOverlapMetric}.} \item{type}{a \code{character} string indicating which graph to generate. This should be (an unambiguous abbreviation of) one of "\code{BOTH}", @@ -67,7 +67,7 @@ demo[["sample03"]] <- GRanges(seqnames = "chr1", state = c("AMPLIFICATION", "DELETION")) ## Calculating Sorensen metric -metric <- calculateOverlapRegionsMetric(demo, method="sorensen") +metric <- calculateOverlapMetric(demo, method="sorensen") ## Plot both amplification and deletion metrics plotOverlapMetric(metric, type="BOTH") diff --git a/tests/testthat/test-CNVMetricMethods.R b/tests/testthat/test-CNVMetricMethods.R index 35bd679..1b8cfd8 100644 --- a/tests/testthat/test-CNVMetricMethods.R +++ b/tests/testthat/test-CNVMetricMethods.R @@ -103,57 +103,4 @@ test_that("calculateWeightedEuclideanDistance() must return good results 02", { }) -### Tests calculateOverlapRegionsMetric() results -context("calculateOverlapRegionsMetric() results") - -test_that("calculateOverlapRegionsMetric() must return an error when segmentData has only one sample", { - - error_message <- "at least 2 samples must be present in the segmentData" - - demo <- GRangesList() - demo[["sample01"]] <- GRanges(seqnames = "chr1", - ranges = IRanges(start = c(1905048, 4554832, 31686841), - end = c(2004603, 4577608, 31695808)), strand = "*", - state = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) - - expect_error(calculateOverlapRegionsMetric(demo), error_message) -}) - -test_that("calculateOverlapRegionsMetric() must return an error when segmentData has metadata status instead of state", { - - error_message <- paste0("at least one sample doesn't have a metadata column ", - "called \'state\'") - - demo <- GRangesList() - demo[["sample01"]] <- GRanges(seqnames = "chr1", - ranges = IRanges(start = c(1905048, 4554832, 31686841), - end = c(2004603, 4577608, 31695808)), strand = "*", - status = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) - - demo[["sample02"]] <- GRanges(seqnames = "chr1", - ranges = IRanges(start = c(1905048, 4554832, 31686841), - end = c(2004603, 4577608, 31695808)), strand = "*", - status= c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) - - expect_error(calculateOverlapRegionsMetric(demo), error_message) -}) - -test_that("calculateOverlapRegionsMetric() must return an error when segmentData doesn't have metadata state", { - - error_message <- paste0("at least one sample doesn't have a metadata column ", - "called \'state\'") - - demo <- GRangesList() - demo[["sample01"]] <- GRanges(seqnames = "chr1", - ranges = IRanges(start = c(1905048, 4554832, 31686841), - end = c(2004603, 4577608, 31695808)), - strand = "*") - - demo[["sample02"]] <- GRanges(seqnames = "chr1", - ranges = IRanges(start = c(1905048, 4554832, 31686841), - end = c(2004603, 4577608, 31695808)), - strand = "*") - - expect_error(calculateOverlapRegionsMetric(demo), error_message) -}) diff --git a/tests/testthat/test-CNVMetricsOverlapMethods.R b/tests/testthat/test-CNVMetricsOverlapMethods.R index c49721b..33256f5 100644 --- a/tests/testthat/test-CNVMetricsOverlapMethods.R +++ b/tests/testthat/test-CNVMetricsOverlapMethods.R @@ -7,11 +7,11 @@ library(IRanges) library(GenomeInfoDb) -### Tests calculateOverlapRegionsMetric() results +### Tests calculateOverlapMetric() results -context("calculateOverlapRegionsMetric() results") +context("calculateOverlapMetric() results") -test_that("calculateOverlapRegionsMetric() must return error when only one sample present", { +test_that("calculateOverlapMetric() must return error when only one sample present", { demo <- GRangesList() demo[["sample01"]] <- GRanges(seqnames = "chr1", @@ -22,12 +22,12 @@ test_that("calculateOverlapRegionsMetric() must return error when only one sampl error_message <- "at least 2 samples must be present in the segmentData" - expect_error(calculateOverlapRegionsMetric(segmentData = demo, + expect_error(calculateOverlapMetric(segmentData = demo, method = "sorensen"), error_message) }) -test_that("calculateOverlapRegionsMetric() must return error when method is available", { +test_that("calculateOverlapMetric() must return error when method is available", { demo <- GRangesList() demo[["sample01"]] <- GRanges(seqnames = "chr1", @@ -43,13 +43,13 @@ test_that("calculateOverlapRegionsMetric() must return error when method is avai paste(dQuote(c("sorensen", "szymkiewicz")), collapse = ", ")) - expect_error(calculateOverlapRegionsMetric(segmentData = demo, + expect_error(calculateOverlapMetric(segmentData = demo, method = "typo"), error_message) }) -test_that("calculateOverlapRegionsMetric() must return 1 when two samples identical with sorensen", { +test_that("calculateOverlapMetric() must return 1 when two samples identical with sorensen", { demo <- GRangesList() demo[["sample01"]] <- GRanges(seqnames = "chr1", @@ -61,7 +61,7 @@ test_that("calculateOverlapRegionsMetric() must return 1 when two samples identi end = c(2004603, 4577608, 31695808)), strand = "*", state = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) - result <- calculateOverlapRegionsMetric(segmentData=demo, + result <- calculateOverlapMetric(segmentData=demo, method="sorensen") expected <- list() @@ -82,7 +82,7 @@ test_that("calculateOverlapRegionsMetric() must return 1 when two samples identi }) -test_that("calculateOverlapRegionsMetric() must return expected results with sorensen", { +test_that("calculateOverlapMetric() must return expected results with sorensen", { demo <- GRangesList() demo[["sample01"]] <- GRanges(seqnames = "chr1", @@ -99,7 +99,7 @@ test_that("calculateOverlapRegionsMetric() must return expected results with sor state = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) - result <- calculateOverlapRegionsMetric(segmentData=demo, + result <- calculateOverlapMetric(segmentData=demo, method="sorensen") expected <- list() @@ -123,7 +123,7 @@ test_that("calculateOverlapRegionsMetric() must return expected results with sor }) -test_that("calculateOverlapRegionsMetric() must return expected results with szymkiewicz", { +test_that("calculateOverlapMetric() must return expected results with szymkiewicz", { demo <- GRangesList() demo[["sample01"]] <- GRanges(seqnames = "chr1", @@ -139,7 +139,7 @@ test_that("calculateOverlapRegionsMetric() must return expected results with szy end = c(250, 700, 2000)), strand = "*", state = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) - result <- calculateOverlapRegionsMetric(segmentData=demo, + result <- calculateOverlapMetric(segmentData=demo, metho="szymkiewicz") expected <- list() @@ -163,6 +163,58 @@ test_that("calculateOverlapRegionsMetric() must return expected results with szy }) +test_that("calculateOverlapMetric() must return an error when segmentData has only one sample", { + + error_message <- "at least 2 samples must be present in the segmentData" + + demo <- GRangesList() + demo[["sample01"]] <- GRanges(seqnames = "chr1", + ranges = IRanges(start = c(1905048, 4554832, 31686841), + end = c(2004603, 4577608, 31695808)), strand = "*", + state = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) + + expect_error(calculateOverlapMetric(demo), error_message) +}) + +test_that("calculateOverlapMetric() must return an error when segmentData has metadata status instead of state", { + + error_message <- paste0("at least one sample doesn't have a metadata column ", + "called \'state\'") + + demo <- GRangesList() + demo[["sample01"]] <- GRanges(seqnames = "chr1", + ranges = IRanges(start = c(1905048, 4554832, 31686841), + end = c(2004603, 4577608, 31695808)), strand = "*", + status = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) + + demo[["sample02"]] <- GRanges(seqnames = "chr1", + ranges = IRanges(start = c(1905048, 4554832, 31686841), + end = c(2004603, 4577608, 31695808)), strand = "*", + status= c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) + + expect_error(calculateOverlapMetric(demo), error_message) +}) + +test_that("calculateOverlapMetric() must return an error when segmentData doesn't have metadata state", { + + error_message <- paste0("at least one sample doesn't have a metadata column ", + "called \'state\'") + + demo <- GRangesList() + demo[["sample01"]] <- GRanges(seqnames = "chr1", + ranges = IRanges(start = c(1905048, 4554832, 31686841), + end = c(2004603, 4577608, 31695808)), + strand = "*") + + demo[["sample02"]] <- GRanges(seqnames = "chr1", + ranges = IRanges(start = c(1905048, 4554832, 31686841), + end = c(2004603, 4577608, 31695808)), + strand = "*") + + expect_error(calculateOverlapMetric(demo), error_message) +}) + + ### Tests plotOverlapMetric() results context("plotOverlapMetric() results") @@ -178,7 +230,7 @@ test_that("plotOverlapMetric() must return error when type wrong", { strand="*", state=c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) - metric <- calculateOverlapRegionsMetric(segmentData=demo, + metric <- calculateOverlapMetric(segmentData=demo, method="szymkiewicz") @@ -212,7 +264,7 @@ test_that("plotOverlapMetric() must return error when colorRange is vector of si ranges = IRanges(start = c(150, 600, 1000), end = c(250, 700, 1500)), strand = "*", state = c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) - metric <- calculateOverlapRegionsMetric(segmentData = demo, + metric <- calculateOverlapMetric(segmentData = demo, method = "szymkiewicz") error_message <- "\'colorRange\' must be be a vector of 2 valid color names." @@ -232,7 +284,7 @@ test_that("plotOverlapMetric() must return error when colorRange is vector of on ranges=IRanges(start=c(150, 600, 1000), end=c(250, 700, 1500)), strand="*", state=c("AMPLIFICATION", "AMPLIFICATION", "DELETION")) - metric <- calculateOverlapRegionsMetric(segmentData = demo, + metric <- calculateOverlapMetric(segmentData = demo, method = "szymkiewicz") error_message <- "\'colorRange\' must be a vector of 2 color names." diff --git a/tests/testthat/test-genericCNVMetric.R b/tests/testthat/test-genericCNVMetric.R index 5dee05c..ea61d3e 100644 --- a/tests/testthat/test-genericCNVMetric.R +++ b/tests/testthat/test-genericCNVMetric.R @@ -29,7 +29,7 @@ test_that("print() for CNVMetric object must return identical object", { state = c("AMPLIFICATION", "DELETION")) ## Calculating Sorensen metric - expected <- calculateOverlapRegionsMetric(demo, method="sorensen") + expected <- calculateOverlapMetric(demo, method="sorensen") result <- print(expected) @@ -60,7 +60,7 @@ test_that("is() for CNVMetric object must return identical object", { state = c("AMPLIFICATION", "DELETION")) ## Calculating Sorensen metric - metric <- calculateOverlapRegionsMetric(demo, method="sorensen") + metric <- calculateOverlapMetric(demo, method="sorensen") result <- is.CNVMetric(metric)