Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
knausb committed Apr 17, 2018
2 parents 8be1379 + 47df2e6 commit c7d96d2
Show file tree
Hide file tree
Showing 38 changed files with 917 additions and 261 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ Description: Facilitates easy manipulation of variant call format (VCF) data.
data may be written to a VCF file (*.vcf.gz). It also may be converted into
other popular R objects (e.g., genlight, DNAbin). VcfR provides a link between
VCF data and familiar R software.
Version: 1.7.0
Version: 1.8.0
Authors@R: c(
person(c("Brian", "J."), "Knaus", role = c("cre", "aut"),
email = "[email protected]", comment = c(ORCID = "0000-0003-1665-4343")),
Expand Down Expand Up @@ -41,7 +41,6 @@ Imports:
stats,
stringr,
tibble,
tidyr,
utils,
vegan,
viridisLite
Expand All @@ -53,7 +52,8 @@ Suggests:
reshape2,
rmarkdown,
scales,
testthat
testthat,
tidyr
VignetteBuilder: knitr
License: GPL
RoxygenNote: 6.0.1
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export(NM2winNM)
export(addID)
export(alleles2consensus)
export(ann2chromR)
export(check_keys)
export(chromR2vcfR)
export(chromo)
export(chromoqc)
Expand Down
13 changes: 12 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,17 @@ At the present, this is simply a to-do list for ideas to include in the next maj

* Move 'FORMAT' column to its own slot. We can then cbind FORMAT and gt when passing to compiled code.
This may have been addressed at 64a308ba50b9119108e8946737460de5997b805b by adding `samples` to vcfR method `[`.
* In issue #92 (vcfR2genlight big data #92), JimWhiting91 has documented that `extract.gt()` could be greatly improved with multithreading. While he used `mclapply()` I do not feel this is the best solution because it does not work on Windows. I think a better solution would be [RCppParallel](https://rcppcore.github.io/RcppParallel/) because this should work on all CRAN platforms.


# vcfR 1.8.0
Released on CRAN 2018-04-17
* Attempted to address CRAN's 'Note: break used in wrong context: no loop is visible' issue.
* `.vcf_stats_gz()` reports number of elements in header as well as the file's last line. This is used by `read.vcfR()` to check for poorly formed files.
* `show` method for vcfR now queries @fix instead of @gt.
* `check_keys()` checks key definitions in the meta section to make sure they are unique.
* `freq_peak_plot()` has parameter `posUnits` to adjust units of scatterplot.
* `vcfR2migrate()` manual discusses Unix and Windows line endings.


# vcfR 1.7.0
Expand All @@ -30,7 +41,7 @@ Released on CRAN 2017-12-08.
* removed `.Call()` statements to standardize style.
* Created `vcfR2migrate()` to output MigrateN format data.
* Addressed clang-UBSAN memory leak in `freq_peak()`.
* Created `pairwise_genetic_diff()` to calculate pairwise differentiation.
* Created `pairwise_genetic_diff()` to calculate pairwise differentiation. Thanks Javier!

# vcfR 1.5.0
Released on CRAN 2017-05-18.
Expand Down
58 changes: 58 additions & 0 deletions R/check_keys.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@


#### check_keys ####
#' @rdname check_keys
#' @aliases check_keys
#'
#' @title Check that INFO and FORMAT keys are unique
#'
#' @param x an oblect of class vcfR
#'
#' @description
#' The INFO and FORMAT columns contain information in key-value pairs.
#' If for some reason a key is not unique it will create issues in retrieving this information.
#' This function checks the keys defined in the meta section to make sure they are unique.
#' Note that it does not actually check the INFO and FORMAT columns, just their definitions in the meta section.
#' This is because each variant can have different information in their INFO and META cells.
#' Checking these on large files will tehrefore come with a performance cost.
#'
#' @seealso queryMETA()
#'
#' @examples
#' data(vcfR_test)
#' check_keys(vcfR_test)
#' queryMETA(vcfR_test)
#' queryMETA(vcfR_test, element = 'DP')
#' # Note that DP occurs as unique in INFO and FORMAT but they may be different.
#'
#'
#' @export
check_keys <- function(x) {
if(class(x) != 'vcfR'){
stop( paste('Expecting a vcfR object, instead received:', class(x)) )
}

# First check INFO.
myKeys <- grep('INFO', x@meta, value = TRUE)
myKeys <- sub('##INFO=<ID=','',myKeys)
myKeys <- unlist(lapply(strsplit(myKeys, ','), function(x){x[1]}))
myKeys <- table(myKeys)
myKeys <- myKeys[myKeys > 1]
if( length(myKeys) > 0){
warning(paste("The following INFO key occurred more than once:", names(myKeys), '\n'))
}

# Check FORMAT.
myKeys <- grep('FORMAT', x@meta, value = TRUE)
myKeys <- sub('##FORMAT=<ID=','',myKeys)
myKeys <- unlist(lapply(strsplit(myKeys, ','), function(x){x[1]}))
myKeys <- table(myKeys)
myKeys <- myKeys[myKeys > 1]
if( length(myKeys) > 0){
warning(paste("The following FORMAT key occurred more than once:", names(myKeys), '\n'))
}

}



35 changes: 30 additions & 5 deletions R/freq_peak_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#' Converts allele balance data produced by \code{freq_peak()} to a copy number by assinging the allele balance data (frequencies) to its closest expected ratio.
#'
#' @param pos chromosomal position of variants
#' @param posUnits units ('bp', 'Kbp', 'Mbp', 'Gbp') for `pos` to be converted to in the main plot
#' @param ab1 matrix of allele balances for allele 1
#' @param ab2 matrix of allele balances for allele 2
#' @param fp1 feq_peak object for allele 1
Expand All @@ -26,16 +27,15 @@
#' The only required information is a vector of chromosomal positions, however this is probably not going to create an interesting plot.
#'
#'
#'
#'
#'
#'
#' @return An invisible NULL.
#'
#' @seealso
#' freq_peak,
#' peak_to_ploid
#'
#' @examples
#'
#' # An empty plot.
#' freq_peak_plot(pos=1:40)
#'
#' data(vcfR_example)
Expand All @@ -59,6 +59,7 @@
#'
#' @export
freq_peak_plot <- function(pos,
posUnits = 'bp',
ab1 = NULL,
ab2 = NULL,
fp1 = NULL,
Expand All @@ -81,6 +82,30 @@ freq_peak_plot <- function(pos,
stop(msg)
}

# Handle x-axis units.
if( posUnits == 'bp'){
# Don't need to do anything.
} else if(posUnits == 'Kbp'){
pos <- pos/1e3
fp1$wins[,'START_pos'] <- fp1$wins[,'START_pos']/1e3
fp1$wins[,'END_pos'] <- fp1$wins[,'END_pos']/1e3
fp2$wins[,'START_pos'] <- fp2$wins[,'START_pos']/1e3
fp2$wins[,'END_pos'] <- fp2$wins[,'END_pos']/1e3
} else if(posUnits == 'Mbp'){
pos <- pos/1e6
fp1$wins[,'START_pos'] <- fp1$wins[,'START_pos']/1e6
fp1$wins[,'END_pos'] <- fp1$wins[,'END_pos']/1e6
fp2$wins[,'START_pos'] <- fp2$wins[,'START_pos']/1e6
fp2$wins[,'END_pos'] <- fp2$wins[,'END_pos']/1e6
} else if(posUnits == 'Gbp'){
pos <- pos/1e9
fp1$wins[,'START_pos'] <- fp1$wins[,'START_pos']/1e9
fp1$wins[,'END_pos'] <- fp1$wins[,'END_pos']/1e9
fp2$wins[,'START_pos'] <- fp2$wins[,'START_pos']/1e9
fp2$wins[,'END_pos'] <- fp2$wins[,'END_pos']/1e9
}
xlabel <- paste('Position (', posUnits, ')', sep = '')

# Handle color
col1 <- grDevices::col2rgb(col1, alpha = FALSE)
col2 <- grDevices::col2rgb(col2, alpha = FALSE)
Expand All @@ -101,7 +126,7 @@ freq_peak_plot <- function(pos,

# Initialize plot
plot( range(pos, na.rm = TRUE), c(0,1), ylim=c(0,1), type="n", yaxt='n',
main = "", xlab = "Position", ylab = "Allele balance")
main = "", xlab = xlabel, ylab = "Allele balance")
graphics::axis(side=2, at=c(0,0.25,0.333,0.5,0.666,0.75,1),
labels=c(0,'1/4','1/3','1/2','2/3','3/4',1), las=1)
graphics::abline(h=c(0.2,0.25,0.333,0.5,0.666,0.75,0.8), col=8)
Expand Down
2 changes: 2 additions & 0 deletions R/genetic_diff.R
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ calc_nei <- function(x1, x2){
#' myPops <- as.factor(rep(c('a','b'), each = 9))
#' myDiff <- genetic_diff(vcf, myPops, method = "nei")
#' colMeans(myDiff[,c(3:8,11)], na.rm = TRUE)
#' hist(myDiff$Gprimest, xlab = expression(italic("G'"["ST"])),
#' col='skyblue', breaks = seq(0, 1, by = 0.01))
#'
#'
genetic_diff <- function(vcf, pops, method = "nei"){
Expand Down
Loading

0 comments on commit c7d96d2

Please sign in to comment.