Skip to content

Commit

Permalink
add is.voiced to collapse voiced/non-voiced segments which are too short
Browse files Browse the repository at this point in the history
  • Loading branch information
jwijffels committed Feb 6, 2024
1 parent 005faed commit 8ce252e
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 11 deletions.
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ Encoding: UTF-8
Depends:
R (>= 2.10)
Imports:
Rcpp (>= 0.11.5)
Rcpp (>= 0.11.5),
utils
Suggests:
av
LinkingTo:
Expand Down
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

S3method(is.voiced,"webrtc-gmm")
S3method(print,VAD)
export(VAD)
export(is.voiced)
importFrom(Rcpp,evalCpp)
importFrom(utils,head)
useDynLib(audio.vadwebrtc)
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## CHANGES IN audio.vadwebrtc VERSION 0.1

- Added function VAD to detect voice in audio
- Added is.voiced generic and is.voiced.webrtc-gmm to collapse voiced/non-voiced segments which are too short
- Initial version based on webrtc commit 7f457533a2ee582865f50210e7460af90f78f0b6 (Wed Jan 17 19:03:20 2024)


1 change: 1 addition & 0 deletions R/pkg.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#' @importFrom Rcpp evalCpp
#' @importFrom utils head
#' @useDynLib audio.vadwebrtc
NULL
72 changes: 63 additions & 9 deletions R/vad.R
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,66 @@ print.VAD <- function(x, ...){



# collapse_segments <- function(x, collapse_silence_secs = 1){
# x$has_voice <- ifelse(x$has_voice, x$has_voice, ifelse((x$end - x$start) < collapse_silence_secs, TRUE, x$has_voice))
# grp <- rle(x$has_voice)
# x$vad_segment <- rep(seq_along(grp$lengths), grp$lengths)
# x <- data.table::as.data.table(x)
# x <- x[, list(start = min(start), end = max(end)), by = list(vad_segment, has_voice)]
# x
# }
#voiced <- collapse_segments(vad$vad_segments)
#' @title Get from a Voice Activity Detection (VAD object) the segments which are voiced
#' @description Postprocessing the Voice Activity Detection whereby sequences of
#' voiced/non-voiced segments are collapsed by
#' \enumerate{
#' \item{first considering all non-voiced segments which are small in duration (default < 1 second) voiced}
#' \item{next considering voiced segments with length less than a number of seconds (default < 1 second) non-voiced}
#' }
#' @param x an object of class VAD
#' @param units character string with the units to use - either 'seconds' or 'milliseconds'
#' @param ... further arguments passed on to the function
#' @return A data.frame with columns vad_segment, start, end, duration, has_voice indicating where in the audio voice is detected
#' @export
#' @examples
#' file <- system.file(package = "audio.vadwebrtc", "extdata", "test_wav.wav")
#' vad <- VAD(file, mode = "normal", milliseconds = 30)
#' vad$vad_segments
#' voiced <- is.voiced(vad, silence_min = 0.2)
#' voiced
#' voiced <- is.voiced(vad, silence_min = 200, units = "milliseconds")
#' voiced
is.voiced <- function(x, ...){
UseMethod("is.voiced")
}

#' @export
"is.voiced.webrtc-gmm" <- function(x, silence_min = ifelse(units == "milliseconds", 1000, 1), voiced_min = ifelse(units == "milliseconds", 1000, 1), units = c("seconds", "milliseconds"), ...){
x <- x$vad_segment
units <- match.arg(units)
silence_min <- silence_min / 1000
voiced_min <- voiced_min / 1000
## Consider silences smaller than 1 second as voiced
x$has_voice <- ifelse(x$has_voice, x$has_voice, ifelse((x$end - x$start) < silence_min, TRUE, x$has_voice))
grp <- rle(x$has_voice)
x$vad_segment <- rep(seq_along(grp$lengths), grp$lengths)
x <- segment_collapse(x)
## Consider voiced elements smaller than 1 second as silences
x$has_voice <- ifelse((x$end - x$start) < voiced_min & x$has_voice, FALSE, x$has_voice)
grp <- rle(x$has_voice)
x$vad_segment <- rep(seq_along(grp$lengths), grp$lengths)
x <- segment_collapse(x)
if(units == "seconds"){
}else if(units == "milliseconds"){
x$start <- x$start * 1000
x$end <- x$end * 1000
}
x$duration <- x$end - x$start
x <- x[, c("vad_segment", "start", "end", "duration", "has_voice"), drop = FALSE]
x
}


segment_collapse <- function(x){
x <- do.call(rbind, lapply(split(x, list(x$vad_segment, x$has_voice), drop = TRUE), FUN = function(x){
data.frame(vad_segment = head(x$vad_segment, n = 1),
start = min(x$start),
end = max(x$end),
has_voice = head(x$has_voice, n = 1),
stringsAsFactors = FALSE)
}))
x <- x[order(x$vad_segment, decreasing = FALSE), ]
rownames(x) <- NULL
x
}
2 changes: 1 addition & 1 deletion man/VAD.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

35 changes: 35 additions & 0 deletions man/is.voiced.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 8ce252e

Please sign in to comment.