Skip to content

Commit

Permalink
CHOP|PROV|JHU only if checking for missiness in NAACCR code columns
Browse files Browse the repository at this point in the history
  • Loading branch information
danlu1 committed Sep 16, 2024
1 parent 65f371d commit 577b299
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 36 deletions.
12 changes: 8 additions & 4 deletions scripts/case_selection/shared_fxns.R
Original file line number Diff line number Diff line change
Expand Up @@ -294,15 +294,19 @@ remap_patient_characteristics <- function(clinical, existing_patients, ethnicity
#' @param data The data frame to check against
#' @param columns The target columns
check_for_missing_values <- function(data, columns) {
# filter out CHOP, PROV, JHU centers with known NAs
data <- data[!grepl("CHOP|PROV|JHU", data$genie_patient_id), ]
# Check for NA values or empty strings
missingness_col <- c()
for (col in columns) {
if (any(is.na(data[[col]]) | data[[col]] == "" )){
if (col %in% c("naaccr_race_code_tertiary", "naaccr_race_code_secondary")) {
# filter out CHOP, PROV, JHU centers with known NAs in NAACCR code columns
relevant_rows <- data[!grepl("CHOP|PROV|JHU", data$genie_patient_id), ]
} else{
relevant_rows <- data
}
if (any(is.na(relevant_rows[[col]]) | relevant_rows[[col]] == "" )){
missingness_col <- c(col, missingness_col)
}
}
}
if (length(missingness_col) > 0) {
warning(paste0("Warning: Missing or empty values found in column(s): ", paste(missingness_col,collapse=", ")))
}
Expand Down
51 changes: 19 additions & 32 deletions scripts/case_selection/tests/test_shared_fxns.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,51 +104,38 @@ test_that("remap_patient_characteristics works as expected", {
expect_equal(result, expected_output)
})

test_that("check_for_missing_values - no missing or empty values in centers other than CHOP, PROV, JHU", {
test_that("check_for_missing_values - no missing or empty values", {
data <- data.frame(
col1 = c(1, 2, 3, NA),
col2 = c("a", "b", "c", ""),
genie_patient_id = c('a', 'b', 'c', 'CHOP123')
col1 = c(1, 2, 3),
col2 = c("a", "b", "c"),
genie_patient_id = c("a", "b", "CHOP123"),
naaccr_race_code_tertiary = c("a", "b", "c"),
naaccr_race_code_secondary = c("a", "b", "c")
)
expect_warning(check_for_missing_values(data, c("col1", "col2")), NA)
expect_no_warning(check_for_missing_values(data, c("col1", "col2", "naaccr_race_code_tertiary", "naaccr_race_code_secondary")))

})

test_that("check_for_missing_values - NAs are detected in centers other than CHOP, PROV, JHU", {
test_that("check_for_missing_values - missingness values are detected in NAACCR code columns in centers other than CHOP, PROV, JHU", {
data <- data.frame(
col1 = c(1, NA, 3),
col1 = c(1, NA, ""),
col2 = c("a", "b", "c"),
genie_patient_id = c('CHOP123', 'b', 'PROV234')
genie_patient_id = c("CHOP123", "b", "PROV234"),
naaccr_race_code_tertiary = c("a", "", "c"),
naaccr_race_code_secondary = c("a", "b", "c")
)
expect_warning(check_for_missing_values(data, c("col1", "col2")),
"Warning: Missing or empty values found in column\\(s\\): col1")
expect_warning(check_for_missing_values(data, c("col1", "col2", "naaccr_race_code_tertiary", "naaccr_race_code_secondary")),
"Warning: Missing or empty values found in column\\(s\\): naaccr_race_code_tertiary, col1")
})

test_that("check_for_missing_values - empty string values are detected in centers other than CHOP, PROV, JHU", {
data <- data.frame(
col1 = c(1, 2, 3),
col2 = c("a", "", "c"),
genie_patient_id = c('CHOP123', 'b', 'PROV234')
)
expect_warning(check_for_missing_values(data, c("col1", "col2")),
"Warning: Missing or empty values found in column\\(s\\): col2")
})

test_that("check_for_missing_values - multiple missing and empty values are detected in centers other than CHOP, PROV, JHU", {
test_that("check_for_missing_values - missingness values are detected in NAACCR code columns in CHOP, PROV, JHU centers", {
data <- data.frame(
col1 = c(1, NA, ""),
col2 = c("a", "", "c"),
genie_patient_id = c('CHOP123', 'b', 'PROV234')
genie_patient_id = c("CHOP123", "b", "PROV234"),
naaccr_race_code_tertiary = c("", "b", "c"),
naaccr_race_code_secondary = c("a", "b", NA)
)
expect_warning(check_for_missing_values(data, c("col1", "col2")),
expect_warning(check_for_missing_values(data, c("col1", "col2", "naaccr_race_code_tertiary", "naaccr_race_code_secondary")),
"Warning: Missing or empty values found in column\\(s\\): col2, col1")
})

test_that("check_for_missing_values - multiple missing and empty values are detected in CHOP, PROV, JHU centers", {
data <- data.frame(
col1 = c(1, NA, 2),
col2 = c("a", "", "c"),
genie_patient_id = c('a', 'CHOP123', 'PROV234')
)
expect_warning(check_for_missing_values(data, c("col1", "col2")), NA)
})

0 comments on commit 577b299

Please sign in to comment.