From 24ab547dd9f0dfd8c2593ae29bdfbea58aea5c97 Mon Sep 17 00:00:00 2001 From: Rachael Stickland <50215726+RayStick@users.noreply.github.com> Date: Fri, 16 Aug 2024 15:45:25 +0100 Subject: [PATCH] unit testings & renaming & docs rename update names this is not user facing rename updated user_prompt and saved output dataframes add tidyverse naming, packages and user prompt functions rename rename functions and variables to simplify documentation changes rename function - correction add section headers and add new func fix bug simplify in order to generalise add new func match latest funcs update docs naming updated fix bug naming update new sub functions add testthat unit test complete move to danger unit test complete doc unit test complete unit test complete unit test complete unit test complete unit test complete unit test complete unit test complete unit test complete unit test complete unit test complete remove inputs to function that are static add doc add doc manuals for new functions --- DESCRIPTION | 8 +- NAMESPACE | 7 +- R/browseMetadata.R | 308 ++++++++++++++ R/browseMetadata_compare_outputs.R | 115 ++++++ ...put.R => browseMetadata_convert_outputs.R} | 6 +- R/compare_sessions.R | 148 ------- R/concensus_on_mismatch.R | 33 ++ R/copy_previous.R | 38 ++ R/data-Output.R | 25 ++ R/data-log_Output.R | 28 ++ R/domain_mapping.R | 376 ------------------ R/end_plot.R | 36 ++ R/join_outputs.R | 19 + R/json_table_to_df.R | 29 ++ R/load_data.R | 50 +++ R/ref_plot.R | 25 ++ R/user_categorisation.R | 3 +- R/user_categorisation_loop.R | 74 ++++ R/user_prompt.R | 66 +++ R/user_prompt_list.R | 34 ++ R/valid_comparison.R | 32 ++ README.md | 16 +- data/Output.rda | Bin 0 -> 183 bytes data/log_Output.rda | Bin 0 -> 252 bytes man/Output.Rd | 31 ++ man/browseMetadata-package.Rd | 1 - man/browseMetadata.Rd | 59 +++ ...s.Rd => browseMetadata_compare_outputs.Rd} | 16 +- ...t.Rd => browseMetadata_convert_outputs.Rd} | 12 +- man/concensus_on_mismatch.Rd | 25 ++ man/copy_previous.Rd | 16 + man/domain_mapping.Rd | 40 -- man/end_plot.Rd | 22 + man/join_outputs.Rd | 20 + man/json_table_to_df.Rd | 21 + man/load_data.Rd | 24 ++ man/log_Output.Rd | 34 ++ man/ref_plot.Rd | 19 + man/user_categorisation.Rd | 2 +- man/user_categorisation_loop.Rd | 44 ++ man/user_prompt.Rd | 34 ++ man/user_prompt_list.Rd | 23 ++ man/valid_comparison.Rd | 31 ++ tests/testthat.R | 12 + tests/testthat/test-concensus_on_mismatch.R | 65 +++ tests/testthat/test-copy_previous.R | 45 +++ tests/testthat/test-end_plot.R | 33 ++ tests/testthat/test-join_outputs.R | 60 +++ tests/testthat/test-json_table_to_df.R | 41 ++ tests/testthat/test-load_data.R | 45 +++ tests/testthat/test-ref_plot.R | 22 + tests/testthat/test-user_categorisation.R | 31 ++ .../testthat/test-user_categorisation_loop.R | 50 +++ tests/testthat/test-user_prompt.R | 61 +++ tests/testthat/test-user_prompt_list.R | 31 ++ tests/testthat/test-valid_comparison.R | 19 + 56 files changed, 1866 insertions(+), 599 deletions(-) create mode 100755 R/browseMetadata.R create mode 100644 R/browseMetadata_compare_outputs.R rename R/{convert_output.R => browseMetadata_convert_outputs.R} (91%) delete mode 100644 R/compare_sessions.R create mode 100644 R/concensus_on_mismatch.R create mode 100644 R/copy_previous.R create mode 100644 R/data-Output.R create mode 100644 R/data-log_Output.R delete mode 100755 R/domain_mapping.R create mode 100644 R/end_plot.R create mode 100644 R/join_outputs.R create mode 100644 R/json_table_to_df.R create mode 100644 R/load_data.R create mode 100644 R/ref_plot.R create mode 100644 R/user_categorisation_loop.R create mode 100644 R/user_prompt.R create mode 100644 R/user_prompt_list.R create mode 100644 R/valid_comparison.R create mode 100644 data/Output.rda create mode 100644 data/log_Output.rda create mode 100644 man/Output.Rd create mode 100644 man/browseMetadata.Rd rename man/{compare_sessions.Rd => browseMetadata_compare_outputs.Rd} (69%) rename man/{convert_output.Rd => browseMetadata_convert_outputs.Rd} (66%) create mode 100644 man/concensus_on_mismatch.Rd create mode 100644 man/copy_previous.Rd delete mode 100644 man/domain_mapping.Rd create mode 100644 man/end_plot.Rd create mode 100644 man/join_outputs.Rd create mode 100644 man/json_table_to_df.Rd create mode 100644 man/load_data.Rd create mode 100644 man/log_Output.Rd create mode 100644 man/ref_plot.Rd create mode 100644 man/user_categorisation_loop.Rd create mode 100644 man/user_prompt.Rd create mode 100644 man/user_prompt_list.Rd create mode 100644 man/valid_comparison.Rd create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-concensus_on_mismatch.R create mode 100644 tests/testthat/test-copy_previous.R create mode 100644 tests/testthat/test-end_plot.R create mode 100644 tests/testthat/test-join_outputs.R create mode 100644 tests/testthat/test-json_table_to_df.R create mode 100644 tests/testthat/test-load_data.R create mode 100644 tests/testthat/test-ref_plot.R create mode 100644 tests/testthat/test-user_categorisation.R create mode 100644 tests/testthat/test-user_categorisation_loop.R create mode 100644 tests/testthat/test-user_prompt.R create mode 100644 tests/testthat/test-user_prompt_list.R create mode 100644 tests/testthat/test-valid_comparison.R diff --git a/DESCRIPTION b/DESCRIPTION index 4e24e2d0..d620b3fc 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -11,7 +11,8 @@ Encoding: UTF-8 LazyData: true RoxygenNote: 7.3.2 Depends: - R (>= 2.10) + R (>= 2.10), + tidyverse Imports: cli, devtools, @@ -19,9 +20,12 @@ Imports: ggplot2, grid, gridExtra, + mockery, rjson Suggests: knitr, - rmarkdown + rmarkdown, + testthat (>= 3.0.0) VignetteBuilder: knitr URL: https://aim-rsf.github.io/browseMetadata/ +Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 8a0cae22..33c11259 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,9 +1,8 @@ # Generated by roxygen2: do not edit by hand -export(compare_sessions) -export(convert_output) -export(domain_mapping) -export(user_categorisation) +export(browseMetadata) +export(browseMetadata_compare_outputs) +export(browseMetadata_convert_outputs) import(cli) import(devtools) import(ggplot2) diff --git a/R/browseMetadata.R b/R/browseMetadata.R new file mode 100755 index 00000000..79f185c9 --- /dev/null +++ b/R/browseMetadata.R @@ -0,0 +1,308 @@ +#' browseMetadata +#' +#' This function will read in the metadata file for a chosen dataset, loop +#' through all the data elements, and ask the user to catergorise/label each +#' data element as belonging to one or more domains.\cr \cr +#' The domains will appear in the Plots tab and dataset information will be +#' printed to the R console, for the user's reference in making these +#' categorisations. \cr \cr +#' These categorisations will be saved to a csv file, alongside a log file which +#' summarises the session details.To speed up this process, some +#' auto-categorisations will be made by the function for commonly occurring data +#' elements and categorisations for the same data element can be copied from one +#' table to another. \cr \cr +#' Example inputs are provided within the package data, for the user to run this +#' function in a demo mode. +#' @param json_file The metadata file. This should be downloaded from the +#' metadata catalogue as a json file. See +#' 'data-raw/maternity_indicators_dataset_(mids)_20240105T132210.json' for an +#' example download. +#' @param domain_file The domain list file. This should be a csv file created by +#' the user, with each domain listed on a separate line. See +#' 'data-raw/domain_list_demo.csv' for a template. +#' @param look_up_file The look-up table file, with auto-categorisations. By +#' default, the code uses 'data/look-up.rda'. The user can provide their own +#' look-up table in the same format as 'data-raw/look-up.csv'. +#' @param output_dir The path to the directory where the two csv output files +#' will be saved. By default, the current working directory is used. +#' @param table_copy Turn on copying between tables (TRUE or FALSE, default +#' TRUE). If TRUE, categorisations you made for all other tables in this dataset +#' will be copied over (if 'OUTPUT_' files are found in output_dir). +#' @return The function will return two csv files: 'OUTPUT_' which contains the +#' mappings and 'LOG_' which contains details about the dataset and session. +#' @examples +#' # Run in demo mode by providing no inputs: browseMetadata() +#' # Demo mode will use the /data files provided in this package +#' # For more guidance, refer to the package README.md file. +#' @export +#' @import ggplot2 +#' @importFrom graphics plot.new +#' @importFrom utils read.csv write.csv +#' @importFrom dplyr %>% arrange count group_by distinct +#' @importFrom tidyverse add_row + +browseMetadata <- function( + json_file = NULL, + domain_file = NULL, + look_up_file = NULL, + output_dir = NULL, + table_copy = TRUE) { + + timestamp_now <- format(Sys.time(), "%Y-%m-%d-%H-%M-%S") + + # DEFINE INPUTS AND OUTPUTS ---- + + ## Set output_dir to current wd if user has not provided it + if (is.null(output_dir)) { + output_dir = getwd() + } + + ## Use 'load_data.R' to collect inputs (defaults or user inputs) + data <- load_data(json_file, domain_file, look_up_file) + + ## Extract Dataset from json_file + Dataset <- data$meta_json$dataModel + Dataset_Name <- Dataset$label + + ## Read in prepared output data frames + log_Output <- get("log_Output") + Output <- get("Output") + + ## Use 'ref_plot.R' to plot domains for the user's ref (save df for later use) + df_plots <- ref_plot(data$domains) + + ## Use 'user_prompt.R' to get user initials for the log file + User_Initials <- user_prompt( + prompt_text = "Enter your initials: ", any_keys = TRUE) + + # DISPLAY DATASET ---- + + ## Use 'user_prompt.R' to print info about dataset based on user input + pre_prompt_text <- data.frame(Heading = logical(0), Text = character(0)) + pre_prompt_text <- pre_prompt_text %>% add_row(Heading = TRUE, + Text = 'Dataset Name') + pre_prompt_text <- pre_prompt_text %>% add_row( + Heading = FALSE,Text = paste(Dataset_Name)) + pre_prompt_text <- pre_prompt_text %>% add_row( + Heading = TRUE,Text = 'Dataset File Exported By') + pre_prompt_text <- pre_prompt_text %>% add_row( + Heading = FALSE, + Text = paste(data$meta_json$exportMetadata$exportedBy, + "at",data$meta_json$exportMetadata$exportedOn)) + prompt_text <- "Would you like to read a description of the dataset? (y/n): " + post_yes_text <- data.frame(Heading = logical(0), Text = character(0)) + post_yes_text <- post_yes_text %>% add_row( + Heading = TRUE, Text = 'Dataset Description') + post_yes_text <- post_yes_text %>% add_row( + Heading = FALSE,Text = paste(Dataset$description)) + + user_prompt( + pre_prompt_text = pre_prompt_text, + prompt_text = prompt_text, + any_keys = FALSE, + post_yes_text = post_yes_text + ) + + # WHICH TABLES FROM THE DATASET? ---- + ## Use 'user_prompt_list.R' to ask user which tables to process + nTables <- length(Dataset$childDataClasses) + table_df <- data.frame(Table_Name = character(0), Table_Number = integer(0)) + for (dc in 1:nTables) { + table_df <- table_df %>% add_row( + Table_Number = dc, + Table_Name = Dataset$childDataClasses[[dc]]$label) + } + + print(table_df,row.names = FALSE) + nTables_Process <- user_prompt_list( + prompt_text = + paste('Found',nTables,'table(s) in this Dataset.','Enter table numbers', + 'you want to process (one table number on each line):'), + list_allowed = seq(from = 1, to = nTables, by = 1), + empty_allowed = FALSE + ) + + # PROCESS EACH CHOSEN TABLE ---- + ## Extract each Table + for (dc in unique(nTables_Process)) { + cat("\n") + cli_alert_info("Processing Table {dc} of {nTables}") + cli_h1("Table Name") + Table_name <- Dataset$childDataClasses[[dc]]$label + cat(Table_name,"\n",fill = TRUE) + + ### Use 'copy_previous.R' to copy from previous output(s) if they exist + if (table_copy == TRUE) { + output <- copy_previous(Dataset_Name,output_dir) + df_prev_exist <- output$df_prev_exist + df_prev <- output$df_prev + } else { + df_prev_exist <- FALSE + } + + ### Use 'user_prompt.R' to ask if user wants to read desc of table + prompt_text = "Would you like to read a description of the table? (y/n): " + post_yes_text <- data.frame(Heading = logical(0), Text = character(0)) + post_yes_text <- post_yes_text %>% add_row( + Heading = TRUE, Text = 'Table Description') + post_yes_text <- post_yes_text %>% add_row( + Heading = FALSE, + Text = paste(Dataset$childDataClasses[[dc]]$description) + ) + + user_prompt( + prompt_text = prompt_text, + any_keys = FALSE, + post_yes_text = post_yes_text + ) + + table_note <- readline(paste('Optional free text note about this table', + '(or press enter to continue): ')) + + ### Use 'json_table_to_df.R' to extract table from meta_json into a df + Table_df <- json_table_to_df(Dataset = Dataset,n = dc) + + ### Ask user which data elements to process + + cli_alert_info(paste('There are', as.character(nrow(Table_df)), + 'data elements (variables) in this table.')) + + if (data$demo_mode == TRUE) { + start_v = 1 + end_v = min(20, nrow(Table_df)) + } else { + #### Use 'user_prompt_list.R' to ask user which data elements + start_v <- user_prompt_list( + prompt_text = 'Start variable (write 1 to process all): ', + list_allowed = seq(from = 1, to = nrow(Table_df), by = 1), + empty_allowed) + end_v <- user_prompt_list( + prompt_text = paste('End variable (write', + as.character(nrow(Table_df)), + 'to process all):'), + list_allowed = seq(from = start_v, to = nrow(Table_df), by = 1), + empty_allowed) + } + + ### Use 'user_categorisation_loop.R' to copy or request from user + + Output <- user_categorisation_loop(start_v, + end_v, + Table_df, + df_prev_exist, + df_prev, + lookup = data$lookup, + df_plots, + Output) + + Output$timestamp <- timestamp_now + Output$Table <- Table_name + + ### Review auto categorized data elements + #### Use 'user_prompt_list.R' to ask the user which rows to edit + cat('\n') + Output_auto <- subset(Output, Note == 'AUTO CATEGORISED') + Output_auto <- Output_auto[, c("DataElement", "Domain_code", "Note")] + print(Output_auto) + + auto_row <- user_prompt_list( + prompt_text = paste('These are the auto categorised data elements.', + 'Enter row numbers for those you want to edit: '), + list_allowed = which(Output$Note == 'AUTO CATEGORISED'), + empty_allowed = TRUE + ) + + if (length(auto_row) != 0) { + for (data_v_auto in unique(auto_row)) { + ##### collect user responses with with 'user_categorisation.R' + decision_output <- user_categorisation( + Table_df$Label[data_v_auto], + Table_df$Description[data_v_auto], + Table_df$Type[data_v_auto], + max(df_plots$Code$Code) + ) + ##### input user responses into output + Output$Domain_code[data_v_auto] <- decision_output$decision + Output$Note[data_v_auto] <- decision_output$decision_note + } + } + + ### Review user categorized data elements (optional) + #### Use 'user_prompt.R' to ask the user if they want to review + #### Use 'user_prompt_list.R' to ask the user which rows to edit + review_cats <- user_prompt( + prompt_text = "Would you like to review your categorisations? (y/n): ", + any_keys = FALSE) + if (review_cats == 'Y' | review_cats == 'y') { + Output_not_auto <- subset(Output, Note != 'AUTO CATEGORISED') + Output_not_auto['Note (first 12 chars)'] <- + substring(Output_not_auto$Note, 1, 11) + print(Output_not_auto[, + c("DataElement", + "Domain_code", + "Note (first 12 chars)")]) + not_auto_row <- user_prompt_list( + prompt_text = paste('These are the data elements you categorised.', + 'Enter row numbers for those you want to edit: '), + list_allowed = which(Output$Note != 'AUTO CATEGORISED'), + empty_allowed = TRUE + ) + if (length(not_auto_row) != 0) { + for (data_v_not_auto in unique(not_auto_row)) { + ##### collect user responses with with 'user_categorisation.R' + decision_output <- user_categorisation( + Table_df$Label[data_v_not_auto], + Table_df$Description[data_v_not_auto], + Table_df$Type[data_v_not_auto], + max(df_plots$Code$Code) + ) + ##### input user responses into output + Output$Domain_code[data_v_not_auto] <- decision_output$decision + Output$Note[data_v_not_auto] <- decision_output$decision_note + } + } + } + + ### Fill in log output + log_Output$timestamp = timestamp_now + log_Output$browseMetadata = packageVersion("browseMetadata") + log_Output$Initials = User_Initials + log_Output$MetaDataVersion = Dataset$documentationVersion + log_Output$MetaDataLastUpdated = Dataset$lastUpdated + log_Output$DomainListDesc = data$DomainListDesc + log_Output$Dataset = Dataset_Name + log_Output$Table = Table_name + log_Output$Table_note = table_note + + ### Create output file names + csv_fname <- paste0("OUTPUT_",gsub(" ", "", Dataset_Name),"_", + gsub(" ", "", Table_name),"_",timestamp_now,".csv") + csv_log_fname <- paste0("LOG_",gsub(" ", "", Dataset_Name),"_", + gsub(" ", "", Table_name),"_",timestamp_now,".csv") + png_fname <- paste0("PLOT_",gsub(" ", "", Dataset_Name),"_", + gsub(" ", "", Table_name),"_",timestamp_now,".png") + + ### Save final categorisations for this Table + utils::write.csv(Output,paste(output_dir, csv_fname, sep = '/'), + row.names = FALSE) + utils::write.csv(log_Output,paste(output_dir, csv_log_fname, sep = '/'), + row.names = FALSE) + cat("\n") + cli_alert_success("Final categorisations saved in:\n{csv_fname}") + cli_alert_success("Session log saved in:\n{csv_log_fname}") + + ### Create and save a summary plot + end_plot_save <- end_plot(df = Output,Table_name, + ref_table = df_plots$Domain_table) + ggsave( + plot = end_plot_save$full_plot, + paste(output_dir, png_fname, sep = '/'), + width = 14, + height = 8, + units = "in" + ) + cli_alert_success("A summary plot has been saved:\n{png_fname}") + + } # end of loop for each table + +} # end of function diff --git a/R/browseMetadata_compare_outputs.R b/R/browseMetadata_compare_outputs.R new file mode 100644 index 00000000..fe24fd08 --- /dev/null +++ b/R/browseMetadata_compare_outputs.R @@ -0,0 +1,115 @@ +#' browseMetadata_compare_outputs +#' +#' This function is to be used after running the browseMetadata function. \cr \cr +#' It compares csv outputs from two sessions, finds their differences, and asks for a consensus. \cr \cr +#' +#' @param session_dir This directory should contain 2 csv files for each session (LOG_ and OUTPUT_), 4 csv files in total. +#' @param session1_base Base file name for session 1 e.g. 'NationalCommunityChildHealthDatabase(NCCHD)_BLOOD_TEST_2024-07-05-16-07-38' +#' @param session2_base Base file name for session 1 e.g. 'NationalCommunityChildHealthDatabase(NCCHD)_BLOOD_TEST_2024-07-08-12-03-30' +#' @param json_file The full path to the metadata file used when running browseMetadata (should be the same for session 1 and session 2) +#' @param domain_file The full path to the domain file used when running browseMetadata (should be the same for session 1 and session 2) +#' @return It returns a csv output, which represents the consensus decisions between session 1 and session 2 +#' @importFrom dplyr left_join select join_by +#' @export + +browseMetadata_compare_outputs <- function(session_dir,session1_base,session2_base,json_file,domain_file) { + + timestamp_now <- format(Sys.time(), "%Y-%m-%d-%H-%M-%S") + + # DEFINE INPUTS ---- + + csv_1a <- read.csv(paste0(session_dir,'/','LOG_',session1_base,'.csv')) + csv_2a <- read.csv(paste0(session_dir,'/','LOG_',session2_base,'.csv')) + csv_1b <- read.csv(paste0(session_dir,'/','OUTPUT_',session1_base,'.csv')) + csv_2b <- read.csv(paste0(session_dir,'/','OUTPUT_',session2_base,'.csv')) + + meta_json <- rjson::fromJSON(file = json_file) + domains <- read.csv(domain_file, header = FALSE) + + Dataset <- meta_json$dataModel + Dataset_Name <- Dataset$label + + # VALIDATION CHECKS ---- + + ## Use 'valid_comparison.R' to check if sessions can be compared to each other and to the json (min requirements): + + valid_comparison(input1 = csv_1a$Dataset[1], + input2 = csv_2a$Dataset[1], + severity = 'danger', + severity_text = 'Session 1 and 2 have different datasets') + + valid_comparison(input1 = csv_1a$Table[1], + input2 = csv_2a$Table[1], + severity = 'danger', + severity_text = 'Session 1 and 2 have different tables') + + valid_comparison(input1 = csv_1a$Dataset[1], + input2 = Dataset_Name, + severity = 'danger', + severity_text = 'Different dataset to json') + + valid_comparison(input1 = nrow(csv_1b), + input2 = nrow(csv_2b), + severity = 'danger', + severity_text = 'Different number of data elements!') + + ## Use 'valid_comparison.R' to check the sessions can be compared to each other and to the json (warnings for user to check): + + valid_comparison(input1 = csv_1a$browseMetadata[1], + input2 = csv_2a$browseMetadata[1], + severity = 'warning', + severity_text = 'Different version of browseMetadata package!') + + valid_comparison(input1 = csv_1a$MetaDataVersion[1], + input2 = csv_2a$MetaDataVersion[1], + severity = 'warning', + severity_text = 'Different metadata versions!') + + valid_comparison(input1 = csv_1a$MetaDataVersion[1], + input2 = Dataset$documentationVersion, + severity = 'warning', + severity_text = 'The version files do not match the json (different metadata versions)!') + + valid_comparison(input1 = csv_1a$MetaDataLastUpdated[1], + input2 = csv_2a$MetaDataLastUpdated[1], + severity = 'warning', + severity_text = 'Different metadata date!') + + valid_comparison(input1 = csv_1a$MetaDataLastUpdated[1], + input2 = Dataset$lastUpdated, + severity = 'warning', + severity_text = 'The session files do not match the json (different dates for metadata)!') + + # DISPLAY TO USER ---- + + ## Use 'ref_plot.R' to plot domains for the user's ref (save df for later use) + df_plots <- ref_plot(domains) + + # EXTRACT TABLE INFO FROM METADATA JSON ---- + + ## Use 'json_table_to_df.R' to extract table from meta_json into a df + table_find <- data.frame(table_n = numeric(length(Dataset$childDataClasses)),table_label = character(length(Dataset$childDataClasses))) + for (t in 1:length(Dataset$childDataClasses)) { + table_find$table_n[t] = t + table_find$table_label[t] = Dataset$childDataClasses[[t]]$label + } + table_n = table_find$table_n[table_find$table_label == csv_1a$Table[1]] + + Table_df <-json_table_to_df(Dataset = meta_json$data,n = table_n) + + # JOIN DATAFRAMES FROM SESSIONS IN ORDER TO COMPARE ---- + ses_join <- join_outputs(session_1 = csv_1b,session_2 = csv_2b) + + # FIND MISMATCHES AND ASK FOR CONSENSUS DECISION ---- + for (datavar in 1:nrow(ses_join)) { + concensus <- concensus_on_mismatch(ses_join,Table_df,datavar,max(df_plots$Code$Code)) + ses_join$Domain_code_join[datavar] <- concensus$Domain_code_join + ses_join$Note_join[datavar] <- concensus$Note_join + } # end of loop for DataElement + + # SAVE TO NEW CSV ---- + output_fname <- paste0("CONCENSUS_OUTPUT_", gsub(" ", "", Dataset_Name), "_", table_find$table_label[table_n], "_", timestamp_now, ".csv") + utils::write.csv(ses_join, output_fname, row.names = FALSE) + cat("\n") + cli_alert_success("Your concensus categorisations have been saved to {output_fname}") +} diff --git a/R/convert_output.R b/R/browseMetadata_convert_outputs.R similarity index 91% rename from R/convert_output.R rename to R/browseMetadata_convert_outputs.R index 306e3925..32bcc0fa 100644 --- a/R/convert_output.R +++ b/R/browseMetadata_convert_outputs.R @@ -1,15 +1,15 @@ -#' convert_output +#' browseMetadata_convert_outputs #' #' The 'OUTPUT_' file groups multiple categorisations onto one line e.g. '1,3' \cr \cr #' This function creates a new longer output 'L-OUTPUT_' which gives each categorisation its own row \cr \cr #' This 'L-OUTPUT_' may be more useful when using these csv files in later analyses -#' @param output_csv The name of the 'OUTPUT_' csv file that was created from domain_mapping.R +#' @param output_csv The name of the 'OUTPUT_' csv file that was created from browseMetadata.R #' @param output_dir The location of output_csv #' @return The function will return 'L-OUTPUT_' in the same output_dir #' @export #' @importFrom utils read.csv write.csv -convert_output <- function(output_csv,output_dir) { +browseMetadata_convert_outputs <- function(output_csv,output_dir) { output <- read.csv(paste0(output_dir,'/',output_csv)) output_long <- output[0,] #make duplicate diff --git a/R/compare_sessions.R b/R/compare_sessions.R deleted file mode 100644 index 7b68ff96..00000000 --- a/R/compare_sessions.R +++ /dev/null @@ -1,148 +0,0 @@ -#' compare_sessions -#' -#' This function is to be used after running the domain_mapping function. \cr \cr -#' It compares csv outputs from two sessions, finds their differences, and asks for a consensus. \cr \cr -#' -#' @param session_dir This directory should contain 2 csv files for each session (LOG_ and OUTPUT_), 4 csv files in total. -#' @param session1_base Base file name for session 1 e.g. 'NationalCommunityChildHealthDatabase(NCCHD)_BLOOD_TEST_2024-07-05-16-07-38' -#' @param session2_base Base file name for session 1 e.g. 'NationalCommunityChildHealthDatabase(NCCHD)_BLOOD_TEST_2024-07-08-12-03-30' -#' @param json_file The full path to the metadata file used when running domain_mapping (should be the same for session 1 and session 2) -#' @param domain_file The full path to the domain file used when running domain_mapping (should be the same for session 1 and session 2) -#' @return It returns a csv output, which represents the consensus decisions between session 1 and session 2 -#' @importFrom dplyr left_join select join_by -#' @export - -compare_sessions <- function(session_dir,session1_base,session2_base,json_file,domain_file) { - - timestamp_now <- gsub(" ", "_", Sys.time()) - timestamp_now <- gsub(":", "-", timestamp_now) - - # read in the input files: - - csv_1a <- read.csv(paste0(session_dir,'/','LOG_',session1_base,'.csv')) - csv_2a <- read.csv(paste0(session_dir,'/','LOG_',session2_base,'.csv')) - csv_1b <- read.csv(paste0(session_dir,'/','OUTPUT_',session1_base,'.csv')) - csv_2b <- read.csv(paste0(session_dir,'/','OUTPUT_',session2_base,'.csv')) - meta_json <- rjson::fromJSON(file = json_file) - domains <- read.csv(domain_file, header = FALSE) - - # check the session csvs can be compared to each other and to the json (min requirements): - - if (csv_1a$Dataset[1] != csv_2a$Dataset[1]){ - cat("\n\n") - cli_alert_danger("Cannot compare session 1 and 2: different datasets") - stop()} - - if (csv_1a$Table[1] != csv_2a$Table[1]){ - cat("\n\n") - cli_alert_danger("Cannot compare session 1 and 2: different tables") - stop()} - - if (csv_1a$Dataset[1] != meta_json$dataModel$label){ - cat("\n\n") - cli_alert_danger("The csv files do not match the json: different datasets") - stop()} - - # check the session csvs can be compared to each other and to the json (warnings for user to check): - - if (csv_1a$browseMetadata[1] != csv_2a$browseMetadata[1]){ - cat("\n\n") - cli_alert_warning("Different version of browseMetadata for session 1 and 2!\nValid comparison may not be possible - please check!") - continue <- readline("Press enter to continue or Esc to cancel: ")} - - if (csv_1a$MetaDataVersion[1] != csv_2a$MetaDataVersion[1]){ - cat("\n\n") - cli_alert_warning("Different metadata versions for session 1 and 2\nValid comparison may not be possible - please check!") - continue <- readline("Press enter to continue or Esc to cancel: ")} - - if (csv_1a$MetaDataVersion[1] != meta_json$dataModel$documentationVersion){ - cat("\n\n") - cli_alert_warning("The session files do not match the json (different metadata versions)\nValid comparison may not be possible - please check!") - continue <- readline("Press enter to continue or Esc to cancel: ")} - - if (csv_1a$MetaDataLastUpdated[1] != csv_2a$MetaDataLastUpdated[1]){ - cat("\n\n") - cli_alert_warning("Different metadata date for session 1 and 2\nValid comparison may not be possible - please check!") - continue <- readline("Press enter to continue or Esc to cancel: ")} - - if (csv_1a$MetaDataLastUpdated[1] != meta_json$dataModel$lastUpdated){ - cat("\n\n") - cli_alert_warning("The session files do not match the json (different dates for metadata)\nValid comparison may not be possible - please check!") - continue <- readline("Press enter to continue or Esc to cancel: ")} - - if (nrow(csv_1b) != nrow(csv_2b)){ - cat("\n\n") - cli_alert_warning("Different number of data elements for session 1 and 2\nValid comparison may not be possible - please check!") - continue <- readline("Press enter to continue or Esc to cancel: ")} - - # print details about each session - cat("\n\n") - cli_alert_success("Comparing session 1 and session 2") - cli_alert_success("Session 1 created by {csv_1a$Initials[1]} and session 2 created by {csv_2a$Initials[1]}") - - # Present domains plots panel for user's reference (as in domain_mapping) - colnames(domains)[1] = "Domain Name" - graphics::plot.new() - domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ID*"), c("*DEMOGRAPHICS*"), domains) - Code <- data.frame(Code = 0:(nrow(domains_extend) - 1)) - Domain_table <- tableGrob(cbind(Code,domains_extend),rows = NULL,theme = ttheme_default()) - grid.arrange(Domain_table,nrow=1,ncol=1) - - # join csv_1b and csv_2b in order to compare - ses_join <- left_join(csv_1b,csv_2b,suffix = c("_ses1","_ses2"),join_by(DataElement)) - ses_join$Domain_code_join <- NA - ses_join$Note_join <- NA - ses_join <- select(ses_join, - 'timestamp_ses1','timestamp_ses2', - 'DataElement_N_ses1','DataElement_N_ses2', - 'Domain_code_ses1','Domain_code_ses2', - 'Note_ses1','Note_ses2', - 'Domain_code_join','Note_join') - - # extract table from meta_json - same code as domain_mapping - table_find <- data.frame(table_n = numeric(length(meta_json$dataModel$childDataClasses)),table_label = character(length(meta_json$dataModel$childDataClasses))) - for (t in 1:length(meta_json$dataModel$childDataClasses)) { - table_find$table_n[t] = t - table_find$table_label[t] = meta_json$dataModel$childDataClasses[[t]]$label - } - - table_n = table_find$table_n[table_find$table_label == csv_1a$Table[1]] - thisTable <- meta_json$dataModel$childDataClasses[[table_n]]$childDataElements - thisTable_df <- data.frame(do.call(rbind, thisTable)) # nested list to dataframe - dataType_df <- data.frame(do.call(rbind, thisTable_df$dataType)) # nested list to dataframe - - selectTable_df <- data.frame( - Label = unlist(thisTable_df$label), - Description = unlist(thisTable_df$description), - Type = unlist(dataType_df$label) - ) - - selectTable_df <- selectTable_df[order(selectTable_df$Label), ] - - # find the mismatches and ask for consensus decisions - for (datavar in 1:nrow(ses_join)) { - # collect user responses - if (ses_join$Domain_code_ses1[datavar] != ses_join$Domain_code_ses2[datavar]){ - cat("\n\n") - cli_alert_info("Mismatch of DataElement {ses_join$DataElement[datavar]}") - cat(paste( - "\nDOMAIN CODE (note) for session 1 --> ",ses_join$Domain_code_ses1[datavar],'(',ses_join$Note_ses1[datavar],')', - "\nDOMAIN CODE (note) for session 2 --> ",ses_join$Domain_code_ses2[datavar],'(',ses_join$Note_ses2[datavar],')' - )) - cat("\n\n") - cli_alert_info("Provide concensus decision for this DataElement:") - decision_output <- user_categorisation(selectTable_df$Label[datavar],selectTable_df$Description[datavar],selectTable_df$Type[datavar],max(Code$Code)) - ses_join$Domain_code_join[datavar] <- decision_output$decision - ses_join$Note_join[datavar] <- decision_output$decision_note - } else { - ses_join$Domain_code_join[datavar] <- ses_join$Domain_code_ses1[datavar] - ses_join$Note_join[datavar] <- 'No mismatch!' - } - } # end of loop for DataElement - - # save to new csv - output_fname <- paste0("CONCENSUS_OUTPUT_", gsub(" ", "", meta_json$dataModel$label), "_", table_find$table_label[table_n], "_", timestamp_now, ".csv") - utils::write.csv(ses_join, output_fname, row.names = FALSE) - cat("\n") - cli_alert_success("Your concensus categorisations have been saved to {output_fname}") -} diff --git a/R/concensus_on_mismatch.R b/R/concensus_on_mismatch.R new file mode 100644 index 00000000..ad8c6c94 --- /dev/null +++ b/R/concensus_on_mismatch.R @@ -0,0 +1,33 @@ +#' concensus_on_mismatch +#' +#' This function is called within the browseMetadata_compare_outputs function. \cr \cr +#' For a specific data element, it compares the domain code categorisation between two sessions. +#' If the categorisation differs, it prompts the user for a new consensus decision by presenting the json metadata. \cr \cr +#' +#' @param ses_join The joined dataframes from the two sessions +#' @param Table_df Metadata from the json file, for one table in the dataset +#' @param datavar Data Element n +#' @param domain_code_max The maximum allowable domain code integer +#' @return It returns a list of 2: the domain code and the note from the consensus decision +#' @importFrom CHECK LATER + +concensus_on_mismatch <- function(ses_join,Table_df,datavar,domain_code_max){ + + if (ses_join$Domain_code_ses1[datavar] != ses_join$Domain_code_ses2[datavar]){ + cat("\n\n") + cli_alert_info("Mismatch of DataElement {ses_join$DataElement[datavar]}") + cat(paste( + "\nDOMAIN CODE (note) for session 1 --> ",ses_join$Domain_code_ses1[datavar],'(',ses_join$Note_ses1[datavar],')', + "\nDOMAIN CODE (note) for session 2 --> ",ses_join$Domain_code_ses2[datavar],'(',ses_join$Note_ses2[datavar],')' + )) + cat("\n\n") + cli::cli_alert_info("Provide concensus decision for this DataElement:") + decision_output <- user_categorisation(Table_df$Label[datavar],Table_df$Description[datavar],Table_df$Type[datavar],domain_code_max) + Domain_code_join <- decision_output$decision + Note_join <- decision_output$decision_note + } else { + Domain_code_join <- ses_join$Domain_code_ses1[datavar] + Note_join <- 'No mismatch!' + } + return(list(Domain_code_join = Domain_code_join, Note_join = Note_join)) +} diff --git a/R/copy_previous.R b/R/copy_previous.R new file mode 100644 index 00000000..34aac7f9 --- /dev/null +++ b/R/copy_previous.R @@ -0,0 +1,38 @@ +#' copy_previous +#' +#' This function is called within the browseMetadata function. \cr \cr +#' It searches for previous OUTPUT files in the output_dir, that match the dataset name. \cr \cr +#' If files exist, it removes duplicates and autos, and stores the rest of the data elements in a dataframe. \cr \cr +#' +#' @param Dataset_Name +#' @param output_dir +#' @return It returns a list of 2: df_prev_exist (a boolean) and df_prev (NULL or populated with data elements to copy) +#' @importFrom CHECK LATER + +copy_previous <- function(Dataset_Name,output_dir) { + + o_search = paste0("^OUTPUT_",gsub(" ", "", Dataset_Name),'*') + csv_list <- data.frame(file = list.files(output_dir, pattern = o_search)) + if (nrow(csv_list) != 0) { + df_list <- lapply(paste0(output_dir, '/', csv_list$file), read.csv) + df_prev <- do.call("rbind", df_list) #combine all df + ## make a new date column, order by earliest, remove duplicates & auto + df_prev$time2 <- as.POSIXct(df_prev$timestamp, format = "%Y-%m-%d-%H-%M-%S") + df_prev <- df_prev[order(df_prev$time2), ] + df_prev <- df_prev %>% distinct(DataElement, .keep_all = TRUE) + df_prev <- df_prev[-(which(df_prev$Note %in% "AUTO CATEGORISED")), ] + df_prev_exist <- TRUE + cat("\n") + cli_alert_info(paste0("Copying from previous session(s): ")) + cat("\n") + print(csv_list$file) + + } else { + df_prev <- NULL + df_prev_exist <- FALSE + } + + output <- list(df_prev = df_prev,df_prev_exist = df_prev_exist) + output + +} diff --git a/R/data-Output.R b/R/data-Output.R new file mode 100644 index 00000000..ada083db --- /dev/null +++ b/R/data-Output.R @@ -0,0 +1,25 @@ +#' Output dataframe +#' +#' Empty output dataframe for browseMetadata.R to fill. \cr \cr +#' +#'\item \code{Output <- data.frame( +#'timestamp = character(0), +#'Table = character(0), +#'DataElement_N = character(0), +#'DataElement = character(0), +#'Domain_code = character(0), +#'Note = character(0) +#')} +#' +#' \item \code{usethis::use_data(Output)} +#' +#' @docType data +# +#' @usage data(Output) +#' +#' @format A data frame with 0 rows and 6 columns +#' +#' @source The dataframe was manually created for the browseMetadata.R function + +"Output" + diff --git a/R/data-log_Output.R b/R/data-log_Output.R new file mode 100644 index 00000000..2c209196 --- /dev/null +++ b/R/data-log_Output.R @@ -0,0 +1,28 @@ +#' Output log dataframe +#' +#' Empty log output dataframe for browseMetadata.R to fill. \cr \cr +#' +#' \item \code{log_Output <- data.frame( +#' timestamp = character(1), +#' browseMetadata = character(1), +#' Initials = character(1), +#' MetaDataVersion = character(1), +#' MetaDataLastUpdated = character(1), +#' DomainListDesc = character(1), +#' Dataset = character(1), +#' Table = character(1), +#' Table_note = character(1) +#' )} +#' +#' \item \code{usethis::use_data(log_Output)} +#' +#' @docType data +# +#' @usage data(log_Output) +#' +#' @format A data frame with 1 empty row and 9 columns +#' +#' @source The dataframe was manually created for the browseMetadata.R function + +"log_Output" + diff --git a/R/domain_mapping.R b/R/domain_mapping.R deleted file mode 100755 index 79ba566b..00000000 --- a/R/domain_mapping.R +++ /dev/null @@ -1,376 +0,0 @@ -#' domain_mapping -#' -#' This function will read in the metadata file for a chosen dataset, loop through all the data elements, and ask the user to catergorise/label each data element as belonging to one or more domains.\cr \cr -#' The domains will appear in the Plots tab and dataset information will be printed to the R console, for the user's reference in making these categorisations. \cr \cr -#' These categorisations will be saved to a csv file, alongside a log file which summarises the session details. -#' To speed up this process, some auto-categorisations will be made by the function for commonly occurring data elements and categorisations for the same data element can be copied from one table to another. \cr \cr -#' Example inputs are provided within the package data, for the user to run this function in a demo mode. -#' @param json_file The metadata file. This should be downloaded from the metadata catalogue as a json file. See 'data-raw/maternity_indicators_dataset_(mids)_20240105T132210.json' for an example download. -#' @param domain_file The domain list file. This should be a csv file created by the user, with each domain listed on a separate line. See 'data-raw/domain_list_demo.csv' for a template. -#' @param look_up_file The look-up table file, with auto-categorisations. By default, the code uses 'data/look-up.rda'. The user can provide their own look-up table in the same format as 'data-raw/look-up.csv'. -#' @param output_dir The path to the directory where the two csv output files will be saved. By default, the current working directory is used. -#' @param table_copy Turn on copying between tables (TRUE or FALSE, default TRUE). If TRUE, categorisations you made for all other tables in this dataset will be copied over (if 'OUTPUT_' files are found in output_dir). -#' @return The function will return two csv files: 'OUTPUT_' which contains the mappings and 'LOG_' which contains details about the dataset and session. -#' @examples -#' # Run in demo mode by providing no inputs: domain_mapping() -#' # Demo mode will use the /data files provided in this package -#' # For more guidance, refer to the package README.md file and the R manual files. -#' @export -#' @import ggplot2 -#' @importFrom graphics plot.new -#' @importFrom utils read.csv write.csv -#' @importFrom dplyr %>% arrange count group_by distinct - -domain_mapping <- function(json_file = NULL, domain_file = NULL, look_up_file = NULL, output_dir = NULL, table_copy = TRUE) { - - ## Load data: Check if demo data should be used ---- - - if (is.null(json_file) && is.null(domain_file)) { - # If both json_file and domain_file are NULL, use demo data - meta_json <- get("json_metadata") - domains <- get("domain_list") - DomainListDesc <- "DemoList" - cat("\n") - cli_alert_info("Running domain_mapping in demo mode using package data files") - demo_mode = TRUE - } else if (is.null(json_file) || is.null(domain_file)) { - # If only one of json_file and domain_file is NULL, throw error - cat("\n") - cli_alert_danger("Please provide both json_file and domain_file (or neither file, to run in demo mode)") - stop() - } else { - demo_mode = FALSE - # Read in the json file containing the meta data - meta_json <- rjson::fromJSON(file = json_file) - # Read in the domain file containing the meta data - domains <- read.csv(domain_file, header = FALSE) - DomainListDesc <- tools::file_path_sans_ext(basename(domain_file)) - } - - # Check if user has provided a look-up table - if (is.null(look_up_file)) { - cli_alert_info("Using the default look-up table in data/look-up.rda") - lookup <- get("look_up") - } - else { - lookup <- read.csv(look_up_file) - cli_alert_info("Using look up file inputted by user") - print(lookup) - } - - # If user has not provider output_dir, use current working dir: - if (is.null(output_dir)) { - output_dir = getwd() } - - ## Present domains plots panel for user's reference ---- - colnames(domains)[1] = "Domain Name" - graphics::plot.new() - domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ID*"), c("*DEMOGRAPHICS*"), domains) - Code <- data.frame(Code = 0:(nrow(domains_extend) - 1)) - Domain_table <- tableGrob(cbind(Code,domains_extend),rows = NULL,theme = ttheme_default()) - grid.arrange(Domain_table,nrow=1,ncol=1) - - ## Get user and demo list info for log file ---- - User_Initials <- "" - cat("\n \n") - while (User_Initials == "") { - User_Initials <- readline("Enter your initials: ") - } - - ## Print information about Dataset ---- - cli_h1("Dataset Name") - cat(meta_json$dataModel$label, fill = TRUE) - cli_h1("Dataset Last Updated") - cat(meta_json$dataModel$lastUpdated, fill = TRUE) - cli_h1("Dataset File Exported By") - cat(meta_json$exportMetadata$exportedBy, "at", meta_json$exportMetadata$exportedOn, fill = TRUE) - - Dataset_desc <- "" - while (Dataset_desc != "Y" & Dataset_desc != "y" & Dataset_desc != "N" & Dataset_desc != "n") { - cat("\n \n") - Dataset_desc <- readline(prompt = "Would you like to read a description of the dataset? (y/n): ") - } - - if (Dataset_desc == 'Y' | Dataset_desc == 'y') { - cli_h1("Dataset Description") - cat(meta_json$dataModel$description, fill = TRUE) - readline(prompt = "Press any key to proceed") - } - - ## Ask user which tables to process ---- - - nTables <- length(meta_json$dataModel$childDataClasses) - cat("\n") - cli_alert_info("Found {nTables} Table{?s} in this Dataset") - for (dc in 1:nTables) { - cat("\n") - cat(dc,meta_json$dataModel$childDataClasses[[dc]]$label, fill = TRUE) - } - - nTables_Process <- numeric(0) - nTables_Process_Error <- TRUE - nTables_Process_OutOfRange <- FALSE - while (length(nTables_Process) == 0 | nTables_Process_Error==TRUE | nTables_Process_OutOfRange == TRUE) { - if (nTables_Process_OutOfRange == TRUE) { - cli_alert_danger('That table number is not within the range displayed, please try again.')} - tryCatch({ - cat("\n \n"); - cli_alert_info("Enter each table number you want to process in this session (one number on each line):"); - cat("\n"); - nTables_Process <- scan(file="",what=0); - nTables_Process_Error <- FALSE; - nTables_Process_OutOfRange = any(nTables_Process > nTables)}, - error=function(e) {nTables_Process_Error <- TRUE; print(e); cat("\n"); cli_alert_danger('Your input is in the wrong format, reference the table numbers and try again')}) - } - - # Extract each Table ---- - for (dc in unique(nTables_Process)) { - cat("\n") - cli_alert_info("Processing Table {dc} of {nTables}") - cli_h1("Table Name") - cat(meta_json$dataModel$childDataClasses[[dc]]$label, fill = TRUE) - cli_h1("Table Last Updated") - cat(meta_json$dataModel$childDataClasses[[dc]]$lastUpdated, "\n", fill = TRUE) - - # Check if previous table output(s) exists in this output_dir (for table copying) - if (table_copy == TRUE){ - dataset_search = paste0("^OUTPUT_",gsub(" ", "", meta_json$dataModel$label),'*') - csv_list <- data.frame(file = list.files(output_dir,pattern = dataset_search)) - if (nrow(csv_list) != 0){ - df_list <- lapply(paste0(output_dir,'/',csv_list$file), read.csv) - df_combined <- do.call("rbind", df_list) #combine all df - df_combined$timestamp2 <- as.POSIXct(df_combined$timestamp, format="%Y-%m-%d-%H-%M-%S") #create new date column - df_combined <- df_combined[order(df_combined$timestamp2),] #order by earliest datetime - df_combined <- df_combined %>% distinct(DataElement, .keep_all = TRUE) #remove duplicates, keep earliest categorisation - df_combined <- df_combined[-(which(df_combined$Note %in% "AUTO CATEGORISED")),] #remove auto categorised - df_combined_exist <- TRUE - cat("\n") - cli_alert_info(paste0("Copying from previous session(s): ")) - cat("\n") - print(csv_list$file) - } else {df_combined_exist <- FALSE} - } else {df_combined_exist <- FALSE} - - table_desc <- "" - while (table_desc != "Y" & table_desc != "y" & table_desc != "N" & table_desc != "n") { - cat("\n \n") - table_desc <- readline(prompt = "Would you like to read a description of the table? (y/n): ") - } - - if (table_desc == 'Y' | table_desc == 'y') { - cli_h1("Table Description") - cat(meta_json$dataModel$childDataClasses[[dc]]$description, fill = TRUE) - readline(prompt = "Press any key to proceed") - } - - cat("\n \n") - table_note <- readline("Optional free text note about this table (or press enter to continue): ") - - thisTable <- meta_json$dataModel$childDataClasses[[dc]]$childDataElements # probably a better way of dealing with complex json files in R ... - thisTable_df <- data.frame(do.call(rbind, thisTable)) # nested list to dataframe - dataType_df <- data.frame(do.call(rbind, thisTable_df$dataType)) # nested list to dataframe - - selectTable_df <- data.frame( - Label = unlist(thisTable_df$label), - Description = unlist(thisTable_df$description), - Type = unlist(dataType_df$label) - ) - - selectTable_df <- selectTable_df[order(selectTable_df$Label), ] - - # Create unique output csv to log the results ---- - timestamp_now <- format(Sys.time(),"%Y-%m-%d-%H-%M-%S") - - output_fname_csv <- paste0("OUTPUT_", gsub(" ", "", meta_json$dataModel$label), "_", gsub(" ", "", meta_json$dataModel$childDataClasses[[dc]]$label), "_", timestamp_now, ".csv") - output_fname_log_csv <- paste0("LOG_", gsub(" ", "", meta_json$dataModel$label), "_", gsub(" ", "", meta_json$dataModel$childDataClasses[[dc]]$label), "_", timestamp_now, ".csv") - output_fname_png <- paste0("PLOT_", gsub(" ", "", meta_json$dataModel$label), "_", gsub(" ", "", meta_json$dataModel$childDataClasses[[dc]]$label), "_", timestamp_now, ".png") - - log_Output <- data.frame( - timestamp = character(1), - browseMetadata = character(1), - Initials = character(1), - MetaDataVersion = character(1), - MetaDataLastUpdated = character(1), - DomainListDesc = character(1), - Dataset = character(1), - Table = character(1), - Table_note = character(1) - ) - - row_Output <- data.frame( - timestamp = character(0), - Table = character(0), - DataElement_N = character(0), - DataElement = character(0), - Domain_code = character(0), - Note = character(0) - ) - - # Loop through each data element, request response from the user to match to a domain ---- - - # if it's the demo run, only loop through a max of 20 data elements - if (demo_mode == TRUE) { - start_var = 1 - end_var = min(20,nrow(selectTable_df)) - } else { - cli_h1(paste('There are',as.character(nrow(selectTable_df)),'data elements (variables) in this table.')) - cat("\n") - start_var <- readline(prompt = "Start variable (write 1 to process all): ") - cat("\n") - end_var <- readline(prompt = paste("End variable (write", nrow(selectTable_df), "to process all): ")) - } - - Output <- row_Output - for (datavar in start_var:end_var) { - cat("\n \n") - cli_alert_info(paste(length(datavar:end_var),'left to process in this session')) - cli_alert_info("Processing data element {datavar} of {nrow(selectTable_df)}") - # prepare output - this_Output <- row_Output - this_Output[nrow(this_Output) + 1 , ] <- NA - this_Output$Table[1] <- meta_json$dataModel$childDataClasses[[dc]]$label - this_Output$DataElement[1] <- selectTable_df$Label[datavar] - this_Output$DataElement_N[1] <- paste(as.character(datavar),'of',as.character(nrow(selectTable_df))) - # search if this data element matches with auto categorisations in lookup - datavar_index <- which(lookup$DataElement == selectTable_df$Label[datavar]) #we should code this to ignore the case - lookup_subset <- lookup[datavar_index,] - # search if this data element matches with any data elements processed in previous table - if (df_combined_exist == TRUE) { - datavar_index <- which(df_combined$DataElement == selectTable_df$Label[datavar]) - df_combined_subset <- df_combined[datavar_index,] - } else {df_combined_subset <- data.frame()} - # decide how to process the data element out of 3 options - if (nrow(lookup_subset) == 1) { # 1 - auto categorisation - this_Output$Domain_code[1] <- lookup_subset$DomainCode - this_Output$Note[1] <- "AUTO CATEGORISED" - Output <- rbind(Output,this_Output) - } else if (df_combined_exist == TRUE & nrow(df_combined_subset) == 1){ # 2 - copy from previous table - this_Output$Domain_code[1] <- df_combined_subset$Domain_code - suppressWarnings(this_Output$Note[1] <- paste0("COPIED FROM: ",df_combined_subset$Table)) - Output <- rbind(Output,this_Output) - } else { # 3 - collect user responses - decision_output <- user_categorisation(selectTable_df$Label[datavar],selectTable_df$Description[datavar],selectTable_df$Type[datavar],max(Code$Code)) - this_Output$Domain_code[1] <- decision_output$decision - this_Output$Note[1] <- decision_output$decision_note - Output <- rbind(Output,this_Output) - } - } # end of loop for DataElement - - ## Print the AUTO CATEGORISED responses for this Table and request review ---- - Output_auto <- subset(Output, Note == 'AUTO CATEGORISED') - cat("\n \n") - cli_alert_warning("Please check the auto categorised data elements are accurate for table {meta_json$dataModel$childDataClasses[[dc]]$label}:") - cat("\n \n") - print(Output_auto[, c("DataElement", "Domain_code","Note")]) - - # extract the rows to edit - auto_row_Error <- TRUE - auto_row_InRange <- TRUE - while (auto_row_Error==TRUE | auto_row_InRange == FALSE) { - if (auto_row_InRange == FALSE) { - cli_alert_danger('The row numbers you provided are not in range. Reference the auto categorised row numbers on the screen and try again')} - tryCatch({ - cat("\n \n"); - cli_alert_info("Press enter to accept the auto categorisations for table {meta_json$dataModel$childDataClasses[[dc]]$label} or enter each row you'd like to edit:"); - cat("\n"); - auto_row <- scan(file="",what=0); - auto_row_Error <- FALSE; - auto_row_InRange <- all(auto_row %in% which(Output$Note == 'AUTO CATEGORISED'))}, - error=function(e) {auto_row_Error <- TRUE; print(e); cat("\n"); cli_alert_danger('Your input is in the wrong format, try again')}) - } - - if (length(auto_row) != 0) { - - for (datavar_auto in unique(auto_row)) { - - # collect user responses - decision_output <- user_categorisation(selectTable_df$Label[datavar_auto],selectTable_df$Description[datavar_auto],selectTable_df$Type[datavar_auto],max(Code$Code)) - # input user responses into output - Output$Domain_code[datavar_auto] <- decision_output$decision - Output$Note[datavar_auto] <- decision_output$decision_note - } - } - - ## Ask if user wants to review their responses for this Table ---- - review_cats <- "" - while (review_cats != "Y" & review_cats != "y" & review_cats != "N" & review_cats != "n") { - cat("\n \n") - review_cats <- readline(prompt = "Would you like to review your categorisations? (y/n): ") - } - - if (review_cats == 'Y' | review_cats == 'y') { - Output_not_auto <- subset(Output, Note != 'AUTO CATEGORISED') - Output_not_auto['Note (first 12 chars)'] <- substring(Output_not_auto$Note,1,11) - cat("\n \n") - print(Output_not_auto[, c("DataElement", "Domain_code","Note (first 12 chars)")]) - cat("\n \n") - - # extract the rows to edit - not_auto_row_Error <- TRUE - not_auto_row_InRange <- TRUE - while (not_auto_row_Error==TRUE | not_auto_row_InRange == FALSE) { - if (not_auto_row_InRange == FALSE) { - cli_alert_danger('The row numbers you provided are not in range. Reference the row numbers on the screen and try again')} - tryCatch({ - cat("\n \n"); - cli_alert_info("Press enter to accept your categorisations for table {meta_json$dataModel$childDataClasses[[dc]]$label} or enter each row you'd like to edit:"); - cat("\n"); - not_auto_row <- scan(file="",what=0); - not_auto_row_Error <- FALSE; - not_auto_row_InRange <- all(not_auto_row %in% which(Output$Note != 'AUTO CATEGORISED'))}, - error=function(e) {not_auto_row_Error <- TRUE; print(e); cat("\n"); cli_alert_danger('Your input is in the wrong format, reference the row numbers and try again')}) - } - - if (length(not_auto_row) != 0) { - - for (datavar_not_auto in unique(not_auto_row)) { - - # collect user responses - decision_output <- user_categorisation(selectTable_df$Label[datavar_not_auto],selectTable_df$Description[datavar_not_auto],selectTable_df$Type[datavar_not_auto],max(Code$Code)) - # input user responses into output - Output$Domain_code[datavar_not_auto] <- decision_output$decision - Output$Note[datavar_not_auto] <- decision_output$decision_note - } - } - } - - ## Fill in columns that have all rows identical ---- - log_Output$timestamp <- timestamp_now - log_Output$browseMetadata <- packageVersion("browseMetadata") - log_Output$Initials <- User_Initials - log_Output$MetaDataVersion <- meta_json$dataModel$documentationVersion - log_Output$MetaDataLastUpdated <- meta_json$dataModel$lastUpdated - log_Output$DomainListDesc <- DomainListDesc - log_Output$Dataset <- meta_json$dataModel$label - log_Output$Table <- meta_json$dataModel$childDataClasses[[dc]]$label - log_Output$Table_note <- table_note - - ## Save final categorisations for this Table ---- - Output$timestamp <- timestamp_now - utils::write.csv(Output, paste(output_dir,output_fname_csv,sep='/'), row.names = FALSE) - utils::write.csv(log_Output, paste(output_dir,output_fname_log_csv,sep='/'), row.names = FALSE) - cat("\n") - cli_alert_success("Your final categorisations have been saved:\n{output_fname_csv}") - cli_alert_success("Your session log has been saved:\n{output_fname_log_csv}") - - ## Create and save a summary plot - counts <- Output %>% group_by(Domain_code) %>% count() %>% arrange(n) - - Domain_plot <- counts %>% - ggplot(aes(x=reorder(Domain_code, -n), y=n)) + - geom_col() + - ggtitle(paste("Data Elements in", meta_json$dataModel$childDataClasses[[dc]]$label, "grouped by Domain code")) + - theme_gray(base_size = 18) + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + - xlab('Domain Code') + - ylab('Count') + - scale_y_continuous(breaks=seq(0,max(counts$n),1)) - - full_plot <- grid.arrange(Domain_plot, Domain_table,nrow=1,ncol=2) - ggsave(plot = full_plot,paste(output_dir,output_fname_png,sep='/'),width = 14, height = 8, units = "in") - cli_alert_success("A summary plot has been saved:\n{output_fname_png}") - - - } # end of loop for each table - -} # end of function diff --git a/R/end_plot.R b/R/end_plot.R new file mode 100644 index 00000000..0bbb158b --- /dev/null +++ b/R/end_plot.R @@ -0,0 +1,36 @@ +#' end_plot +#' +#' This function is called within the browseMetadata function. \cr \cr +#' A summary plot is created that includes the domain code reference table and counts of domain code categorisations \cr \cr +#' +#' @param df The Output dataframe with all the domain categorisations +#' @param Table_name The table name +#' @param ref_table The domain code reference table (which domain maps to which integer) +#' @return It returns a ggplot +#' @importFrom CHECK LATER + +end_plot <- function(df,Table_name, ref_table){ + + counts <- df %>% group_by(Domain_code) %>% count() %>% arrange(n) + + Domain_plot <- counts %>% + ggplot(aes(x = reorder(Domain_code, -n), y = n)) + + geom_col() + + ggtitle(paste("Data Elements in",Table_name,"grouped by Domain code")) + + theme_gray(base_size = 18) + + theme(axis.text.x = element_text( + angle = 90, + vjust = 0.5, + hjust = 1 + )) + + xlab('Domain Code') + + ylab('Count') + + scale_y_continuous(breaks = seq(0, max(counts$n), 1)) + + full_plot <- grid.arrange(Domain_plot, + ref_table, + nrow = 1, + ncol = 2) + return(full_plot) + +} diff --git a/R/join_outputs.R b/R/join_outputs.R new file mode 100644 index 00000000..95cf5ac2 --- /dev/null +++ b/R/join_outputs.R @@ -0,0 +1,19 @@ +#' join_outputs +#' +#' This function is called within the browseMetadata_compare_outputs function. \cr \cr +#' Joins output dataframes from two sessions, on the column DataElement. +#' +#' @param session_1 Dataframe from session 1 +#' @param session_2 Dataframe from session 2 +#' @return Dataframe with information from session 1 and 2, joined on column DataElement. +#' @importFrom CHECK LATER +#' + +join_outputs <- function(session_1, session_2){ + + ses_join <- left_join(session_1,session_2,suffix = c("_ses1","_ses2"),join_by(DataElement)) + ses_join <- select(ses_join,contains("_ses"),'DataElement') + ses_join$Domain_code_join <- NA + ses_join$Note_join <- NA + ses_join +} diff --git a/R/json_table_to_df.R b/R/json_table_to_df.R new file mode 100644 index 00000000..bcf97ada --- /dev/null +++ b/R/json_table_to_df.R @@ -0,0 +1,29 @@ +#' join_outputs +#' +#' This function is called within the browseMetadata function. \cr \cr +#' It reads in the nested lists from the json and extracts information needed into a dataframe. \cr \cr +#' It does this for one specific table in a dataset. \cr \cr +#' +#' @param Dataset This is the dataModel field of the json +#' @param n The Dataset number (as a json can have multiple datasets) +#' @return A dataframe for that specific table, including data label, description and type. +#' @importFrom CHECK LATER +#' + +json_table_to_df <- function(Dataset,n){ + + jsonTable <- Dataset$childDataClasses[[n]]$childDataElements + jsonTable_df <- data.frame(do.call(rbind, jsonTable)) # nested list to df + jsonType_df_dataType <- data.frame(do.call(rbind, jsonTable_df$dataType)) # nested list to df + + Table_df <- data.frame( + Label = unlist(jsonTable_df$label), + Description = unlist(jsonTable_df$description), + Type = unlist(jsonType_df_dataType$label) + ) + + Table_df <- Table_df[order(Table_df$Label), ] + + Table_df + +} diff --git a/R/load_data.R b/R/load_data.R new file mode 100644 index 00000000..0eafaebe --- /dev/null +++ b/R/load_data.R @@ -0,0 +1,50 @@ +#' load_data +#' +#' This function is called within the browseMetadata function. \cr \cr +#' It collects the inputs needed for the browseMetadata function (defaults or user inputs) +#' If some inputs are NULL, it loads the default inputs. If defaults not available, it prints error for the user. +#' If inputs are not NULL, it loads the user-specified inputs. +#' @param json_file As defined in browseMetadata +#' @param domain_file As defined in browseMetadata +#' @param look_up_file As defined in browseMetadata +#' @return A list of 5: all inputs needed for the browseMetadata function to run. +#' @importFrom CHECK LATER +#' +#' +load_data <- function(json_file, domain_file,look_up_file){ + + # Collect meta_json and domains + if (is.null(json_file) && is.null(domain_file)) { # if both json_file and domain_file are NULL, use demo data + meta_json <- get("json_metadata") + domains <- get("domain_list") + DomainListDesc <- "DemoList" + cat("\n") + cli_alert_info("Running browseMetadata in demo mode using package data files") + cat("\n ") + demo_mode = TRUE + } else if (is.null(json_file) || is.null(domain_file)) { # if only one of json_file and domain_file is NULL, throw error + cat("\n") + cli_alert_danger("Please provide both json_file and domain_file (or neither file, to run in demo mode)") + stop() + } else { # read in user specified files + demo_mode = FALSE + meta_json <- rjson::fromJSON(file = json_file) # read in the json file containing the meta data + domains <- read.csv(domain_file, header = FALSE) # read in the domain file containing the list of research domains + DomainListDesc <- tools::file_path_sans_ext(basename(domain_file)) + } + + # Collect look up table + if (is.null(look_up_file)) { + cli_alert_info("Using the default look-up table in data/look-up.rda") + cat("\n ") + lookup <- get("look_up") + } + else { + lookup <- read.csv(look_up_file) + cli_alert_info("Using look up file inputted by user") + cat("\n ") + print(lookup) + } + + list(meta_json = meta_json,domains = domains,DomainListDesc = DomainListDesc, demo_mode = demo_mode,lookup = lookup) +} diff --git a/R/ref_plot.R b/R/ref_plot.R new file mode 100644 index 00000000..6948d1a5 --- /dev/null +++ b/R/ref_plot.R @@ -0,0 +1,25 @@ +#' ref_plot +#' +#' This function is called within the browseMetadata function. \cr \cr +#' It plots a reference table to guide the user in their categorisation of domains. \cr \cr +#' This reference table is based on the user inputted domains and the default domains provided by this package. \cr \cr +#' @param domains The output of load_data +#' @return A reference table that appears in the Plots tab. A list of 2 containing the derivatives for this plot, used later in browseMetadata. +#' @importFrom CHECK LATER +#' +#' + +ref_plot <- function(domains){ + + colnames(domains)[1] = "Domain Name" + graphics::plot.new() + domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ID*"), c("*DEMOGRAPHICS*"), domains) + Code <- data.frame(Code = 0:(nrow(domains_extend) - 1)) + Domain_table <- tableGrob(cbind(Code,domains_extend),rows = NULL,theme = ttheme_default()) + grid.arrange(Domain_table,nrow=1,ncol=1) + + return(list(Code = Code, Domain_table = Domain_table)) + +} + + diff --git a/R/user_categorisation.R b/R/user_categorisation.R index 4a3d36b9..eb611921 100644 --- a/R/user_categorisation.R +++ b/R/user_categorisation.R @@ -1,6 +1,6 @@ #' user_categorisation #' -#' This function is used within the domain_mapping function. \cr \cr +#' This function is called within the browseMetadata function. \cr \cr #' It displays data properties to the user and requests a categorisation into a domain. \cr \cr #' An optional note can be included with the categorisation. #' @@ -9,7 +9,6 @@ #' @param data_type Data type of the variable #' @param domain_code_max Max code in the domain list (0-3 auto included, then N included via domain_file) #' @return It returns a list containing the decision and decision note -#' @export user_categorisation <- function(data_element,data_desc,data_type,domain_code_max) { diff --git a/R/user_categorisation_loop.R b/R/user_categorisation_loop.R new file mode 100644 index 00000000..71c9afee --- /dev/null +++ b/R/user_categorisation_loop.R @@ -0,0 +1,74 @@ +#' user_categorisation_loop +#' +#' This function is called within the browseMetadata function. \cr \cr +#' Given a specific table and a number of data elements to search, it checks for 3 different sources of domain categorisation: \cr \cr +#' 1 - If data elements match those in the look-up table, auto categorise them \cr \cr +#' 2 - If data elements match to previous table output, copy them \cr \cr +#' 3 - If no match for 1 or 2, data elements are categorised by the user \cr \cr +#' @param start_v Index of data element to start +#' @param end_v Index of data element to end +#' @param Table_df Dataframe with the table information, extracted from json metadata +#' @param df_prev_exist Boolean to indicate with previous dataframes exist (to copy from) +#' @param df_prev Previous dataframes to copy from (or NULL) +#' @param lookup The lookup table to enable auto categorisations +#' @param df_plots Output from the ref_plot function, to indicate maximum domain code allowed +#' @param Output Empty Output dataframe, to fill +#' @return An Output dataframe containing information about the table, data elements and categorisations +#' @importFrom CHECK LATER + +user_categorisation_loop <- function(start_v,end_v,Table_df,df_prev_exist,df_prev,lookup,df_plots,Output) { + + for (data_v in start_v:end_v) { + cat("\n \n") + cli_alert_info(paste(length(data_v:end_v), 'left to process')) + cli_alert_info("Data element {data_v} of {nrow(Table_df)}") + this_DataElement <- Table_df$Label[data_v] + this_DataElement_N <- paste(as.character(data_v), 'of', + as.character(nrow(Table_df))) + data_v_index <- which(lookup$DataElement == + Table_df$Label[data_v]) #we should code this to ignore the case + lookup_subset <- lookup[data_v_index, ] + ##### search if data element matches any data elements from previous table + if (df_prev_exist == TRUE) { + data_v_index <- which(df_prev$DataElement == + Table_df$Label[data_v]) + df_prev_subset <- df_prev[data_v_index, ] + } else { + df_prev_subset <- data.frame() + } + ##### decide how to process the data element out of 3 options + if (nrow(lookup_subset) == 1) { + ###### 1 - auto categorisation + Output <- Output %>% add_row( + DataElement = this_DataElement, + DataElement_N = this_DataElement_N, + Domain_code = as.character(lookup_subset$DomainCode), + Note = 'AUTO CATEGORISED' + ) + } else if (df_prev_exist == TRUE & + nrow(df_prev_subset) == 1) { + ###### 2 - copy from previous table + Output <- Output %>% add_row( + DataElement = this_DataElement, + DataElement_N = this_DataElement_N, + Domain_code = as.character(df_prev_subset$Domain_code), + Note = paste0("COPIED FROM: ", df_prev_subset$Table) + ) + } else { + ###### 3 - collect user responses with 'user_categorisation.R' + decision_output <- user_categorisation( + Table_df$Label[data_v], + Table_df$Description[data_v], + Table_df$Type[data_v], + max(df_plots$Code$Code) + ) + Output <- Output %>% add_row( + DataElement = this_DataElement, + DataElement_N = this_DataElement_N, + Domain_code = decision_output$decision, + Note = decision_output$decision_note + ) + } + } # end of loop for DataElement + Output +} diff --git a/R/user_prompt.R b/R/user_prompt.R new file mode 100644 index 00000000..a830da9b --- /dev/null +++ b/R/user_prompt.R @@ -0,0 +1,66 @@ +#' user_prompt +#' +#' This function is called within the browseMetadata function. \cr \cr +#' It prompts a response from the user. \cr \cr +#' +#' @param pre_prompt_text Optional text to display to the user, prior to prompt. +#' This should be a data frame: +#' data.frame(Heading = logical(0), Text = character(0)) +#' Each row of the dataframe specifies Heading TRUE/FALSE and text to display. +#' @param prompt_text Text to display to the user, to prompt their response. +#' @param any_keys Boolean to determine if any key responses are allowable. +#' If FALSE, only these are allowed: Y, y, N and n. +#' @param post_yes_text Optional text to post after receiving a 'Y' or 'y' +#' response from user. Same dataframe format as pre_prompt_text. +#' @return It returns variable text, depending on any_keys. +#' @importFrom CHECK LATER + +user_prompt <- function(pre_prompt_text = NULL, prompt_text, any_keys, post_yes_text = NULL) { + + # pre prompt text is optional + # prompt text is not + # any_keys, when TRUE it allows any input, when FALSE it only allows y/n/Y/N + # post_yes_text, when any_keys is FALSE and response is y/Y then print the post text + + # pre prompt text + if (!is.null(pre_prompt_text)){ + for (line in 1:nrow(pre_prompt_text)){ + if (pre_prompt_text$Heading[line] == TRUE){ + cli_h1(pre_prompt_text$Text[line]) + }else{ + cat(pre_prompt_text$Text[line]) + } + } + cat("\n ") + } + + # prompt text + if (any_keys == TRUE){ + response <- "" + while (response == "") { + response <- readline(prompt = prompt_text) + } + } else if (any_keys == FALSE) { + response <- "" + while (!response %in% c("Y", "y", "N", "n")) { + response <- readline(prompt = prompt_text) + } + # post yes text + if (response %in% list('Y','y') & !is.null(post_yes_text)) { + for (line in 1:nrow(post_yes_text)){ + if (post_yes_text$Heading[line] == TRUE){ + cli_h1(post_yes_text$Text[line]) + }else{ + cat(post_yes_text$Text[line]) + } + } + cat("\nPress any key to continue ") + readline() + } + } else { + stop("Invalid input given for 'any_keys'. Only TRUE or FALSE are allowed.") + } + + response + +} diff --git a/R/user_prompt_list.R b/R/user_prompt_list.R new file mode 100644 index 00000000..7227d296 --- /dev/null +++ b/R/user_prompt_list.R @@ -0,0 +1,34 @@ +#' user_prompt_list +#' +#' This function is called within the browseMetadata function. \cr \cr +#' It prompts a response from the user, in the form of a list. \cr \cr +#' It checks if the user has given the an input within the allowed range - if not, it re-sends prompt. \cr \cr +#' +#' @param prompt_text Text to display to the user, to prompt their response. +#' @param list_allowed A list of allowable integer responses. +#' @param empty_allowed A boolean specifying if no response is allowed. +#' @return It returns a list of integers to process, that match the prompt options. +#' @importFrom CHECK LATER + +user_prompt_list <- function(prompt_text,list_allowed,empty_allowed) { + + list_to_process_Error <- TRUE + list_to_process_InRange <- TRUE + while (list_to_process_Error==TRUE | list_to_process_InRange==FALSE) { + tryCatch({ + cat("\n \n"); + cli_alert_info(prompt_text); + cat("\n"); + list_to_process <- scan(file="",what=0); + list_to_process_InRange_1 = (all(list_to_process %in% list_allowed)) + if (empty_allowed == FALSE){ + list_to_process_InRange_2 = (all(length(list_to_process) != 0)) + } else {list_to_process_InRange_2 = TRUE} + list_to_process_InRange = all(list_to_process_InRange_1,list_to_process_InRange_2) + if (list_to_process_InRange == FALSE){cli_alert_danger('One of your inputs is out of range! Reference the allowable list of integers and try again.')}; + list_to_process_Error <- FALSE}, + error=function(e) {list_to_process_Error <- TRUE; print(e); cat("\n"); cli_alert_danger('Your input is in the wrong format. Reference the allowable list of integers and try again.')}) + } + list_to_process +} + diff --git a/R/valid_comparison.R b/R/valid_comparison.R new file mode 100644 index 00000000..89c486df --- /dev/null +++ b/R/valid_comparison.R @@ -0,0 +1,32 @@ +#' valid_comparison + +#' This function is called within the browseMetadata_compare_outputs function. \cr \cr +#' It reads two inputs to see if they are equal. \cr \cr +#' If the test is 'warning' status and inputs are not equal it gives warning but continues. \cr \cr +#' If the test is 'danger' status and inputs are not equal it stops and exits, with error message. \cr \cr +#' @param input1 Input 1 +#' @param input2 Input 2 +#' @param severity Level of severity. Only 'danger' or 'warning' +#' @param severity_text The text to print if inputs are not equal. +#' @return It returns variable text, depending on any_keys. +#' @importFrom CHECK LATER +#' +valid_comparison <- function(input1, input2, severity, severity_text) { + + if (!severity %in% c('danger', 'warning')) { + stop("Invalid severity. Only 'danger' and 'warning' are allowed.") + } + + if (severity == 'danger') { + if (input1 != input2) { + cat('\n') + stop(paste(severity_text,"-> Exiting!")) + } + } else if (severity == 'warning') { + if (input1 != input2) { + cat('\n') + cli_alert_warning(paste(severity_text,"-> Continuing but please check comparison is valid!")) + } + } + } + diff --git a/README.md b/README.md index fe1ca5db..70bffb7c 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,7 @@ library(browseMetadata) Read the documentation: ``` -?domain_mapping +?browseMetadata ``` Set your working directory to be an empty folder you just created: @@ -70,7 +70,7 @@ setwd("/Users/your-username/test-browseMetadata") Run the function in demo mode: ``` r -domain_mapping() +browseMetadata() ``` Take note of the **Plots** tab in R Studio which should show a table of domains with this info: @@ -93,7 +93,7 @@ For a research study, your domains are likely to be much more specific e.g. 'Pre The 4 default domains are always included [0-3], appended on to any domain list given. ``` -ℹ Running domain_mapping in demo mode using package data files +ℹ Running browseMetadata in demo mode using package data files ℹ Using the default look-up table in data/look-up.rda Enter your initials: RS @@ -285,14 +285,14 @@ LOG_NationalCommunityChildHealthDatabase(NCCHD)_CHILD_2024-04-05-14-37-36.csv PLOT_NationalCommunityChildHealthDatabase(NCCHD)_CHILD_2024-04-05-14-37-36.png ``` -The OUTPUT csv contains the categorisations you made. The LOG csv contains information about the session as a whole, including various metadata. These two csv files contain the same timestamp column. If you do not like the formatting of the OUTPUT csv, see the function [R/convert_output.R](R/convert_output.R) for an alternative. +The OUTPUT csv contains the categorisations you made. The LOG csv contains information about the session as a whole, including various metadata. These two csv files contain the same timestamp column. If you do not like the formatting of the OUTPUT csv, see the function [R/post_browseMetadata_convert.R](R/post_browseMetadata_convert.R) for an alternative. The PLOT png file saves a simple plot displaying the count of domain codes for that table. ### Using your own input files ``` r -domain_mapping(json_file, domain_file, look_up_file, output_dir, table_copy) +browseMetadata(json_file, domain_file, look_up_file, output_dir, table_copy) ``` This code is in early development. To see known bugs or sub-optimal features refer to the [Issues](https://github.com/aim-rsf/browseMetadata/issues). @@ -300,7 +300,7 @@ This code is in early development. To see known bugs or sub-optimal features ref First, change the json file and domain file inputs. Later, consider changing the other 3 inputs, depending on your use-case. For example: ``` r -domain_mapping(json_file = 'path/your-json.json', domain_file = 'path/your-domains.csv') +browseMetadata(json_file = 'path/your-json.json', domain_file = 'path/your-domains.csv') ``` Unlike in demo mode, it will ask you to specify the range of variables you want to process (start variable:end variable), because you can choose to process a table across multiple sessions (particularly useful if the table has a large number of data elements). @@ -319,7 +319,7 @@ Unlike in demo mode, it will ask you to specify the range of variables you want #### lookup file: -- a [default lookup file](dataraw/look_up.csv) is used by the domain_mapping function +- a [default lookup file](dataraw/look_up.csv) is used by the browseMetadata function - optional: a csv can be created by the user (using the same format as the default) and provided as the input - the lookup file makes auto-categorisations intended for variables that come up regularly in health datasets (e.g. IDs and demographics) - the lookup file only works for 1:1 mappings right now, i.e. the DataElement should only be listed once in the lookup file @@ -396,7 +396,7 @@ PATH_BLOOD_TESTS The csv output file containing the categorisation for each data element could be used as an input in later analysis steps to filter variables and visualise how each variable maps to research domains of interest. -Categorisations across researchers can be compared by using the function [R/compare_sessions.R](R/compare_sessions.R). Type `?compare_sessions` to read the manual on how to run this function. In brief, it compares csv outputs from two sessions, finds their differences, and asks for a consensus. +Categorisations across researchers can be compared by using the function [R/post_browseMetadata_compare.R](R/post_browseMetadata_compare.R). Type `?post_browseMetadata_compare` to read the manual on how to run this function. In brief, it compares csv outputs from two sessions, finds their differences, and asks for a consensus. ## License diff --git a/data/Output.rda b/data/Output.rda new file mode 100644 index 0000000000000000000000000000000000000000..2c1ecd654ba86c43b2f8755062970b0f5e0388d3 GIT binary patch literal 183 zcmV;o07(BrT4*^jL0KkKS?UL^Z2$mm|A7BGNB{r<5C8@Nlt8~H-hcoBAOMmL6(6NH zr9Dq6r=)41&}r>VpphzOlL%>`$%p_Iin)R+nJH8jPXhGpgY^KuK8>&zrUZM?6h1PZ zOIu3RhLGAAH4H%x8c4^4BGe3z&7I?}CgGefo>_OB8G34R5t$P)RqQ zS{N2qj~d}0tQh4v!UfwR5<#V;G!!%I7_z7`{?FGk*3oc~+dwuX!GtgZ95jH*;Kk+x zP1`wRL4Le2rr&MXdEI{_%${;$!Z8K*b#PFptO1~{8n1qZKLc}EStg*;T#@d%L9Ot{ zfz~jQaV0t9J@JK&*ds{RP}(jgmwhHyx@PMrAY+KR%QaWdqo*=y_ZM% add_row( + timestamp = format(Sys.time(), "%Y-%m-%d-%H-%M-%S"), + Table = 'Sample Table', + DataElement = 'DataElement 1', + DataElement_N = '1 of 2', + Domain_code = '1', + Note = 'AUTO CATEGORISED' + ) + + df <- df %>% add_row( + timestamp = format(Sys.time(), "%Y-%m-%d-%H-%M-%S"), + Table = 'Sample Table', + DataElement = 'DataElement 2', + DataElement_N = '2 of 2', + Domain_code = '3', + Note = 'DEMOGRAPHICS' + ) + + # Sample reference table + domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ID*"), c("*DEMOGRAPHICS*"), c("Domain A"),c("Domain B")) + Code <- data.frame(Code = 0:(nrow(domains_extend) - 1)) + ref_table <- tableGrob(cbind(Code,domains_extend),rows = NULL,theme = ttheme_default()) + + # Call the function + result <- end_plot(df, "Sample Table", ref_table) + + # Check if the result is a gtable object + expect_s3_class(result, "gtable") +}) diff --git a/tests/testthat/test-join_outputs.R b/tests/testthat/test-join_outputs.R new file mode 100644 index 00000000..8bdadad6 --- /dev/null +++ b/tests/testthat/test-join_outputs.R @@ -0,0 +1,60 @@ +# Create sample data (only allow it to different on timestamp, Domain_code and Note) +session_1 <- data.frame( + timestamp = c("2024-08-22-13-26-33", "2024-08-22-13-26-33", "2024-08-22-13-26-33"), + Table = c("HEALTH","HEALTH","HEALTH"), + DataElement = c("ALF_E", "AVAIL_FROM_DT", "OUTCOME"), + DataElement_N = c("1 of 3", "2 of 3", "3 of 3"), + Domain_code = c("1", "1", "5"), + Note = c("ID", "Metadata", "Diagnostic category") + ) + +session_2 <- data.frame( + timestamp = c("2024-08-22-15-24-30", "2024-08-22-15-24-30", "2024-08-22-15-24-30"), + Table = c("HEALTH","HEALTH","HEALTH"), + DataElement = c("ALF_E", "AVAIL_FROM_DT", "OUTCOME"), + DataElement_N = c("1 of 3", "2 of 3", "3 of 3"), + Domain_code = c("1", "1", "4"), + Note = c("ID", "info about data", "diagnosis") +) + +# Define expected outputs +expected_output_ses1_ses2 <- data.frame( + timestamp_ses1 = c("2024-08-22-13-26-33", "2024-08-22-13-26-33", "2024-08-22-13-26-33"), + Table_ses1 = c("HEALTH","HEALTH","HEALTH"), + DataElement_N_ses1 = c("1 of 3", "2 of 3", "3 of 3"), + Domain_code_ses1 = c("1", "1", "5"), + Note_ses1 = c("ID", "Metadata", "Diagnostic category"), + timestamp_ses2 = c("2024-08-22-15-24-30", "2024-08-22-15-24-30", "2024-08-22-15-24-30"), + Table_ses2 = c("HEALTH","HEALTH","HEALTH"), + DataElement_N_ses2 = c("1 of 3", "2 of 3", "3 of 3"), + Domain_code_ses2 = c("1", "1", "4"), + Note_ses2 = c("ID", "info about data", "diagnosis"), + DataElement = c("ALF_E", "AVAIL_FROM_DT", "OUTCOME"), + Domain_code_join = c(NA, NA, NA), + Note_join = c(NA, NA, NA) +) + +expected_output_ses1_ses1 <- data.frame( + timestamp_ses1 = c("2024-08-22-13-26-33", "2024-08-22-13-26-33", "2024-08-22-13-26-33"), + Table_ses1 = c("HEALTH","HEALTH","HEALTH"), + DataElement_N_ses1 = c("1 of 3", "2 of 3", "3 of 3"), + Domain_code_ses1 = c("1", "1", "5"), + Note_ses1 = c("ID", "Metadata", "Diagnostic category"), + timestamp_ses2 = c("2024-08-22-13-26-33", "2024-08-22-13-26-33", "2024-08-22-13-26-33"), + Table_ses2 = c("HEALTH","HEALTH","HEALTH"), + DataElement_N_ses2 = c("1 of 3", "2 of 3", "3 of 3"), + Domain_code_ses2 = c("1", "1", "5"), + Note_ses2 = c("ID", "Metadata", "Diagnostic category"), + DataElement = c("ALF_E", "AVAIL_FROM_DT", "OUTCOME"), + Domain_code_join = c(NA, NA, NA), + Note_join = c(NA, NA, NA) +) + +# Write the test +test_that("join_outputs works correctly", { + result <- join_outputs(session_1, session_1) + expect_equal(result, expected_output_ses1_ses1) + result <- join_outputs(session_1,session_2) + expect_equal(result, expected_output_ses1_ses2) + }) + diff --git a/tests/testthat/test-json_table_to_df.R b/tests/testthat/test-json_table_to_df.R new file mode 100644 index 00000000..4c55df3a --- /dev/null +++ b/tests/testthat/test-json_table_to_df.R @@ -0,0 +1,41 @@ +# Define the mock json +mock_Dataset <- list( + childDataClasses = list( + list( + childDataElements = list( + list(ID = "111", label = "Label1", description = "Description1", dataType = list(ID = "a", label = "Type1")), + list(ID = "112", label = "Label2", description = "Description2", dataType = list(ID = "b", label = "Type2")) + ) + ), + list( + childDataElements = list( + list(ID = "111", label = "Label3", description = "Description3", dataType = list(ID = "c", label = "Type1")), + list(ID = "112", label = "Label4", description = "Description4", dataType = list(ID = "d", label = "Type3")) + ) + ) + ) +) + +test_that("json_table_to_df gives expected output for first index", { + result <- json_table_to_df(mock_Dataset, 1) + + expected <- data.frame( + Label = c("Label1", "Label2"), + Description = c("Description1", "Description2"), + Type = c("Type1", "Type2") + ) + + expect_equal(result, expected) +}) + +test_that("json_table_to_df gives expected output for second index", { + result <- json_table_to_df(mock_Dataset, 2) + + expected <- data.frame( + Label = c("Label3", "Label4"), + Description = c("Description3", "Description4"), + Type = c("Type1", "Type3") + ) + + expect_equal(result, expected) +}) diff --git a/tests/testthat/test-load_data.R b/tests/testthat/test-load_data.R new file mode 100644 index 00000000..d27ef4ce --- /dev/null +++ b/tests/testthat/test-load_data.R @@ -0,0 +1,45 @@ +library(testthat) +library(rjson) +library(cli) + +# Define file paths to demo data relative to the package directory +json_file <- system.file("data-raw/national_community_child_health_database_(ncchd)_20240405T130125.json", package = "browseMetadata") +look_up_file <- system.file("data-raw/look_up.csv", package = "browseMetadata") +domains_file <- system.file("data-raw/domain_list_demo.csv", package = "browseMetadata") + +# Define package demo data +json <- get("json_metadata") +look_up <- get("look_up") +domains <- get("domain_list") + +test_that("load_data runs in demo mode when both json_file and domain_file are NULL", { + result <- load_data(NULL, NULL, NULL) + expect_true(result$demo_mode) + expect_equal(result$meta_json, json) + expect_equal(result$domains, domains) + expect_equal(result$DomainListDesc, "DemoList") +}) + +test_that("load_data throws error if only one of json_file or domain_file is NULL", { + expect_error(load_data(json_file, NULL, NULL)) + expect_error(load_data(NULL, domains_file, NULL)) +}) + +test_that("load_data reads user-specified files correctly", { + result <- load_data(json_file, domains_file, NULL) + expect_false(result$demo_mode) + expect_true(is.list(result$meta_json)) + expect_true(is.data.frame(result$domains)) + expect_equal(result$DomainListDesc, tools::file_path_sans_ext(basename(domains_file))) +}) + +test_that("load_data uses default look-up table when look_up_file is NULL", { + result <- load_data(json_file, domains_file, NULL) + expect_equal(result$lookup, look_up) +}) + +test_that("load_data reads user-specified look-up table correctly", { + result <- load_data(json_file, domains_file, look_up_file) + expect_true(is.data.frame(result$lookup)) + expect_equal(nrow(result$lookup), nrow(read.csv(look_up_file))) +}) diff --git a/tests/testthat/test-ref_plot.R b/tests/testthat/test-ref_plot.R new file mode 100644 index 00000000..659e8afd --- /dev/null +++ b/tests/testthat/test-ref_plot.R @@ -0,0 +1,22 @@ +# Unit test for ref_plot function +test_that("ref_plot function works correctly", { + # Mock input dataframe + domains <- data.frame(Domains = c("Domain1", "Domain2", "Domain3", "Domain4")) + + # Call the function + result <- ref_plot(domains) + + # Check the structure of the result + expect_true(is.list(result)) + expect_true("Code" %in% names(result)) + expect_true("Domain_table" %in% names(result)) + + # Check the content of the Code dataframe + expect_equal(nrow(result$Code), 8) + expect_equal(result$Code$Code, 0:7) + + # Check the content of the Domain_table + expect_true(inherits(result$Domain_table, "gtable")) + expect_equal(nrow(result$Domain_table), 9) + expect_equal(result$Domain_table$layout$name[1], "colhead-fg") +}) diff --git a/tests/testthat/test-user_categorisation.R b/tests/testthat/test-user_categorisation.R new file mode 100644 index 00000000..ef8df92b --- /dev/null +++ b/tests/testthat/test-user_categorisation.R @@ -0,0 +1,31 @@ +test_that("user_categorisation works with valid input", { + mock_readline <- mock("3", "This is a note", "n") # create a mock object that returns user inputs in sequence + stub(user_categorisation, "readline", mock_readline) # replace `readline` function within the `user_categorisation` function with the `mock_readline` mock object + + response <- user_categorisation(data_element = "Element1", data_desc = "Description1", data_type = "Type1", domain_code_max = 5) + expect_equal(response, list(decision = "3", decision_note = "This is a note")) +}) + +test_that("user_categorisation handles invalid input and then valid input", { + mock_readline <- mock("6", "3", "This is a note", "n") # create a mock object that returns invalid input first, then valid input + stub(user_categorisation, "readline", mock_readline) # replace `readline` function within the `user_categorisation` function with the `mock_readline` mock object + + response <- user_categorisation(data_element = "Element1", data_desc = "Description1", data_type = "Type1", domain_code_max = 5) + expect_equal(response, list(decision = "3", decision_note = "This is a note")) +}) + +test_that("user_categorisation handles multiple valid inputs", { + mock_readline <- mock("3,4", "This is another note", "n") # create a mock object that returns multiple valid inputs + stub(user_categorisation, "readline", mock_readline) # replace `readline` function within the `user_categorisation` function with the `mock_readline` mock object + + response <- user_categorisation(data_element = "Element1", data_desc = "Description1", data_type = "Type1", domain_code_max = 5) + expect_equal(response, list(decision = "3,4", decision_note = "This is another note")) +}) + +test_that("user_categorisation handles re-do input", { + mock_readline <- mock("3", "This is a note", "y", "4", "Another note", "n") # create a mock object that returns inputs including re-do + stub(user_categorisation, "readline", mock_readline) # replace `readline` function within the `user_categorisation` function with the `mock_readline` mock object + + response <- user_categorisation(data_element = "Element1", data_desc = "Description1", data_type = "Type1", domain_code_max = 5) + expect_equal(response, list(decision = "4", decision_note = "Another note")) +}) diff --git a/tests/testthat/test-user_categorisation_loop.R b/tests/testthat/test-user_categorisation_loop.R new file mode 100644 index 00000000..52b00c2a --- /dev/null +++ b/tests/testthat/test-user_categorisation_loop.R @@ -0,0 +1,50 @@ +Output <- get("Output") +Code <- data.frame(Code = 0:2) +df_plots <- list(Code = Code,'') + +test_that("user_categorisation_loop handles auto categorisation", { + # Mock data + selectTable_df <- data.frame(Label = c("Element1", "Element2"), Description = c("Desc1", "Desc2"), Type = c("Type1", "Type2")) + lookup <- data.frame(DataElement = c("Element1", "Element2"), DomainCode = c(1, 2)) + + # Call the function + result <- user_categorisation_loop(1, 2, selectTable_df, FALSE, data.frame(), lookup, df_plots, Output) + + # Check the result + expect_equal(nrow(result), 2) + expect_equal(result$Note[1], "AUTO CATEGORISED") + expect_equal(result$Note[2], "AUTO CATEGORISED") +}) + +test_that("user_categorisation_loop handles copying from previous table", { + # Mock data + selectTable_df <- data.frame(Label = c("Element1", "Element2"), Description = c("Desc1", "Desc2"), Type = c("Type1", "Type2")) + df_prev <- data.frame(DataElement = c("Element1", "Element2"), Domain_code = c(1, 2), Table = c("PrevTable1", "PrevTable2")) + lookup <- data.frame(DataElement = c("Element3", "Element4"), DomainCode = c(3, 4)) + + # Call the function + result <- user_categorisation_loop(1, 2, selectTable_df, TRUE, df_prev, lookup, df_plots, Output) + + # Check the result + expect_equal(nrow(result), 2) + expect_equal(result$Note[1], "COPIED FROM: PrevTable1") + expect_equal(result$Note[2], "COPIED FROM: PrevTable2") +}) + +test_that("user_categorisation_loop handles user categorisation", { + # Mock data + selectTable_df <- data.frame(Label = c("Element1", "Element2"), Description = c("Desc1", "Desc2"), Type = c("Type1", "Type2")) + lookup <- data.frame(DataElement = c("Element3", "Element4"), DomainCode = c(3, 4)) + + # Mock the user_categorisation function + mock_user_categorisation <- mock(list(decision = "1", decision_note = "User note"), cycle = TRUE) + stub(user_categorisation_loop, "user_categorisation", mock_user_categorisation) + + # Call the function + result <- user_categorisation_loop(1, 2, selectTable_df, FALSE, data.frame(), lookup, df_plots, Output) + + # Check the result + expect_equal(nrow(result), 2) + expect_equal(result$Note[1], "User note") + expect_equal(result$Domain_code[1], "1") +}) diff --git a/tests/testthat/test-user_prompt.R b/tests/testthat/test-user_prompt.R new file mode 100644 index 00000000..b0f7a400 --- /dev/null +++ b/tests/testthat/test-user_prompt.R @@ -0,0 +1,61 @@ +test_that("user_prompt works with any_keys = TRUE", { + mock_readline <- mock("test_response") # create a mock object that returns a string when called + stub(user_prompt, "readline", mock_readline) # replace `readline` function within the `user_prompt` function with the `mock_readline` mock object + response <- user_prompt(prompt_text = "Enter something: ", any_keys = TRUE) + expect_equal(response, "test_response") +}) + +test_that("user_prompt works with any_keys = FALSE and response is y/Y", { + mock_readline <- mock("Y") + stub(user_prompt, "readline", mock_readline) + + response <- user_prompt(prompt_text = "Enter y/n: ", any_keys = FALSE) + expect_equal(response, "Y") +}) + +test_that("user_prompt works with any_keys = FALSE and response is n/N", { + mock_readline <- mock("n") + stub(user_prompt, "readline", mock_readline) + + response <- user_prompt(prompt_text = "Enter y/n: ", any_keys = FALSE) + expect_equal(response, "n") +}) + +test_that("user_prompt throws error with invalid any_keys", { + expect_error(user_prompt(prompt_text = "Enter something: ", any_keys = "invalid"), + "Invalid input given for 'any_keys'. Only TRUE or FALSE are allowed.") +}) + +test_that("user_prompt works with pre_prompt_text", { + pre_prompt_text <- data.frame(Heading = c(TRUE, FALSE), Text = c("Heading Text", "Regular Text")) + mock_readline <- mock("test_response") + stub(user_prompt, "readline", mock_readline) + + response <- user_prompt(pre_prompt_text = pre_prompt_text, prompt_text = "Enter something: ", any_keys = TRUE) + expect_equal(response, "test_response") +}) + +test_that("user_prompt works with post_yes_text", { + post_yes_text <- data.frame(Heading = c(TRUE, FALSE), Text = c("Post Heading Text", "Post Regular Text")) + mock_readline <- mock("Y", "test_response") + stub(user_prompt, "readline", mock_readline) + + response <- user_prompt(prompt_text = "Enter y/n: ", any_keys = FALSE, post_yes_text = post_yes_text) + expect_equal(response, "Y") +}) + +test_that("user_prompt handles empty input initially", { + mock_readline <- mock("", "test_response") + stub(user_prompt, "readline", mock_readline) + + response <- user_prompt(prompt_text = "Enter something: ", any_keys = TRUE) + expect_equal(response, "test_response") +}) + +test_that("user_prompt handles invalid input for any_keys = FALSE", { + mock_readline <- mock("invalid", "Y") + stub(user_prompt, "readline", mock_readline) + + response <- user_prompt(prompt_text = "Enter y/n: ", any_keys = FALSE) + expect_equal(response, "Y") +}) diff --git a/tests/testthat/test-user_prompt_list.R b/tests/testthat/test-user_prompt_list.R new file mode 100644 index 00000000..59e6513f --- /dev/null +++ b/tests/testthat/test-user_prompt_list.R @@ -0,0 +1,31 @@ +test_that("user_prompt_list works with valid input", { + mock_scan <- mock(c(1, 2, 3), cycle = TRUE) # create a mock object that returns a list of integers when called, cycling the same value + stub(user_prompt_list, "scan", mock_scan) # replace `scan` function within the `user_prompt_list` function with the `mock_scan` mock object + + response <- user_prompt_list(prompt_text = "Enter numbers: ", list_allowed = 1:5, empty_allowed = FALSE) + expect_equal(response, c(1, 2, 3)) +}) + +test_that("user_prompt_list handles out of range input and then valid input", { + mock_scan <- mock(c(6, 7, 8), c(1, 2, 3)) # create a mock object that returns out-of-range values first, then valid values + stub(user_prompt_list, "scan", mock_scan) # replace `scan` function within the `user_prompt_list` function with the `mock_scan` mock object + + response <- user_prompt_list(prompt_text = "Enter numbers: ", list_allowed = 1:5, empty_allowed = FALSE) + expect_equal(response, c(1, 2, 3)) # expect the valid input after the out-of-range input +}) + +test_that("user_prompt_list handles empty input when not allowed", { + mock_scan <- mock(integer(0), c(1, 2, 3)) # create a mock object that returns an empty list first, then valid values + stub(user_prompt_list, "scan", mock_scan) # replace `scan` function within the `user_prompt_list` function with the `mock_scan` mock object + + response <- user_prompt_list(prompt_text = "Enter numbers: ", list_allowed = 1:5, empty_allowed = FALSE) + expect_equal(response, c(1, 2, 3)) # expect the valid input after the empty input +}) + +test_that("user_prompt_list handles empty input when allowed", { + mock_scan <- mock(integer(0), cycle = TRUE) # create a mock object that returns an empty list when called, cycling the same value + stub(user_prompt_list, "scan", mock_scan) # replace `scan` function within the `user_prompt_list` function with the `mock_scan` mock object + + response <- user_prompt_list(prompt_text = "Enter numbers: ", list_allowed = 1:5, empty_allowed = TRUE) + expect_equal(response, integer(0)) # expect an empty list +}) diff --git a/tests/testthat/test-valid_comparison.R b/tests/testthat/test-valid_comparison.R new file mode 100644 index 00000000..c75410ac --- /dev/null +++ b/tests/testthat/test-valid_comparison.R @@ -0,0 +1,19 @@ +test_that("valid_comparison function works correctly", { + # Test invalid severity + expect_error(valid_comparison(1, 1, 'info', 'Invalid severity test'), + "Invalid severity. Only 'danger' and 'warning' are allowed.") + + # Test danger severity with different inputs + expect_error(valid_comparison(1, 2, 'danger', 'Different inputs test'), + "Exiting!") + + # Test warning severity with different inputs + expect_message(valid_comparison(1, 2, 'warning', 'Different inputs test'), + "Continuing but please check comparison is valid!") + + # Test danger severity with same inputs + expect_silent(valid_comparison(1, 1, 'danger', 'Same inputs test')) + + # Test warning severity with same inputs + expect_silent(valid_comparison(1, 1, 'warning', 'Same inputs test')) +})