From 24ab547dd9f0dfd8c2593ae29bdfbea58aea5c97 Mon Sep 17 00:00:00 2001
From: Rachael Stickland <50215726+RayStick@users.noreply.github.com>
Date: Fri, 16 Aug 2024 15:45:25 +0100
Subject: [PATCH] unit testings & renaming & docs

rename

update names

this is not user facing

rename

updated user_prompt and saved output dataframes

add tidyverse

naming, packages and user prompt functions

rename

rename functions and variables to simplify

documentation changes

rename function - correction

add section headers and add new func

fix bug

simplify in order to generalise

add new func

match latest funcs

update docs

naming updated

fix bug

naming update

new sub functions

add testthat

unit test complete

move to danger

unit test complete

doc

unit test complete

unit test complete

unit test complete

unit test complete

unit test complete

unit test complete

unit test complete

unit test complete

unit test complete

unit test complete

remove inputs to function that are static

add doc

add doc manuals for new functions
---
 DESCRIPTION                                   |   8 +-
 NAMESPACE                                     |   7 +-
 R/browseMetadata.R                            | 308 ++++++++++++++
 R/browseMetadata_compare_outputs.R            | 115 ++++++
 ...put.R => browseMetadata_convert_outputs.R} |   6 +-
 R/compare_sessions.R                          | 148 -------
 R/concensus_on_mismatch.R                     |  33 ++
 R/copy_previous.R                             |  38 ++
 R/data-Output.R                               |  25 ++
 R/data-log_Output.R                           |  28 ++
 R/domain_mapping.R                            | 376 ------------------
 R/end_plot.R                                  |  36 ++
 R/join_outputs.R                              |  19 +
 R/json_table_to_df.R                          |  29 ++
 R/load_data.R                                 |  50 +++
 R/ref_plot.R                                  |  25 ++
 R/user_categorisation.R                       |   3 +-
 R/user_categorisation_loop.R                  |  74 ++++
 R/user_prompt.R                               |  66 +++
 R/user_prompt_list.R                          |  34 ++
 R/valid_comparison.R                          |  32 ++
 README.md                                     |  16 +-
 data/Output.rda                               | Bin 0 -> 183 bytes
 data/log_Output.rda                           | Bin 0 -> 252 bytes
 man/Output.Rd                                 |  31 ++
 man/browseMetadata-package.Rd                 |   1 -
 man/browseMetadata.Rd                         |  59 +++
 ...s.Rd => browseMetadata_compare_outputs.Rd} |  16 +-
 ...t.Rd => browseMetadata_convert_outputs.Rd} |  12 +-
 man/concensus_on_mismatch.Rd                  |  25 ++
 man/copy_previous.Rd                          |  16 +
 man/domain_mapping.Rd                         |  40 --
 man/end_plot.Rd                               |  22 +
 man/join_outputs.Rd                           |  20 +
 man/json_table_to_df.Rd                       |  21 +
 man/load_data.Rd                              |  24 ++
 man/log_Output.Rd                             |  34 ++
 man/ref_plot.Rd                               |  19 +
 man/user_categorisation.Rd                    |   2 +-
 man/user_categorisation_loop.Rd               |  44 ++
 man/user_prompt.Rd                            |  34 ++
 man/user_prompt_list.Rd                       |  23 ++
 man/valid_comparison.Rd                       |  31 ++
 tests/testthat.R                              |  12 +
 tests/testthat/test-concensus_on_mismatch.R   |  65 +++
 tests/testthat/test-copy_previous.R           |  45 +++
 tests/testthat/test-end_plot.R                |  33 ++
 tests/testthat/test-join_outputs.R            |  60 +++
 tests/testthat/test-json_table_to_df.R        |  41 ++
 tests/testthat/test-load_data.R               |  45 +++
 tests/testthat/test-ref_plot.R                |  22 +
 tests/testthat/test-user_categorisation.R     |  31 ++
 .../testthat/test-user_categorisation_loop.R  |  50 +++
 tests/testthat/test-user_prompt.R             |  61 +++
 tests/testthat/test-user_prompt_list.R        |  31 ++
 tests/testthat/test-valid_comparison.R        |  19 +
 56 files changed, 1866 insertions(+), 599 deletions(-)
 create mode 100755 R/browseMetadata.R
 create mode 100644 R/browseMetadata_compare_outputs.R
 rename R/{convert_output.R => browseMetadata_convert_outputs.R} (91%)
 delete mode 100644 R/compare_sessions.R
 create mode 100644 R/concensus_on_mismatch.R
 create mode 100644 R/copy_previous.R
 create mode 100644 R/data-Output.R
 create mode 100644 R/data-log_Output.R
 delete mode 100755 R/domain_mapping.R
 create mode 100644 R/end_plot.R
 create mode 100644 R/join_outputs.R
 create mode 100644 R/json_table_to_df.R
 create mode 100644 R/load_data.R
 create mode 100644 R/ref_plot.R
 create mode 100644 R/user_categorisation_loop.R
 create mode 100644 R/user_prompt.R
 create mode 100644 R/user_prompt_list.R
 create mode 100644 R/valid_comparison.R
 create mode 100644 data/Output.rda
 create mode 100644 data/log_Output.rda
 create mode 100644 man/Output.Rd
 create mode 100644 man/browseMetadata.Rd
 rename man/{compare_sessions.Rd => browseMetadata_compare_outputs.Rd} (69%)
 rename man/{convert_output.Rd => browseMetadata_convert_outputs.Rd} (66%)
 create mode 100644 man/concensus_on_mismatch.Rd
 create mode 100644 man/copy_previous.Rd
 delete mode 100644 man/domain_mapping.Rd
 create mode 100644 man/end_plot.Rd
 create mode 100644 man/join_outputs.Rd
 create mode 100644 man/json_table_to_df.Rd
 create mode 100644 man/load_data.Rd
 create mode 100644 man/log_Output.Rd
 create mode 100644 man/ref_plot.Rd
 create mode 100644 man/user_categorisation_loop.Rd
 create mode 100644 man/user_prompt.Rd
 create mode 100644 man/user_prompt_list.Rd
 create mode 100644 man/valid_comparison.Rd
 create mode 100644 tests/testthat.R
 create mode 100644 tests/testthat/test-concensus_on_mismatch.R
 create mode 100644 tests/testthat/test-copy_previous.R
 create mode 100644 tests/testthat/test-end_plot.R
 create mode 100644 tests/testthat/test-join_outputs.R
 create mode 100644 tests/testthat/test-json_table_to_df.R
 create mode 100644 tests/testthat/test-load_data.R
 create mode 100644 tests/testthat/test-ref_plot.R
 create mode 100644 tests/testthat/test-user_categorisation.R
 create mode 100644 tests/testthat/test-user_categorisation_loop.R
 create mode 100644 tests/testthat/test-user_prompt.R
 create mode 100644 tests/testthat/test-user_prompt_list.R
 create mode 100644 tests/testthat/test-valid_comparison.R

diff --git a/DESCRIPTION b/DESCRIPTION
index 4e24e2d0..d620b3fc 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -11,7 +11,8 @@ Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.3.2
 Depends: 
-    R (>= 2.10)
+    R (>= 2.10),
+    tidyverse
 Imports: 
     cli,
     devtools,
@@ -19,9 +20,12 @@ Imports:
     ggplot2,
     grid,
     gridExtra,
+    mockery,
     rjson
 Suggests: 
     knitr,
-    rmarkdown
+    rmarkdown,
+    testthat (>= 3.0.0)
 VignetteBuilder: knitr
 URL: https://aim-rsf.github.io/browseMetadata/
+Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
index 8a0cae22..33c11259 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,9 +1,8 @@
 # Generated by roxygen2: do not edit by hand
 
-export(compare_sessions)
-export(convert_output)
-export(domain_mapping)
-export(user_categorisation)
+export(browseMetadata)
+export(browseMetadata_compare_outputs)
+export(browseMetadata_convert_outputs)
 import(cli)
 import(devtools)
 import(ggplot2)
diff --git a/R/browseMetadata.R b/R/browseMetadata.R
new file mode 100755
index 00000000..79f185c9
--- /dev/null
+++ b/R/browseMetadata.R
@@ -0,0 +1,308 @@
+#' browseMetadata
+#'
+#' This function will read in the metadata file for a chosen dataset, loop
+#' through all the data elements, and ask the user to catergorise/label each
+#' data element as belonging to one or more domains.\cr \cr
+#' The domains will appear in the Plots tab and dataset information will be
+#' printed to the R console, for the user's reference in making these
+#' categorisations. \cr \cr
+#' These categorisations will be saved to a csv file, alongside a log file which
+#' summarises the session details.To speed up this process, some
+#' auto-categorisations will be made by the function for commonly occurring data
+#' elements and categorisations for the same data element can be copied from one
+#' table to another. \cr \cr
+#' Example inputs are provided within the package data, for the user to run this
+#' function in a demo mode.
+#' @param json_file The metadata file. This should be downloaded from the
+#' metadata catalogue as a json file. See
+#' 'data-raw/maternity_indicators_dataset_(mids)_20240105T132210.json' for an
+#' example download.
+#' @param domain_file The domain list file. This should be a csv file created by
+#' the user, with each domain listed on a separate line. See
+#' 'data-raw/domain_list_demo.csv' for a template.
+#' @param look_up_file The look-up table file, with auto-categorisations. By
+#' default, the code uses 'data/look-up.rda'. The user can provide their own
+#' look-up table in the same format as 'data-raw/look-up.csv'.
+#' @param output_dir The path to the directory where the two csv output files
+#' will be saved. By default, the current working directory is used.
+#' @param table_copy Turn on copying between tables (TRUE or FALSE, default
+#' TRUE). If TRUE, categorisations you made for all other tables in this dataset
+#' will be copied over (if 'OUTPUT_' files are found in output_dir).
+#' @return The function will return two csv files: 'OUTPUT_' which contains the
+#' mappings and 'LOG_' which contains details about the dataset and session.
+#' @examples
+#' # Run in demo mode by providing no inputs: browseMetadata()
+#' # Demo mode will use the /data files provided in this package
+#' # For more guidance, refer to the package README.md file.
+#' @export
+#' @import ggplot2
+#' @importFrom graphics plot.new
+#' @importFrom utils read.csv write.csv
+#' @importFrom dplyr %>% arrange count group_by distinct
+#' @importFrom tidyverse add_row
+
+browseMetadata <- function(
+    json_file = NULL,
+    domain_file = NULL,
+    look_up_file = NULL,
+    output_dir = NULL,
+    table_copy = TRUE) {
+
+  timestamp_now <- format(Sys.time(), "%Y-%m-%d-%H-%M-%S")
+
+  # DEFINE INPUTS AND OUTPUTS ----
+
+  ## Set output_dir to current wd if user has not provided it
+  if (is.null(output_dir)) {
+    output_dir = getwd()
+  }
+
+  ## Use 'load_data.R' to collect inputs (defaults or user inputs)
+  data <- load_data(json_file, domain_file, look_up_file)
+
+  ## Extract Dataset from json_file
+  Dataset <- data$meta_json$dataModel
+  Dataset_Name <- Dataset$label
+
+  ## Read in prepared output data frames
+  log_Output <- get("log_Output")
+  Output <- get("Output")
+
+  ## Use 'ref_plot.R' to plot domains for the user's ref (save df for later use)
+  df_plots <- ref_plot(data$domains)
+
+  ## Use 'user_prompt.R' to get user initials for the log file
+  User_Initials <- user_prompt(
+    prompt_text = "Enter your initials: ", any_keys = TRUE)
+
+  # DISPLAY DATASET ----
+
+  ## Use 'user_prompt.R' to print info about dataset based on user input
+  pre_prompt_text <- data.frame(Heading = logical(0), Text = character(0))
+  pre_prompt_text <- pre_prompt_text %>% add_row(Heading = TRUE,
+                                                 Text = 'Dataset Name')
+  pre_prompt_text <- pre_prompt_text %>% add_row(
+    Heading = FALSE,Text = paste(Dataset_Name))
+  pre_prompt_text <- pre_prompt_text %>% add_row(
+    Heading = TRUE,Text = 'Dataset File Exported By')
+  pre_prompt_text <- pre_prompt_text %>% add_row(
+    Heading = FALSE,
+    Text = paste(data$meta_json$exportMetadata$exportedBy,
+                 "at",data$meta_json$exportMetadata$exportedOn))
+  prompt_text <- "Would you like to read a description of the dataset? (y/n): "
+  post_yes_text <- data.frame(Heading = logical(0), Text = character(0))
+  post_yes_text <- post_yes_text %>% add_row(
+    Heading = TRUE, Text = 'Dataset Description')
+  post_yes_text <- post_yes_text %>% add_row(
+    Heading = FALSE,Text = paste(Dataset$description))
+
+  user_prompt(
+    pre_prompt_text = pre_prompt_text,
+    prompt_text = prompt_text,
+    any_keys = FALSE,
+    post_yes_text = post_yes_text
+  )
+
+  # WHICH TABLES FROM THE DATASET? ----
+  ## Use 'user_prompt_list.R' to ask user which tables to process
+  nTables <- length(Dataset$childDataClasses)
+  table_df <- data.frame(Table_Name = character(0), Table_Number = integer(0))
+  for (dc in 1:nTables) {
+    table_df <- table_df %>% add_row(
+      Table_Number = dc,
+      Table_Name = Dataset$childDataClasses[[dc]]$label)
+    }
+
+  print(table_df,row.names = FALSE)
+    nTables_Process <- user_prompt_list(
+    prompt_text =
+      paste('Found',nTables,'table(s) in this Dataset.','Enter table numbers',
+            'you want to process (one table number on each line):'),
+    list_allowed = seq(from = 1, to = nTables, by = 1),
+    empty_allowed = FALSE
+  )
+
+  # PROCESS EACH CHOSEN TABLE ----
+  ## Extract each Table
+  for (dc in unique(nTables_Process)) {
+    cat("\n")
+    cli_alert_info("Processing Table {dc} of {nTables}")
+    cli_h1("Table Name")
+    Table_name <- Dataset$childDataClasses[[dc]]$label
+    cat(Table_name,"\n",fill = TRUE)
+
+    ### Use 'copy_previous.R' to copy from previous output(s) if they exist
+    if (table_copy == TRUE) {
+      output <- copy_previous(Dataset_Name,output_dir)
+      df_prev_exist <- output$df_prev_exist
+      df_prev <- output$df_prev
+    } else {
+      df_prev_exist <- FALSE
+    }
+
+    ### Use 'user_prompt.R' to ask if user wants to read desc of table
+    prompt_text = "Would you like to read a description of the table? (y/n): "
+    post_yes_text <- data.frame(Heading = logical(0), Text = character(0))
+    post_yes_text <- post_yes_text %>% add_row(
+      Heading = TRUE, Text = 'Table Description')
+    post_yes_text <- post_yes_text %>% add_row(
+      Heading = FALSE,
+      Text = paste(Dataset$childDataClasses[[dc]]$description)
+    )
+
+    user_prompt(
+      prompt_text = prompt_text,
+      any_keys = FALSE,
+      post_yes_text = post_yes_text
+    )
+
+    table_note <- readline(paste('Optional free text note about this table',
+                                 '(or press enter to continue): '))
+
+    ###  Use 'json_table_to_df.R' to extract table from meta_json into a df
+    Table_df <- json_table_to_df(Dataset = Dataset,n = dc)
+
+    ### Ask user which data elements to process
+
+    cli_alert_info(paste('There are', as.character(nrow(Table_df)),
+                 'data elements (variables) in this table.'))
+
+    if (data$demo_mode == TRUE) {
+      start_v = 1
+      end_v = min(20, nrow(Table_df))
+    } else {
+      #### Use 'user_prompt_list.R' to ask user which data elements
+      start_v <- user_prompt_list(
+        prompt_text = 'Start variable (write 1 to process all): ',
+        list_allowed = seq(from = 1, to = nrow(Table_df), by = 1),
+        empty_allowed)
+      end_v <- user_prompt_list(
+        prompt_text = paste('End variable (write',
+                            as.character(nrow(Table_df)),
+                            'to process all):'),
+        list_allowed = seq(from = start_v, to = nrow(Table_df), by = 1),
+        empty_allowed)
+    }
+
+    ### Use 'user_categorisation_loop.R' to copy or request from user
+
+    Output <- user_categorisation_loop(start_v,
+                             end_v,
+                             Table_df,
+                             df_prev_exist,
+                             df_prev,
+                             lookup = data$lookup,
+                             df_plots,
+                             Output)
+
+    Output$timestamp <- timestamp_now
+    Output$Table <- Table_name
+
+    ### Review auto categorized data elements
+    #### Use 'user_prompt_list.R' to ask the user which rows to edit
+    cat('\n')
+    Output_auto <- subset(Output, Note == 'AUTO CATEGORISED')
+    Output_auto <- Output_auto[, c("DataElement", "Domain_code", "Note")]
+    print(Output_auto)
+
+    auto_row <- user_prompt_list(
+      prompt_text = paste('These are the auto categorised data elements.',
+                          'Enter row numbers for those you want to edit: '),
+      list_allowed = which(Output$Note == 'AUTO CATEGORISED'),
+      empty_allowed = TRUE
+    )
+
+    if (length(auto_row) != 0) {
+      for (data_v_auto in unique(auto_row)) {
+        ##### collect user responses with with 'user_categorisation.R'
+        decision_output <- user_categorisation(
+          Table_df$Label[data_v_auto],
+          Table_df$Description[data_v_auto],
+          Table_df$Type[data_v_auto],
+          max(df_plots$Code$Code)
+        )
+        ##### input user responses into output
+        Output$Domain_code[data_v_auto] <- decision_output$decision
+        Output$Note[data_v_auto] <- decision_output$decision_note
+      }
+    }
+
+    ### Review user categorized data elements (optional)
+    #### Use 'user_prompt.R' to ask the user if they want to review
+    #### Use 'user_prompt_list.R' to ask the user which rows to edit
+    review_cats <- user_prompt(
+      prompt_text = "Would you like to review your categorisations? (y/n): ",
+      any_keys = FALSE)
+    if (review_cats == 'Y' | review_cats == 'y') {
+      Output_not_auto <- subset(Output, Note != 'AUTO CATEGORISED')
+      Output_not_auto['Note (first 12 chars)'] <-
+        substring(Output_not_auto$Note, 1, 11)
+      print(Output_not_auto[,
+                            c("DataElement",
+                              "Domain_code",
+                              "Note (first 12 chars)")])
+      not_auto_row <- user_prompt_list(
+        prompt_text = paste('These are the data elements you categorised.',
+                            'Enter row numbers for those you want to edit: '),
+        list_allowed = which(Output$Note != 'AUTO CATEGORISED'),
+        empty_allowed = TRUE
+      )
+      if (length(not_auto_row) != 0) {
+        for (data_v_not_auto in unique(not_auto_row)) {
+          #####  collect user responses with with 'user_categorisation.R'
+          decision_output <- user_categorisation(
+            Table_df$Label[data_v_not_auto],
+            Table_df$Description[data_v_not_auto],
+            Table_df$Type[data_v_not_auto],
+            max(df_plots$Code$Code)
+          )
+          ##### input user responses into output
+          Output$Domain_code[data_v_not_auto] <- decision_output$decision
+          Output$Note[data_v_not_auto] <- decision_output$decision_note
+        }
+      }
+    }
+
+    ### Fill in log output
+    log_Output$timestamp = timestamp_now
+    log_Output$browseMetadata = packageVersion("browseMetadata")
+    log_Output$Initials = User_Initials
+    log_Output$MetaDataVersion = Dataset$documentationVersion
+    log_Output$MetaDataLastUpdated = Dataset$lastUpdated
+    log_Output$DomainListDesc = data$DomainListDesc
+    log_Output$Dataset = Dataset_Name
+    log_Output$Table = Table_name
+    log_Output$Table_note = table_note
+
+    ### Create output file names
+    csv_fname <- paste0("OUTPUT_",gsub(" ", "", Dataset_Name),"_",
+                        gsub(" ", "", Table_name),"_",timestamp_now,".csv")
+    csv_log_fname <- paste0("LOG_",gsub(" ", "", Dataset_Name),"_",
+                            gsub(" ", "", Table_name),"_",timestamp_now,".csv")
+    png_fname <- paste0("PLOT_",gsub(" ", "", Dataset_Name),"_",
+                        gsub(" ", "", Table_name),"_",timestamp_now,".png")
+
+    ### Save final categorisations for this Table
+    utils::write.csv(Output,paste(output_dir, csv_fname, sep = '/'),
+                     row.names = FALSE)
+    utils::write.csv(log_Output,paste(output_dir, csv_log_fname, sep = '/'),
+                     row.names = FALSE)
+    cat("\n")
+    cli_alert_success("Final categorisations saved in:\n{csv_fname}")
+    cli_alert_success("Session log saved in:\n{csv_log_fname}")
+
+    ### Create and save a summary plot
+    end_plot_save <- end_plot(df = Output,Table_name,
+                              ref_table = df_plots$Domain_table)
+    ggsave(
+      plot = end_plot_save$full_plot,
+      paste(output_dir, png_fname, sep = '/'),
+      width = 14,
+      height = 8,
+      units = "in"
+    )
+    cli_alert_success("A summary plot has been saved:\n{png_fname}")
+
+  } # end of loop for each table
+
+} # end of function
diff --git a/R/browseMetadata_compare_outputs.R b/R/browseMetadata_compare_outputs.R
new file mode 100644
index 00000000..fe24fd08
--- /dev/null
+++ b/R/browseMetadata_compare_outputs.R
@@ -0,0 +1,115 @@
+#' browseMetadata_compare_outputs
+#'
+#' This function is to be used after running the browseMetadata function. \cr \cr
+#' It compares csv outputs from two sessions, finds their differences, and asks for a consensus. \cr \cr
+#'
+#' @param session_dir This directory should contain 2 csv files for each session (LOG_ and OUTPUT_), 4 csv files in total.
+#' @param session1_base Base file name for session 1 e.g. 'NationalCommunityChildHealthDatabase(NCCHD)_BLOOD_TEST_2024-07-05-16-07-38'
+#' @param session2_base Base file name for session 1 e.g. 'NationalCommunityChildHealthDatabase(NCCHD)_BLOOD_TEST_2024-07-08-12-03-30'
+#' @param json_file The full path to the metadata file used when running browseMetadata (should be the same for session 1 and session 2)
+#' @param domain_file The full path to the domain file used when running browseMetadata (should be the same for session 1 and session 2)
+#' @return It returns a csv output, which represents the consensus decisions between session 1 and session 2
+#' @importFrom dplyr left_join select join_by
+#' @export
+
+browseMetadata_compare_outputs <- function(session_dir,session1_base,session2_base,json_file,domain_file) {
+
+  timestamp_now <- format(Sys.time(), "%Y-%m-%d-%H-%M-%S")
+
+  # DEFINE INPUTS ----
+
+  csv_1a <- read.csv(paste0(session_dir,'/','LOG_',session1_base,'.csv'))
+  csv_2a <- read.csv(paste0(session_dir,'/','LOG_',session2_base,'.csv'))
+  csv_1b <- read.csv(paste0(session_dir,'/','OUTPUT_',session1_base,'.csv'))
+  csv_2b <- read.csv(paste0(session_dir,'/','OUTPUT_',session2_base,'.csv'))
+
+  meta_json <- rjson::fromJSON(file = json_file)
+  domains <- read.csv(domain_file, header = FALSE)
+
+  Dataset <- meta_json$dataModel
+  Dataset_Name <- Dataset$label
+
+  # VALIDATION CHECKS ----
+
+  ## Use 'valid_comparison.R' to check if sessions can be compared to each other and to the json (min requirements):
+
+  valid_comparison(input1 = csv_1a$Dataset[1],
+                   input2 = csv_2a$Dataset[1],
+                   severity = 'danger',
+                   severity_text = 'Session 1 and 2 have different datasets')
+
+  valid_comparison(input1 = csv_1a$Table[1],
+                   input2 = csv_2a$Table[1],
+                   severity = 'danger',
+                   severity_text = 'Session 1 and 2 have different tables')
+
+  valid_comparison(input1 = csv_1a$Dataset[1],
+                   input2 =  Dataset_Name,
+                   severity = 'danger',
+                   severity_text = 'Different dataset to json')
+
+  valid_comparison(input1 = nrow(csv_1b),
+                   input2 = nrow(csv_2b),
+                   severity = 'danger',
+                   severity_text = 'Different number of data elements!')
+
+  ##  Use 'valid_comparison.R' to check the sessions can be compared to each other and to the json (warnings for user to check):
+
+  valid_comparison(input1 = csv_1a$browseMetadata[1],
+                   input2 = csv_2a$browseMetadata[1],
+                   severity = 'warning',
+                   severity_text = 'Different version of browseMetadata package!')
+
+  valid_comparison(input1 = csv_1a$MetaDataVersion[1],
+                   input2 = csv_2a$MetaDataVersion[1],
+                   severity = 'warning',
+                   severity_text = 'Different metadata versions!')
+
+  valid_comparison(input1 = csv_1a$MetaDataVersion[1],
+                   input2 = Dataset$documentationVersion,
+                   severity = 'warning',
+                   severity_text = 'The version files do not match the json (different metadata versions)!')
+
+  valid_comparison(input1 = csv_1a$MetaDataLastUpdated[1],
+                   input2 = csv_2a$MetaDataLastUpdated[1],
+                   severity = 'warning',
+                   severity_text = 'Different metadata date!')
+
+  valid_comparison(input1 = csv_1a$MetaDataLastUpdated[1],
+                   input2 = Dataset$lastUpdated,
+                   severity = 'warning',
+                   severity_text = 'The session files do not match the json (different dates for metadata)!')
+
+  # DISPLAY TO USER ----
+
+  ## Use 'ref_plot.R' to plot domains for the user's ref (save df for later use)
+  df_plots <- ref_plot(domains)
+
+  # EXTRACT TABLE INFO FROM METADATA JSON ----
+
+  ## Use 'json_table_to_df.R' to extract table from meta_json into a df
+  table_find <- data.frame(table_n = numeric(length(Dataset$childDataClasses)),table_label = character(length(Dataset$childDataClasses)))
+  for (t in 1:length(Dataset$childDataClasses)) {
+    table_find$table_n[t] = t
+    table_find$table_label[t] = Dataset$childDataClasses[[t]]$label
+  }
+  table_n = table_find$table_n[table_find$table_label == csv_1a$Table[1]]
+
+  Table_df <-json_table_to_df(Dataset = meta_json$data,n = table_n)
+
+  # JOIN DATAFRAMES FROM SESSIONS IN ORDER TO COMPARE ----
+  ses_join <- join_outputs(session_1 = csv_1b,session_2 = csv_2b)
+
+  # FIND MISMATCHES AND ASK FOR CONSENSUS DECISION ----
+  for (datavar in 1:nrow(ses_join)) {
+    concensus <- concensus_on_mismatch(ses_join,Table_df,datavar,max(df_plots$Code$Code))
+    ses_join$Domain_code_join[datavar] <- concensus$Domain_code_join
+    ses_join$Note_join[datavar] <- concensus$Note_join
+    } # end of loop for DataElement
+
+  # SAVE TO NEW CSV ----
+  output_fname <- paste0("CONCENSUS_OUTPUT_", gsub(" ", "", Dataset_Name), "_", table_find$table_label[table_n], "_", timestamp_now, ".csv")
+  utils::write.csv(ses_join, output_fname, row.names = FALSE)
+  cat("\n")
+  cli_alert_success("Your concensus categorisations have been saved to {output_fname}")
+}
diff --git a/R/convert_output.R b/R/browseMetadata_convert_outputs.R
similarity index 91%
rename from R/convert_output.R
rename to R/browseMetadata_convert_outputs.R
index 306e3925..32bcc0fa 100644
--- a/R/convert_output.R
+++ b/R/browseMetadata_convert_outputs.R
@@ -1,15 +1,15 @@
-#' convert_output
+#' browseMetadata_convert_outputs
 #'
 #' The 'OUTPUT_' file groups multiple categorisations onto one line e.g. '1,3' \cr \cr
 #' This function creates a new longer output 'L-OUTPUT_' which gives each categorisation its own row \cr \cr
 #' This 'L-OUTPUT_' may be more useful when using these csv files in later analyses
-#' @param output_csv The name of the 'OUTPUT_' csv file that was created from domain_mapping.R
+#' @param output_csv The name of the 'OUTPUT_' csv file that was created from browseMetadata.R
 #' @param output_dir The location of output_csv
 #' @return The function will return 'L-OUTPUT_' in the same output_dir
 #' @export
 #' @importFrom utils read.csv write.csv
 
-convert_output <- function(output_csv,output_dir) {
+browseMetadata_convert_outputs <- function(output_csv,output_dir) {
 
 output <- read.csv(paste0(output_dir,'/',output_csv))
 output_long <- output[0,] #make duplicate
diff --git a/R/compare_sessions.R b/R/compare_sessions.R
deleted file mode 100644
index 7b68ff96..00000000
--- a/R/compare_sessions.R
+++ /dev/null
@@ -1,148 +0,0 @@
-#' compare_sessions
-#'
-#' This function is to be used after running the domain_mapping function. \cr \cr
-#' It compares csv outputs from two sessions, finds their differences, and asks for a consensus. \cr \cr
-#'
-#' @param session_dir This directory should contain 2 csv files for each session (LOG_ and OUTPUT_), 4 csv files in total.
-#' @param session1_base Base file name for session 1 e.g. 'NationalCommunityChildHealthDatabase(NCCHD)_BLOOD_TEST_2024-07-05-16-07-38'
-#' @param session2_base Base file name for session 1 e.g. 'NationalCommunityChildHealthDatabase(NCCHD)_BLOOD_TEST_2024-07-08-12-03-30'
-#' @param json_file The full path to the metadata file used when running domain_mapping (should be the same for session 1 and session 2)
-#' @param domain_file The full path to the domain file used when running domain_mapping (should be the same for session 1 and session 2)
-#' @return It returns a csv output, which represents the consensus decisions between session 1 and session 2
-#' @importFrom dplyr left_join select join_by
-#' @export
-
-compare_sessions <- function(session_dir,session1_base,session2_base,json_file,domain_file) {
-
-  timestamp_now <- gsub(" ", "_", Sys.time())
-  timestamp_now <- gsub(":", "-", timestamp_now)
-
-  # read in the input files:
-
-  csv_1a <- read.csv(paste0(session_dir,'/','LOG_',session1_base,'.csv'))
-  csv_2a <- read.csv(paste0(session_dir,'/','LOG_',session2_base,'.csv'))
-  csv_1b <- read.csv(paste0(session_dir,'/','OUTPUT_',session1_base,'.csv'))
-  csv_2b <- read.csv(paste0(session_dir,'/','OUTPUT_',session2_base,'.csv'))
-  meta_json <- rjson::fromJSON(file = json_file)
-  domains <- read.csv(domain_file, header = FALSE)
-
-  # check the session csvs can be compared to each other and to the json (min requirements):
-
-  if (csv_1a$Dataset[1] != csv_2a$Dataset[1]){
-    cat("\n\n")
-    cli_alert_danger("Cannot compare session 1 and 2: different datasets")
-    stop()}
-
-  if (csv_1a$Table[1] != csv_2a$Table[1]){
-    cat("\n\n")
-    cli_alert_danger("Cannot compare session 1 and 2: different tables")
-    stop()}
-
-  if (csv_1a$Dataset[1] != meta_json$dataModel$label){
-    cat("\n\n")
-    cli_alert_danger("The csv files do not match the json: different datasets")
-    stop()}
-
-  # check the session csvs can be compared to each other and to the json (warnings for user to check):
-
-  if (csv_1a$browseMetadata[1] != csv_2a$browseMetadata[1]){
-    cat("\n\n")
-    cli_alert_warning("Different version of browseMetadata for session 1 and 2!\nValid comparison may not be possible - please check!")
-    continue <- readline("Press enter to continue or Esc to cancel: ")}
-
-  if (csv_1a$MetaDataVersion[1] != csv_2a$MetaDataVersion[1]){
-    cat("\n\n")
-    cli_alert_warning("Different metadata versions for session 1 and 2\nValid comparison may not be possible - please check!")
-    continue <- readline("Press enter to continue or Esc to cancel: ")}
-
-  if (csv_1a$MetaDataVersion[1] != meta_json$dataModel$documentationVersion){
-    cat("\n\n")
-    cli_alert_warning("The session files do not match the json (different metadata versions)\nValid comparison may not be possible - please check!")
-    continue <- readline("Press enter to continue or Esc to cancel: ")}
-
-  if (csv_1a$MetaDataLastUpdated[1] != csv_2a$MetaDataLastUpdated[1]){
-    cat("\n\n")
-    cli_alert_warning("Different metadata date for session 1 and 2\nValid comparison may not be possible - please check!")
-    continue <- readline("Press enter to continue or Esc to cancel: ")}
-
-  if (csv_1a$MetaDataLastUpdated[1] != meta_json$dataModel$lastUpdated){
-    cat("\n\n")
-    cli_alert_warning("The session files do not match the json (different dates for metadata)\nValid comparison may not be possible - please check!")
-    continue <- readline("Press enter to continue or Esc to cancel: ")}
-
-  if (nrow(csv_1b) != nrow(csv_2b)){
-    cat("\n\n")
-    cli_alert_warning("Different number of data elements for session 1 and 2\nValid comparison may not be possible - please check!")
-    continue <- readline("Press enter to continue or Esc to cancel: ")}
-
-  # print details about each session
-  cat("\n\n")
-  cli_alert_success("Comparing session 1 and session 2")
-  cli_alert_success("Session 1 created by {csv_1a$Initials[1]} and session 2 created by {csv_2a$Initials[1]}")
-
-  # Present domains plots panel for user's reference (as in domain_mapping)
-  colnames(domains)[1] = "Domain Name"
-  graphics::plot.new()
-  domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ID*"), c("*DEMOGRAPHICS*"), domains)
-  Code <- data.frame(Code = 0:(nrow(domains_extend) - 1))
-  Domain_table <- tableGrob(cbind(Code,domains_extend),rows = NULL,theme = ttheme_default())
-  grid.arrange(Domain_table,nrow=1,ncol=1)
-
-  # join csv_1b and csv_2b in order to compare
-  ses_join <- left_join(csv_1b,csv_2b,suffix = c("_ses1","_ses2"),join_by(DataElement))
-  ses_join$Domain_code_join <- NA
-  ses_join$Note_join <- NA
-  ses_join <- select(ses_join,
-                     'timestamp_ses1','timestamp_ses2',
-                     'DataElement_N_ses1','DataElement_N_ses2',
-                     'Domain_code_ses1','Domain_code_ses2',
-                     'Note_ses1','Note_ses2',
-                     'Domain_code_join','Note_join')
-
-  # extract table from meta_json - same code as domain_mapping
-  table_find <- data.frame(table_n = numeric(length(meta_json$dataModel$childDataClasses)),table_label = character(length(meta_json$dataModel$childDataClasses)))
-  for (t in 1:length(meta_json$dataModel$childDataClasses)) {
-    table_find$table_n[t] = t
-    table_find$table_label[t] = meta_json$dataModel$childDataClasses[[t]]$label
-  }
-
-  table_n = table_find$table_n[table_find$table_label == csv_1a$Table[1]]
-  thisTable <- meta_json$dataModel$childDataClasses[[table_n]]$childDataElements
-  thisTable_df <- data.frame(do.call(rbind, thisTable)) # nested list to dataframe
-  dataType_df <- data.frame(do.call(rbind, thisTable_df$dataType)) # nested list to dataframe
-
-  selectTable_df <- data.frame(
-    Label = unlist(thisTable_df$label),
-    Description = unlist(thisTable_df$description),
-    Type = unlist(dataType_df$label)
-  )
-
-  selectTable_df <- selectTable_df[order(selectTable_df$Label), ]
-
-  # find the mismatches and ask for consensus decisions
-  for (datavar in 1:nrow(ses_join)) {
-      # collect user responses
-    if (ses_join$Domain_code_ses1[datavar] != ses_join$Domain_code_ses2[datavar]){
-      cat("\n\n")
-      cli_alert_info("Mismatch of DataElement {ses_join$DataElement[datavar]}")
-      cat(paste(
-        "\nDOMAIN CODE (note) for session 1 --> ",ses_join$Domain_code_ses1[datavar],'(',ses_join$Note_ses1[datavar],')',
-        "\nDOMAIN CODE (note) for session 2 --> ",ses_join$Domain_code_ses2[datavar],'(',ses_join$Note_ses2[datavar],')'
-      ))
-      cat("\n\n")
-      cli_alert_info("Provide concensus decision for this DataElement:")
-      decision_output <- user_categorisation(selectTable_df$Label[datavar],selectTable_df$Description[datavar],selectTable_df$Type[datavar],max(Code$Code))
-      ses_join$Domain_code_join[datavar] <- decision_output$decision
-      ses_join$Note_join[datavar] <- decision_output$decision_note
-    } else {
-      ses_join$Domain_code_join[datavar] <- ses_join$Domain_code_ses1[datavar]
-      ses_join$Note_join[datavar] <- 'No mismatch!'
-    }
-  } # end of loop for DataElement
-
-  # save to new csv
-  output_fname <- paste0("CONCENSUS_OUTPUT_", gsub(" ", "", meta_json$dataModel$label), "_", table_find$table_label[table_n], "_", timestamp_now, ".csv")
-  utils::write.csv(ses_join, output_fname, row.names = FALSE)
-  cat("\n")
-  cli_alert_success("Your concensus categorisations have been saved to {output_fname}")
-}
diff --git a/R/concensus_on_mismatch.R b/R/concensus_on_mismatch.R
new file mode 100644
index 00000000..ad8c6c94
--- /dev/null
+++ b/R/concensus_on_mismatch.R
@@ -0,0 +1,33 @@
+#' concensus_on_mismatch
+#'
+#' This function is called within the browseMetadata_compare_outputs function. \cr \cr
+#' For a specific data element, it compares the domain code categorisation between two sessions.
+#' If the categorisation differs, it prompts the user for a new consensus decision by presenting the json metadata. \cr \cr
+#'
+#' @param ses_join The joined dataframes from the two sessions
+#' @param Table_df Metadata from the json file, for one table in the dataset
+#' @param datavar Data Element n
+#' @param domain_code_max The maximum allowable domain code integer
+#' @return It returns a list of 2: the domain code and the note from the consensus decision
+#' @importFrom CHECK LATER
+
+concensus_on_mismatch <- function(ses_join,Table_df,datavar,domain_code_max){
+
+  if (ses_join$Domain_code_ses1[datavar] != ses_join$Domain_code_ses2[datavar]){
+    cat("\n\n")
+    cli_alert_info("Mismatch of DataElement {ses_join$DataElement[datavar]}")
+    cat(paste(
+      "\nDOMAIN CODE (note) for session 1 --> ",ses_join$Domain_code_ses1[datavar],'(',ses_join$Note_ses1[datavar],')',
+      "\nDOMAIN CODE (note) for session 2 --> ",ses_join$Domain_code_ses2[datavar],'(',ses_join$Note_ses2[datavar],')'
+      ))
+    cat("\n\n")
+    cli::cli_alert_info("Provide concensus decision for this DataElement:")
+    decision_output <- user_categorisation(Table_df$Label[datavar],Table_df$Description[datavar],Table_df$Type[datavar],domain_code_max)
+    Domain_code_join <- decision_output$decision
+    Note_join <- decision_output$decision_note
+    } else {
+      Domain_code_join <- ses_join$Domain_code_ses1[datavar]
+      Note_join <- 'No mismatch!'
+    }
+  return(list(Domain_code_join = Domain_code_join, Note_join = Note_join))
+}
diff --git a/R/copy_previous.R b/R/copy_previous.R
new file mode 100644
index 00000000..34aac7f9
--- /dev/null
+++ b/R/copy_previous.R
@@ -0,0 +1,38 @@
+#' copy_previous
+#'
+#' This function is called within the browseMetadata function. \cr \cr
+#' It searches for previous OUTPUT files in the output_dir, that match the dataset name. \cr \cr
+#' If files exist, it removes duplicates and autos, and stores the rest of the data elements in a dataframe. \cr \cr
+#'
+#' @param Dataset_Name
+#' @param output_dir
+#' @return It returns a list of 2: df_prev_exist (a boolean) and df_prev (NULL or populated with data elements to copy)
+#' @importFrom CHECK LATER
+
+copy_previous <- function(Dataset_Name,output_dir) {
+
+  o_search = paste0("^OUTPUT_",gsub(" ", "", Dataset_Name),'*')
+  csv_list <- data.frame(file = list.files(output_dir, pattern = o_search))
+  if (nrow(csv_list) != 0) {
+    df_list <- lapply(paste0(output_dir, '/', csv_list$file), read.csv)
+    df_prev <- do.call("rbind", df_list) #combine all df
+    ## make a new date column, order by earliest, remove duplicates & auto
+    df_prev$time2 <- as.POSIXct(df_prev$timestamp, format = "%Y-%m-%d-%H-%M-%S")
+      df_prev <- df_prev[order(df_prev$time2), ]
+      df_prev <- df_prev %>% distinct(DataElement, .keep_all = TRUE)
+      df_prev <- df_prev[-(which(df_prev$Note %in% "AUTO CATEGORISED")), ]
+      df_prev_exist <- TRUE
+      cat("\n")
+      cli_alert_info(paste0("Copying from previous session(s): "))
+      cat("\n")
+      print(csv_list$file)
+
+    } else {
+      df_prev <- NULL
+      df_prev_exist <- FALSE
+    }
+
+  output <- list(df_prev = df_prev,df_prev_exist = df_prev_exist)
+  output
+
+}
diff --git a/R/data-Output.R b/R/data-Output.R
new file mode 100644
index 00000000..ada083db
--- /dev/null
+++ b/R/data-Output.R
@@ -0,0 +1,25 @@
+#' Output dataframe
+#'
+#' Empty output dataframe for browseMetadata.R to fill. \cr \cr
+#'
+#'\item \code{Output <- data.frame(
+#'timestamp = character(0),
+#'Table = character(0),
+#'DataElement_N = character(0),
+#'DataElement = character(0),
+#'Domain_code = character(0),
+#'Note = character(0)
+#')}
+#'
+#' \item \code{usethis::use_data(Output)}
+#'
+#' @docType data
+#
+#' @usage data(Output)
+#'
+#' @format A data frame with 0 rows and 6 columns
+#'
+#' @source The dataframe was manually created for the browseMetadata.R function
+
+"Output"
+
diff --git a/R/data-log_Output.R b/R/data-log_Output.R
new file mode 100644
index 00000000..2c209196
--- /dev/null
+++ b/R/data-log_Output.R
@@ -0,0 +1,28 @@
+#' Output log dataframe
+#'
+#' Empty log output dataframe for browseMetadata.R to fill. \cr \cr
+#'
+#' \item \code{log_Output <- data.frame(
+#' timestamp = character(1),
+#' browseMetadata = character(1),
+#' Initials = character(1),
+#' MetaDataVersion = character(1),
+#' MetaDataLastUpdated = character(1),
+#' DomainListDesc = character(1),
+#' Dataset = character(1),
+#' Table = character(1),
+#' Table_note = character(1)
+#' )}
+#'
+#' \item \code{usethis::use_data(log_Output)}
+#'
+#' @docType data
+#
+#' @usage data(log_Output)
+#'
+#' @format A data frame with 1 empty row and 9 columns
+#'
+#' @source The dataframe was manually created for the browseMetadata.R function
+
+"log_Output"
+
diff --git a/R/domain_mapping.R b/R/domain_mapping.R
deleted file mode 100755
index 79ba566b..00000000
--- a/R/domain_mapping.R
+++ /dev/null
@@ -1,376 +0,0 @@
-#' domain_mapping
-#'
-#' This function will read in the metadata file for a chosen dataset, loop through all the data elements, and ask the user to catergorise/label each data element as belonging to one or more domains.\cr \cr
-#' The domains will appear in the Plots tab and dataset information will be printed to the R console, for the user's reference in making these categorisations. \cr \cr
-#' These categorisations will be saved to a csv file, alongside a log file which summarises the session details.
-#' To speed up this process, some auto-categorisations will be made by the function for commonly occurring data elements and categorisations for the same data element can be copied from one table to another. \cr \cr
-#' Example inputs are provided within the package data, for the user to run this function in a demo mode.
-#' @param json_file The metadata file. This should be downloaded from the metadata catalogue as a json file. See 'data-raw/maternity_indicators_dataset_(mids)_20240105T132210.json' for an example download.
-#' @param domain_file The domain list file. This should be a csv file created by the user, with each domain listed on a separate line. See 'data-raw/domain_list_demo.csv' for a template.
-#' @param look_up_file The look-up table file, with auto-categorisations. By default, the code uses 'data/look-up.rda'. The user can provide their own look-up table in the same format as 'data-raw/look-up.csv'.
-#' @param output_dir The path to the directory where the two csv output files will be saved. By default, the current working directory is used.
-#' @param table_copy Turn on copying between tables (TRUE or FALSE, default TRUE). If TRUE, categorisations you made for all other tables in this dataset will be copied over (if 'OUTPUT_' files are found in output_dir).
-#' @return The function will return two csv files: 'OUTPUT_' which contains the mappings and 'LOG_' which contains details about the dataset and session.
-#' @examples
-#' # Run in demo mode by providing no inputs: domain_mapping()
-#' # Demo mode will use the /data files provided in this package
-#' # For more guidance, refer to the package README.md file and the R manual files.
-#' @export
-#' @import ggplot2
-#' @importFrom graphics plot.new
-#' @importFrom utils read.csv write.csv
-#' @importFrom dplyr %>% arrange count group_by distinct
-
-domain_mapping <- function(json_file = NULL, domain_file = NULL, look_up_file = NULL, output_dir = NULL, table_copy = TRUE) {
-
-  ## Load data: Check if demo data should be used ----
-
-  if (is.null(json_file) && is.null(domain_file)) {
-    # If both json_file and domain_file are NULL, use demo data
-    meta_json <- get("json_metadata")
-    domains <- get("domain_list")
-    DomainListDesc <- "DemoList"
-    cat("\n")
-    cli_alert_info("Running domain_mapping in demo mode using package data files")
-    demo_mode = TRUE
-  } else if (is.null(json_file) || is.null(domain_file)) {
-    # If only one of json_file and domain_file is NULL, throw error
-    cat("\n")
-    cli_alert_danger("Please provide both json_file and domain_file (or neither file, to run in demo mode)")
-    stop()
-  } else {
-    demo_mode = FALSE
-    # Read in the json file containing the meta data
-    meta_json <- rjson::fromJSON(file = json_file)
-    # Read in the domain file containing the meta data
-    domains <- read.csv(domain_file, header = FALSE)
-    DomainListDesc <- tools::file_path_sans_ext(basename(domain_file))
-  }
-
-  # Check if user has provided a look-up table
-  if (is.null(look_up_file)) {
-    cli_alert_info("Using the default look-up table in data/look-up.rda")
-    lookup <- get("look_up")
-    }
-  else {
-    lookup <- read.csv(look_up_file)
-    cli_alert_info("Using look up file inputted by user")
-    print(lookup)
-  }
-
-  # If user has not provider output_dir, use current working dir:
-  if (is.null(output_dir)) {
-    output_dir = getwd() }
-
-  ## Present domains plots panel for user's reference ----
-  colnames(domains)[1] = "Domain Name"
-  graphics::plot.new()
-  domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ID*"), c("*DEMOGRAPHICS*"), domains)
-  Code <- data.frame(Code = 0:(nrow(domains_extend) - 1))
-  Domain_table <- tableGrob(cbind(Code,domains_extend),rows = NULL,theme = ttheme_default())
-  grid.arrange(Domain_table,nrow=1,ncol=1)
-
-  ## Get user and demo list info for log file ----
-  User_Initials <- ""
-  cat("\n \n")
-  while (User_Initials == "") {
-    User_Initials <- readline("Enter your initials: ")
-  }
-
-  ## Print information about Dataset ----
-  cli_h1("Dataset Name")
-  cat(meta_json$dataModel$label, fill = TRUE)
-  cli_h1("Dataset Last Updated")
-  cat(meta_json$dataModel$lastUpdated, fill = TRUE)
-  cli_h1("Dataset File Exported By")
-  cat(meta_json$exportMetadata$exportedBy, "at", meta_json$exportMetadata$exportedOn, fill = TRUE)
-
-  Dataset_desc <- ""
-  while (Dataset_desc != "Y" & Dataset_desc != "y" & Dataset_desc != "N" & Dataset_desc != "n") {
-    cat("\n \n")
-    Dataset_desc <- readline(prompt = "Would you like to read a description of the dataset? (y/n): ")
-  }
-
-  if (Dataset_desc == 'Y' | Dataset_desc == 'y') {
-    cli_h1("Dataset Description")
-    cat(meta_json$dataModel$description, fill = TRUE)
-    readline(prompt = "Press any key to proceed")
-  }
-
-  ## Ask user which tables to process  ----
-
-  nTables <- length(meta_json$dataModel$childDataClasses)
-  cat("\n")
-  cli_alert_info("Found {nTables} Table{?s} in this Dataset")
-  for (dc in 1:nTables) {
-    cat("\n")
-    cat(dc,meta_json$dataModel$childDataClasses[[dc]]$label, fill = TRUE)
-  }
-
-  nTables_Process <- numeric(0)
-  nTables_Process_Error <- TRUE
-  nTables_Process_OutOfRange <- FALSE
-  while (length(nTables_Process) == 0 | nTables_Process_Error==TRUE | nTables_Process_OutOfRange == TRUE) {
-    if (nTables_Process_OutOfRange == TRUE) {
-      cli_alert_danger('That table number is not within the range displayed, please try again.')}
-    tryCatch({
-      cat("\n \n");
-      cli_alert_info("Enter each table number you want to process in this session (one number on each line):");
-      cat("\n");
-      nTables_Process <- scan(file="",what=0);
-      nTables_Process_Error <- FALSE;
-      nTables_Process_OutOfRange = any(nTables_Process > nTables)},
-      error=function(e) {nTables_Process_Error <- TRUE; print(e); cat("\n"); cli_alert_danger('Your input is in the wrong format, reference the table numbers and try again')})
-  }
-
-  # Extract each Table  ----
-  for (dc in unique(nTables_Process)) {
-    cat("\n")
-    cli_alert_info("Processing Table {dc} of {nTables}")
-    cli_h1("Table Name")
-    cat(meta_json$dataModel$childDataClasses[[dc]]$label, fill = TRUE)
-    cli_h1("Table Last Updated")
-    cat(meta_json$dataModel$childDataClasses[[dc]]$lastUpdated, "\n", fill = TRUE)
-
-    # Check if previous table output(s) exists in this output_dir (for table copying)
-    if (table_copy == TRUE){
-      dataset_search = paste0("^OUTPUT_",gsub(" ", "", meta_json$dataModel$label),'*')
-      csv_list <- data.frame(file = list.files(output_dir,pattern = dataset_search))
-      if (nrow(csv_list) != 0){
-        df_list <- lapply(paste0(output_dir,'/',csv_list$file), read.csv)
-        df_combined <- do.call("rbind", df_list) #combine all df
-        df_combined$timestamp2 <- as.POSIXct(df_combined$timestamp, format="%Y-%m-%d-%H-%M-%S") #create new date column
-        df_combined <- df_combined[order(df_combined$timestamp2),] #order by earliest datetime
-        df_combined <- df_combined %>% distinct(DataElement, .keep_all = TRUE) #remove duplicates, keep earliest categorisation
-        df_combined <- df_combined[-(which(df_combined$Note %in% "AUTO CATEGORISED")),] #remove auto categorised
-        df_combined_exist <- TRUE
-        cat("\n")
-        cli_alert_info(paste0("Copying from previous session(s): "))
-        cat("\n")
-        print(csv_list$file)
-      } else {df_combined_exist <- FALSE}
-      } else {df_combined_exist <- FALSE}
-
-    table_desc <- ""
-    while (table_desc != "Y" & table_desc != "y" & table_desc != "N" & table_desc != "n") {
-      cat("\n \n")
-      table_desc <- readline(prompt = "Would you like to read a description of the table? (y/n): ")
-    }
-
-    if (table_desc == 'Y' | table_desc == 'y') {
-      cli_h1("Table Description")
-      cat(meta_json$dataModel$childDataClasses[[dc]]$description, fill = TRUE)
-      readline(prompt = "Press any key to proceed")
-    }
-
-    cat("\n \n")
-    table_note <- readline("Optional free text note about this table (or press enter to continue): ")
-
-    thisTable <- meta_json$dataModel$childDataClasses[[dc]]$childDataElements #  probably a better way of dealing with complex json files in R ...
-    thisTable_df <- data.frame(do.call(rbind, thisTable)) # nested list to dataframe
-    dataType_df <- data.frame(do.call(rbind, thisTable_df$dataType)) # nested list to dataframe
-
-    selectTable_df <- data.frame(
-      Label = unlist(thisTable_df$label),
-      Description = unlist(thisTable_df$description),
-      Type = unlist(dataType_df$label)
-    )
-
-    selectTable_df <- selectTable_df[order(selectTable_df$Label), ]
-
-    # Create unique output csv to log the results ----
-    timestamp_now <- format(Sys.time(),"%Y-%m-%d-%H-%M-%S")
-
-    output_fname_csv <- paste0("OUTPUT_", gsub(" ", "", meta_json$dataModel$label), "_", gsub(" ", "", meta_json$dataModel$childDataClasses[[dc]]$label), "_", timestamp_now, ".csv")
-    output_fname_log_csv <- paste0("LOG_", gsub(" ", "", meta_json$dataModel$label), "_", gsub(" ", "", meta_json$dataModel$childDataClasses[[dc]]$label), "_", timestamp_now, ".csv")
-    output_fname_png <- paste0("PLOT_", gsub(" ", "", meta_json$dataModel$label), "_", gsub(" ", "", meta_json$dataModel$childDataClasses[[dc]]$label), "_", timestamp_now, ".png")
-
-    log_Output <- data.frame(
-      timestamp = character(1),
-      browseMetadata = character(1),
-      Initials = character(1),
-      MetaDataVersion = character(1),
-      MetaDataLastUpdated = character(1),
-      DomainListDesc = character(1),
-      Dataset = character(1),
-      Table = character(1),
-      Table_note = character(1)
-    )
-
-    row_Output <- data.frame(
-      timestamp = character(0),
-      Table = character(0),
-      DataElement_N = character(0),
-      DataElement = character(0),
-      Domain_code = character(0),
-      Note = character(0)
-    )
-
-    # Loop through each data element, request response from the user to match to a domain ----
-
-    # if it's the demo run, only loop through a max of 20 data elements
-    if (demo_mode == TRUE) {
-      start_var = 1
-      end_var = min(20,nrow(selectTable_df))
-    } else {
-      cli_h1(paste('There are',as.character(nrow(selectTable_df)),'data elements (variables) in this table.'))
-      cat("\n")
-      start_var <- readline(prompt = "Start variable (write 1 to process all): ")
-      cat("\n")
-      end_var <- readline(prompt = paste("End variable (write", nrow(selectTable_df), "to process all): "))
-      }
-
-    Output <- row_Output
-    for (datavar in start_var:end_var) {
-      cat("\n \n")
-      cli_alert_info(paste(length(datavar:end_var),'left to process in this session'))
-      cli_alert_info("Processing data element {datavar} of {nrow(selectTable_df)}")
-      # prepare output
-      this_Output <- row_Output
-      this_Output[nrow(this_Output) + 1 , ] <- NA
-      this_Output$Table[1] <- meta_json$dataModel$childDataClasses[[dc]]$label
-      this_Output$DataElement[1] <- selectTable_df$Label[datavar]
-      this_Output$DataElement_N[1] <- paste(as.character(datavar),'of',as.character(nrow(selectTable_df)))
-      # search if this data element matches with auto categorisations in lookup
-      datavar_index <- which(lookup$DataElement == selectTable_df$Label[datavar]) #we should code this to ignore the case
-      lookup_subset <- lookup[datavar_index,]
-      # search if this data element matches with any data elements processed in previous table
-      if (df_combined_exist == TRUE) {
-        datavar_index <- which(df_combined$DataElement == selectTable_df$Label[datavar])
-        df_combined_subset <- df_combined[datavar_index,]
-        } else {df_combined_subset <- data.frame()}
-      # decide how to process the data element out of 3 options
-      if (nrow(lookup_subset) == 1) { # 1 - auto categorisation
-        this_Output$Domain_code[1] <- lookup_subset$DomainCode
-        this_Output$Note[1] <- "AUTO CATEGORISED"
-        Output <- rbind(Output,this_Output)
-        } else if (df_combined_exist == TRUE & nrow(df_combined_subset) == 1){ # 2 - copy from previous table
-          this_Output$Domain_code[1] <- df_combined_subset$Domain_code
-          suppressWarnings(this_Output$Note[1] <- paste0("COPIED FROM: ",df_combined_subset$Table))
-          Output <- rbind(Output,this_Output)
-        } else { # 3 - collect user responses
-          decision_output <- user_categorisation(selectTable_df$Label[datavar],selectTable_df$Description[datavar],selectTable_df$Type[datavar],max(Code$Code))
-          this_Output$Domain_code[1] <- decision_output$decision
-          this_Output$Note[1] <- decision_output$decision_note
-          Output <- rbind(Output,this_Output)
-          }
-      } # end of loop for DataElement
-
-    ## Print the AUTO CATEGORISED responses for this Table and request review  ----
-    Output_auto <- subset(Output, Note == 'AUTO CATEGORISED')
-    cat("\n \n")
-    cli_alert_warning("Please check the auto categorised data elements are accurate for table {meta_json$dataModel$childDataClasses[[dc]]$label}:")
-    cat("\n \n")
-    print(Output_auto[, c("DataElement", "Domain_code","Note")])
-
-    # extract the rows to edit
-    auto_row_Error <- TRUE
-    auto_row_InRange <- TRUE
-    while (auto_row_Error==TRUE | auto_row_InRange == FALSE) {
-      if (auto_row_InRange == FALSE) {
-        cli_alert_danger('The row numbers you provided are not in range. Reference the auto categorised row numbers on the screen and try again')}
-      tryCatch({
-        cat("\n \n");
-        cli_alert_info("Press enter to accept the auto categorisations for table {meta_json$dataModel$childDataClasses[[dc]]$label} or enter each row you'd like to edit:");
-        cat("\n");
-        auto_row <- scan(file="",what=0);
-        auto_row_Error <- FALSE;
-        auto_row_InRange <- all(auto_row %in% which(Output$Note == 'AUTO CATEGORISED'))},
-        error=function(e) {auto_row_Error <- TRUE; print(e); cat("\n"); cli_alert_danger('Your input is in the wrong format, try again')})
-    }
-
-    if (length(auto_row) != 0) {
-
-      for  (datavar_auto in unique(auto_row)) {
-
-        # collect user responses
-        decision_output <- user_categorisation(selectTable_df$Label[datavar_auto],selectTable_df$Description[datavar_auto],selectTable_df$Type[datavar_auto],max(Code$Code))
-        # input user responses into output
-        Output$Domain_code[datavar_auto] <- decision_output$decision
-        Output$Note[datavar_auto] <- decision_output$decision_note
-      }
-    }
-
-    ## Ask if user wants to review their responses for this Table ----
-    review_cats <- ""
-    while (review_cats != "Y" & review_cats != "y" & review_cats != "N" & review_cats != "n") {
-      cat("\n \n")
-      review_cats <- readline(prompt = "Would you like to review your categorisations? (y/n): ")
-    }
-
-    if (review_cats == 'Y' | review_cats == 'y') {
-      Output_not_auto <- subset(Output, Note != 'AUTO CATEGORISED')
-      Output_not_auto['Note (first 12 chars)'] <- substring(Output_not_auto$Note,1,11)
-      cat("\n \n")
-      print(Output_not_auto[, c("DataElement", "Domain_code","Note (first 12 chars)")])
-      cat("\n \n")
-
-      # extract the rows to edit
-      not_auto_row_Error <- TRUE
-      not_auto_row_InRange <- TRUE
-      while (not_auto_row_Error==TRUE | not_auto_row_InRange == FALSE) {
-        if (not_auto_row_InRange == FALSE) {
-          cli_alert_danger('The row numbers you provided are not in range. Reference the row numbers on the screen and try again')}
-        tryCatch({
-          cat("\n \n");
-          cli_alert_info("Press enter to accept your categorisations for table {meta_json$dataModel$childDataClasses[[dc]]$label} or enter each row you'd like to edit:");
-          cat("\n");
-          not_auto_row <- scan(file="",what=0);
-          not_auto_row_Error <- FALSE;
-          not_auto_row_InRange <- all(not_auto_row %in% which(Output$Note != 'AUTO CATEGORISED'))},
-          error=function(e) {not_auto_row_Error <- TRUE; print(e); cat("\n"); cli_alert_danger('Your input is in the wrong format, reference the row numbers and try again')})
-      }
-
-      if (length(not_auto_row) != 0) {
-
-        for  (datavar_not_auto in unique(not_auto_row)) {
-
-          # collect user responses
-          decision_output <- user_categorisation(selectTable_df$Label[datavar_not_auto],selectTable_df$Description[datavar_not_auto],selectTable_df$Type[datavar_not_auto],max(Code$Code))
-          # input user responses into output
-          Output$Domain_code[datavar_not_auto] <- decision_output$decision
-          Output$Note[datavar_not_auto] <- decision_output$decision_note
-        }
-      }
-    }
-
-    ## Fill in columns that have all rows identical ----
-    log_Output$timestamp <- timestamp_now
-    log_Output$browseMetadata <- packageVersion("browseMetadata")
-    log_Output$Initials <- User_Initials
-    log_Output$MetaDataVersion <- meta_json$dataModel$documentationVersion
-    log_Output$MetaDataLastUpdated <- meta_json$dataModel$lastUpdated
-    log_Output$DomainListDesc <- DomainListDesc
-    log_Output$Dataset <- meta_json$dataModel$label
-    log_Output$Table <- meta_json$dataModel$childDataClasses[[dc]]$label
-    log_Output$Table_note <- table_note
-
-    ## Save final categorisations for this Table  ----
-    Output$timestamp <- timestamp_now
-    utils::write.csv(Output, paste(output_dir,output_fname_csv,sep='/'), row.names = FALSE)
-    utils::write.csv(log_Output, paste(output_dir,output_fname_log_csv,sep='/'), row.names = FALSE)
-    cat("\n")
-    cli_alert_success("Your final categorisations have been saved:\n{output_fname_csv}")
-    cli_alert_success("Your session log has been saved:\n{output_fname_log_csv}")
-
-    ## Create and save a summary plot
-    counts <- Output %>% group_by(Domain_code) %>% count() %>% arrange(n)
-
-    Domain_plot <- counts %>%
-      ggplot(aes(x=reorder(Domain_code, -n), y=n)) +
-      geom_col() +
-      ggtitle(paste("Data Elements in", meta_json$dataModel$childDataClasses[[dc]]$label, "grouped by Domain code")) +
-      theme_gray(base_size = 18) +
-      theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
-      xlab('Domain Code') +
-      ylab('Count') +
-      scale_y_continuous(breaks=seq(0,max(counts$n),1))
-
-    full_plot <- grid.arrange(Domain_plot, Domain_table,nrow=1,ncol=2)
-    ggsave(plot = full_plot,paste(output_dir,output_fname_png,sep='/'),width = 14, height = 8, units = "in")
-    cli_alert_success("A summary plot has been saved:\n{output_fname_png}")
-
-
-  } # end of loop for each table
-
-} # end of function
diff --git a/R/end_plot.R b/R/end_plot.R
new file mode 100644
index 00000000..0bbb158b
--- /dev/null
+++ b/R/end_plot.R
@@ -0,0 +1,36 @@
+#' end_plot
+#'
+#' This function is called within the browseMetadata function. \cr \cr
+#' A summary plot is created that includes the domain code reference table and counts of domain code categorisations \cr \cr
+#'
+#' @param df The Output dataframe with all the domain categorisations
+#' @param Table_name The table name
+#' @param ref_table The domain code reference table (which domain maps to which integer)
+#' @return It returns a ggplot
+#' @importFrom CHECK LATER
+
+end_plot <- function(df,Table_name, ref_table){
+
+  counts <- df %>% group_by(Domain_code) %>% count() %>% arrange(n)
+
+  Domain_plot <- counts %>%
+    ggplot(aes(x = reorder(Domain_code, -n), y = n)) +
+    geom_col() +
+    ggtitle(paste("Data Elements in",Table_name,"grouped by Domain code")) +
+    theme_gray(base_size = 18) +
+    theme(axis.text.x = element_text(
+      angle = 90,
+      vjust = 0.5,
+      hjust = 1
+    )) +
+    xlab('Domain Code') +
+    ylab('Count') +
+    scale_y_continuous(breaks = seq(0, max(counts$n), 1))
+
+  full_plot <- grid.arrange(Domain_plot,
+                            ref_table,
+                            nrow = 1,
+                            ncol = 2)
+  return(full_plot)
+
+}
diff --git a/R/join_outputs.R b/R/join_outputs.R
new file mode 100644
index 00000000..95cf5ac2
--- /dev/null
+++ b/R/join_outputs.R
@@ -0,0 +1,19 @@
+#' join_outputs
+#'
+#' This function is called within the browseMetadata_compare_outputs function. \cr \cr
+#' Joins output dataframes from two sessions, on the column DataElement.
+#'
+#' @param session_1 Dataframe from session 1
+#' @param session_2 Dataframe from session 2
+#' @return Dataframe with information from session 1 and 2, joined on column DataElement.
+#' @importFrom CHECK LATER
+#'
+
+join_outputs <- function(session_1, session_2){
+
+  ses_join <- left_join(session_1,session_2,suffix = c("_ses1","_ses2"),join_by(DataElement))
+  ses_join <- select(ses_join,contains("_ses"),'DataElement')
+  ses_join$Domain_code_join <- NA
+  ses_join$Note_join <- NA
+  ses_join
+}
diff --git a/R/json_table_to_df.R b/R/json_table_to_df.R
new file mode 100644
index 00000000..bcf97ada
--- /dev/null
+++ b/R/json_table_to_df.R
@@ -0,0 +1,29 @@
+#' join_outputs
+#'
+#' This function is called within the browseMetadata function. \cr \cr
+#' It reads in the nested lists from the json and extracts information needed into a dataframe. \cr \cr
+#' It does this for one specific table in a dataset. \cr \cr
+#'
+#' @param Dataset This is the dataModel field of the json
+#' @param n The Dataset number (as a json can have multiple datasets)
+#' @return A dataframe for that specific table, including data label, description and type.
+#' @importFrom CHECK LATER
+#'
+
+json_table_to_df <- function(Dataset,n){
+
+  jsonTable <- Dataset$childDataClasses[[n]]$childDataElements
+  jsonTable_df <- data.frame(do.call(rbind, jsonTable)) # nested list to df
+  jsonType_df_dataType <- data.frame(do.call(rbind, jsonTable_df$dataType)) # nested list to df
+
+  Table_df <- data.frame(
+    Label = unlist(jsonTable_df$label),
+    Description = unlist(jsonTable_df$description),
+    Type = unlist(jsonType_df_dataType$label)
+  )
+
+  Table_df <- Table_df[order(Table_df$Label), ]
+
+  Table_df
+
+}
diff --git a/R/load_data.R b/R/load_data.R
new file mode 100644
index 00000000..0eafaebe
--- /dev/null
+++ b/R/load_data.R
@@ -0,0 +1,50 @@
+#' load_data
+#'
+#' This function is called within the browseMetadata function. \cr \cr
+#' It collects the inputs needed for the browseMetadata function (defaults or user inputs)
+#' If some inputs are NULL, it loads the default inputs. If defaults not available, it prints error for the user.
+#' If inputs are not NULL, it loads the user-specified inputs.
+#' @param json_file As defined in browseMetadata
+#' @param domain_file As defined in browseMetadata
+#' @param look_up_file As defined in browseMetadata
+#' @return A list of 5: all inputs needed for the browseMetadata function to run.
+#' @importFrom CHECK LATER
+#'
+#'
+load_data <- function(json_file, domain_file,look_up_file){
+
+  # Collect meta_json and domains
+  if (is.null(json_file) && is.null(domain_file)) { # if both json_file and domain_file are NULL, use demo data
+    meta_json <- get("json_metadata")
+    domains <- get("domain_list")
+    DomainListDesc <- "DemoList"
+    cat("\n")
+    cli_alert_info("Running browseMetadata in demo mode using package data files")
+    cat("\n ")
+    demo_mode = TRUE
+  } else if (is.null(json_file) || is.null(domain_file)) { # if only one of json_file and domain_file is NULL, throw error
+    cat("\n")
+    cli_alert_danger("Please provide both json_file and domain_file (or neither file, to run in demo mode)")
+    stop()
+  } else { # read in user specified files
+    demo_mode = FALSE
+    meta_json <- rjson::fromJSON(file = json_file) # read in the json file containing the meta data
+    domains <- read.csv(domain_file, header = FALSE) # read in the domain file containing the list of research domains
+    DomainListDesc <- tools::file_path_sans_ext(basename(domain_file))
+  }
+
+  # Collect look up table
+  if (is.null(look_up_file)) {
+    cli_alert_info("Using the default look-up table in data/look-up.rda")
+    cat("\n ")
+    lookup <- get("look_up")
+  }
+  else {
+    lookup <- read.csv(look_up_file)
+    cli_alert_info("Using look up file inputted by user")
+    cat("\n ")
+    print(lookup)
+  }
+
+  list(meta_json = meta_json,domains = domains,DomainListDesc = DomainListDesc, demo_mode = demo_mode,lookup = lookup)
+}
diff --git a/R/ref_plot.R b/R/ref_plot.R
new file mode 100644
index 00000000..6948d1a5
--- /dev/null
+++ b/R/ref_plot.R
@@ -0,0 +1,25 @@
+#' ref_plot
+#'
+#' This function is called within the browseMetadata function. \cr \cr
+#' It plots a reference table to guide the user in their categorisation of domains. \cr \cr
+#' This reference table is based on the user inputted domains and the default domains provided by this package.  \cr \cr
+#' @param domains The output of load_data
+#' @return A reference table that appears in the Plots tab. A list of 2 containing the derivatives for this plot, used later in browseMetadata.
+#' @importFrom CHECK LATER
+#'
+#'
+
+ref_plot <- function(domains){
+
+  colnames(domains)[1] = "Domain Name"
+  graphics::plot.new()
+  domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ID*"), c("*DEMOGRAPHICS*"), domains)
+  Code <- data.frame(Code = 0:(nrow(domains_extend) - 1))
+  Domain_table <- tableGrob(cbind(Code,domains_extend),rows = NULL,theme = ttheme_default())
+  grid.arrange(Domain_table,nrow=1,ncol=1)
+
+  return(list(Code = Code, Domain_table = Domain_table))
+
+}
+
+
diff --git a/R/user_categorisation.R b/R/user_categorisation.R
index 4a3d36b9..eb611921 100644
--- a/R/user_categorisation.R
+++ b/R/user_categorisation.R
@@ -1,6 +1,6 @@
 #' user_categorisation
 #'
-#' This function is used within the domain_mapping function. \cr \cr
+#' This function is called within the browseMetadata function. \cr \cr
 #' It displays data properties to the user and requests a categorisation into a domain. \cr \cr
 #' An optional note can be included with the categorisation.
 #'
@@ -9,7 +9,6 @@
 #' @param data_type Data type of the variable
 #' @param domain_code_max Max code in the domain list (0-3 auto included, then N included via domain_file)
 #' @return It returns a list containing the decision and decision note
-#' @export
 
 user_categorisation <- function(data_element,data_desc,data_type,domain_code_max) {
 
diff --git a/R/user_categorisation_loop.R b/R/user_categorisation_loop.R
new file mode 100644
index 00000000..71c9afee
--- /dev/null
+++ b/R/user_categorisation_loop.R
@@ -0,0 +1,74 @@
+#' user_categorisation_loop
+#'
+#' This function is called within the browseMetadata function. \cr \cr
+#' Given a specific table and a number of data elements to search, it checks for 3 different sources of domain categorisation: \cr \cr
+#' 1 - If data elements match those in the look-up table, auto categorise them \cr \cr
+#' 2 - If data elements match to previous table output, copy them \cr \cr
+#' 3 - If no match for 1 or 2, data elements are categorised by the user \cr \cr
+#' @param start_v Index of data element to start
+#' @param end_v Index of data element to end
+#' @param Table_df Dataframe with the table information, extracted from json metadata
+#' @param df_prev_exist Boolean to indicate with previous dataframes exist (to copy from)
+#' @param df_prev Previous dataframes to copy from (or NULL)
+#' @param lookup The lookup table to enable auto categorisations
+#' @param df_plots Output from the ref_plot function, to indicate maximum domain code allowed
+#' @param Output Empty Output dataframe, to fill
+#' @return An Output dataframe containing information about the table, data elements and categorisations
+#' @importFrom CHECK LATER
+
+user_categorisation_loop <- function(start_v,end_v,Table_df,df_prev_exist,df_prev,lookup,df_plots,Output) {
+
+  for (data_v in start_v:end_v) {
+    cat("\n \n")
+    cli_alert_info(paste(length(data_v:end_v), 'left to process'))
+    cli_alert_info("Data element {data_v} of {nrow(Table_df)}")
+    this_DataElement <- Table_df$Label[data_v]
+    this_DataElement_N <- paste(as.character(data_v), 'of',
+                                as.character(nrow(Table_df)))
+    data_v_index <- which(lookup$DataElement ==
+                            Table_df$Label[data_v]) #we should code this to ignore the case
+    lookup_subset <- lookup[data_v_index, ]
+    ##### search if data element matches any data elements from previous table
+    if (df_prev_exist == TRUE) {
+      data_v_index <- which(df_prev$DataElement ==
+                              Table_df$Label[data_v])
+      df_prev_subset <- df_prev[data_v_index, ]
+    } else {
+      df_prev_subset <- data.frame()
+    }
+    ##### decide how to process the data element out of 3 options
+    if (nrow(lookup_subset) == 1) {
+      ###### 1 - auto categorisation
+      Output <- Output %>% add_row(
+        DataElement = this_DataElement,
+        DataElement_N = this_DataElement_N,
+        Domain_code = as.character(lookup_subset$DomainCode),
+        Note = 'AUTO CATEGORISED'
+      )
+    } else if (df_prev_exist == TRUE &
+               nrow(df_prev_subset) == 1) {
+      ###### 2 - copy from previous table
+      Output <- Output %>% add_row(
+        DataElement = this_DataElement,
+        DataElement_N = this_DataElement_N,
+        Domain_code = as.character(df_prev_subset$Domain_code),
+        Note = paste0("COPIED FROM: ", df_prev_subset$Table)
+      )
+    } else {
+      ###### 3 - collect user responses with 'user_categorisation.R'
+      decision_output <- user_categorisation(
+        Table_df$Label[data_v],
+        Table_df$Description[data_v],
+        Table_df$Type[data_v],
+        max(df_plots$Code$Code)
+      )
+      Output <- Output %>% add_row(
+        DataElement = this_DataElement,
+        DataElement_N = this_DataElement_N,
+        Domain_code = decision_output$decision,
+        Note = decision_output$decision_note
+      )
+    }
+  } # end of loop for DataElement
+  Output
+}
diff --git a/R/user_prompt.R b/R/user_prompt.R
new file mode 100644
index 00000000..a830da9b
--- /dev/null
+++ b/R/user_prompt.R
@@ -0,0 +1,66 @@
+#' user_prompt
+#'
+#' This function is called within the browseMetadata function. \cr \cr
+#' It prompts a response from the user. \cr \cr
+#'
+#' @param pre_prompt_text Optional text to display to the user, prior to prompt.
+#' This should be a data frame:
+#' data.frame(Heading = logical(0), Text = character(0))
+#' Each row of the dataframe specifies Heading TRUE/FALSE and text to display.
+#' @param prompt_text Text to display to the user, to prompt their response.
+#' @param any_keys Boolean to determine if any key responses are allowable.
+#' If FALSE, only these are allowed: Y, y, N and n.
+#' @param post_yes_text Optional text to post after receiving a 'Y' or 'y'
+#' response from user. Same dataframe format as pre_prompt_text.
+#' @return It returns variable text, depending on any_keys.
+#' @importFrom CHECK LATER
+
+user_prompt <- function(pre_prompt_text = NULL, prompt_text, any_keys, post_yes_text = NULL) {
+
+  # pre prompt text is optional
+  # prompt text is not
+  # any_keys, when TRUE it allows any input, when FALSE it only allows y/n/Y/N
+  # post_yes_text, when any_keys is FALSE and response is y/Y then print the post text
+
+  # pre prompt text
+  if (!is.null(pre_prompt_text)){
+    for (line in 1:nrow(pre_prompt_text)){
+      if (pre_prompt_text$Heading[line] == TRUE){
+        cli_h1(pre_prompt_text$Text[line])
+        }else{
+          cat(pre_prompt_text$Text[line])
+        }
+    }
+    cat("\n ")
+    }
+
+  # prompt text
+  if (any_keys == TRUE){
+    response <- ""
+    while (response == "") {
+      response <- readline(prompt = prompt_text)
+    }
+  } else if (any_keys == FALSE) {
+    response <- ""
+    while (!response %in% c("Y", "y", "N", "n")) {
+      response <- readline(prompt = prompt_text)
+    }
+    # post yes text
+    if (response %in% list('Y','y') & !is.null(post_yes_text)) {
+      for (line in 1:nrow(post_yes_text)){
+        if (post_yes_text$Heading[line] == TRUE){
+          cli_h1(post_yes_text$Text[line])
+        }else{
+          cat(post_yes_text$Text[line])
+        }
+      }
+      cat("\nPress any key to continue ")
+      readline()
+    }
+  } else {
+    stop("Invalid input given for 'any_keys'. Only TRUE or FALSE are allowed.")
+  }
+
+  response
+
+}
diff --git a/R/user_prompt_list.R b/R/user_prompt_list.R
new file mode 100644
index 00000000..7227d296
--- /dev/null
+++ b/R/user_prompt_list.R
@@ -0,0 +1,34 @@
+#' user_prompt_list
+#'
+#' This function is called within the browseMetadata function. \cr \cr
+#' It prompts a response from the user, in the form of a list. \cr \cr
+#' It checks if the user has given the an input within the allowed range - if not, it re-sends prompt. \cr \cr
+#'
+#' @param prompt_text Text to display to the user, to prompt their response.
+#' @param list_allowed A list of allowable integer responses.
+#' @param empty_allowed A boolean specifying if no response is allowed.
+#' @return It returns a list of integers to process, that match the prompt options.
+#' @importFrom CHECK LATER
+
+user_prompt_list <- function(prompt_text,list_allowed,empty_allowed) {
+
+  list_to_process_Error <- TRUE
+  list_to_process_InRange <- TRUE
+  while (list_to_process_Error==TRUE | list_to_process_InRange==FALSE) {
+    tryCatch({
+      cat("\n \n");
+      cli_alert_info(prompt_text);
+      cat("\n");
+      list_to_process <- scan(file="",what=0);
+      list_to_process_InRange_1 = (all(list_to_process %in% list_allowed))
+      if (empty_allowed == FALSE){
+        list_to_process_InRange_2 = (all(length(list_to_process) != 0))
+      } else {list_to_process_InRange_2 = TRUE}
+      list_to_process_InRange = all(list_to_process_InRange_1,list_to_process_InRange_2)
+      if (list_to_process_InRange == FALSE){cli_alert_danger('One of your inputs is out of range! Reference the allowable list of integers and try again.')};
+      list_to_process_Error <- FALSE},
+      error=function(e) {list_to_process_Error <- TRUE; print(e); cat("\n"); cli_alert_danger('Your input is in the wrong format. Reference the allowable list of integers and try again.')})
+    }
+  list_to_process
+}
+
diff --git a/R/valid_comparison.R b/R/valid_comparison.R
new file mode 100644
index 00000000..89c486df
--- /dev/null
+++ b/R/valid_comparison.R
@@ -0,0 +1,32 @@
+#' valid_comparison
+
+#' This function is called within the browseMetadata_compare_outputs function. \cr \cr
+#' It reads two inputs to see if they are equal. \cr \cr
+#' If the test is 'warning' status and inputs are not equal it gives warning but continues. \cr \cr
+#' If the test is 'danger' status and inputs are not equal it stops and exits, with error message. \cr \cr
+#' @param input1 Input 1
+#' @param input2 Input 2
+#' @param severity Level of severity. Only 'danger' or 'warning'
+#' @param severity_text The text to print if inputs are not equal.
+#' @return It returns variable text, depending on any_keys.
+#' @importFrom CHECK LATER
+#'
+valid_comparison <- function(input1, input2, severity, severity_text) {
+
+  if (!severity %in% c('danger', 'warning')) {
+    stop("Invalid severity. Only 'danger' and 'warning' are allowed.")
+  }
+
+  if (severity == 'danger') {
+    if (input1 != input2) {
+      cat('\n')
+      stop(paste(severity_text,"-> Exiting!"))
+    }
+  } else if (severity == 'warning') {
+    if (input1 != input2) {
+      cat('\n')
+      cli_alert_warning(paste(severity_text,"-> Continuing but please check comparison is valid!"))
+    }
+  }
+  }
+
diff --git a/README.md b/README.md
index fe1ca5db..70bffb7c 100644
--- a/README.md
+++ b/README.md
@@ -58,7 +58,7 @@ library(browseMetadata)
 Read the documentation:
 
 ```         
-?domain_mapping
+?browseMetadata
 ```
 
 Set your working directory to be an empty folder you just created:
@@ -70,7 +70,7 @@ setwd("/Users/your-username/test-browseMetadata")
 Run the function in demo mode:
 
 ``` r
-domain_mapping()
+browseMetadata()
 ```
 
 Take note of the **Plots** tab in R Studio which should show a table of domains with this info:
@@ -93,7 +93,7 @@ For a research study, your domains are likely to be much more specific e.g. 'Pre
 The 4 default domains are always included [0-3], appended on to any domain list given.
 
 ```         
-ℹ Running domain_mapping in demo mode using package data files
+ℹ Running browseMetadata in demo mode using package data files
 ℹ Using the default look-up table in data/look-up.rda
  
 Enter your initials: RS
@@ -285,14 +285,14 @@ LOG_NationalCommunityChildHealthDatabase(NCCHD)_CHILD_2024-04-05-14-37-36.csv
 PLOT_NationalCommunityChildHealthDatabase(NCCHD)_CHILD_2024-04-05-14-37-36.png
 ```
 
-The OUTPUT csv contains the categorisations you made. The LOG csv contains information about the session as a whole, including various metadata. These two csv files contain the same timestamp column. If you do not like the formatting of the OUTPUT csv, see the function [R/convert_output.R](R/convert_output.R) for an alternative. 
+The OUTPUT csv contains the categorisations you made. The LOG csv contains information about the session as a whole, including various metadata. These two csv files contain the same timestamp column. If you do not like the formatting of the OUTPUT csv, see the function [R/post_browseMetadata_convert.R](R/post_browseMetadata_convert.R) for an alternative. 
 
 The PLOT png file saves a simple plot displaying the count of domain codes for that table.
 
 ### Using your own input files
 
 ``` r
-domain_mapping(json_file, domain_file, look_up_file, output_dir, table_copy)
+browseMetadata(json_file, domain_file, look_up_file, output_dir, table_copy)
 ```
 
 This code is in early development. To see known bugs or sub-optimal features refer to the [Issues](https://github.com/aim-rsf/browseMetadata/issues).
@@ -300,7 +300,7 @@ This code is in early development. To see known bugs or sub-optimal features ref
 First, change the json file and domain file inputs. Later, consider changing the other 3 inputs, depending on your use-case. For example:
 
 ``` r
-domain_mapping(json_file = 'path/your-json.json', domain_file = 'path/your-domains.csv')
+browseMetadata(json_file = 'path/your-json.json', domain_file = 'path/your-domains.csv')
 ```
 
 Unlike in demo mode, it will ask you to specify the range of variables you want to process (start variable:end variable), because you can choose to process a table across multiple sessions (particularly useful if the table has a large number of data elements).
@@ -319,7 +319,7 @@ Unlike in demo mode, it will ask you to specify the range of variables you want
 
 #### lookup file:
 
--   a [default lookup file](dataraw/look_up.csv) is used by the domain_mapping function
+-   a [default lookup file](dataraw/look_up.csv) is used by the browseMetadata function
 -   optional: a csv can be created by the user (using the same format as the default) and provided as the input
 -   the lookup file makes auto-categorisations intended for variables that come up regularly in health datasets (e.g. IDs and demographics)
 -   the lookup file only works for 1:1 mappings right now, i.e. the DataElement should only be listed once in the lookup file
@@ -396,7 +396,7 @@ PATH_BLOOD_TESTS
 
 The csv output file containing the categorisation for each data element could be used as an input in later analysis steps to filter variables and visualise how each variable maps to research domains of interest.
 
-Categorisations across researchers can be compared by using the function [R/compare_sessions.R](R/compare_sessions.R). Type `?compare_sessions` to read the manual on how to run this function. In brief, it compares csv outputs from two sessions, finds their differences, and asks for a consensus.
+Categorisations across researchers can be compared by using the function [R/post_browseMetadata_compare.R](R/post_browseMetadata_compare.R). Type `?post_browseMetadata_compare` to read the manual on how to run this function. In brief, it compares csv outputs from two sessions, finds their differences, and asks for a consensus.
 
 ## License
 
diff --git a/data/Output.rda b/data/Output.rda
new file mode 100644
index 0000000000000000000000000000000000000000..2c1ecd654ba86c43b2f8755062970b0f5e0388d3
GIT binary patch
literal 183
zcmV;o07(BrT4*^jL0KkKS?UL^Z2$mm|A7BGNB{r<5C8@Nlt8~H-hcoBAOMmL6(6NH
zr9Dq6r=)41&}r>VpphzOlL%>`$%p_Iin)R+nJH8jPXhGpgY^KuK8>&zrUZM?6h1PZ
zOIu3RhLGAAH4H%x8c4^4BGe3z&7I?}CgGefo>_OB8G34<O=)nJ`9iA&d2Au|9?d}X
l%owSf$pZ2BIL5SE98TDcq?hh<Sv39pUC9*TLPMw?wX}V0PTT+h

literal 0
HcmV?d00001

diff --git a/data/log_Output.rda b/data/log_Output.rda
new file mode 100644
index 0000000000000000000000000000000000000000..574ad41272a191f46e169ca2856ad451143ecb7c
GIT binary patch
literal 252
zcmV<Y00aL*T4*^jL0KkKStP{J6aWCL|G@t9$N&HV5C8-wmq5R#-he;=00FQ7*pSrH
zo=NIINjxaf20&<fo~M);15FJYF&O~R0il9Kqe7U{GI~Mjq{+2COqyw@>R5t$P)RqQ
zS{N2qj~d}0tQh4v!UfwR5<#V;G!!%I7_z7`{?FGk*3oc~+dwuX!GtgZ95jH*;Kk+x
zP1`wRL4Le2rr&MXdEI{_%${;$!Z8K*b#PFptO1~{8n1qZKLc}EStg*;T#@d%L9Ot{
zfz~jQaV0t9J@JK&*ds{RP}(jgmwhHyx@PMrAY+KR%QaWdqo*=y_ZM<SI8cxz#LyHE
C*KpPV

literal 0
HcmV?d00001

diff --git a/man/Output.Rd b/man/Output.Rd
new file mode 100644
index 00000000..c44a1dcb
--- /dev/null
+++ b/man/Output.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data-Output.R
+\docType{data}
+\name{Output}
+\alias{Output}
+\title{Output dataframe}
+\format{
+A data frame with 0 rows and 6 columns
+}
+\source{
+The dataframe was manually created for the browseMetadata.R function
+}
+\usage{
+data(Output)
+}
+\description{
+Empty output dataframe for browseMetadata.R to fill. \cr \cr
+}
+\details{
+\item \code{Output <- data.frame(
+timestamp = character(0),
+Table = character(0),
+DataElement_N = character(0),
+DataElement = character(0),
+Domain_code = character(0),
+Note = character(0)
+)}
+
+\item \code{usethis::use_data(Output)}
+}
+\keyword{datasets}
diff --git a/man/browseMetadata-package.Rd b/man/browseMetadata-package.Rd
index 96dd6009..bc73a4cf 100644
--- a/man/browseMetadata-package.Rd
+++ b/man/browseMetadata-package.Rd
@@ -2,7 +2,6 @@
 % Please edit documentation in R/browseMetadata-package.R
 \docType{package}
 \name{browseMetadata-package}
-\alias{browseMetadata}
 \alias{browseMetadata-package}
 \title{browseMetadata}
 \description{
diff --git a/man/browseMetadata.Rd b/man/browseMetadata.Rd
new file mode 100644
index 00000000..99fc1763
--- /dev/null
+++ b/man/browseMetadata.Rd
@@ -0,0 +1,59 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/browseMetadata.R
+\name{browseMetadata}
+\alias{browseMetadata}
+\title{browseMetadata}
+\usage{
+browseMetadata(
+  json_file = NULL,
+  domain_file = NULL,
+  look_up_file = NULL,
+  output_dir = NULL,
+  table_copy = TRUE
+)
+}
+\arguments{
+\item{json_file}{The metadata file. This should be downloaded from the
+metadata catalogue as a json file. See
+'data-raw/maternity_indicators_dataset_(mids)_20240105T132210.json' for an
+example download.}
+
+\item{domain_file}{The domain list file. This should be a csv file created by
+the user, with each domain listed on a separate line. See
+'data-raw/domain_list_demo.csv' for a template.}
+
+\item{look_up_file}{The look-up table file, with auto-categorisations. By
+default, the code uses 'data/look-up.rda'. The user can provide their own
+look-up table in the same format as 'data-raw/look-up.csv'.}
+
+\item{output_dir}{The path to the directory where the two csv output files
+will be saved. By default, the current working directory is used.}
+
+\item{table_copy}{Turn on copying between tables (TRUE or FALSE, default
+TRUE). If TRUE, categorisations you made for all other tables in this dataset
+will be copied over (if 'OUTPUT_' files are found in output_dir).}
+}
+\value{
+The function will return two csv files: 'OUTPUT_' which contains the
+mappings and 'LOG_' which contains details about the dataset and session.
+}
+\description{
+This function will read in the metadata file for a chosen dataset, loop
+through all the data elements, and ask the user to catergorise/label each
+data element as belonging to one or more domains.\cr \cr
+The domains will appear in the Plots tab and dataset information will be
+printed to the R console, for the user's reference in making these
+categorisations. \cr \cr
+These categorisations will be saved to a csv file, alongside a log file which
+summarises the session details.To speed up this process, some
+auto-categorisations will be made by the function for commonly occurring data
+elements and categorisations for the same data element can be copied from one
+table to another. \cr \cr
+Example inputs are provided within the package data, for the user to run this
+function in a demo mode.
+}
+\examples{
+# Run in demo mode by providing no inputs: browseMetadata()
+# Demo mode will use the /data files provided in this package
+# For more guidance, refer to the package README.md file.
+}
diff --git a/man/compare_sessions.Rd b/man/browseMetadata_compare_outputs.Rd
similarity index 69%
rename from man/compare_sessions.Rd
rename to man/browseMetadata_compare_outputs.Rd
index 6f718a34..b74f64b1 100644
--- a/man/compare_sessions.Rd
+++ b/man/browseMetadata_compare_outputs.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/compare_sessions.R
-\name{compare_sessions}
-\alias{compare_sessions}
-\title{compare_sessions}
+% Please edit documentation in R/browseMetadata_compare_outputs.R
+\name{browseMetadata_compare_outputs}
+\alias{browseMetadata_compare_outputs}
+\title{browseMetadata_compare_outputs}
 \usage{
-compare_sessions(
+browseMetadata_compare_outputs(
   session_dir,
   session1_base,
   session2_base,
@@ -19,14 +19,14 @@ compare_sessions(
 
 \item{session2_base}{Base file name for session 1 e.g. 'NationalCommunityChildHealthDatabase(NCCHD)_BLOOD_TEST_2024-07-08-12-03-30'}
 
-\item{json_file}{The full path to the metadata file used when running domain_mapping (should be the same for session 1 and session 2)}
+\item{json_file}{The full path to the metadata file used when running browseMetadata (should be the same for session 1 and session 2)}
 
-\item{domain_file}{The full path to the domain file used when running domain_mapping (should be the same for session 1 and session 2)}
+\item{domain_file}{The full path to the domain file used when running browseMetadata (should be the same for session 1 and session 2)}
 }
 \value{
 It returns a csv output, which represents the consensus decisions between session 1 and session 2
 }
 \description{
-This function is to be used after running the domain_mapping function. \cr \cr
+This function is to be used after running the browseMetadata function. \cr \cr
 It compares csv outputs from two sessions, finds their differences, and asks for a consensus. \cr \cr
 }
diff --git a/man/convert_output.Rd b/man/browseMetadata_convert_outputs.Rd
similarity index 66%
rename from man/convert_output.Rd
rename to man/browseMetadata_convert_outputs.Rd
index 5a2cede7..1aae7676 100644
--- a/man/convert_output.Rd
+++ b/man/browseMetadata_convert_outputs.Rd
@@ -1,13 +1,13 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/convert_output.R
-\name{convert_output}
-\alias{convert_output}
-\title{convert_output}
+% Please edit documentation in R/browseMetadata_convert_outputs.R
+\name{browseMetadata_convert_outputs}
+\alias{browseMetadata_convert_outputs}
+\title{browseMetadata_convert_outputs}
 \usage{
-convert_output(output_csv, output_dir)
+browseMetadata_convert_outputs(output_csv, output_dir)
 }
 \arguments{
-\item{output_csv}{The name of the 'OUTPUT_' csv file that was created from domain_mapping.R}
+\item{output_csv}{The name of the 'OUTPUT_' csv file that was created from browseMetadata.R}
 
 \item{output_dir}{The location of output_csv}
 }
diff --git a/man/concensus_on_mismatch.Rd b/man/concensus_on_mismatch.Rd
new file mode 100644
index 00000000..078dcf6e
--- /dev/null
+++ b/man/concensus_on_mismatch.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/concensus_on_mismatch.R
+\name{concensus_on_mismatch}
+\alias{concensus_on_mismatch}
+\title{concensus_on_mismatch}
+\usage{
+concensus_on_mismatch(ses_join, Table_df, datavar, domain_code_max)
+}
+\arguments{
+\item{ses_join}{The joined dataframes from the two sessions}
+
+\item{Table_df}{Metadata from the json file, for one table in the dataset}
+
+\item{datavar}{Data Element n}
+
+\item{domain_code_max}{The maximum allowable domain code integer}
+}
+\value{
+It returns a list of 2: the domain code and the note from the consensus decision
+}
+\description{
+This function is called within the browseMetadata_compare_outputs function. \cr \cr
+For a specific data element, it compares the domain code categorisation between two sessions.
+If the categorisation differs, it prompts the user for a new consensus decision by presenting the json metadata. \cr \cr
+}
diff --git a/man/copy_previous.Rd b/man/copy_previous.Rd
new file mode 100644
index 00000000..d374fe6e
--- /dev/null
+++ b/man/copy_previous.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/copy_previous.R
+\name{copy_previous}
+\alias{copy_previous}
+\title{copy_previous}
+\usage{
+copy_previous(Dataset_Name, output_dir)
+}
+\value{
+It returns a list of 2: df_prev_exist (a boolean) and df_prev (NULL or populated with data elements to copy)
+}
+\description{
+This function is called within the browseMetadata function. \cr \cr
+It searches for previous OUTPUT files in the output_dir, that match the dataset name. \cr \cr
+If files exist, it removes duplicates and autos, and stores the rest of the data elements in a dataframe. \cr \cr
+}
diff --git a/man/domain_mapping.Rd b/man/domain_mapping.Rd
deleted file mode 100644
index e2dc6620..00000000
--- a/man/domain_mapping.Rd
+++ /dev/null
@@ -1,40 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/domain_mapping.R
-\name{domain_mapping}
-\alias{domain_mapping}
-\title{domain_mapping}
-\usage{
-domain_mapping(
-  json_file = NULL,
-  domain_file = NULL,
-  look_up_file = NULL,
-  output_dir = NULL,
-  table_copy = TRUE
-)
-}
-\arguments{
-\item{json_file}{The metadata file. This should be downloaded from the metadata catalogue as a json file. See 'data-raw/maternity_indicators_dataset_(mids)_20240105T132210.json' for an example download.}
-
-\item{domain_file}{The domain list file. This should be a csv file created by the user, with each domain listed on a separate line. See 'data-raw/domain_list_demo.csv' for a template.}
-
-\item{look_up_file}{The look-up table file, with auto-categorisations. By default, the code uses 'data/look-up.rda'. The user can provide their own look-up table in the same format as 'data-raw/look-up.csv'.}
-
-\item{output_dir}{The path to the directory where the two csv output files will be saved. By default, the current working directory is used.}
-
-\item{table_copy}{Turn on copying between tables (TRUE or FALSE, default TRUE). If TRUE, categorisations you made for all other tables in this dataset will be copied over (if 'OUTPUT_' files are found in output_dir).}
-}
-\value{
-The function will return two csv files: 'OUTPUT_' which contains the mappings and 'LOG_' which contains details about the dataset and session.
-}
-\description{
-This function will read in the metadata file for a chosen dataset, loop through all the data elements, and ask the user to catergorise/label each data element as belonging to one or more domains.\cr \cr
-The domains will appear in the Plots tab and dataset information will be printed to the R console, for the user's reference in making these categorisations. \cr \cr
-These categorisations will be saved to a csv file, alongside a log file which summarises the session details.
-To speed up this process, some auto-categorisations will be made by the function for commonly occurring data elements and categorisations for the same data element can be copied from one table to another. \cr \cr
-Example inputs are provided within the package data, for the user to run this function in a demo mode.
-}
-\examples{
-# Run in demo mode by providing no inputs: domain_mapping()
-# Demo mode will use the /data files provided in this package
-# For more guidance, refer to the package README.md file and the R manual files.
-}
diff --git a/man/end_plot.Rd b/man/end_plot.Rd
new file mode 100644
index 00000000..8683bd91
--- /dev/null
+++ b/man/end_plot.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/end_plot.R
+\name{end_plot}
+\alias{end_plot}
+\title{end_plot}
+\usage{
+end_plot(df, Table_name, ref_table)
+}
+\arguments{
+\item{df}{The Output dataframe with all the domain categorisations}
+
+\item{Table_name}{The table name}
+
+\item{ref_table}{The domain code reference table (which domain maps to which integer)}
+}
+\value{
+It returns a ggplot
+}
+\description{
+This function is called within the browseMetadata function. \cr \cr
+A summary plot is created that includes the domain code reference table and counts of domain code categorisations \cr \cr
+}
diff --git a/man/join_outputs.Rd b/man/join_outputs.Rd
new file mode 100644
index 00000000..b6555478
--- /dev/null
+++ b/man/join_outputs.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/join_outputs.R
+\name{join_outputs}
+\alias{join_outputs}
+\title{join_outputs}
+\usage{
+join_outputs(session_1, session_2)
+}
+\arguments{
+\item{session_1}{Dataframe from session 1}
+
+\item{session_2}{Dataframe from session 2}
+}
+\value{
+Dataframe with information from session 1 and 2, joined on column DataElement.
+}
+\description{
+This function is called within the browseMetadata_compare_outputs function. \cr \cr
+Joins output dataframes from two sessions, on the column DataElement.
+}
diff --git a/man/json_table_to_df.Rd b/man/json_table_to_df.Rd
new file mode 100644
index 00000000..84415ed9
--- /dev/null
+++ b/man/json_table_to_df.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/json_table_to_df.R
+\name{json_table_to_df}
+\alias{json_table_to_df}
+\title{join_outputs}
+\usage{
+json_table_to_df(Dataset, n)
+}
+\arguments{
+\item{Dataset}{This is the dataModel field of the json}
+
+\item{n}{The Dataset number (as a json can have multiple datasets)}
+}
+\value{
+A dataframe for that specific table, including data label, description and type.
+}
+\description{
+This function is called within the browseMetadata function. \cr \cr
+It reads in the nested lists from the json and extracts information needed into a dataframe. \cr \cr
+It does this for one specific table in a dataset. \cr \cr
+}
diff --git a/man/load_data.Rd b/man/load_data.Rd
new file mode 100644
index 00000000..e01e4936
--- /dev/null
+++ b/man/load_data.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/load_data.R
+\name{load_data}
+\alias{load_data}
+\title{load_data}
+\usage{
+load_data(json_file, domain_file, look_up_file)
+}
+\arguments{
+\item{json_file}{As defined in browseMetadata}
+
+\item{domain_file}{As defined in browseMetadata}
+
+\item{look_up_file}{As defined in browseMetadata}
+}
+\value{
+A list of 5: all inputs needed for the browseMetadata function to run.
+}
+\description{
+This function is called within the browseMetadata function. \cr \cr
+It collects the inputs needed for the browseMetadata function (defaults or user inputs)
+If some inputs are NULL, it loads the default inputs. If defaults not available, it prints error for the user.
+If inputs are not NULL, it loads the user-specified inputs.
+}
diff --git a/man/log_Output.Rd b/man/log_Output.Rd
new file mode 100644
index 00000000..d825501b
--- /dev/null
+++ b/man/log_Output.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/data-log_Output.R
+\docType{data}
+\name{log_Output}
+\alias{log_Output}
+\title{Output log dataframe}
+\format{
+A data frame with 1 empty row and 9 columns
+}
+\source{
+The dataframe was manually created for the browseMetadata.R function
+}
+\usage{
+data(log_Output)
+}
+\description{
+Empty log output dataframe for browseMetadata.R to fill. \cr \cr
+}
+\details{
+\item \code{log_Output <- data.frame(
+timestamp = character(1),
+browseMetadata = character(1),
+Initials = character(1),
+MetaDataVersion = character(1),
+MetaDataLastUpdated = character(1),
+DomainListDesc = character(1),
+Dataset = character(1),
+Table = character(1),
+Table_note = character(1)
+)}
+
+\item \code{usethis::use_data(log_Output)}
+}
+\keyword{datasets}
diff --git a/man/ref_plot.Rd b/man/ref_plot.Rd
new file mode 100644
index 00000000..3fd58c96
--- /dev/null
+++ b/man/ref_plot.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ref_plot.R
+\name{ref_plot}
+\alias{ref_plot}
+\title{ref_plot}
+\usage{
+ref_plot(domains)
+}
+\arguments{
+\item{domains}{The output of load_data}
+}
+\value{
+A reference table that appears in the Plots tab. A list of 2 containing the derivatives for this plot, used later in browseMetadata.
+}
+\description{
+This function is called within the browseMetadata function. \cr \cr
+It plots a reference table to guide the user in their categorisation of domains. \cr \cr
+This reference table is based on the user inputted domains and the default domains provided by this package.  \cr \cr
+}
diff --git a/man/user_categorisation.Rd b/man/user_categorisation.Rd
index 13b7b46d..147b9631 100644
--- a/man/user_categorisation.Rd
+++ b/man/user_categorisation.Rd
@@ -19,7 +19,7 @@ user_categorisation(data_element, data_desc, data_type, domain_code_max)
 It returns a list containing the decision and decision note
 }
 \description{
-This function is used within the domain_mapping function. \cr \cr
+This function is called within the browseMetadata function. \cr \cr
 It displays data properties to the user and requests a categorisation into a domain. \cr \cr
 An optional note can be included with the categorisation.
 }
diff --git a/man/user_categorisation_loop.Rd b/man/user_categorisation_loop.Rd
new file mode 100644
index 00000000..324eaa90
--- /dev/null
+++ b/man/user_categorisation_loop.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/user_categorisation_loop.R
+\name{user_categorisation_loop}
+\alias{user_categorisation_loop}
+\title{user_categorisation_loop}
+\usage{
+user_categorisation_loop(
+  start_v,
+  end_v,
+  Table_df,
+  df_prev_exist,
+  df_prev,
+  lookup,
+  df_plots,
+  Output
+)
+}
+\arguments{
+\item{start_v}{Index of data element to start}
+
+\item{end_v}{Index of data element to end}
+
+\item{Table_df}{Dataframe with the table information, extracted from json metadata}
+
+\item{df_prev_exist}{Boolean to indicate with previous dataframes exist (to copy from)}
+
+\item{df_prev}{Previous dataframes to copy from (or NULL)}
+
+\item{lookup}{The lookup table to enable auto categorisations}
+
+\item{df_plots}{Output from the ref_plot function, to indicate maximum domain code allowed}
+
+\item{Output}{Empty Output dataframe, to fill}
+}
+\value{
+An Output dataframe containing information about the table, data elements and categorisations
+}
+\description{
+This function is called within the browseMetadata function. \cr \cr
+Given a specific table and a number of data elements to search, it checks for 3 different sources of domain categorisation: \cr \cr
+1 - If data elements match those in the look-up table, auto categorise them \cr \cr
+2 - If data elements match to previous table output, copy them \cr \cr
+3 - If no match for 1 or 2, data elements are categorised by the user \cr \cr
+}
diff --git a/man/user_prompt.Rd b/man/user_prompt.Rd
new file mode 100644
index 00000000..f924f774
--- /dev/null
+++ b/man/user_prompt.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/user_prompt.R
+\name{user_prompt}
+\alias{user_prompt}
+\title{user_prompt}
+\usage{
+user_prompt(
+  pre_prompt_text = NULL,
+  prompt_text,
+  any_keys,
+  post_yes_text = NULL
+)
+}
+\arguments{
+\item{pre_prompt_text}{Optional text to display to the user, prior to prompt.
+This should be a data frame:
+data.frame(Heading = logical(0), Text = character(0))
+Each row of the dataframe specifies Heading TRUE/FALSE and text to display.}
+
+\item{prompt_text}{Text to display to the user, to prompt their response.}
+
+\item{any_keys}{Boolean to determine if any key responses are allowable.
+If FALSE, only these are allowed: Y, y, N and n.}
+
+\item{post_yes_text}{Optional text to post after receiving a 'Y' or 'y'
+response from user. Same dataframe format as pre_prompt_text.}
+}
+\value{
+It returns variable text, depending on any_keys.
+}
+\description{
+This function is called within the browseMetadata function. \cr \cr
+It prompts a response from the user. \cr \cr
+}
diff --git a/man/user_prompt_list.Rd b/man/user_prompt_list.Rd
new file mode 100644
index 00000000..5d5b6ade
--- /dev/null
+++ b/man/user_prompt_list.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/user_prompt_list.R
+\name{user_prompt_list}
+\alias{user_prompt_list}
+\title{user_prompt_list}
+\usage{
+user_prompt_list(prompt_text, list_allowed, empty_allowed)
+}
+\arguments{
+\item{prompt_text}{Text to display to the user, to prompt their response.}
+
+\item{list_allowed}{A list of allowable integer responses.}
+
+\item{empty_allowed}{A boolean specifying if no response is allowed.}
+}
+\value{
+It returns a list of integers to process, that match the prompt options.
+}
+\description{
+This function is called within the browseMetadata function. \cr \cr
+It prompts a response from the user, in the form of a list. \cr \cr
+It checks if the user has given the an input within the allowed range - if not, it re-sends prompt. \cr \cr
+}
diff --git a/man/valid_comparison.Rd b/man/valid_comparison.Rd
new file mode 100644
index 00000000..905ed898
--- /dev/null
+++ b/man/valid_comparison.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/valid_comparison.R
+\name{valid_comparison}
+\alias{valid_comparison}
+\title{valid_comparison
+This function is called within the browseMetadata_compare_outputs function. \cr \cr
+It reads two inputs to see if they are equal. \cr \cr
+If the test is 'warning' status and inputs are not equal it gives warning but continues. \cr \cr
+If the test is 'danger' status and inputs are not equal it stops and exits, with error message. \cr \cr}
+\usage{
+valid_comparison(input1, input2, severity, severity_text)
+}
+\arguments{
+\item{input1}{Input 1}
+
+\item{input2}{Input 2}
+
+\item{severity}{Level of severity. Only 'danger' or 'warning'}
+
+\item{severity_text}{The text to print if inputs are not equal.}
+}
+\value{
+It returns variable text, depending on any_keys.
+}
+\description{
+valid_comparison
+This function is called within the browseMetadata_compare_outputs function. \cr \cr
+It reads two inputs to see if they are equal. \cr \cr
+If the test is 'warning' status and inputs are not equal it gives warning but continues. \cr \cr
+If the test is 'danger' status and inputs are not equal it stops and exits, with error message. \cr \cr
+}
diff --git a/tests/testthat.R b/tests/testthat.R
new file mode 100644
index 00000000..395a0dc7
--- /dev/null
+++ b/tests/testthat.R
@@ -0,0 +1,12 @@
+# This file is part of the standard setup for testthat.
+# It is recommended that you do not modify it.
+#
+# Where should you do additional test configuration?
+# Learn more about the roles of various files in:
+# * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
+# * https://testthat.r-lib.org/articles/special-files.html
+
+library(testthat)
+library(browseMetadata)
+
+test_check("browseMetadata")
diff --git a/tests/testthat/test-concensus_on_mismatch.R b/tests/testthat/test-concensus_on_mismatch.R
new file mode 100644
index 00000000..d64ebf74
--- /dev/null
+++ b/tests/testthat/test-concensus_on_mismatch.R
@@ -0,0 +1,65 @@
+# Mock user_categorisation function
+mock_user_categorisation <- function(data_element, data_desc, data_type, domain_code_max) {
+  return(list(decision = "mock_decision", decision_note = "mock_note"))
+}
+
+test_that("concensus_on_mismatch handles mismatch correctly", {
+  # Mock data
+  ses_join <- data.frame(
+    Domain_code_ses1 = c("1", "1,2"),
+    Domain_code_ses2 = c("1", "2"),
+    DataElement = c("Element1", "Element2"),
+    Note_ses1 = c("Note1", "Note2"),
+    Note_ses2 = c("Note3", "Note4"),
+    stringsAsFactors = FALSE
+  )
+
+  Table_df <- data.frame(
+    Label = c("Element1", "Element2"),
+    Description = c("Description1", "Description2"),
+    Type = c("Type1", "Type2"),
+    stringsAsFactors = FALSE
+  )
+
+  datavar <- 2
+  domain_code_max <- 5
+
+  # Stub the user_categorisation function
+  stub(concensus_on_mismatch, "user_categorisation", mock_user_categorisation)
+
+  # Call the function
+  result <- concensus_on_mismatch(ses_join, Table_df, datavar, domain_code_max)
+
+  # Check the result
+  expect_equal(result$Domain_code_join, "mock_decision")
+  expect_equal(result$Note_join, "mock_note")
+})
+
+test_that("concensus_on_mismatch handles no mismatch correctly", {
+  # Mock data
+  ses_join <- data.frame(
+    Domain_code_ses1 = c("2", "2,4"),
+    Domain_code_ses2 = c("2", "2,4"),
+    DataElement = c("Element1", "Element2"),
+    Note_ses1 = c("Note1", "Note2"),
+    Note_ses2 = c("Note3", "Note4"),
+    stringsAsFactors = FALSE
+  )
+
+  Table_df <- data.frame(
+    Label = c("Element1", "Element2"),
+    Description = c("Description1", "Description2"),
+    Type = c("Type1", "Type2"),
+    stringsAsFactors = FALSE
+  )
+
+  datavar <- 2
+  domain_code_max <- 5
+
+  # Call the function
+  result <- concensus_on_mismatch(ses_join, Table_df, datavar, domain_code_max)
+
+  # Check the result
+  expect_equal(result$Domain_code_join, "2,4")
+  expect_equal(result$Note_join, "No mismatch!")
+})
diff --git a/tests/testthat/test-copy_previous.R b/tests/testthat/test-copy_previous.R
new file mode 100644
index 00000000..f8a1a052
--- /dev/null
+++ b/tests/testthat/test-copy_previous.R
@@ -0,0 +1,45 @@
+library(testthat)
+library(dplyr)
+library(cli)
+
+test_that("copy_previous works correctly when there are files to copy from", {
+  # Create a temporary directory
+  temp_dir <- tempdir()
+
+  # Create test CSV files
+  # Criteria for test files: one must have 'AUTO CATEGORISED' as Note, overlap in DataElements across files, different timestamps across files)
+  write.csv(data.frame(timestamp = "2024-08-22-13-35-40", DataElement = c("DataElement1","DataElement2"), Note = c("note1","note2")),
+            file = file.path(temp_dir, "OUTPUT_TestDataset_1.csv"), row.names = FALSE)
+  write.csv(data.frame(timestamp = "2024-09-22-13-00-05", DataElement = c("DataElement1","DataElement3"), Note = c("note3","note4")),
+            file = file.path(temp_dir, "OUTPUT_TestDataset_2.csv"), row.names = FALSE)
+  write.csv(data.frame(timestamp = "2024-10-22-11-12-02", DataElement = c("DataElement4","DataElement2"), Note = c("AUTO CATEGORISED","note5")),
+            file = file.path(temp_dir, "OUTPUT_TestDataset_3.csv"), row.names = FALSE)
+
+  # Call the function
+  result <- copy_previous("TestDataset", temp_dir)
+
+  # Check the results
+  expect_true(result$df_prev_exist)
+  expect_equal(nrow(result$df_prev), 3)
+  expect_equal(result$df_prev$timestamp, c("2024-08-22-13-35-40", "2024-08-22-13-35-40","2024-09-22-13-00-05"))
+  expect_equal(result$df_prev$DataElement, c("DataElement1", "DataElement2","DataElement3"))
+  expect_equal(result$df_prev$Note, c("note1", "note2","note4"))
+
+  # Clean up
+  unlink(temp_dir, recursive = TRUE)
+})
+
+test_that("copy_previous works correctly when there are no files to copy from", {
+  # Create a temporary directory
+  temp_dir <- tempdir()
+
+  # Call the function
+  result <- copy_previous("TestDataset", temp_dir)
+
+  # Check the results
+  expect_null(result$df_prev)
+  expect_false(result$df_prev_exist)
+
+  # Clean up
+  unlink(temp_dir, recursive = TRUE)
+})
diff --git a/tests/testthat/test-end_plot.R b/tests/testthat/test-end_plot.R
new file mode 100644
index 00000000..fcfbb3bc
--- /dev/null
+++ b/tests/testthat/test-end_plot.R
@@ -0,0 +1,33 @@
+test_that("end_plot function works correctly", {
+  # Sample data frame
+  df <- get("Output")
+
+  df <- df %>% add_row(
+    timestamp = format(Sys.time(), "%Y-%m-%d-%H-%M-%S"),
+    Table = 'Sample Table',
+    DataElement = 'DataElement 1',
+    DataElement_N = '1 of 2',
+    Domain_code = '1',
+    Note = 'AUTO CATEGORISED'
+  )
+
+  df <- df %>% add_row(
+    timestamp = format(Sys.time(), "%Y-%m-%d-%H-%M-%S"),
+    Table = 'Sample Table',
+    DataElement = 'DataElement 2',
+    DataElement_N = '2 of 2',
+    Domain_code = '3',
+    Note = 'DEMOGRAPHICS'
+  )
+
+  # Sample reference table
+  domains_extend <- rbind(c("*NO MATCH / UNSURE*"), c("*METADATA*"), c("*ID*"), c("*DEMOGRAPHICS*"), c("Domain A"),c("Domain B"))
+  Code <- data.frame(Code = 0:(nrow(domains_extend) - 1))
+  ref_table <- tableGrob(cbind(Code,domains_extend),rows = NULL,theme = ttheme_default())
+
+  # Call the function
+  result <- end_plot(df, "Sample Table", ref_table)
+
+  # Check if the result is a gtable object
+  expect_s3_class(result, "gtable")
+})
diff --git a/tests/testthat/test-join_outputs.R b/tests/testthat/test-join_outputs.R
new file mode 100644
index 00000000..8bdadad6
--- /dev/null
+++ b/tests/testthat/test-join_outputs.R
@@ -0,0 +1,60 @@
+# Create sample data (only allow it to different on timestamp, Domain_code and Note)
+session_1 <- data.frame(
+  timestamp = c("2024-08-22-13-26-33", "2024-08-22-13-26-33", "2024-08-22-13-26-33"),
+  Table = c("HEALTH","HEALTH","HEALTH"),
+  DataElement = c("ALF_E", "AVAIL_FROM_DT", "OUTCOME"),
+  DataElement_N = c("1 of 3", "2 of 3", "3 of 3"),
+  Domain_code = c("1", "1", "5"),
+  Note = c("ID", "Metadata", "Diagnostic category")
+  )
+
+session_2 <- data.frame(
+  timestamp = c("2024-08-22-15-24-30", "2024-08-22-15-24-30", "2024-08-22-15-24-30"),
+  Table = c("HEALTH","HEALTH","HEALTH"),
+  DataElement = c("ALF_E", "AVAIL_FROM_DT", "OUTCOME"),
+  DataElement_N = c("1 of 3", "2 of 3", "3 of 3"),
+  Domain_code = c("1", "1", "4"),
+  Note = c("ID", "info about data", "diagnosis")
+)
+
+# Define expected outputs
+expected_output_ses1_ses2 <- data.frame(
+  timestamp_ses1 = c("2024-08-22-13-26-33", "2024-08-22-13-26-33", "2024-08-22-13-26-33"),
+  Table_ses1 = c("HEALTH","HEALTH","HEALTH"),
+  DataElement_N_ses1 = c("1 of 3", "2 of 3", "3 of 3"),
+  Domain_code_ses1 = c("1", "1", "5"),
+  Note_ses1 = c("ID", "Metadata", "Diagnostic category"),
+  timestamp_ses2 = c("2024-08-22-15-24-30", "2024-08-22-15-24-30", "2024-08-22-15-24-30"),
+  Table_ses2 = c("HEALTH","HEALTH","HEALTH"),
+  DataElement_N_ses2 = c("1 of 3", "2 of 3", "3 of 3"),
+  Domain_code_ses2 = c("1", "1", "4"),
+  Note_ses2 = c("ID", "info about data", "diagnosis"),
+  DataElement = c("ALF_E", "AVAIL_FROM_DT", "OUTCOME"),
+  Domain_code_join = c(NA, NA, NA),
+  Note_join = c(NA, NA, NA)
+)
+
+expected_output_ses1_ses1 <- data.frame(
+  timestamp_ses1 = c("2024-08-22-13-26-33", "2024-08-22-13-26-33", "2024-08-22-13-26-33"),
+  Table_ses1 = c("HEALTH","HEALTH","HEALTH"),
+  DataElement_N_ses1 = c("1 of 3", "2 of 3", "3 of 3"),
+  Domain_code_ses1 = c("1", "1", "5"),
+  Note_ses1 = c("ID", "Metadata", "Diagnostic category"),
+  timestamp_ses2 = c("2024-08-22-13-26-33", "2024-08-22-13-26-33", "2024-08-22-13-26-33"),
+  Table_ses2 = c("HEALTH","HEALTH","HEALTH"),
+  DataElement_N_ses2 = c("1 of 3", "2 of 3", "3 of 3"),
+  Domain_code_ses2 = c("1", "1", "5"),
+  Note_ses2 = c("ID", "Metadata", "Diagnostic category"),
+  DataElement = c("ALF_E", "AVAIL_FROM_DT", "OUTCOME"),
+  Domain_code_join = c(NA, NA, NA),
+  Note_join = c(NA, NA, NA)
+)
+
+# Write the test
+test_that("join_outputs works correctly", {
+  result <- join_outputs(session_1, session_1)
+  expect_equal(result, expected_output_ses1_ses1)
+  result <- join_outputs(session_1,session_2)
+  expect_equal(result, expected_output_ses1_ses2)
+  })
+
diff --git a/tests/testthat/test-json_table_to_df.R b/tests/testthat/test-json_table_to_df.R
new file mode 100644
index 00000000..4c55df3a
--- /dev/null
+++ b/tests/testthat/test-json_table_to_df.R
@@ -0,0 +1,41 @@
+# Define the mock json
+mock_Dataset <- list(
+  childDataClasses = list(
+    list(
+      childDataElements = list(
+        list(ID = "111", label = "Label1", description = "Description1", dataType = list(ID = "a", label = "Type1")),
+        list(ID = "112", label = "Label2", description = "Description2", dataType = list(ID = "b", label = "Type2"))
+      )
+    ),
+    list(
+      childDataElements = list(
+        list(ID = "111", label = "Label3", description = "Description3", dataType = list(ID = "c", label = "Type1")),
+        list(ID = "112", label = "Label4", description = "Description4", dataType = list(ID = "d", label = "Type3"))
+      )
+    )
+  )
+)
+
+test_that("json_table_to_df gives expected output for first index", {
+  result <- json_table_to_df(mock_Dataset, 1)
+
+  expected <- data.frame(
+    Label = c("Label1", "Label2"),
+    Description = c("Description1", "Description2"),
+    Type = c("Type1", "Type2")
+  )
+
+  expect_equal(result, expected)
+})
+
+test_that("json_table_to_df gives expected output for second index", {
+  result <- json_table_to_df(mock_Dataset, 2)
+
+  expected <- data.frame(
+    Label = c("Label3", "Label4"),
+    Description = c("Description3", "Description4"),
+    Type = c("Type1", "Type3")
+  )
+
+  expect_equal(result, expected)
+})
diff --git a/tests/testthat/test-load_data.R b/tests/testthat/test-load_data.R
new file mode 100644
index 00000000..d27ef4ce
--- /dev/null
+++ b/tests/testthat/test-load_data.R
@@ -0,0 +1,45 @@
+library(testthat)
+library(rjson)
+library(cli)
+
+# Define file paths to demo data relative to the package directory
+json_file <- system.file("data-raw/national_community_child_health_database_(ncchd)_20240405T130125.json", package = "browseMetadata")
+look_up_file <- system.file("data-raw/look_up.csv", package = "browseMetadata")
+domains_file <- system.file("data-raw/domain_list_demo.csv", package = "browseMetadata")
+
+# Define package demo data
+json <- get("json_metadata")
+look_up <- get("look_up")
+domains <- get("domain_list")
+
+test_that("load_data runs in demo mode when both json_file and domain_file are NULL", {
+  result <- load_data(NULL, NULL, NULL)
+  expect_true(result$demo_mode)
+  expect_equal(result$meta_json, json)
+  expect_equal(result$domains, domains)
+  expect_equal(result$DomainListDesc, "DemoList")
+})
+
+test_that("load_data throws error if only one of json_file or domain_file is NULL", {
+  expect_error(load_data(json_file, NULL, NULL))
+  expect_error(load_data(NULL, domains_file, NULL))
+})
+
+test_that("load_data reads user-specified files correctly", {
+  result <- load_data(json_file, domains_file, NULL)
+  expect_false(result$demo_mode)
+  expect_true(is.list(result$meta_json))
+  expect_true(is.data.frame(result$domains))
+  expect_equal(result$DomainListDesc, tools::file_path_sans_ext(basename(domains_file)))
+})
+
+test_that("load_data uses default look-up table when look_up_file is NULL", {
+  result <- load_data(json_file, domains_file, NULL)
+  expect_equal(result$lookup, look_up)
+})
+
+test_that("load_data reads user-specified look-up table correctly", {
+  result <- load_data(json_file, domains_file, look_up_file)
+  expect_true(is.data.frame(result$lookup))
+  expect_equal(nrow(result$lookup), nrow(read.csv(look_up_file)))
+})
diff --git a/tests/testthat/test-ref_plot.R b/tests/testthat/test-ref_plot.R
new file mode 100644
index 00000000..659e8afd
--- /dev/null
+++ b/tests/testthat/test-ref_plot.R
@@ -0,0 +1,22 @@
+# Unit test for ref_plot function
+test_that("ref_plot function works correctly", {
+  # Mock input dataframe
+  domains <- data.frame(Domains = c("Domain1", "Domain2", "Domain3", "Domain4"))
+
+  # Call the function
+  result <- ref_plot(domains)
+
+  # Check the structure of the result
+  expect_true(is.list(result))
+  expect_true("Code" %in% names(result))
+  expect_true("Domain_table" %in% names(result))
+
+  # Check the content of the Code dataframe
+  expect_equal(nrow(result$Code), 8)
+  expect_equal(result$Code$Code, 0:7)
+
+  # Check the content of the Domain_table
+  expect_true(inherits(result$Domain_table, "gtable"))
+  expect_equal(nrow(result$Domain_table), 9)
+  expect_equal(result$Domain_table$layout$name[1], "colhead-fg")
+})
diff --git a/tests/testthat/test-user_categorisation.R b/tests/testthat/test-user_categorisation.R
new file mode 100644
index 00000000..ef8df92b
--- /dev/null
+++ b/tests/testthat/test-user_categorisation.R
@@ -0,0 +1,31 @@
+test_that("user_categorisation works with valid input", {
+  mock_readline <- mock("3", "This is a note", "n")  # create a mock object that returns user inputs in sequence
+  stub(user_categorisation, "readline", mock_readline)  # replace `readline` function within the `user_categorisation` function with the `mock_readline` mock object
+
+  response <- user_categorisation(data_element = "Element1", data_desc = "Description1", data_type = "Type1", domain_code_max = 5)
+  expect_equal(response, list(decision = "3", decision_note = "This is a note"))
+})
+
+test_that("user_categorisation handles invalid input and then valid input", {
+  mock_readline <- mock("6", "3", "This is a note", "n")  # create a mock object that returns invalid input first, then valid input
+  stub(user_categorisation, "readline", mock_readline)  # replace `readline` function within the `user_categorisation` function with the `mock_readline` mock object
+
+  response <- user_categorisation(data_element = "Element1", data_desc = "Description1", data_type = "Type1", domain_code_max = 5)
+  expect_equal(response, list(decision = "3", decision_note = "This is a note"))
+})
+
+test_that("user_categorisation handles multiple valid inputs", {
+  mock_readline <- mock("3,4", "This is another note", "n")  # create a mock object that returns multiple valid inputs
+  stub(user_categorisation, "readline", mock_readline)  # replace `readline` function within the `user_categorisation` function with the `mock_readline` mock object
+
+  response <- user_categorisation(data_element = "Element1", data_desc = "Description1", data_type = "Type1", domain_code_max = 5)
+  expect_equal(response, list(decision = "3,4", decision_note = "This is another note"))
+})
+
+test_that("user_categorisation handles re-do input", {
+  mock_readline <- mock("3", "This is a note", "y", "4", "Another note", "n")  # create a mock object that returns inputs including re-do
+  stub(user_categorisation, "readline", mock_readline)  # replace `readline` function within the `user_categorisation` function with the `mock_readline` mock object
+
+  response <- user_categorisation(data_element = "Element1", data_desc = "Description1", data_type = "Type1", domain_code_max = 5)
+  expect_equal(response, list(decision = "4", decision_note = "Another note"))
+})
diff --git a/tests/testthat/test-user_categorisation_loop.R b/tests/testthat/test-user_categorisation_loop.R
new file mode 100644
index 00000000..52b00c2a
--- /dev/null
+++ b/tests/testthat/test-user_categorisation_loop.R
@@ -0,0 +1,50 @@
+Output <- get("Output")
+Code <- data.frame(Code = 0:2)
+df_plots <- list(Code = Code,'')
+
+test_that("user_categorisation_loop handles auto categorisation", {
+  # Mock data
+  selectTable_df <- data.frame(Label = c("Element1", "Element2"), Description = c("Desc1", "Desc2"), Type = c("Type1", "Type2"))
+  lookup <- data.frame(DataElement = c("Element1", "Element2"), DomainCode = c(1, 2))
+
+  # Call the function
+  result <- user_categorisation_loop(1, 2, selectTable_df, FALSE, data.frame(), lookup, df_plots, Output)
+
+  # Check the result
+  expect_equal(nrow(result), 2)
+  expect_equal(result$Note[1], "AUTO CATEGORISED")
+  expect_equal(result$Note[2], "AUTO CATEGORISED")
+})
+
+test_that("user_categorisation_loop handles copying from previous table", {
+  # Mock data
+  selectTable_df <- data.frame(Label = c("Element1", "Element2"), Description = c("Desc1", "Desc2"), Type = c("Type1", "Type2"))
+  df_prev <- data.frame(DataElement = c("Element1", "Element2"), Domain_code = c(1, 2), Table = c("PrevTable1", "PrevTable2"))
+  lookup <- data.frame(DataElement = c("Element3", "Element4"), DomainCode = c(3, 4))
+
+  # Call the function
+  result <- user_categorisation_loop(1, 2, selectTable_df, TRUE, df_prev, lookup, df_plots, Output)
+
+  # Check the result
+  expect_equal(nrow(result), 2)
+  expect_equal(result$Note[1], "COPIED FROM: PrevTable1")
+  expect_equal(result$Note[2], "COPIED FROM: PrevTable2")
+})
+
+test_that("user_categorisation_loop handles user categorisation", {
+  # Mock data
+  selectTable_df <- data.frame(Label = c("Element1", "Element2"), Description = c("Desc1", "Desc2"), Type = c("Type1", "Type2"))
+  lookup <- data.frame(DataElement = c("Element3", "Element4"), DomainCode = c(3, 4))
+
+  # Mock the user_categorisation function
+  mock_user_categorisation <- mock(list(decision = "1", decision_note = "User note"), cycle = TRUE)
+  stub(user_categorisation_loop, "user_categorisation", mock_user_categorisation)
+
+  # Call the function
+  result <- user_categorisation_loop(1, 2, selectTable_df, FALSE, data.frame(), lookup, df_plots, Output)
+
+  # Check the result
+  expect_equal(nrow(result), 2)
+  expect_equal(result$Note[1], "User note")
+  expect_equal(result$Domain_code[1], "1")
+})
diff --git a/tests/testthat/test-user_prompt.R b/tests/testthat/test-user_prompt.R
new file mode 100644
index 00000000..b0f7a400
--- /dev/null
+++ b/tests/testthat/test-user_prompt.R
@@ -0,0 +1,61 @@
+test_that("user_prompt works with any_keys = TRUE", {
+  mock_readline <- mock("test_response")  # create a mock object that returns a string when called
+  stub(user_prompt, "readline", mock_readline) # replace `readline` function within the `user_prompt` function with the `mock_readline` mock object
+  response <- user_prompt(prompt_text = "Enter something: ", any_keys = TRUE)
+  expect_equal(response, "test_response")
+})
+
+test_that("user_prompt works with any_keys = FALSE and response is y/Y", {
+  mock_readline <- mock("Y")
+  stub(user_prompt, "readline", mock_readline)
+
+  response <- user_prompt(prompt_text = "Enter y/n: ", any_keys = FALSE)
+  expect_equal(response, "Y")
+})
+
+test_that("user_prompt works with any_keys = FALSE and response is n/N", {
+  mock_readline <- mock("n")
+  stub(user_prompt, "readline", mock_readline)
+
+  response <- user_prompt(prompt_text = "Enter y/n: ", any_keys = FALSE)
+  expect_equal(response, "n")
+})
+
+test_that("user_prompt throws error with invalid any_keys", {
+  expect_error(user_prompt(prompt_text = "Enter something: ", any_keys = "invalid"),
+               "Invalid input given for 'any_keys'. Only TRUE or FALSE are allowed.")
+})
+
+test_that("user_prompt works with pre_prompt_text", {
+  pre_prompt_text <- data.frame(Heading = c(TRUE, FALSE), Text = c("Heading Text", "Regular Text"))
+  mock_readline <- mock("test_response")
+  stub(user_prompt, "readline", mock_readline)
+
+  response <- user_prompt(pre_prompt_text = pre_prompt_text, prompt_text = "Enter something: ", any_keys = TRUE)
+  expect_equal(response, "test_response")
+})
+
+test_that("user_prompt works with post_yes_text", {
+  post_yes_text <- data.frame(Heading = c(TRUE, FALSE), Text = c("Post Heading Text", "Post Regular Text"))
+  mock_readline <- mock("Y", "test_response")
+  stub(user_prompt, "readline", mock_readline)
+
+  response <- user_prompt(prompt_text = "Enter y/n: ", any_keys = FALSE, post_yes_text = post_yes_text)
+  expect_equal(response, "Y")
+})
+
+test_that("user_prompt handles empty input initially", {
+  mock_readline <- mock("", "test_response")
+  stub(user_prompt, "readline", mock_readline)
+
+  response <- user_prompt(prompt_text = "Enter something: ", any_keys = TRUE)
+  expect_equal(response, "test_response")
+})
+
+test_that("user_prompt handles invalid input for any_keys = FALSE", {
+  mock_readline <- mock("invalid", "Y")
+  stub(user_prompt, "readline", mock_readline)
+
+  response <- user_prompt(prompt_text = "Enter y/n: ", any_keys = FALSE)
+  expect_equal(response, "Y")
+})
diff --git a/tests/testthat/test-user_prompt_list.R b/tests/testthat/test-user_prompt_list.R
new file mode 100644
index 00000000..59e6513f
--- /dev/null
+++ b/tests/testthat/test-user_prompt_list.R
@@ -0,0 +1,31 @@
+test_that("user_prompt_list works with valid input", {
+  mock_scan <- mock(c(1, 2, 3), cycle = TRUE)  # create a mock object that returns a list of integers when called, cycling the same value
+  stub(user_prompt_list, "scan", mock_scan)  # replace `scan` function within the `user_prompt_list` function with the `mock_scan` mock object
+
+  response <- user_prompt_list(prompt_text = "Enter numbers: ", list_allowed = 1:5, empty_allowed = FALSE)
+  expect_equal(response, c(1, 2, 3))
+})
+
+test_that("user_prompt_list handles out of range input and then valid input", {
+  mock_scan <- mock(c(6, 7, 8), c(1, 2, 3))  # create a mock object that returns out-of-range values first, then valid values
+  stub(user_prompt_list, "scan", mock_scan)  # replace `scan` function within the `user_prompt_list` function with the `mock_scan` mock object
+
+  response <- user_prompt_list(prompt_text = "Enter numbers: ", list_allowed = 1:5, empty_allowed = FALSE)
+  expect_equal(response, c(1, 2, 3))  # expect the valid input after the out-of-range input
+})
+
+test_that("user_prompt_list handles empty input when not allowed", {
+  mock_scan <- mock(integer(0), c(1, 2, 3))  # create a mock object that returns an empty list first, then valid values
+  stub(user_prompt_list, "scan", mock_scan)  # replace `scan` function within the `user_prompt_list` function with the `mock_scan` mock object
+
+  response <- user_prompt_list(prompt_text = "Enter numbers: ", list_allowed = 1:5, empty_allowed = FALSE)
+  expect_equal(response, c(1, 2, 3))  # expect the valid input after the empty input
+})
+
+test_that("user_prompt_list handles empty input when allowed", {
+  mock_scan <- mock(integer(0), cycle = TRUE)  # create a mock object that returns an empty list when called, cycling the same value
+  stub(user_prompt_list, "scan", mock_scan)  # replace `scan` function within the `user_prompt_list` function with the `mock_scan` mock object
+
+  response <- user_prompt_list(prompt_text = "Enter numbers: ", list_allowed = 1:5, empty_allowed = TRUE)
+  expect_equal(response, integer(0))  # expect an empty list
+})
diff --git a/tests/testthat/test-valid_comparison.R b/tests/testthat/test-valid_comparison.R
new file mode 100644
index 00000000..c75410ac
--- /dev/null
+++ b/tests/testthat/test-valid_comparison.R
@@ -0,0 +1,19 @@
+test_that("valid_comparison function works correctly", {
+  # Test invalid severity
+  expect_error(valid_comparison(1, 1, 'info', 'Invalid severity test'),
+               "Invalid severity. Only 'danger' and 'warning' are allowed.")
+
+  # Test danger severity with different inputs
+  expect_error(valid_comparison(1, 2, 'danger', 'Different inputs test'),
+               "Exiting!")
+
+  # Test warning severity with different inputs
+  expect_message(valid_comparison(1, 2, 'warning', 'Different inputs test'),
+                "Continuing but please check comparison is valid!")
+
+  # Test danger severity with same inputs
+  expect_silent(valid_comparison(1, 1, 'danger', 'Same inputs test'))
+
+  # Test warning severity with same inputs
+  expect_silent(valid_comparison(1, 1, 'warning', 'Same inputs test'))
+})