Merge branch 'develop' into hex_sticker

traitecoevo · Nov 29, 2023 · 9eed286 · 9eed286
2 parents ec3079e + fcd7cde
commit 9eed286
Show file tree

Hide file tree

Showing 223 changed files with 5,966 additions and 5,420 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,16 +1,17 @@
 Type: Package
 Package: traits.build
-Title: Package used to build an AusTraits data resource
-Version: 0.9.0
+Title: A workflow for harmonising trait data from diverse sources into a documented standard structure
+Version: 1.0.1
 Maintainer: Daniel Falster <[email protected]>
 Authors@R: c(
     person(given = "Daniel", family = "Falster", role = c("cre", "aut"), email = "[email protected]", comment = c(ORCID = "0000-0002-9814-092X")),
     person(given = "Elizabeth", family = "Wenk", role = c("cur", "aut"), comment = c(ORCID = "0000-0001-5640-5910")),
     person(given = "Sophie", family = "Yang", role = c("cur", "aut"), comment = c(ORCID = "0000-0001-7328-345X")),
+    person(given = "Fonti", family = "Kar", role = c("aut", "ctb"), comment = c(ORCID = "0000-0002-2760-3974")),
     person("ARDC", role = c("fnd")),
     person("ARC", role = c("fnd"))
     )
-Description: This package enables harmonising of data from diverse sources. The code was originally built to support AusTraits, an open-source compilation of data on the traits of Australian plant species. For more information on AusTraits go to https://austraits.org.
+Description: The `traits.build` package provides a workflow to harmonise trait data from diverse sources. The code was originally built to support AusTraits (see Falster et al 2021, <doi:10.1038/s41597-021-01006-6>, <https://github.com/traitecoevo/autraits.build>) and has been generalised here to support construction of other trait databases. For detailed instructions and examples see <https://traitecoevo.github.io/traits.build-book/>.
 BugReports: https://github.com/traitecoevo/traits.build/issues
 URL: http://traitecoevo.github.io/traits.build/
 License: BSD_2_clause + file LICENCE
@@ -28,6 +29,7 @@ Imports:
     kableExtra,
     magrittr,
     purrr,
+    forcats,
     RefManageR,
     rlang,
     rmarkdown,
@@ -40,7 +42,6 @@ Imports:
 Suggests:
     furrr,
     remake,
-    austraits,
     leaflet,
     bibtex,
     knitr,
@@ -49,10 +50,13 @@ Suggests:
     markdown,
     pkgdown,
     rcrossref,
+    ggplot2,
+    ggbeeswarm,
+    gridExtra,
+    scales,
     zip,
     covr
 Remotes:
-    traitecoevo/austraits@develop,
     richfitz/remake
 Encoding: UTF-8
 VignetteBuilder: knitr

diff --git a/NAMESPACE b/NAMESPACE
@@ -4,13 +4,16 @@ export("%>%")
 export(build_add_version)
 export(build_combine)
 export(build_setup_pipeline)
-export(build_update_taxonomy)
+export(check_pivot_duplicates)
 export(dataset_build)
 export(dataset_configure)
 export(dataset_find_taxon)
 export(dataset_process)
 export(dataset_report)
 export(dataset_test)
+export(dataset_update_taxonomy)
+export(db_traits_pivot_longer)
+export(db_traits_pivot_wider)
 export(get_schema)
 export(get_unit_conversions)
 export(metadata_add_contexts)
@@ -29,6 +32,7 @@ export(metadata_exclude_observations)
 export(metadata_find_taxonomic_change)
 export(metadata_remove_taxonomic_change)
 export(metadata_update_taxonomic_change)
+export(plot_trait_distribution_beeswarm)
 export(read_csv_char)
 export(read_metadata)
 export(util_df_to_list)
@@ -72,11 +76,7 @@ importFrom(styler,style_text)
 importFrom(testthat,compare)
 importFrom(testthat,context)
 importFrom(testthat,expect)
-importFrom(testthat,expect_false)
-importFrom(testthat,expect_named)
 importFrom(testthat,expect_silent)
-importFrom(testthat,expect_true)
-importFrom(testthat,expect_type)
 importFrom(testthat,local_edition)
 importFrom(testthat,test_that)
 importFrom(tidyr,spread)

diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,15 @@
-# traits.build 0.9.0
+# traits.build 1.0.1
 
-* Moved vignettes to <https://traitecoevo.github.io/traits.build-book/>
-* migrate code from <http://traitecoevo.github.io/austraits.build/articles/>
+As described in #134, fixes some minor issues with 
+
+- testing of datasets in `dataset_test`
+- generating of reports
+- standardising of taxonomic names. 
+
+# traits.build 1.0.0
+
+This is the first major release of the {traits.build} package, providing a workflow to harmonise trait data from diverse sources. The code was originally built to support AusTraits (see Falster et al 2021, <doi:10.1038/s41597-021-01006-6>, <https://github.com/traitecoevo/autraits.build>) and has been generalised here to support construction of other trait databases. Detailed instructions are available at
+
+- package website: <https://traitecoevo.github.io/traits.build/>
+- package book: <https://traitecoevo.github.io/traits.build-book/>
 
diff --git a/R/get.R b/R/get.R
@@ -47,5 +47,8 @@ util_get_version <- function(path =  "config/metadata.yml") {
 #' @return 40-digit SHA character string for the latest commit to the repository
 #' @export
 util_get_SHA <- function(path = ".") {
-  git2r::sha(git2r::last_commit(git2r::repository(path)))
+  sha <- tryCatch({
+      git2r::sha(git2r::last_commit(git2r::repository(path)))
+    }, error = function(cond) {NA})
+  sha
 }
diff --git a/R/pivot.R b/R/pivot.R
@@ -0,0 +1,137 @@
+#' @title Test whether a dataset can pivot wider
+#'
+#' @description Test whether the traits table of a dataset can pivot wider with the minimum required columns.
+#'
+#' @param dataset Built dataset with `test_build_dataset`
+#'
+#' @return Number of rows with duplicates preventing pivoting wider
+
+check_pivot_wider <- function(dataset) {
+
+  duplicates <- dataset$traits %>%
+    select(
+      dplyr::all_of(c("dataset_id", "trait_name", "value", "observation_id", "value_type",
+      "repeat_measurements_id", "method_id", "method_context_id"))
+    ) %>%
+    tidyr::pivot_wider(names_from = "trait_name", values_from = "value", values_fn = length) %>%
+    tidyr::pivot_longer(cols = 7:ncol(.)) %>%
+    dplyr::rename(dplyr::all_of(c("trait_name" = "name", "number_of_duplicates" = "value"))) %>%
+    select(
+      dplyr::all_of(c("dataset_id", "trait_name", "number_of_duplicates", "observation_id",
+      "value_type")), everything()
+    ) %>%
+    filter(.data$number_of_duplicates > 1) %>%
+    nrow()
+
+  if (duplicates == 0) {
+    invisible(TRUE)
+  } else {
+    invisible(FALSE)
+  }
+
+}
+
+
+#' @title Pivot long format data into a wide format
+#'
+#' @description `trait_pivot_wider` "widens" long format data ("tidy data").
+#'
+#' Databases built with `traits.build` are organised in a long format where observations are on different rows and the
+#' type of observation is denoted by various identifying columns (e.g `trait_name`, `dataset_id`,
+#' `observation_id`, etc.).
+#' This function converts the data into wide format so that each trait in its own column.
+#'
+#' @param traits The traits table from database (list object)
+#' @return A tibble in wide format
+#' @details `trait_pivot_wider` will return a single wide tibble; note that some meta-data columns
+#' (unit, replicates, measurement_remarks, basis_of_record, basis_of_value) will be excluded to
+#' produce a useful wide tibble.
+#' @examples
+#' \dontrun{
+#' data <- austraits$traits %>% filter(dataset_id == "Falster_2003")
+#' data # Long format
+#' traits_wide <- trait_pivot_wider(data)
+#' traits_wide # Wide format
+#' }
+#' @author Daniel Falster - [email protected]
+#' @export
+db_traits_pivot_wider <- function(traits) {
+
+  metadata_cols <- c("unit", "replicates", "measurement_remarks", "basis_of_value")
+
+  # A check for if there are more than 1 value_type for a given taxon_name, observation_id and method
+  check_value_type <- traits %>%
+    select(dplyr::all_of(c(
+      "trait_name", "value", "dataset_id", "observation_id", "method_id", "method_context_id",
+      "repeat_measurements_id", "value_type"))) %>%
+    dplyr::group_by(
+      .data$dataset_id, .data$observation_id, .data$method_id,
+      .data$method_context_id, .data$repeat_measurements_id) %>%
+    dplyr::summarise(n_value_type = length(unique(.data$value_type))) %>%
+    arrange(.data$observation_id) %>%
+    dplyr::filter(.data$n_value_type > 1)
+
+  if (nrow(check_value_type) > 1) {
+
+    traits %>%
+      tidyr::pivot_wider(
+        names_from = "trait_name",
+        values_from = "value",
+        id_cols = -dplyr::all_of(metadata_cols)
+      )
+
+  } else {
+
+    metadata_cols <- c(metadata_cols, "value_type")
+
+    traits %>%
+      tidyr::pivot_wider(
+        names_from = "trait_name",
+        values_from = "value",
+        id_cols = -dplyr::all_of(metadata_cols)
+      )
+  }
+
+}
+
+
+#' @title Pivot wide format data into a long format
+#'
+#' @description `trait_pivot_longer` "gathers" wide format data into a "tidy" format.
+#'
+#' This function converts the data into long format where observations are on different rows and the type of
+#' observation is denoted by the `trait_name` column.
+#' In other words, `trait_pivot_longer` reverts the actions of `trait_pivot_wider`.
+#' @param wide_data Output from `trait_pivot_wider` (a tibble of wide data)
+#' @return A tibble in long format
+#' @details
+#' `trait_pivot_longer` will return a tibble with fewer columns than the original traits table
+#' The excluded columns include: "unit", "replicates", "measurement_remarks", "basis_of_record",
+#' "basis_of_value" # Double check #TODO
+#'
+#' @examples
+#' \dontrun{
+#' data <- austraits$traits %>%
+#' filter(dataset_id == "Falster_2003")
+#' data # Long format
+#' traits_wide <- trait_pivot_wider(data)
+#' traits_wide # Wide format
+#'
+#' values_long <- trait_pivot_longer(traits_wide)
+#' }
+#' @author Daniel Falster - [email protected]
+#' @author Fonti Kar - [email protected]
+#' @export
+db_traits_pivot_longer <- function(wide_data) {
+
+  # The start of the trait columns is after `original_name`
+  start_of_trait_cols <- which(names(wide_data) == "original_name") + 1
+
+  wide_data %>%
+    tidyr::pivot_longer(
+      cols = start_of_trait_cols:ncol(.),
+      names_to = "trait_name",
+      values_drop_na = TRUE
+    )
+
+}