Skip to content

Commit

Permalink
Merge pull request #22 from nfdi4health/main
Browse files Browse the repository at this point in the history
Final KORA Version
  • Loading branch information
FlorianSchw authored Jan 8, 2025
2 parents 956a538 + f7fd177 commit 4cb700a
Show file tree
Hide file tree
Showing 35 changed files with 774 additions and 717 deletions.
42 changes: 42 additions & 0 deletions .github/workflows/dpe-reconstruct.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
name: Reconstruct dpes

# Trigger workflow on push to the main branch
on:
push:
branches:
- main

# Define the job
jobs:
update_document:
runs-on: ubuntu-latest

steps:
# Step 1: Checkout the repository
- name: Checkout code
uses: actions/checkout@v4

# Step 2: Set up the programming environment (e.g., R, Python, Node.js)
- name: Set up R
uses: r-lib/actions/setup-r@v2
with:
r-version: '4.3'

# Step 3: Install dependencies
- name: Install R packages
run: |
Rscript -e "install.packages(c('dplyr', 'readxl', 'writexl', 'here'), repos='https://cloud.r-project.org/')"
# Step 4: Run the code to update the document
- name: Run update script
run: |
Rscript create_mock_data/reconstructing_dpes.R
# Step 5: Commit and push the updated document
- name: Commit changes
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git add rmonize/DPE_KARMEN_P1_test.xlsx rmonize/DPE_KARMEN_P2_test.xlsx
git commit -m "Update DPE_KARMEN_P1 and DPE_KARMEN_P2 files"
git push
Binary file added Franzi/DPE_KARMEN_FRANZI.xlsx
Binary file not shown.
Binary file added Ines/DPE_KARMEN_INES.xlsx
Binary file not shown.
Binary file added Tracy/DPE_KARMEN_TRACY.xlsx
Binary file not shown.
2 changes: 1 addition & 1 deletion create_mock_data/mock_data_function.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#### Script for creating mock data
#studyname <- "GINI_P1"
#studyname <- "GINI_P2"
create_mock_data <- function(studyname = NULL,
single_dataset = TRUE,
vars_second_dataset = NULL){
Expand Down
3 changes: 2 additions & 1 deletion create_mock_data/mock_data_initiation.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@

source(here::here("create_mock_data", "mock_data_function.R"))

create_mock_data(studyname = "GINI_P1")
create_mock_data(studyname = "KORA_S1_P2")


38 changes: 38 additions & 0 deletions create_mock_data/reconstructing_dpes.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

# Load necessary libraries
library(dplyr)
library(readxl)
library(writexl)
library(here)

# Your code that uses these libraries


##### Creating DPE_P1 and DPE_P2
dpe_tracy <- readxl::read_excel(here::here("Tracy/DPE_KARMEN_TRACY.xlsx"), sheet = 1)
dpe_ines <- readxl::read_excel(here::here("Ines/DPE_KARMEN_INES.xlsx"), sheet = 1)
dpe_franzi <- readxl::read_excel(here::here("Franzi/DPE_KARMEN_FRANZI.xlsx"), sheet = 1)


dataschema_p1 <- list(Variables = tibble::tibble(readxl::read_excel(here::here("rmonize/data_schema/", "Dataschema_P1.xlsx"), sheet = 1)),
Categories = tibble::tibble(readxl::read_excel(here::here("rmonize/data_schema/", "Dataschema_P1.xlsx"), sheet = 2)))
dataschema_p2 <- list(Variables = tibble::tibble(readxl::read_excel(here::here("rmonize/data_schema/", "Dataschema_P2.xlsx"), sheet = 1)),
Categories = tibble::tibble(readxl::read_excel(here::here("rmonize/data_schema/", "Dataschema_P2.xlsx"), sheet = 2)))

# Assuming these are data.frames
combined_data <- bind_rows(dpe_tracy, dpe_ines, dpe_franzi)

# Subset for dpe_p1
dpe_p1 <- combined_data %>%
filter(dataschema_variable %in% dataschema_p1$Variables$name) %>%
slice(match(dataschema_p1$Variables$index, row_number())) %>%
mutate(input_dataset = "KARMEN_P1")

# Subset for dpe_p2
dpe_p2 <- combined_data %>%
filter(dataschema_variable %in% dataschema_p2$Variables$name) %>%
slice(match(dataschema_p2$Variables$index, row_number())) %>%
mutate(input_dataset = "KARMEN_P2")

writexl::write_xlsx(dpe_p1, paste0("rmonize/DPE_KARMEN_P1_test.xlsx"))
writexl::write_xlsx(dpe_p2, paste0("rmonize/DPE_KARMEN_P2_test.xlsx"))
202 changes: 101 additions & 101 deletions data/DATA_KORA_S1_P1.csv

Large diffs are not rendered by default.

202 changes: 101 additions & 101 deletions data/DATA_KORA_S1_P2.csv

Large diffs are not rendered by default.

101 changes: 0 additions & 101 deletions data/DATA_KORA_S1_P2_Preliminary.csv

This file was deleted.

202 changes: 101 additions & 101 deletions data/DATA_KORA_S3_P1.csv

Large diffs are not rendered by default.

202 changes: 101 additions & 101 deletions data/DATA_KORA_S3_P2.csv

Large diffs are not rendered by default.

101 changes: 0 additions & 101 deletions data/DATA_KORA_S3_P2_Preliminary.csv

This file was deleted.

101 changes: 0 additions & 101 deletions output/harmonised_dataset/KORA_S1_P1_harmonized.csv

This file was deleted.

Binary file modified rmonize/data_dictionary/DD_KORA_S1_P1.xlsx
Binary file not shown.
Binary file modified rmonize/data_dictionary/DD_KORA_S1_P2.xlsx
Binary file not shown.
Binary file modified rmonize/data_dictionary/DD_KORA_S3_P1.xlsx
Binary file not shown.
Binary file modified rmonize/data_dictionary/DD_KORA_S3_P2.xlsx
Binary file not shown.
Binary file added rmonize/data_proc_elem/DPE_KARMEN_P1.xlsx
Binary file not shown.
Binary file added rmonize/data_proc_elem/DPE_KARMEN_P2.xlsx
Binary file not shown.
Binary file modified rmonize/data_proc_elem/DPE_KORA_S1_P1.xlsx
Binary file not shown.
Binary file modified rmonize/data_proc_elem/DPE_KORA_S1_P2.xlsx
Binary file not shown.
Binary file modified rmonize/data_proc_elem/DPE_KORA_S3_P1.xlsx
Binary file not shown.
Binary file modified rmonize/data_proc_elem/DPE_KORA_S3_P2.xlsx
Binary file not shown.
Binary file modified rmonize/data_schema/Dataschema_P1.xlsx
Binary file not shown.
Binary file modified rmonize/data_schema/Dataschema_P2.xlsx
Binary file not shown.
24 changes: 23 additions & 1 deletion scripts/GINI_P1.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
# install.packages("tidyverse")
# install.packages("here")
# install.packages("car")
# install.packages("writexl")

#### Load the package in order to conduct
library(Rmonize)
library(readxl)
library(tidyverse)
library(here)
library(car)
library(writexl)

#### Step 0: Name of the study
dataset_name <- "GINI_P1"
Expand Down Expand Up @@ -78,14 +80,34 @@ Rmonize::harmonized_dossier_visualize(harmonized_dossier,
bookdown_path,
harmonized_dossier_summary = harmonized_dossier_summary)

ifelse(!dir.exists(file.path(here::here("output/rmonize_summary/"))), dir.create(file.path(here::here("output/rmonize_summary/"))), FALSE)
ifelse(!dir.exists(file.path(here::here("output/opal_dd/"))), dir.create(file.path(here::here("output/opal_dd/"))), FALSE)


dir.create(here::here("output/rmonize_summary/", paste0(dataset_name, "_", system_name)))
file.copy(here::here("output/rmonize_report/", paste0(dataset_name, "_", system_name, "/docs")),
here::here("output/rmonize_summary/", paste0(dataset_name, "_", system_name)), recursive=TRUE)

dir.create(here::here("output/opal_dd/", paste0(dataset_name, "_", system_name)))
opal_dd <- dataschema
opal_dd$Variables$table <- dataset_name
opal_dd$Categories$table <- dataset_name
opal_dd$Variables <- opal_dd$Variables[c(1,5,2:4)]
opal_dd$Categories <- opal_dd$Categories[c(4,1:3)]

writexl::write_xlsx(opal_dd, here::here("output/opal_dd/", paste0(dataset_name, "_", system_name, "/", dataset_name, "_DD.xlsx")))


# Open the visual report in a browser.
fabR::bookdown_open(bookdown_path)

#### Step 9: Extract and save harmonized data into a pre-set folder
harmonized_dataset <- Rmonize::pooled_harmonized_dataset_create(harmonized_dossier)

ifelse(!dir.exists(file.path(here::here("output/harmonised_dataset/", paste0(dataset_name, "_", system_name)))),dir.create(here::here("output/harmonised_dataset/", paste0(dataset_name, "_", system_name))), FALSE)

readr::write_delim(x = harmonized_dataset,
file = here::here(paste0("output/harmonised_dataset/", dataset_name, "_harmonized.csv")),
file = here::here(paste0("output/harmonised_dataset/", dataset_name, "_", system_name, "/", dataset_name,"_harmonized.csv")),
delim = ",",
na = "")

Expand Down
25 changes: 24 additions & 1 deletion scripts/GINI_P2.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
# install.packages("tidyverse")
# install.packages("here")
# install.packages("car")
# install.packages("writexl")

#### Load the package in order to conduct
library(Rmonize)
library(readxl)
library(tidyverse)
library(here)
library(car)
library(writexl)

#### Step 0: Name of the study
dataset_name <- "GINI_P2"
Expand Down Expand Up @@ -77,13 +79,34 @@ Rmonize::harmonized_dossier_visualize(harmonized_dossier,
bookdown_path,
harmonized_dossier_summary = harmonized_dossier_summary)


ifelse(!dir.exists(file.path(here::here("output/rmonize_summary/"))), dir.create(file.path(here::here("output/rmonize_summary/"))), FALSE)
ifelse(!dir.exists(file.path(here::here("output/opal_dd/"))), dir.create(file.path(here::here("output/opal_dd/"))), FALSE)


dir.create(here::here("output/rmonize_summary/", paste0(dataset_name, "_", system_name)))
file.copy(here::here("output/rmonize_report/", paste0(dataset_name, "_", system_name, "/docs")),
here::here("output/rmonize_summary/", paste0(dataset_name, "_", system_name)), recursive=TRUE)

dir.create(here::here("output/opal_dd/", paste0(dataset_name, "_", system_name)))
opal_dd <- dataschema
opal_dd$Variables$table <- dataset_name
opal_dd$Categories$table <- dataset_name
opal_dd$Variables <- opal_dd$Variables[c(1,5,2:4)]
opal_dd$Categories <- opal_dd$Categories[c(4,1:3)]

writexl::write_xlsx(opal_dd, here::here("output/opal_dd/", paste0(dataset_name, "_", system_name, "/", dataset_name, "_DD.xlsx")))


# Open the visual report in a browser.
fabR::bookdown_open(bookdown_path)

#### Step 9: Extract and save harmonized data into a pre-set folder
harmonized_dataset <- Rmonize::pooled_harmonized_dataset_create(harmonized_dossier)

ifelse(!dir.exists(file.path(here::here("output/harmonised_dataset/", paste0(dataset_name, "_", system_name)))),dir.create(here::here("output/harmonised_dataset/", paste0(dataset_name, "_", system_name))), FALSE)

readr::write_delim(x = harmonized_dataset,
file = here::here(paste0("output/harmonised_dataset/", dataset_name, "_harmonized.csv")),
file = here::here(paste0("output/harmonised_dataset/", dataset_name, "_", system_name, "/", dataset_name,"_harmonized.csv")),
delim = ",",
na = "")
24 changes: 23 additions & 1 deletion scripts/KORA_S1_P1.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
# install.packages("tidyverse")
# install.packages("here")
# install.packages("car")
# install.packages("writexl")

#### Load the package in order to conduct
library(Rmonize)
library(readxl)
library(tidyverse)
library(here)
library(car)
library(writexl)

#### Step 0: Name of the study
dataset_name <- "KORA_S1_P1"
Expand Down Expand Up @@ -78,14 +80,34 @@ Rmonize::harmonized_dossier_visualize(harmonized_dossier,
bookdown_path,
harmonized_dossier_summary = harmonized_dossier_summary)

ifelse(!dir.exists(file.path(here::here("output/rmonize_summary/"))), dir.create(file.path(here::here("output/rmonize_summary/"))), FALSE)
ifelse(!dir.exists(file.path(here::here("output/opal_dd/"))), dir.create(file.path(here::here("output/opal_dd/"))), FALSE)


dir.create(here::here("output/rmonize_summary/", paste0(dataset_name, "_", system_name)))
file.copy(here::here("output/rmonize_report/", paste0(dataset_name, "_", system_name, "/docs")),
here::here("output/rmonize_summary/", paste0(dataset_name, "_", system_name)), recursive=TRUE)

dir.create(here::here("output/opal_dd/", paste0(dataset_name, "_", system_name)))
opal_dd <- dataschema
opal_dd$Variables$table <- dataset_name
opal_dd$Categories$table <- dataset_name
opal_dd$Variables <- opal_dd$Variables[c(1,5,2:4)]
opal_dd$Categories <- opal_dd$Categories[c(4,1:3)]

writexl::write_xlsx(opal_dd, here::here("output/opal_dd/", paste0(dataset_name, "_", system_name, "/", dataset_name, "_DD.xlsx")))


# Open the visual report in a browser.
fabR::bookdown_open(bookdown_path)

#### Step 9: Extract and save harmonized data into a pre-set folder
harmonized_dataset <- Rmonize::pooled_harmonized_dataset_create(harmonized_dossier)

ifelse(!dir.exists(file.path(here::here("output/harmonised_dataset/", paste0(dataset_name, "_", system_name)))),dir.create(here::here("output/harmonised_dataset/", paste0(dataset_name, "_", system_name))), FALSE)

readr::write_delim(x = harmonized_dataset,
file = here::here(paste0("output/harmonised_dataset/", dataset_name, "_harmonized.csv")),
file = here::here(paste0("output/harmonised_dataset/", dataset_name, "_", system_name, "/", dataset_name,"_harmonized.csv")),
delim = ",",
na = "")

Expand Down
25 changes: 24 additions & 1 deletion scripts/KORA_S1_P2.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
# install.packages("tidyverse")
# install.packages("here")
# install.packages("car")
# install.packages("writexl")

#### Load the package in order to conduct
library(Rmonize)
library(readxl)
library(tidyverse)
library(here)
library(car)
library(writexl)

#### Step 0: Name of the study
dataset_name <- "KORA_S1_P2"
Expand Down Expand Up @@ -77,13 +79,34 @@ Rmonize::harmonized_dossier_visualize(harmonized_dossier,
bookdown_path,
harmonized_dossier_summary = harmonized_dossier_summary)


ifelse(!dir.exists(file.path(here::here("output/rmonize_summary/"))), dir.create(file.path(here::here("output/rmonize_summary/"))), FALSE)
ifelse(!dir.exists(file.path(here::here("output/opal_dd/"))), dir.create(file.path(here::here("output/opal_dd/"))), FALSE)


dir.create(here::here("output/rmonize_summary/", paste0(dataset_name, "_", system_name)))
file.copy(here::here("output/rmonize_report/", paste0(dataset_name, "_", system_name, "/docs")),
here::here("output/rmonize_summary/", paste0(dataset_name, "_", system_name)), recursive=TRUE)

dir.create(here::here("output/opal_dd/", paste0(dataset_name, "_", system_name)))
opal_dd <- dataschema
opal_dd$Variables$table <- dataset_name
opal_dd$Categories$table <- dataset_name
opal_dd$Variables <- opal_dd$Variables[c(1,5,2:4)]
opal_dd$Categories <- opal_dd$Categories[c(4,1:3)]

writexl::write_xlsx(opal_dd, here::here("output/opal_dd/", paste0(dataset_name, "_", system_name, "/", dataset_name, "_DD.xlsx")))


# Open the visual report in a browser.
fabR::bookdown_open(bookdown_path)

#### Step 9: Extract and save harmonized data into a pre-set folder
harmonized_dataset <- Rmonize::pooled_harmonized_dataset_create(harmonized_dossier)

ifelse(!dir.exists(file.path(here::here("output/harmonised_dataset/", paste0(dataset_name, "_", system_name)))),dir.create(here::here("output/harmonised_dataset/", paste0(dataset_name, "_", system_name))), FALSE)

readr::write_delim(x = harmonized_dataset,
file = here::here(paste0("output/harmonised_dataset/", dataset_name, "_harmonized.csv")),
file = here::here(paste0("output/harmonised_dataset/", dataset_name, "_", system_name, "/", dataset_name,"_harmonized.csv")),
delim = ",",
na = "")
24 changes: 23 additions & 1 deletion scripts/KORA_S3_P1.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,15 @@
# install.packages("tidyverse")
# install.packages("here")
# install.packages("car")
# install.packages("writexl")

#### Load the package in order to conduct
library(Rmonize)
library(readxl)
library(tidyverse)
library(here)
library(car)
library(writexl)

#### Step 0: Name of the study
dataset_name <- "KORA_S3_P1"
Expand Down Expand Up @@ -78,13 +80,33 @@ Rmonize::harmonized_dossier_visualize(harmonized_dossier,
bookdown_path,
harmonized_dossier_summary = harmonized_dossier_summary)

ifelse(!dir.exists(file.path(here::here("output/rmonize_summary/"))), dir.create(file.path(here::here("output/rmonize_summary/"))), FALSE)
ifelse(!dir.exists(file.path(here::here("output/opal_dd/"))), dir.create(file.path(here::here("output/opal_dd/"))), FALSE)


dir.create(here::here("output/rmonize_summary/", paste0(dataset_name, "_", system_name)))
file.copy(here::here("output/rmonize_report/", paste0(dataset_name, "_", system_name, "/docs")),
here::here("output/rmonize_summary/", paste0(dataset_name, "_", system_name)), recursive=TRUE)

dir.create(here::here("output/opal_dd/", paste0(dataset_name, "_", system_name)))
opal_dd <- dataschema
opal_dd$Variables$table <- dataset_name
opal_dd$Categories$table <- dataset_name
opal_dd$Variables <- opal_dd$Variables[c(1,5,2:4)]
opal_dd$Categories <- opal_dd$Categories[c(4,1:3)]

writexl::write_xlsx(opal_dd, here::here("output/opal_dd/", paste0(dataset_name, "_", system_name, "/", dataset_name, "_DD.xlsx")))


# Open the visual report in a browser.
fabR::bookdown_open(bookdown_path)

#### Step 9: Extract and save harmonized data into a pre-set folder
harmonized_dataset <- Rmonize::pooled_harmonized_dataset_create(harmonized_dossier)

ifelse(!dir.exists(file.path(here::here("output/harmonised_dataset/", paste0(dataset_name, "_", system_name)))),dir.create(here::here("output/harmonised_dataset/", paste0(dataset_name, "_", system_name))), FALSE)

readr::write_delim(x = harmonized_dataset,
file = here::here(paste0("output/harmonised_dataset/", dataset_name, "_harmonized.csv")),
file = here::here(paste0("output/harmonised_dataset/", dataset_name, "_", system_name, "/", dataset_name,"_harmonized.csv")),
delim = ",",
na = "")
Loading

0 comments on commit 4cb700a

Please sign in to comment.