-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcollate_beds_published_icb.R
170 lines (145 loc) · 5.94 KB
/
collate_beds_published_icb.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# Import packages --------------------------------------------------------
library(tidyverse)
library(janitor)
library(rstudioapi)
library(readxl)
setwd(dirname(getActiveDocumentContext()$path))
source(file = "./functions.R")
# Set working directory
setwd("~/../../Department of Health and Social Care/NW005 - DischargeAnalysisCenter/Analysis Projects/20240129 - NCTR Published - Briefing Tool/Code/")
# Create backseries for icb beds from create_backseries/ directory
CREATE_BACKSERIES <- FALSE
#If not creating backseries, then read in backseries
if(CREATE_BACKSERIES == FALSE){
icb_backseries <- read_csv(file = './data/beds/backseries/beds_icb_backseries.csv')
print("read in backseries, ready to append new data to it")
} else {
print("creating backseries")
}
# Create vectors for later use from file names ----------------------------
# Only ICB level data in published files from Aug 2023 - current
if(CREATE_BACKSERIES){
beds_files <- list.files(path = 'data/beds/icb/create_bs/', pattern = '.xlsx')
} else {
beds_files <- list.files(path = 'data/beds/icb/', pattern = '.xlsx')
}
year_month_vec <- c()
month_floor_vec <- c()
for(date in beds_files){
year_ <- substr(x = date, start = 1, stop = 4)
month_ <- substr(x = date, start = 5, stop = 6)
year_month <- substr(x = date, start = 1, stop = 6)
floor_month <- paste0(year_, "-", month_, "-01")
year_month_vec <- c(year_month_vec, year_month)
month_floor_vec <- c(month_floor_vec, floor_month)
}
# DF length needs to match number of files in dir we are reading from
# E.g. create_bs/ or icb/
# USER SHOULD AMMEND AS APPROPRIATE ----------------
ICB_CELL_REF_DF <- data.frame(month_year = year_month_vec,
cell_ref = c(#"B15:O67", #aug 23
#"B15:O67", #sep 23
#"B15:O67", #oct 23
#"B15:O67", #nov 23
#"B15:O67", #dec 23
#"B15:O67", #jan 24
#"B15:O67", #feb 24
"B15:O67"), #mar 24
ignore_rows = c(#11,
#11,
#11,
#11,
#11,
#11,
#11, #feb 24
11)) #mar 24
# Read data function ------------------------------------------------------
read_data <- function(file_name, cell_ref, backseries){
if(backseries == TRUE){
path <- "data/beds/icb/create_bs/"
} else {
path <- "data/beds/icb/"
}
sheet <- grep(excel_sheets(path = paste0(path, file_name)),
pattern = 'type 1',
value = TRUE)
df <- readxl::read_xlsx(path = paste0(path, file_name),
sheet = sheet,
range = cell_ref)}
# Create list of excels, one item per month -------------------------------
list_excels <- function(df, backseries){
#create empty list
temp_list <- list()
#iterate through files in beds/ using index
for(i in c(1:length(beds_files))){
#obtain cell references from df
cell_ref <- df$cell_ref[df$month_year==year_month_vec[i]]
#get df with correct cell reference
print(beds_files[i])
print(cell_ref)
beds_df_temp <- read_data(beds_files[i], cell_ref, backseries)
print(sprintf("sucessfully read in file %s", beds_files[i]))
temp_list[[i]] <- beds_df_temp}
return(temp_list)
}
icb_excel_list <- list_excels(ICB_CELL_REF_DF, CREATE_BACKSERIES)
# Iterate through each dataframe in the list and wrangle to get
# beds data --------
wrangle_sheets <- function(excel_list, cell_ref_df){
for(i in c(1: length(excel_list))){
# iterate through each month
df <- excel_list[[i]]
# cell_ref_df$ignore_rows[i] gets ignore_rows in cell reference df
df <- df[c(cell_ref_df$ignore_rows[i]:dim(df)[1]),]
# rename columns
names(df)[1:3] <- c('region', 'icb_code', 'icb_name')
# get beds
df$`Adult G&A beds available` <- as.numeric(df$`Adult G&A beds available`)
df[['beds']] <- df[['Adult G&A beds available']] - df[['Adult G&A covid void beds']]
# add floor date per month for join with NCTR
df[['floor_month']] <- as.Date(month_floor_vec[i])
# keep selected columns
df <- df %>% select('region', 'icb_code', 'icb_name', 'beds', 'floor_month')
# update / overwrite original list
excel_list[[i]] <- df}
return(excel_list)
}
icb_excel_list_formatted <- wrangle_sheets(icb_excel_list, ICB_CELL_REF_DF)
#combine rows
icb_beds_long <- icb_excel_list_formatted %>%
bind_rows() %>%
mutate(icb_name = str_to_title(icb_name),
region = str_to_title(region))
test_national_icb_source <- icb_beds_long %>%
group_by(floor_month) %>%
summarise(sum = sum(beds))
#check 42 ICBs
length(unique(icb_beds_long$icb_name)) == 42
#check those ICBs with different count to number of months
count_occurences <- function(data){
n_months <- data %>%
distinct(floor_month) %>%
pull() %>%
length()
missing_icbs <- data %>%
group_by(icb_name) %>%
summarise(count = n()) %>%
filter(count != n_months) %>%
return(missing_icbs)
}
check <- count_occurences(icb_beds_long)
#write csv
date_today <- Sys.Date()
if(CREATE_BACKSERIES == TRUE){
# overwrite backseries
write.csv(x = icb_beds_long,
file = paste0('data/beds/backseries/beds_icb_backseries','.csv'),
row.names = FALSE)
} else {
# append new data to backseries
icb_backseries_and_new <- rbind(icb_backseries, icb_beds_long)
write.csv(x = icb_backseries_and_new,
file = paste0('output/monthly_beds_icb_', date_today,'.csv'),
row.names = FALSE)
}
check <- count_occurences(icb_backseries_and_new)