Skip to content

Commit

Permalink
fix sharedString deletion
Browse files Browse the repository at this point in the history
  • Loading branch information
DavZim committed Dec 6, 2024
1 parent 69e00af commit 80aa7be
Show file tree
Hide file tree
Showing 2 changed files with 156 additions and 26 deletions.
65 changes: 43 additions & 22 deletions R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1900,45 +1900,66 @@ deleteDataColumn <- function(wb, sheet, col) {
}

a <- wb$worksheets[[sheet]]$sheet_data

# t: if a shared string is used or if the string is a value itself
# v: the shared string index or the string itself (if t == 0)
# in wb$sharedStrings we find the values of the shared strings by index + 1 (0 indexed!)


# check which elements to delete
keep <- a$cols != col
# if there is no column to delete, exit early
if (all(keep)) return(invisible(0))

# delete cols in cols "col" move higher cols one down
# delete cols in cols "col", move higher cols one down
a$cols <- as.integer(a$cols[keep] - 1 * (a$cols[keep] > col))
a$rows <- a$rows[keep]

# reduce the shared strings pointers if they are not used anymore
has_t <- !is.na(a$t) & a$t == 1
used_shared <- a$v[has_t] # a reference to all shared strings
keep_t <- keep[has_t] # these shared strings are kept
keep_t[is.na(keep_t)] <- FALSE
keep_shared <- used_shared[keep_t]
rem_shared <- setdiff(unique(used_shared[!keep_t]), unique(keep_shared))
for (v in rem_shared) {
to_reduce <- as.numeric(keep_shared) > as.numeric(v)
to_reduce[is.na(to_reduce)] <- FALSE
if (any(to_reduce))
keep_shared[to_reduce] <- as.character(as.numeric(keep_shared[to_reduce]) - 1)
}
used_shared[keep_t] <- keep_shared
a$v[has_t] <- used_shared

ss <- data.frame(
# the old index 0 indexed, as used in a$v
old = as.numeric(seq(length(wb$sharedStrings)) - 1),
# will hold the new index 0 indexed, as used in a$v
new = NA,
# the actual strings
string = wb$sharedStrings
)

# 1. remove the values from sheet_data (a)
a$v <- a$v[keep]
a$t <- a$t[keep]


# update the shared strings map (ss) with the new indices

# v_this_sheet etc are the indices that are still used
v_this_sheet <- as.numeric(a$v[!is.na(a$t) & a$t == 1])
# get all string indices from other sheets, so that strings used in other sheets are not deleted!
v_other_sheets <- unlist(lapply(setdiff(seq_along(wb$worksheets), sheet), function(sh) {
a <- wb$worksheets[[sh]]$sheet_data
as.numeric(a$v[!is.na(a$t) & a$t == 1])
}))

idx <- sort(unique(c(v_this_sheet, v_other_sheets)))
ss$new[ss$old %in% idx] <- seq_along(idx) - 1

# 2. remove the values from the sharedStrings object
wb$sharedStrings <- wb$sharedStrings[idx + 1]
attr(wb$sharedStrings, "uniqueCount") <- length(idx)

# 3. reindex the values from the sheet_data to use new shared strings indices
a$v[a$t == 1] <- as.character(ss$new[as.numeric(a$v[a$t == 1]) + 1])

# update the shared strings for all other sheets
for (s in setdiff(seq_along(wb$worksheets), sheet)) {
a <- wb$worksheets[[s]]$sheet_data
a$v[a$t == 1] <- as.character(ss$new[as.numeric(a$v[a$t == 1]) + 1])
}

a$f <- updateFormula(a$f[keep], col = col)
a$n_elements <- sum(keep)

if ("data_count" %in% names(a)) a$data_count <- length(unique(a$v))

# remove the unneeded strings from sharedStrings
rv <- as.numeric(rem_shared) + 1
wb$sharedStrings <- wb$sharedStrings[-rv]
attr(wb$sharedStrings, "uniqueCount") <- length(unique(wb$sharedStrings))

# adjust styles
sheet_name <- wb$sheet_names[[sheet]]
this_sheet <- sapply(wb$styleObjects, function(o) {
Expand Down
117 changes: 113 additions & 4 deletions tests/testthat/test-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ test_that("int2col and col2int", {
test_that("deleteDataColumn basics", {
wb <- createWorkbook()
addWorksheet(wb, "tester")

for (i in seq(5)) {
mat <- data.frame(x = rep(paste0(int2col(i), i), 10))
writeData(wb, sheet = 1, startRow = 1, startCol = i, mat)
Expand All @@ -29,8 +29,8 @@ test_that("deleteDataColumn basics", {
c("<f>=COUNTA(A2:A11)</f>", "<f>=COUNTA(B2:B11)</f>", "<f>=COUNTA(C2:C11)</f>",
"<f>=COUNTA(D2:D11)</f>", "<f>=COUNTA(E2:E11)</f>")
)


deleteDataColumn(wb, 1, col = 3)
expect_equal(read.xlsx(wb),
data.frame(x = rep("A1", 10), x = "B2", x = "D4", x = "E5", # no C3!
Expand Down Expand Up @@ -130,7 +130,7 @@ test_that("deleteDataColumn with formatting data", {

st <- openxlsx::createStyle(textDecoration = "Bold", fontSize = 20, fontColour = "red")
openxlsx::addStyle(wb, 1, style = st, rows = 1, cols = seq(ncol(df)))

sst <- wb$styleObjects[[1]]
sst$rows <- c(1, 1)
sst$cols <- c(1, 2)
Expand All @@ -140,3 +140,112 @@ test_that("deleteDataColumn with formatting data", {
expect_equal(wb$styleObjects[[1]],
sst)
})

test_that("deleteDataColumn with shared strings does not crash or change inputs", {
df <- data.frame("Col 1" = "Row 2 Col 1",
"Col 2" = "Row 2 Col 2",
"Col 3" = "Row 2 Col 3",
check.names = FALSE)

wb <- createWorkbook()
addWorksheet(wb, "tester")
writeData(wb, sheet = 1, startRow = 1, startCol = 1, x = df, colNames = TRUE)

deleteDataColumn(wb, sheet = 1, col = 2)

expect_equal(
wb$sharedStrings,
structure(
list(
"<si><t xml:space=\"preserve\">Col 1</t></si>",
"<si><t xml:space=\"preserve\">Col 3</t></si>",
"<si><t xml:space=\"preserve\">Row 2 Col 1</t></si>",
"<si><t xml:space=\"preserve\">Row 2 Col 3</t></si>"
),
uniqueCount = 4L
)
)
expect_equal(
read.xlsx(wb),
data.frame(
"Col 1" = "Row 2 Col 1",
"Col 3" = "Row 2 Col 3"
)
)
})


test_that("deleteDataColumn with shared strings on other sheets", {
df <- data.frame("ABC" = "I am a shared string with sheet 2!")
df2 <- data.frame("AB" = "I am a shared string with sheet 2!")

wb <- createWorkbook()
addWorksheet(wb, "tester")
writeData(wb, sheet = 1, startRow = 1, startCol = 1, x = df, colNames = TRUE)

simplify <- function(sd) data.frame(rows = sd$rows, cols = sd$cols, t = sd$t, v = sd$v)
expect_equal(
simplify(wb$worksheets[[1]]$sheet_data),
data.frame(rows = c(1, 2), cols = 1, t = 1, v = c("0", "1"))
)
expect_equal(
wb$sharedStrings,
structure(
list(
"<si><t xml:space=\"preserve\">ABC</t></si>",
"<si><t xml:space=\"preserve\">I am a shared string with sheet 2!</t></si>"
),
uniqueCount = 2L
)
)

addWorksheet(wb, "tester2")
writeData(wb, sheet = 2, startRow = 1, startCol = 1, x = df2, colNames = TRUE)

expect_equal(
simplify(wb$worksheets[[2]]$sheet_data),
data.frame(rows = c(1, 2), cols = 1, t = 1, v = c("2", "1"))
)
expect_equal(
wb$sharedStrings,
structure(
list(
"<si><t xml:space=\"preserve\">ABC</t></si>",
"<si><t xml:space=\"preserve\">I am a shared string with sheet 2!</t></si>",
"<si><t xml:space=\"preserve\">AB</t></si>"
),
uniqueCount = 3L
)
)


# deleting from sheet 1 does not delete the string from sheet 2!
deleteDataColumn(wb, sheet = 1, col = 1)

expect_equal(
simplify(wb$worksheets[[1]]$sheet_data),
data.frame(rows = numeric(0), cols = numeric(0), t = numeric(0), v = character(0))
)

# note on sheet 2, the indices v to the shared strings have to change as well!
expect_equal(
simplify(wb$worksheets[[2]]$sheet_data),
data.frame(rows = c(1, 2), cols = 1, t = 1, v = c("1", "0"))
)

expect_equal(
wb$sharedStrings,
structure(
list(
"<si><t xml:space=\"preserve\">I am a shared string with sheet 2!</t></si>",
"<si><t xml:space=\"preserve\">AB</t></si>"
),
uniqueCount = 2L
)
)

expect_equal(
read.xlsx(wb, sheet = 2),
data.frame(AB = "I am a shared string with sheet 2!")
)
})

0 comments on commit 80aa7be

Please sign in to comment.