diff --git a/DESCRIPTION b/DESCRIPTION index e849ada..d6e8710 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -23,4 +23,4 @@ LinkingTo: Rcpp ByteCompile: yes Suggests: testthat Encoding: UTF-8 -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.2 diff --git a/R/read.R b/R/read.R index b936d70..277490e 100644 --- a/R/read.R +++ b/R/read.R @@ -1,5 +1,5 @@ # -# Copyright (C) 2014-2021 Jan Marvin Garbuszus and Sebastian Jeworutzki +# Copyright (C) 2014-2024 Jan Marvin Garbuszus and Sebastian Jeworutzki # Copyright (C) of 'convert.dates' and 'missing.types' Thomas Lumley # # This program is free software; you can redistribute it and/or modify it @@ -29,7 +29,7 @@ #' "label_(integer code)". #' @param encoding \emph{character.} Strings can be converted from Windows-1252 #' or UTF-8 to system encoding. Options are "latin1" or "UTF-8" to specify -#' target encoding explicitly. Stata 14, 15 and 16 files are UTF-8 encoded and +#' target encoding explicitly. Since Stata 14 files are UTF-8 encoded and #' may contain strings which can't be displayed in the current locale. #' Set encoding=NULL to stop reencoding. #' @param fromEncoding \emph{character.} We expect strings to be encoded as @@ -93,6 +93,13 @@ #' #' Reading dta-files of older and newer versions than 13 was introduced #' with version 0.8. +#' +#' Stata 18 introduced alias variables and frame files. Alias variables are +#' currently ignored when reading the file and a warning is printed. Stata +#' frame files (file extension `.dtas`) contain zipped `dta` files which can +#' be loaded individually. The read test provides an example how to construct +#' the alias variables from a Stata frame file. +#' #' @return The function returns a data.frame with attributes. The attributes #' include #' \describe{ @@ -127,7 +134,7 @@ #' \dontrun{ #' library(readstata13) #' r13 <- read.dta13("https://www.stata-press.com/data/r13/auto.dta") -#' } +#' } #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @useDynLib readstata13, .registration = TRUE @@ -212,6 +219,7 @@ read.dta13 <- function(file, convert.factors = TRUE, generate.factors=FALSE, sstr <- 2045 sstrl <- 32768 + salias <- 65525 sdouble <- 65526 sfloat <- 65527 slong <- 65528 diff --git a/R/readstata13.R b/R/readstata13.R index da6dd7d..e7caea4 100644 --- a/R/readstata13.R +++ b/R/readstata13.R @@ -8,10 +8,9 @@ #' #' @name readstata13 #' @aliases readstata13-package -#' @docType package #' @useDynLib readstata13, .registration = TRUE #' @import Rcpp #' @note If you catch a bug, please do not sue us, we do not have any money. #' @seealso \code{\link[foreign]{read.dta}} and \code{memisc} for dta files from #' Stata Versions < 13 -NULL +"_PACKAGE" diff --git a/R/save.R b/R/save.R index 11462a4..e90e390 100644 --- a/R/save.R +++ b/R/save.R @@ -33,8 +33,8 @@ #' to Stata date time format. Code from \code{foreign::write.dta} #' @param convert.underscore \emph{logical.} If \code{TRUE}, all non numerics or #' non alphabet characters will be converted to underscores. -#' @param tz \emph{character.} time zone specification to be used for -#' POSIXct values and dates (if convert.dates is TRUE). ‘""’ is the current +#' @param tz \emph{character.} time zone specification to be used for +#' POSIXct values and dates (if convert.dates is TRUE). ‘""’ is the current #' time zone, and ‘"GMT"’ is UTC (Universal Time, Coordinated). #' @param add.rownames \emph{logical.} If \code{TRUE}, a new variable rownames #' will be added to the dta-file. @@ -42,7 +42,7 @@ #' use all of Statas numeric-vartypes. #' @param version \emph{numeric.} Stata format for the resulting dta-file either #' Stata version number (6 - 16) or the internal Stata dta-format (e.g. 117 for -#' Stata 13). Experimental support for large datasets: Use version="15mp" to +#' Stata 13). Support for large datasets: Use version="15mp" to #' save the dataset in the new Stata 15/16 MP file format. This feature is not #' thoroughly tested yet. #' @return The function writes a dta-file to disk. The following features of the @@ -68,7 +68,7 @@ #' \dontrun{ #' library(readstata13) #' save.dta13(cars, file="cars.dta") -#' } +#' } #' @author Jan Marvin Garbuszus \email{jan.garbuszus@@ruhr-uni-bochum.de} #' @author Sebastian Jeworutzki \email{sebastian.jeworutzki@@ruhr-uni-bochum.de} #' @useDynLib readstata13, .registration = TRUE @@ -104,10 +104,7 @@ save.dta13 <- function(data, file, data.label=NULL, time.stamp=TRUE, if (version==6) version <- 108 - if (version == 119) - message("Support for Stata 15/16 MP (119) format is experimental and not thoroughly tested.") - - if (version<102 | version == 109 | version == 116 | version>119) + if (version<102 | version == 109 | version == 116 | version>121) stop("Version mismatch abort execution. No Data was saved.") sstr <- 2045 diff --git a/inst/extdata/myproject2.dtas b/inst/extdata/myproject2.dtas new file mode 100644 index 0000000..8feb1f5 Binary files /dev/null and b/inst/extdata/myproject2.dtas differ diff --git a/inst/include/readstata.h b/inst/include/readstata.h index 720475f..f9f7785 100644 --- a/inst/include/readstata.h +++ b/inst/include/readstata.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015-2017 Jan Marvin Garbuszus and Sebastian Jeworutzki + * Copyright (C) 2015-2024 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -155,6 +155,8 @@ inline Rcpp::IntegerVector calc_rowlength(Rcpp::IntegerVector vartype) { case STATA_STRL: rlen(i) = 8; break; + case STATA_ALIAS: // 0 + break; default: rlen(i) = type; break; diff --git a/inst/include/statadefines.h b/inst/include/statadefines.h index 2bda863..c7ad09a 100644 --- a/inst/include/statadefines.h +++ b/inst/include/statadefines.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 Jan Marvin Garbuszus and Sebastian Jeworutzki + * Copyright (C) 2015-2023 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -53,6 +53,7 @@ #define STATA_INT 65528 #define STATA_FLOAT 65527 #define STATA_DOUBLE 65526 +#define STATA_ALIAS 65525 #define STATA_STR 2045 #define STATA_SHORT_STR 244 diff --git a/man/read.dta13.Rd b/man/read.dta13.Rd index e4072a9..5bc4f45 100644 --- a/man/read.dta13.Rd +++ b/man/read.dta13.Rd @@ -36,7 +36,7 @@ are found, unique labels will be generated according the following scheme: \item{encoding}{\emph{character.} Strings can be converted from Windows-1252 or UTF-8 to system encoding. Options are "latin1" or "UTF-8" to specify -target encoding explicitly. Stata 14, 15 and 16 files are UTF-8 encoded and +target encoding explicitly. Since Stata 14 files are UTF-8 encoded and may contain strings which can't be displayed in the current locale. Set encoding=NULL to stop reencoding.} @@ -141,6 +141,12 @@ In R, you may use rownames to store characters (see for instance Reading dta-files of older and newer versions than 13 was introduced with version 0.8. + +Stata 18 introduced alias variables and frame files. Alias variables are + currently ignored when reading the file and a warning is printed. Stata + frame files (file extension `.dtas`) contain zipped `dta` files which can + be loaded individually. The read test provides an example how to construct + the alias variables from a Stata frame file. } \note{ read.dta13 uses GPL 2 licensed code by Thomas Lumley and R-core members @@ -150,7 +156,7 @@ read.dta13 uses GPL 2 licensed code by Thomas Lumley and R-core members \dontrun{ library(readstata13) r13 <- read.dta13("https://www.stata-press.com/data/r13/auto.dta") -} +} } \references{ Stata Corp (2014): Description of .dta file format diff --git a/man/save.dta13.Rd b/man/save.dta13.Rd index cbf9fbf..1db6433 100644 --- a/man/save.dta13.Rd +++ b/man/save.dta13.Rd @@ -37,8 +37,8 @@ hexcode.} \item{convert.dates}{\emph{logical.} If \code{TRUE}, dates will be converted to Stata date time format. Code from \code{foreign::write.dta}} -\item{tz}{\emph{character.} time zone specification to be used for -POSIXct values and dates (if convert.dates is TRUE). ‘""’ is the current +\item{tz}{\emph{character.} time zone specification to be used for +POSIXct values and dates (if convert.dates is TRUE). ‘""’ is the current time zone, and ‘"GMT"’ is UTC (Universal Time, Coordinated).} \item{add.rownames}{\emph{logical.} If \code{TRUE}, a new variable rownames @@ -49,7 +49,7 @@ use all of Statas numeric-vartypes.} \item{version}{\emph{numeric.} Stata format for the resulting dta-file either Stata version number (6 - 16) or the internal Stata dta-format (e.g. 117 for -Stata 13). Experimental support for large datasets: Use version="15mp" to +Stata 13). Support for large datasets: Use version="15mp" to save the dataset in the new Stata 15/16 MP file format. This feature is not thoroughly tested yet.} @@ -80,7 +80,7 @@ into a dta-file. \dontrun{ library(readstata13) save.dta13(cars, file="cars.dta") -} +} } \references{ Stata Corp (2014): Description of .dta file format diff --git a/src/read_data.cpp b/src/read_data.cpp index c6028ec..437bb77 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2018 Jan Marvin Garbuszus and Sebastian Jeworutzki + * Copyright (C) 2014-2024 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -46,6 +46,12 @@ List read_data(FILE * file, SET_VECTOR_ELT(df, i, IntegerVector(no_init(nn))); break; + // return correct column size and create a warning + case STATA_ALIAS: + SET_VECTOR_ELT(df, i, CharacterVector(no_init(nn))); + Rf_warning("File contains unhandled alias variable in column: %d", i + 1); + break; + default: SET_VECTOR_ELT(df, i, CharacterVector(no_init(nn))); break; @@ -166,6 +172,7 @@ List read_data(FILE * file, break; } case 118: + case 120: { int16_t v = 0; int64_t o = 0, z = 0; @@ -193,6 +200,7 @@ List read_data(FILE * file, break; } case 119: + case 121: { int32_t v = 0; int64_t o = 0, z = 0; @@ -221,8 +229,13 @@ List read_data(FILE * file, } } break; + } + case STATA_ALIAS: + { + break; // do nothing } // case < 0: + // case STATA_ALIAS default: { // skip to the next valid case diff --git a/src/read_dta.cpp b/src/read_dta.cpp index 94d8a9e..1703cca 100644 --- a/src/read_dta.cpp +++ b/src/read_dta.cpp @@ -1,5 +1,5 @@ /* - * Copyright (C) 2014-2023 Jan Marvin Garbuszus and Sebastian Jeworutzki + * Copyright (C) 2014-2024 Jan Marvin Garbuszus and Sebastian Jeworutzki * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the @@ -38,7 +38,7 @@ List read_dta(FILE * file, */ int8_t fversion = 117L; //f = first - int8_t lversion = 119L; //l = last + int8_t lversion = 121L; //l = last std::string version(3, '\0'); readstring(version, file, version.size()); @@ -74,6 +74,8 @@ List read_dta(FILE * file, break; case 118: case 119: + case 120: + case 121: nvarnameslen = 129; nformatslen = 57; nvalLabelslen = 129; @@ -106,9 +108,9 @@ List read_dta(FILE * file, */ uint32_t k = 0; - if (release < 119) + if (release < 119 || release == 120) k = readbin((uint16_t)k, file, swapit); - if (release == 119) + if (release == 119 || release == 121) k = readbin(k, file, swapit); // @@ -123,7 +125,7 @@ List read_dta(FILE * file, if (release == 117) n = readbin((uint32_t)n, file, swapit); - if ((release == 118) | (release == 119)) + if ((release >= 118) && (release <= 121)) n = readbin(n, file, swapit); // @@ -146,7 +148,7 @@ List read_dta(FILE * file, if (release == 117) ndlabel = readbin((int8_t)ndlabel, file, swapit); - if ((release == 118) | (release == 119)) + if ((release >= 118) && (release <= 121)) ndlabel = readbin(ndlabel, file, swapit); std::string datalabel(ndlabel, '\0'); @@ -224,6 +226,7 @@ List read_dta(FILE * file, * vartypes. * 0-2045: strf (String: Max length 2045) * 32768: strL (long String: Max length 2 billion) + * 65525: alias * 65526: double * 65527: float * 65528: long @@ -274,9 +277,9 @@ List read_dta(FILE * file, { uint32_t nsortlist = 0; - if ((release == 117) | (release == 118)) + if ((release == 117) || (release == 118) || (release == 120)) nsortlist = readbin((uint16_t)nsortlist, file, swapit); - if (release == 119) + if (release == 119 || release == 121) nsortlist = readbin(nsortlist, file, swapit); sortlist[i] = nsortlist; @@ -530,6 +533,8 @@ List read_dta(FILE * file, } case 118: case 119: + case 120: + case 121: { uint32_t v = 0; uint64_t o = 0; diff --git a/src/save_dta.cpp b/src/save_dta.cpp index 5805c7e..7a944e9 100644 --- a/src/save_dta.cpp +++ b/src/save_dta.cpp @@ -76,6 +76,8 @@ int stata_save(const char * filePath, Rcpp::DataFrame dat) break; case 118: case 119: + case 120: + case 121: nvarnameslen = 129; nformatslen = 57; nvalLabelslen = 129; @@ -152,14 +154,14 @@ int stata_save(const char * filePath, Rcpp::DataFrame dat) writestr(byteord, byteord.size(), dta); writestr(sbyteorder, 3, dta); // LSF writestr(K, K.size(), dta); - if (release < 119) + if (release < 119 || release == 120) writebin((int16_t)k, dta, swapit); - if (release == 119) + if (release == 119 || release == 121) writebin(k, dta, swapit); writestr(num, num.size(), dta); if (release == 117) writebin((int32_t)n, dta, swapit); - if ((release == 118) | (release == 119)) + if ((release == 118) | (release == 119) | (release == 120) | (release == 121)) writebin(n, dta, swapit); writestr(lab, lab.size(), dta); @@ -178,7 +180,7 @@ int stata_save(const char * filePath, Rcpp::DataFrame dat) if (release == 117) writebin((uint8_t)ndlabel, dta, swapit); - if ((release == 118) | (release == 119)) + if ((release == 118) | (release == 119) | (release == 120) | (release == 121)) writebin(ndlabel, dta, swapit); writestr(datalabel,datalabel.size(), dta); @@ -189,7 +191,7 @@ int stata_save(const char * filePath, Rcpp::DataFrame dat) if (release == 117) { writebin(zero, dta, swapit); } - if ((release == 118) | (release == 119)) { + if ((release == 118) | (release == 119) | (release == 120) | (release == 121)) { writebin(zero, dta, swapit); writebin(zero, dta, swapit); } @@ -257,10 +259,10 @@ int stata_save(const char * filePath, Rcpp::DataFrame dat) { uint32_t nsortlist = 0; - if ((release == 117) | (release == 118)) { + if ((release == 117) | (release == 118) | (release == 120)) { writebin((uint16_t)nsortlist, dta, swapit); } - if (release == 119) { + if ((release == 119) | (release == 121)) { writebin(nsortlist, dta, swapit); } } @@ -496,6 +498,7 @@ int stata_save(const char * filePath, Rcpp::DataFrame dat) break; } case 118: + case 120: { int16_t v = i+1; int64_t o = j+1; @@ -519,6 +522,7 @@ int stata_save(const char * filePath, Rcpp::DataFrame dat) break; } case 119: + case 121: { int32_t v = i+1; int64_t o = j+1; @@ -575,7 +579,7 @@ int stata_save(const char * filePath, Rcpp::DataFrame dat) writebin(v, dta, swapit); if (release == 117) writebin((uint32_t)o, dta, swapit); - if ((release == 118) | (release == 119)) + if ((release == 118) | (release == 119) | (release == 120) | (release == 121)) writebin(o, dta, swapit); writebin(t, dta, swapit); writebin(len, dta, swapit); diff --git a/tests/testthat/test_read.R b/tests/testthat/test_read.R index 52cadbc..4f66c9f 100644 --- a/tests/testthat/test_read.R +++ b/tests/testthat/test_read.R @@ -248,3 +248,64 @@ test_that("various datetime conversions", { dddates <- read.dta13(datetime, convert.dates = TRUE) expect_true(all.equal(dd, dddates, check.attributes = FALSE)) }) + +test_that("reading file format 120 works", { + + fl <- system.file("extdata", "myproject2.dtas", package="readstata13") + + tmp <- tempdir() + + fls <- unzip(fl, exdir = tmp) + + # data name, dta file name, dta version + data_fram <- strsplit(readLines(fls[1])[-c(1:2)], " ") + data_fram <- as.data.frame(do.call("rbind", data_fram)) + + expect_equal(data_fram$V1, c("persons", "counties")) + + # read dtas + dtas <- fls[tools::file_ext(fls) == "dta"] + expect_equal(basename(dtas), paste0(data_fram$V2, ".dta")) + + expect_warning( + df1 <- read.dta13(dtas[1]), + "File contains unhandled alias variable in column: 5" + ) + df2 <- read.dta13(dtas[2], convert.factors = FALSE) + + expect_equal(attr(df1, "version"), as.integer(data_fram$V3[1])) + expect_equal(attr(df2, "version"), as.integer(data_fram$V3[2])) + + # backup order + nams <- names(df1) + + # merge: fralias_from in attr(df1, "expansion.fields") tells what to merge + df <- merge( + df1[-which(names(df1) == "median")], + df2, + by = "countyid", + all.x = TRUE + ) + + # update names + as_name <- attr(df1, "expansion.fields")[[16]] + nams2 <- names(df) + nams2[nams2 == as_name[3]] <- as_name[1] + names(df) <- nams2 + + # resore expected order + df <- df[nams] + + # restore order + df <- df[order(df$personid), ] + + expect_equal( + df$personid, 1:20 + ) + + expect_equal( + c("personid", "countyid", "income", "counties", "median", "ratio"), + names(df) + ) + +}) diff --git a/tests/testthat/test_save.R b/tests/testthat/test_save.R index b303b49..b5873a0 100644 --- a/tests/testthat/test_save.R +++ b/tests/testthat/test_save.R @@ -109,6 +109,8 @@ dir.create("data") dd <- mtcars +save.dta13(dd, "data/dta_121.dta", version = 121, compress = TRUE) +save.dta13(dd, "data/dta_120.dta", version = 120, compress = TRUE) save.dta13(dd, "data/dta_119.dta", version = 119, compress = TRUE) save.dta13(dd, "data/dta_118.dta", version = 118, compress = TRUE) save.dta13(dd, "data/dta_117.dta", version = 117, compress = TRUE) @@ -126,6 +128,8 @@ save.dta13(dd, "data/dta_104.dta", version = 104, compress = TRUE) save.dta13(dd, "data/dta_103.dta", version = 103, compress = TRUE) save.dta13(dd, "data/dta_102.dta", version = 102, compress = TRUE) +dd121 <- read.dta13("data/dta_121.dta") +dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") @@ -147,6 +151,8 @@ dd102 <- read.dta13("data/dta_102.dta") unlink("data", recursive = TRUE) test_that("compress", { + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) @@ -176,6 +182,8 @@ dir.create("data") dd <- mtcars dd$am <- factor(x = dd$am, levels = c(0,1), labels = c("auto", "man")) +save.dta13(dd, "data/dta_121.dta", version = 121, convert.factors = TRUE) +save.dta13(dd, "data/dta_120.dta", version = 120, convert.factors = TRUE) save.dta13(dd, "data/dta_119.dta", version = 119, convert.factors = TRUE) save.dta13(dd, "data/dta_118.dta", version = 118, convert.factors = TRUE) save.dta13(dd, "data/dta_117.dta", version = 117, convert.factors = TRUE) @@ -193,6 +201,8 @@ save.dta13(dd, "data/dta_107.dta", version = 107, convert.factors = TRUE) # save.dta13(dd, "data/dta_103.dta", version = 103, convert.factors = TRUE) # save.dta13(dd, "data/dta_102.dta", version = 102, convert.factors = TRUE) +dd121 <- read.dta13("data/dta_121.dta") +dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") @@ -214,6 +224,8 @@ dd107 <- read.dta13("data/dta_107.dta") unlink("data", recursive = TRUE) test_that("convert.factors TRUE", { + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) @@ -243,6 +255,8 @@ dir.create("data") dd <- mtcars dd$am <- factor(x = dd$am, levels = c(0,1), labels = c("auto", "man")) +save.dta13(dd, "data/dta_121.dta", version = 121, convert.factors = FALSE) +save.dta13(dd, "data/dta_120.dta", version = 120, convert.factors = FALSE) save.dta13(dd, "data/dta_119.dta", version = 119, convert.factors = FALSE) save.dta13(dd, "data/dta_118.dta", version = 118, convert.factors = FALSE) save.dta13(dd, "data/dta_117.dta", version = 117, convert.factors = FALSE) @@ -260,6 +274,8 @@ save.dta13(dd, "data/dta_107.dta", version = 107, convert.factors = FALSE) # save.dta13(dd, "data/dta_103.dta", version = 103, convert.factors = FALSE) # save.dta13(dd, "data/dta_102.dta", version = 102, convert.factors = FALSE) +dd121 <- read.dta13("data/dta_121.dta") +dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") @@ -285,6 +301,8 @@ dd$am <- dd$am + 1 unlink("data", recursive = TRUE) test_that("convert.factors TRUE", { + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) @@ -312,6 +330,8 @@ dir.create("data") dd <- mtcars +save.dta13(dd, "data/dta_121.dta", version = 121, add.rownames = TRUE) +save.dta13(dd, "data/dta_120.dta", version = 120, add.rownames = TRUE) save.dta13(dd, "data/dta_119.dta", version = 119, add.rownames = TRUE) save.dta13(dd, "data/dta_118.dta", version = 118, add.rownames = TRUE) save.dta13(dd, "data/dta_117.dta", version = 117, add.rownames = TRUE) @@ -329,6 +349,8 @@ save.dta13(dd, "data/dta_104.dta", version = 104, add.rownames = TRUE) save.dta13(dd, "data/dta_103.dta", version = 103, add.rownames = TRUE) save.dta13(dd, "data/dta_102.dta", version = 102, add.rownames = TRUE) +dd121 <- read.dta13("data/dta_121.dta", add.rownames = TRUE) +dd120 <- read.dta13("data/dta_120.dta", add.rownames = TRUE) dd119 <- read.dta13("data/dta_119.dta", add.rownames = TRUE) dd118 <- read.dta13("data/dta_118.dta", add.rownames = TRUE) dd117 <- read.dta13("data/dta_117.dta", add.rownames = TRUE) @@ -351,6 +373,8 @@ unlink("data", recursive = TRUE) test_that("add.rownames TRUE", { # Check that rownames are identical + expect_true(identical(rownames(dd), rownames(dd121))) + expect_true(identical(rownames(dd), rownames(dd120))) expect_true(identical(rownames(dd), rownames(dd119))) expect_true(identical(rownames(dd), rownames(dd118))) expect_true(identical(rownames(dd), rownames(dd117))) @@ -369,6 +393,8 @@ test_that("add.rownames TRUE", { expect_true(identical(rownames(dd), rownames(dd102))) # Check that data is identical + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) @@ -399,6 +425,8 @@ dir.create("data") dd <- mtcars +save.dta13(dd, "data/dta_121.dta", version = 121, data.label = dl) +save.dta13(dd, "data/dta_120.dta", version = 120, data.label = dl) save.dta13(dd, "data/dta_119.dta", version = 119, data.label = dl) save.dta13(dd, "data/dta_118.dta", version = 118, data.label = dl) save.dta13(dd, "data/dta_117.dta", version = 117, data.label = dl) @@ -416,6 +444,8 @@ save.dta13(dd, "data/dta_104.dta", version = 104, data.label = dl) save.dta13(dd, "data/dta_103.dta", version = 103, data.label = dl) # save.dta13(dd, "data/dta_102.dta", version = 102, data.label = dl) # no data label +dd121 <- read.dta13("data/dta_121.dta") +dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") @@ -437,6 +467,8 @@ unlink("data", recursive = TRUE) test_that("data label", { # Check that rownames are identical + expect_equal(dl, attr(dd121, "datalabel")) + expect_equal(dl, attr(dd120, "datalabel")) expect_equal(dl, attr(dd119, "datalabel")) expect_equal(dl, attr(dd118, "datalabel")) expect_equal(dl, attr(dd117, "datalabel")) @@ -502,6 +534,8 @@ dd <- data.frame(td = as.Date(td), tm = as.Date(tm), tq = as.Date(tq)) +save.dta13(dd, "data/dta_121.dta", version = 121, convert.dates = TRUE) +save.dta13(dd, "data/dta_120.dta", version = 120, convert.dates = TRUE) save.dta13(dd, "data/dta_119.dta", version = 119, convert.dates = TRUE) save.dta13(dd, "data/dta_118.dta", version = 118, convert.dates = TRUE) save.dta13(dd, "data/dta_117.dta", version = 117, convert.dates = TRUE) @@ -519,6 +553,8 @@ save.dta13(dd, "data/dta_104.dta", version = 104, convert.dates = TRUE) save.dta13(dd, "data/dta_103.dta", version = 103, convert.dates = TRUE) save.dta13(dd, "data/dta_102.dta", version = 102, convert.dates = TRUE) +dd121 <- read.dta13("data/dta_121.dta") +dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") @@ -540,6 +576,8 @@ unlink("data", recursive = TRUE) test_that("convert.dates TRUE", { # Check that rownames are identical + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) @@ -571,6 +609,8 @@ dd <- data.frame( dat = c(paste(replicate(2046, "a"), collapse = ""), paste(replicate(2046, "b"), collapse = "")), stringsAsFactors = FALSE) +save.dta13(dd, "data/dta_121.dta", version = 121) +save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) @@ -588,6 +628,8 @@ save.dta13(dd, "data/dta_117.dta", version = 117) # save.dta13(dd, "data/dta_103.dta", version = 103) # save.dta13(dd, "data/dta_102.dta", version = 102) +dd121 <- read.dta13("data/dta_121.dta", replace.strl = TRUE) +dd120 <- read.dta13("data/dta_120.dta", replace.strl = TRUE) dd119 <- read.dta13("data/dta_119.dta", replace.strl = TRUE) dd118 <- read.dta13("data/dta_118.dta", replace.strl = TRUE) dd117 <- read.dta13("data/dta_117.dta", replace.strl = TRUE) @@ -609,6 +651,8 @@ unlink("data", recursive = TRUE) test_that("replace.strl TRUE", { # Check that rownames are identical + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) @@ -636,6 +680,8 @@ dir.create("data") dd <- data.frame(x.1 = 1) +save.dta13(dd, "data/dta_121.dta", version = 121, convert.underscore = TRUE) +save.dta13(dd, "data/dta_120.dta", version = 120, convert.underscore = TRUE) save.dta13(dd, "data/dta_119.dta", version = 119, convert.underscore = TRUE) save.dta13(dd, "data/dta_118.dta", version = 118, convert.underscore = TRUE) save.dta13(dd, "data/dta_117.dta", version = 117, convert.underscore = TRUE) @@ -653,6 +699,8 @@ save.dta13(dd, "data/dta_104.dta", version = 104, convert.underscore = TRUE) save.dta13(dd, "data/dta_103.dta", version = 103, convert.underscore = TRUE) save.dta13(dd, "data/dta_102.dta", version = 102, convert.underscore = TRUE) +dd121 <- read.dta13("data/dta_121.dta") +dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") @@ -676,6 +724,8 @@ names(dd) <- "x_1" test_that("convert.underscore TRUE", { # check numerics + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) @@ -693,6 +743,8 @@ test_that("convert.underscore TRUE", { expect_true(datacompare(dd, dd103)) expect_true(datacompare(dd, dd102)) # check names + expect_true(namescompare(dd, dd121)) + expect_true(namescompare(dd, dd120)) expect_true(namescompare(dd, dd119)) expect_true(namescompare(dd, dd118)) expect_true(namescompare(dd, dd117)) @@ -720,6 +772,8 @@ dir.create("data") dd <- mtcars +save.dta13(dd, "data/dta_121.dta", version = 121) +save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) @@ -737,6 +791,8 @@ save.dta13(dd, "data/dta_104.dta", version = 104) save.dta13(dd, "data/dta_103.dta", version = 103) save.dta13(dd, "data/dta_102.dta", version = 102) +dd121 <- read.dta13("data/dta_121.dta", select.rows = 5) +dd120 <- read.dta13("data/dta_120.dta", select.rows = 5) dd119 <- read.dta13("data/dta_119.dta", select.rows = 5) dd118 <- read.dta13("data/dta_118.dta", select.rows = 5) dd117 <- read.dta13("data/dta_117.dta", select.rows = 5) @@ -760,6 +816,8 @@ dd <- dd[1:5,] test_that("select.rows = 5", { # check numerics + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) @@ -784,6 +842,8 @@ dir.create("data") dd <- mtcars +save.dta13(dd, "data/dta_121.dta", version = 121) +save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) @@ -801,6 +861,8 @@ save.dta13(dd, "data/dta_104.dta", version = 104) save.dta13(dd, "data/dta_103.dta", version = 103) save.dta13(dd, "data/dta_102.dta", version = 102) +dd121 <- read.dta13("data/dta_121.dta", select.rows = c(5,10)) +dd120 <- read.dta13("data/dta_120.dta", select.rows = c(5,10)) dd119 <- read.dta13("data/dta_119.dta", select.rows = c(5,10)) dd118 <- read.dta13("data/dta_118.dta", select.rows = c(5,10)) dd117 <- read.dta13("data/dta_117.dta", select.rows = c(5,10)) @@ -824,6 +886,8 @@ dd <- dd[5:10,] test_that("select.rows = c(5,10)", { # check numerics + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) @@ -852,6 +916,8 @@ dir.create("data") dd <- mtcars +save.dta13(dd, "data/dta_121.dta", version = 121) +save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) @@ -869,6 +935,8 @@ save.dta13(dd, "data/dta_104.dta", version = 104) save.dta13(dd, "data/dta_103.dta", version = 103) save.dta13(dd, "data/dta_102.dta", version = 102) +dd121 <- read.dta13("data/dta_121.dta", select.cols = c("disp", "drat")) +dd120 <- read.dta13("data/dta_120.dta", select.cols = c("disp", "drat")) dd119 <- read.dta13("data/dta_119.dta", select.cols = c("disp", "drat")) dd118 <- read.dta13("data/dta_118.dta", select.cols = c("disp", "drat")) dd117 <- read.dta13("data/dta_117.dta", select.cols = c("disp", "drat")) @@ -892,6 +960,8 @@ dd <- dd[,c("disp", "drat")] test_that("select.cols = c('disp', 'drat')", { # check numerics + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) @@ -916,6 +986,8 @@ dir.create("data") dd <- mtcars +save.dta13(dd, "data/dta_121.dta", version = 121) +save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) @@ -933,6 +1005,8 @@ save.dta13(dd, "data/dta_104.dta", version = 104) save.dta13(dd, "data/dta_103.dta", version = 103) save.dta13(dd, "data/dta_102.dta", version = 102) +dd121 <- read.dta13("data/dta_121.dta", select.cols = c(3, 5)) +dd120 <- read.dta13("data/dta_120.dta", select.cols = c(3, 5)) dd119 <- read.dta13("data/dta_119.dta", select.cols = c(3, 5)) dd118 <- read.dta13("data/dta_118.dta", select.cols = c(3, 5)) dd117 <- read.dta13("data/dta_117.dta", select.cols = c(3, 5)) @@ -956,6 +1030,8 @@ dd <- dd[,c("disp", "drat")] test_that("select.cols = c('disp', 'drat')", { # check numerics + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117)) @@ -1010,6 +1086,8 @@ ef <- list( attr(dd, "expansion.fields") <- ef +save.dta13(dd, "data/dta_121.dta", version = 121) +save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) @@ -1027,6 +1105,8 @@ save.dta13(dd, "data/dta_105.dta", version = 105) # save.dta13(dd, "data/dta_103.dta", version = 103) # save.dta13(dd, "data/dta_102.dta", version = 102) +dd121 <- attr(read.dta13("data/dta_121.dta"), "expansion.fields") +dd120 <- attr(read.dta13("data/dta_120.dta"), "expansion.fields") dd119 <- attr(read.dta13("data/dta_119.dta"), "expansion.fields") dd118 <- attr(read.dta13("data/dta_118.dta"), "expansion.fields") dd117 <- attr(read.dta13("data/dta_117.dta"), "expansion.fields") @@ -1048,6 +1128,8 @@ unlink("data", recursive = TRUE) test_that("expansinon.fields", { # check numerics + expect_equal(ef, dd121) + expect_equal(ef, dd120) expect_equal(ef, dd119) expect_equal(ef, dd118) expect_equal(ef, dd117) @@ -1077,8 +1159,8 @@ dd <- mtcars varlabeldd <- LETTERS[seq_len(ncol(dd))] varlabel(dd) <- varlabeldd -version_list <- c(102,103,104,105,106,107,108,110, - 111,112,113,114,115,117,118,119) +version_list <- c(102,103,104,105,106,107,108,110,111, + 112,113,114,115,117,118,119,120,121) # write variable label attribute for(v in version_list) { @@ -1113,6 +1195,8 @@ dir.create("data") dd <- data.frame(x1 = c("NA", NA_character_)) exp <- data.frame(x1 = c("NA", "")) +save.dta13(dd, "data/dta_121.dta", version = 121) +save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) @@ -1130,6 +1214,8 @@ save.dta13(dd, "data/dta_104.dta", version = 104) save.dta13(dd, "data/dta_103.dta", version = 103) save.dta13(dd, "data/dta_102.dta", version = 102) +dd121 <- read.dta13("data/dta_121.dta") +dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") @@ -1148,6 +1234,8 @@ dd103 <- read.dta13("data/dta_103.dta") dd102 <- read.dta13("data/dta_102.dta") test_that("NA character works", { + expect_true(datacompare(exp, dd121)) + expect_true(datacompare(exp, dd120)) expect_true(datacompare(exp, dd119)) expect_true(datacompare(exp, dd118)) expect_true(datacompare(exp, dd117)) @@ -1178,15 +1266,21 @@ dd <- data.frame( dat = c(paste(replicate(2046, "a"), collapse = ""), "NA", NA_character_), stringsAsFactors = FALSE) +save.dta13(dd, "data/dta_121.dta", version = 121) +save.dta13(dd, "data/dta_120.dta", version = 120) save.dta13(dd, "data/dta_119.dta", version = 119) save.dta13(dd, "data/dta_118.dta", version = 118) save.dta13(dd, "data/dta_117.dta", version = 117) +dd121 <- read.dta13("data/dta_121.dta") +dd120 <- read.dta13("data/dta_120.dta") dd119 <- read.dta13("data/dta_119.dta") dd118 <- read.dta13("data/dta_118.dta") dd117 <- read.dta13("data/dta_117.dta") test_that("NA character works", { + expect_true(datacompare(dd, dd121)) + expect_true(datacompare(dd, dd120)) expect_true(datacompare(dd, dd119)) expect_true(datacompare(dd, dd118)) expect_true(datacompare(dd, dd117))