Skip to content

Commit

Permalink
Merge branch '2017SUT' into 2017MakeUse
Browse files Browse the repository at this point in the history
# Conflicts:
#	data-raw/BEAData_Detail.R
  • Loading branch information
bl-young committed Feb 5, 2024
2 parents 07bc60c + 7d72a1d commit 10fb430
Show file tree
Hide file tree
Showing 54 changed files with 1,830 additions and 172 deletions.
45 changes: 24 additions & 21 deletions R/CrosswalkFunctions.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,26 +41,39 @@ getNAICStoBEAAllocation <- function (year, model) {
return(AllocationTable)
}


#' Get 2-6 digit NAICS codes and names for year specified.
#' @param year int. 2012 or 2007 accepted.
#' @return dataframe with columns NAICS_year_Code and NAICS_year_Name.
getNAICS2to6DigitsCodeName <- function (year) {
#' Download NAICS file for 2-6 digit NAICS codes if not present.
#' @param year int, 2017, 2012, or 2007 accepted.
#' @return FileName str
downloadNAICS2to6DigitsFile <- function (year) {
# Download 2-6 digits NAICS table
if (year == 2012) {
if (year == 2017) {
FileName <- "inst/extdata/2-digit_2017_Codes.xlsx"
url <- "https://www.census.gov/naics/2017NAICS/2-6%20digit_2017_Codes.xlsx"
} else if (year == 2012) {
FileName <- "inst/extdata/2-digit_2012_Codes.xls"
url <- "https://www.census.gov/eos/www/naics/2012NAICS/2-digit_2012_Codes.xls"
} else {
} else if (year == 2007) {
FileName <- "inst/extdata/naics07.xls"
url <- "https://www.census.gov/eos/www/naics/reference_files_tools/2007/naics07.xls"
} else {
stop('Specify available year for crosswalk')
}
if(!file.exists(FileName)) {
utils::download.file(url, FileName, mode = "wb")
}

return(FileName)
}


#' Get 2-6 digit NAICS codes and names for year specified.
#' @param year int. 2017, 2012, or 2007 accepted.
#' @return dataframe with columns NAICS_year_Code and NAICS_year_Name.
getNAICS2to6DigitsCodeName <- function (year) {
FileName <- downloadNAICS2to6DigitsFile(year)
# Load 2-6 digits NAICS table
NAICS <- as.data.frame(readxl::read_excel(FileName, sheet = 1, col_names = TRUE))[-1,-1]
colnames(NAICS) <- c("NAICS_Code", "NAICS_Name")
NAICS <- NAICS[c("NAICS_Code", "NAICS_Name")] # Avoid extra columns
# Split the NAICS code with dash ("-)
DashSplit <- do.call("rbind.data.frame", apply(do.call("rbind", strsplit(NAICS$NAICS_Code, "-")),
1, function(x) seq(x[1], x[2], 1)))
Expand All @@ -80,24 +93,14 @@ getNAICS2to6DigitsCodeName <- function (year) {
}

#' Get 2-6 digit NAICS codes in a crosswalk format for year specified.
#' @param year int, 2012 or 2007 accepted.
#' @param year int, 2017, 2012 or 2007 accepted.
#' @return data frame with columns NAICS_2, NAICS_3, NAICS_4, NAICS_5, NAICS_6.
getNAICS2to6Digits <- function (year) {
# Download 2-6 digits NAICS table
if (year == 2012) {
FileName <- "inst/extdata/2-digit_2012_Codes.xls"
url <- "https://www.census.gov/eos/www/naics/2012NAICS/2-digit_2012_Codes.xls"
} else {
FileName <- "inst/extdata/naics07.xls"
url <- "https://www.census.gov/eos/www/naics/reference_files_tools/2007/naics07.xls"
}
if(!file.exists(FileName)) {
utils::download.file(url, FileName, mode = "wb")
}

FileName <- downloadNAICS2to6DigitsFile(year)
# Load 2-6 digits NAICS table
NAICS <- as.data.frame(readxl::read_excel(FileName, sheet = 1, col_names = TRUE))[-1,-1]
colnames(NAICS) <- c("NAICS_Code", "NAICS_Name")
NAICS <- NAICS[c("NAICS_Code", "NAICS_Name")] # Avoid extra columns
NAICS$NAICS_Code <- suppressWarnings(as.integer(NAICS$NAICS_Code))
NAICS <- NAICS[!is.na(NAICS$NAICS_Code), ]
# Reshape the table
Expand Down
59 changes: 59 additions & 0 deletions R/DataDocumentation.R
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,16 @@
#' @source \url{https://apps.bea.gov/industry/iTables\%20Static\%20Files/AllTablesSUP.zip}
"Summary_Supply_2020"

#' Summary 2021 Supply (2017 schema)
#' @format A dataframe with 74 obs. and 83 variables
#' @source \url{https://apps.bea.gov/industry/iTables\%20Static\%20Files/AllTablesSUP.zip}
"Summary_Supply_2021"

#' Summary 2022 Supply (2017 schema)
#' @format A dataframe with 74 obs. and 83 variables
#' @source \url{https://apps.bea.gov/industry/iTables\%20Static\%20Files/AllTablesSUP.zip}
"Summary_Supply_2022"

#' Summary 2010 Use (2012 schema)
#' @format A dataframe with 82 obs. and 92 variables
#' @source \url{https://apps.bea.gov/industry/iTables\%20Static\%20Files/AllTablesSUP.zip}
Expand Down Expand Up @@ -861,6 +871,16 @@
#' @source \url{https://apps.bea.gov/industry/iTables\%20Static\%20Files/AllTablesSUP.zip}
"Summary_Use_SUT_2020"

#' Summary 2021 Use (2017 schema)
#' @format A dataframe with 82 obs. and 92 variables
#' @source \url{https://apps.bea.gov/industry/iTables\%20Static\%20Files/AllTablesSUP.zip}
"Summary_Use_SUT_2021"

#' Summary 2022 Use (2017 schema)
#' @format A dataframe with 82 obs. and 92 variables
#' @source \url{https://apps.bea.gov/industry/iTables\%20Static\%20Files/AllTablesSUP.zip}
"Summary_Use_SUT_2022"

#' Sector 2010 Supply (2012 schema)
#' @format A dataframe with 18 obs. and 27 variables
#' @source \url{https://apps.bea.gov/industry/iTables\%20Static\%20Files/AllTablesSUP.zip}
Expand Down Expand Up @@ -970,3 +990,42 @@
#' @format A dataframe with 26 obs. and 22 variables
#' @source \url{https://apps.bea.gov/industry/iTables\%20Static\%20Files/AllTablesSUP.zip}
"Sector_Use_SUT_2020"

#' Detail 2017 Supply (2017 schema)
#' @format A dataframe with 403 obs. and 414 variables
#' @source \url{https://apps.bea.gov/industry/iTables\%20Static\%20Files/AllTablesSUP.zip}
"Detail_Supply_2017"

#' Detail 2017 Use (under the Supply-Use framework, 2017 schema)
#' @format A dataframe with 411 obs. and 423 variables
#' @source \url{https://apps.bea.gov/industry/iTables\%20Static\%20Files/AllTablesSUP.zip}
"Detail_Use_SUT_2017"

#' Master Crosswalk table (2017 schema)
#' @format A dataframe with 4095 obs. and 5 variables:
#' \describe{
#' \item{BEA_2017_Sector_Code}{text code}
#' \item{BEA_2017_Summary_Code}{text code}
#' \item{BEA_2017_Detail_Code}{text code}
#' \item{NAICS_2017_Code}{text code}
#' \item{NAICS_2012_Code}{text code}
#' }
"MasterCrosswalk2017"

#' Master Crosswalk table (2017 schema)
#' @format A dataframe with 4095 obs. and 12 variables:
#' \describe{
#' \item{BEA_2017_Sector_Code}{text code}
#' \item{BEA_2017_Sector_Name}{text code}
#' \item{BEA_2017_Summary_Code}{text code}
#' \item{BEA_2017_Summary_Name}{text code}
#' \item{BEA_2017_Detail_Code}{text code}
#' \item{BEA_2017_Detail_Name}{text code}
#' \item{USEEIO_Code}{text code}
#' \item{USEEIO_Name}{text code}
#' \item{NAICS_2017_Code}{text code}
#' \item{NAICS_2017_Name}{text code}
#' \item{NAICS_2012_Code}{text code}
#' \item{NAICS_2007_Code}{text code}
#' }
"MasterCrosswalk"
125 changes: 76 additions & 49 deletions data-raw/BEAData.R
Original file line number Diff line number Diff line change
Expand Up @@ -1284,55 +1284,82 @@ cleanSectorNames <- function(df) {
return(df)
}

# Get BEA (Detail/Summary/Sector) Code and Name under 2012 schema
getBEACodeName2012Schema <- function() {
# Get BEA (Detail/Summary/Sector) Code and Name
getBEACodeName <- function(schema_year) {
# Download data
url <- getBEAIOTables()[["url"]]
date_accessed <- getBEAIOTables()[["date_accessed"]]
files <- getBEAIOTables()[["files"]]
#next lines get the data from AllTablesIO
#url <- getBEAIOTables()[["url"]]
#date_accessed <- getBEAIOTables()[["date_accessed"]]
#files <- getBEAIOTables()[["files"]]
schema_year <- as.character(schema_year)
# Get the data from AllTablesSUP
url <- getBEASupplyUseTables()[["url"]]
date_accessed <- getBEASupplyUseTables()[["date_accessed"]]
files <- getBEASupplyUseTables()[["files"]]

### Detail ###
# Load data
FileName <- file.path("inst/extdata/AllTablesIO",
files[startsWith(files, "IOUse_Before_Redefinitions_PRO") &
endsWith(files, "Detail.xlsx")])
#Next line for use with AllTablesIO
#FileName <- file.path("inst/extdata/AllTablesSUP",
#files[startsWith(files, "IOUse_Before_Redefinitions_PRO") &
# endsWith(files, "Detail.xlsx")])
FileName <- file.path("inst/extdata/AllTablesSUP",
files[startsWith(files, paste0("Use_SUT_Framework_",schema_year,"_DET.xlsx"))])

date_last_modified <- as.character(as.Date(file.mtime(FileName)))
BEADetail <- as.data.frame(readxl::read_excel(FileName, sheet = "2012"))
BEADetail <- as.data.frame(readxl::read_excel(FileName, sheet = schema_year))
## Commodity & Value Added
DetailCommVA <- BEADetail[!is.na(BEADetail[, 2]), c(1:2)][-1, ]
commodity_range <- c(1:(which(DetailCommVA[, 1] == "T005") - 1))
va_range <- c((length(commodity_range) + 2):(which(DetailCommVA[, 1] == "T006") - 1))
#value added range in MUT Use table
#va_range <- c((length(commodity_range) + 2):(which(DetailCommVA[, 1] == "T006") - 1))

#Value added range from SUT
va_range <- c((length(commodity_range) + 2):(which(DetailCommVA[, 1] == "VABAS") - 1))
va_range <- append(va_range,c((max(va_range) + 3):(which(DetailCommVA[, 1] == "VAPRO") - 1)))

# Commodity
BEADetailCommodityCodeName <- DetailCommVA[commodity_range, ]
colnames(BEADetailCommodityCodeName) <- c("BEA_2012_Detail_Commodity_Code",
"BEA_2012_Detail_Commodity_Name")
colnames(BEADetailCommodityCodeName) <- c(paste0("BEA_",schema_year,"_Detail_Commodity_Code"),
paste0("BEA_",schema_year,"_Detail_Commodity_Name"))
rownames(BEADetailCommodityCodeName) <- NULL
# Value Added
BEADetailValueAddedCodeName <- DetailCommVA[va_range, ]
colnames(BEADetailValueAddedCodeName) <- c("BEA_2012_Detail_ValueAdded_Code",
"BEA_2012_Detail_ValueAdded_Name")
colnames(BEADetailValueAddedCodeName) <- c(paste0("BEA_",schema_year,"_Detail_ValueAdded_Code"),
paste0("BEA_",schema_year,"_Detail_ValueAdded_Name"))
rownames(BEADetailValueAddedCodeName) <- NULL
## Industry & Final Demand
DetailIndFD <- as.data.frame(t(BEADetail[!is.na(BEADetail[, 3]), ][2:1, -c(1:2)]))
industry_range <- c(1:(which(DetailIndFD[, 1] == "T001") - 1))
fd_range <- c((length(industry_range) + 2):(which(DetailIndFD[, 1] == "T004") - 1))

# FD range from MUT Use
#fd_range <- c((length(industry_range) + 2):(which(DetailIndFD[, 1] == "T004") - 1))

#FD range from SUT Use
fd_range <- c((length(industry_range) + 2):(which(DetailIndFD[, 1] == "T019") - 1))

# Industry
BEADetailIndustryCodeName <- DetailIndFD[industry_range, ]
colnames(BEADetailIndustryCodeName) <- c("BEA_2012_Detail_Industry_Code",
"BEA_2012_Detail_Industry_Name")
colnames(BEADetailIndustryCodeName) <- c(paste0("BEA_",schema_year,"_Detail_Industry_Code"),
paste0("BEA_",schema_year,"_Detail_Industry_Name"))
rownames(BEADetailIndustryCodeName) <- NULL
# Final Demand
BEADetailFinalDemandCodeName <- DetailIndFD[fd_range, ]
colnames(BEADetailFinalDemandCodeName) <- c("BEA_2012_Detail_FinalDemand_Code",
"BEA_2012_Detail_FinalDemand_Name")
colnames(BEADetailFinalDemandCodeName) <- c(paste0("BEA_",schema_year,"_Detail_FinalDemand_Code"),
paste0("BEA_",schema_year,"_Detail_FinalDemand_Name"))
rownames(BEADetailFinalDemandCodeName) <- NULL

### Summary ###
# Load data
FileName <- file.path("inst/extdata/AllTablesIO",
files[startsWith(files, "IOUse_Before_Redefinitions_PRO") &
#FileName <- file.path("inst/extdata/AllTablesIO",
# files[startsWith(files, "IOUse_Before_Redefinitions_PRO") &
# endsWith(files, "Summary.xlsx")])
FileName <- file.path("inst/extdata/AllTablesSUP",
files[startsWith(files, "Use_Tables") &
endsWith(files, "Summary.xlsx")])

date_last_modified <- as.character(as.Date(file.mtime(FileName)))
BEASummary <- as.data.frame(readxl::read_excel(FileName, sheet = "2012"))
BEASummary <- as.data.frame(readxl::read_excel(FileName, sheet = schema_year))
## Commodity & Value Added
SummaryCommVA <- BEASummary[!is.na(BEASummary[, 2]), c(1:2)][-c(1:2), ]
commodity_range <- c(1:(which(SummaryCommVA[, 2] == "Total Intermediate") - 1))
Expand Down Expand Up @@ -1399,18 +1426,18 @@ getBEACodeName2012Schema <- function() {
rownames(BEASectorFinalDemandCodeName) <- NULL

### Put the data.frames in a list
BEACodeNameList <- list("Detail_IndustryCodeName_2012" = BEADetailIndustryCodeName,
"Detail_CommodityCodeName_2012" = BEADetailCommodityCodeName,
"Detail_ValueAddedCodeName_2012" = BEADetailValueAddedCodeName,
"Detail_FinalDemandCodeName_2012" = BEADetailFinalDemandCodeName,
"Summary_IndustryCodeName_2012" = BEASummaryIndustryCodeName,
"Summary_CommodityCodeName_2012" = BEASummaryCommodityCodeName,
"Summary_ValueAddedCodeName_2012" = BEASummaryValueAddedCodeName,
"Summary_FinalDemandCodeName_2012" = BEASummaryFinalDemandCodeName,
"Sector_IndustryCodeName_2012" = BEASectorIndustryCodeName,
"Sector_CommodityCodeName_2012" = BEASectorCommodityCodeName,
"Sector_ValueAddedCodeName_2012" = BEASectorValueAddedCodeName,
"Sector_FinalDemandCodeName_2012" = BEASectorFinalDemandCodeName)
BEACodeNameList <- list("Detail_IndustryCodeName" = BEADetailIndustryCodeName,
"Detail_CommodityCodeName" = BEADetailCommodityCodeName,
"Detail_ValueAddedCodeName" = BEADetailValueAddedCodeName,
"Detail_FinalDemandCodeName" = BEADetailFinalDemandCodeName,
"Summary_IndustryCodeName" = BEASummaryIndustryCodeName,
"Summary_CommodityCodeName" = BEASummaryCommodityCodeName,
"Summary_ValueAddedCodeName" = BEASummaryValueAddedCodeName,
"Summary_FinalDemandCodeName" = BEASummaryFinalDemandCodeName,
"Sector_IndustryCodeName" = BEASectorIndustryCodeName,
"Sector_CommodityCodeName" = BEASectorCommodityCodeName,
"Sector_ValueAddedCodeName" = BEASectorValueAddedCodeName,
"Sector_FinalDemandCodeName" = BEASectorFinalDemandCodeName)
BEACodeNameList <- lapply(BEACodeNameList, cleanSectorNames)
BEACodeNameList <- lapply(BEACodeNameList, cleanSectorCodes)
### Save and Document data
Expand Down Expand Up @@ -1464,7 +1491,7 @@ getBEADetailMarginsBeforeRedef2012Schema <- function(year) {
}


# Download all Supply and Use tables from BEA iTable
# Download all Supply and Use tables from BEA AllTablesSUP.zip
getBEASupplyUseTables <- function() {
# Create the placeholder file
AllTablesSUP <- "inst/extdata/AllTablesIOSUP.zip"
Expand All @@ -1491,8 +1518,8 @@ getBEASupplyUseTables <- function() {
return(ls)
}

# Get BEA Detail Supply (2012 schema) table from static Excel
getBEADetailSupply2012Schema <- function(year) {
# Get BEA Detail Supply table from static Excel
getBEADetailSupply <- function(year) {
# Download data
url <- getBEASupplyUseTables()[["url"]]
date_accessed <- getBEASupplyUseTables()[["date_accessed"]]
Expand Down Expand Up @@ -1528,8 +1555,8 @@ getBEADetailSupply2012Schema <- function(year) {
}


# Get BEA Detail Use (under the Supply-Use framework, 2012 schema) table from static Excel
getBEADetailUseSUT2012Schema <- function(year) {
# Get BEA Detail Use (under the Supply-Use framework schema) table from static Excel
getBEADetailUseSUTSchema <- function(year) {
# Download data
url <- getBEASupplyUseTables()[["url"]]
date_accessed <- getBEASupplyUseTables()[["date_accessed"]]
Expand Down Expand Up @@ -1565,22 +1592,22 @@ getBEADetailUseSUT2012Schema <- function(year) {
}


# Get BEA Summary Supply (2012 schema) table from static Excel
getBEASummarySupply2012Schema <- function() {
# Get BEA Summary Supply table from static Excel
getBEASummarySupply <- function() {
# Download data
url <- getBEASupplyUseTables()[["url"]]
date_accessed <- getBEASupplyUseTables()[["date_accessed"]]
files <- getBEASupplyUseTables()[["files"]]
# Prepare file name
file <- files[startsWith(files, "Supply") & endsWith(files, "SUM.xlsx")]
file <- files[startsWith(files, "Supply") & endsWith(files, "Summary.xlsx")]
FileName <- file.path("inst/extdata/AllTablesSUP", file)
date_last_modified <- as.character(as.Date(file.mtime(FileName)))
# Find latest data year
file_split <- unlist(stringr::str_split(file, pattern = "_"))
year_range <- file_split[length(file_split) - 1]
end_year <- sub(".*-", "", year_range)
# Load data
for (year in 2010:end_year) {
for (year in 2017:end_year) {
SummarySupply <- as.data.frame(readxl::read_excel(FileName,
sheet = as.character(year)))
# Trim table, assign column names
Expand Down Expand Up @@ -1610,22 +1637,22 @@ getBEASummarySupply2012Schema <- function() {
}
}

# Get BEA Summary Use (under the Supply-Use framework, 2012 schema) table from static Excel
getBEASummaryUseSUT2012Schema <- function() {
# Get BEA Summary Use under the Supply-Use framework from static Excel
getBEASummaryUseSUT <- function() {
# Download data
url <- getBEASupplyUseTables()[["url"]]
date_accessed <- getBEASupplyUseTables()[["date_accessed"]]
files <- getBEASupplyUseTables()[["files"]]
# Prepare file name
file <- files[startsWith(files, "Use") & endsWith(files, "Sum.xlsx")]
file <- files[startsWith(files, "Use") & endsWith(files, "Summary.xlsx")]
FileName <- file.path("inst/extdata/AllTablesSUP", file)
date_last_modified <- as.character(as.Date(file.mtime(FileName)))
# Find latest data year
file_split <- unlist(stringr::str_split(file, pattern = "_"))
year_range <- file_split[length(file_split) - 1]
end_year <- sub(".*-", "", year_range)
# Load data
for (year in 2010:end_year) {
for (year in 2017:end_year) {
SummaryUse <- as.data.frame(readxl::read_excel(FileName,
sheet = as.character(year)))
# Trim table, assign column names
Expand Down Expand Up @@ -1656,8 +1683,8 @@ getBEASummaryUseSUT2012Schema <- function() {
}


# Get BEA Sector Supply (2012 schema) table from static Excel
getBEASectorSupply2012Schema <- function() {
# Get BEA Sector Supply table from static Excel
getBEASectorSupply <- function() {
# Download data
url <- getBEASupplyUseTables()[["url"]]
date_accessed <- getBEASupplyUseTables()[["date_accessed"]]
Expand Down
Loading

0 comments on commit 10fb430

Please sign in to comment.