The budget of the United States Federal Government is often a contentious issue. This presentation will provide a ‘rough’ introduction, covering where to get the data, provide a breakdown of the numbers, and links to other resources.
library(dplyr)
library(ggplot2)
library(DT)
library(tidyr)
Source - seankross
This is an old FY15 snapshot.
devtools::install_github("seankross/usbudget")
library(usbudget)
data("budauth")
ba <- budauth
names(ba) <- gsub("^X", "", names(ba))
Sources
links <- list(budauth = "https://www.whitehouse.gov/wp-content/uploads/2019/03/budauth-fy2020.xlsx",
outlays = "https://www.whitehouse.gov/wp-content/uploads/2019/03/outlays-fy2020.xlsx",
receipts = "https://www.whitehouse.gov/wp-content/uploads/2019/03/receipts-fy2020.xlsx"
)
files_raw <- lapply(links, function(x) {
download.file(x, "C:/workspace2/temp.xlsx", mode = "wb")
as.data.frame(readxl::read_excel("temp.xlsx"))
})
# budget authority (e.g. what the federal government is allowed to spend)
files <- lapply(files_raw, function(x) {
# remove X from fiscal years
names(x) = gsub(" ", ".", names(x))
# convert dollars from string to numeric
if (any(names(x) == "1962")) {
vars = as.character(1962:2024)
} else vars = as.character(1976:2024)
x[vars] = lapply(x[vars], function(y) as.numeric(gsub(",", "", y)) * 1000)
return(x)
})
budauth <- files$budauth
outlays <- files$outlays
receipts <- files$receipts
Sources
gdp_all <- read.csv("https://apps.bea.gov/national/Release/TXT/NipaDataA.txt", stringsAsFactors = FALSE)
gdp <- subset(gdp_all, X.SeriesCode == "A001RC")[-1]
names(gdp) <- c("year", "gdp")
gdp$gdp <- as.numeric(gsub(",", "", gdp$gdp)) * 1e6
# # Data from FredCast
#gdp_raw <- read.csv("https://fred.stlouisfed.org/graph/fredgraph.csv?id=GNPA&scale=left&cosd=1929-01-01&coed=2017-01-01&fq=Annual&fam=avg&fgst=lin&fgsnd=2009-06-01&line_index=1&transformation=lin&vintage_date=2018-10-15&revision_date=2018-10-15&nd=1929-01-01", stringsAsFactors = FALSE)
# gdp$DATE <- format(as.Date(gdp$DATE), "%Y")
# names(gdp) <- c("year", "gdp")
# gdp <- subset(gdp, year %in% 1947:2019)
Sources
cpi_raw <- read.csv("https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1169&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=CPIAUCSL&scale=left&cosd=1947-01-01&coed=2018-01-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2009-06-01&line_index=1&transformation=lin&vintage_date=2019-06-25&revision_date=2019-06-25&nd=1947-01-01", stringsAsFactors = FALSE)
cpi <- cpi_raw
names(cpi) <- c("year", "cpi")
cpi <- within(cpi, {
year = as.integer(format(as.Date(year), "%Y"))
cpi = as.numeric(cpi)
adj = cpi[year == 2019] / cpi
})
save(budauth, outlays, receipts, gdp, cpi, file = "C:/workspace2/usbudget.RData")
load(file = "C:/workspace2/usbudget.RData")
bud <- select(budauth, Agency.Name, as.character(1976:2024)) %>%
gather(key = year, value = dollars, - Agency.Name) %>%
group_by(Agency.Name, year) %>%
summarize(dollars = sum(dollars)) %>%
mutate(year = as.integer(year))
top10 <- filter(bud, year == 2019) %>%
arrange(- dollars) %>% # View()
head()
gg_budget <- filter(bud, Agency.Name %in% top10$Agency.Name) %>%
ggplot(aes(x = year, y = dollars / 1e9, col = Agency.Name)) +
geom_line(aes(lty = (year <= 2019)), lwd = 2) +
geom_vline(xintercept = 2019) +
scale_linetype_manual(values = c("dotted", "solid")) +
scale_x_continuous(breaks = seq(1880, 2030, 8), limits = c(1976, 2024)) +
theme(aspect.ratio = 1/2) +
ylab("Billons of Dollars") +
ggtitle("Top 10 US Agencies")
plot(gg_budget)
sp <- select(budauth, BEA.Category, as.character(1976:2024)) %>%
gather(key = year, value = dollars, - BEA.Category) %>%
group_by(BEA.Category, year) %>%
summarize(dollars = sum(dollars[dollars > 0])) %>%
mutate(year = as.integer(year))
ggplot(sp, aes(x = year, y = dollars / 1e9, col = BEA.Category)) +
geom_line(aes(lty = (year <= 2019)), lwd = 2) +
geom_vline(xintercept = 2019) +
scale_linetype_manual(values = c("dotted", "solid")) +
scale_x_continuous(breaks = seq(1880, 2030, 8)) +
theme(aspect.ratio = 1/2) +
ylab("Billons of Dollars") +
ggtitle("Spending Category")
# percentage
group_by(budauth, BEA.Category, Agency.Name) %>%
summarize(dollars = sum(`2019`)) %>%
mutate(dollars = ifelse(is.na(dollars), 0, dollars)) %>%
spread(BEA.Category, dollars) %>%
mutate(pct_man = Mandatory / (Mandatory + Discretionary)) %>%
arrange(- Mandatory) %>%
head(24) %>%
datatable()
Examples
gg_budget_adj <- inner_join(bud, cpi, by = "year") %>%
mutate(dollars = dollars * adj) %>%
filter(Agency.Name %in% top10$Agency.Name) %>%
ggplot(aes(x = year, y = dollars / 1e9, col = Agency.Name)) +
geom_line(lwd = 2) +
geom_vline(xintercept = 2019) +
scale_x_continuous(breaks = seq(1880, 2030, 8), limits = c(1976, 2024)) +
theme(aspect.ratio = 1/2) +
ylab("Billons of Dollars (Adj for Inflation)") +
ggtitle("Top 10 US Agencies")
gridExtra::grid.arrange(gg_budget, gg_budget_adj, ncol = 1)
gg_gdp <- gdp %>%
inner_join(cpi, by = "year") %>%
mutate(dollars = gdp * adj) %>%
ggplot(aes(x = year, y = dollars / 1e9)) +
geom_line(lwd = 1) +
scale_x_continuous(breaks = seq(1880, 2030, 8), limits = c(1976, 2019)) +
theme(aspect.ratio = 1/2) +
ylab("Billons of Dollars (Adj for Inflation)") +
ggtitle("Gross Domestic Product")
gg_gdp_pct <- bud %>%
group_by(year) %>%
summarize(dollars = sum(dollars)) %>%
inner_join(gdp, by = "year") %>%
mutate(pct_gdp = dollars / gdp) %>%
ggplot(aes(x = year, y = pct_gdp)) +
geom_line(lwd = 1) +
# ylim(0, 0.5) +
scale_x_continuous(breaks = seq(1880, 2030, 8), limits = c(1976, 2019)) +
theme(aspect.ratio = 1/2) +
ylab("percent of GDP (%)") +
ggtitle("US Budget")
gridExtra::grid.arrange(gg_gdp, gg_gdp_pct, ncol = 1)