1 Abstract

The budget of the United States Federal Government is often a contentious issue. This presentation will provide a ‘rough’ introduction, covering where to get the data, provide a breakdown of the numbers, and links to other resources.

2 Load packages

library(dplyr)
library(ggplot2)
library(DT)
library(tidyr)

3 Budget Data

3.1 GitHub Snapshot

Source - seankross

This is an old FY15 snapshot.

devtools::install_github("seankross/usbudget")

library(usbudget)

data("budauth")
ba <- budauth
names(ba) <- gsub("^X", "", names(ba))

3.2 US Budget

Sources

links <- list(budauth = "https://www.whitehouse.gov/wp-content/uploads/2019/03/budauth-fy2020.xlsx",
              outlays = "https://www.whitehouse.gov/wp-content/uploads/2019/03/outlays-fy2020.xlsx",
              receipts = "https://www.whitehouse.gov/wp-content/uploads/2019/03/receipts-fy2020.xlsx"
              )
files_raw <- lapply(links, function(x) {
  download.file(x, "C:/workspace2/temp.xlsx", mode = "wb")
  as.data.frame(readxl::read_excel("temp.xlsx"))
  })

  
# budget authority (e.g. what the federal government is allowed to spend)
files <- lapply(files_raw, function(x) {
  # remove X from fiscal years
  names(x) = gsub(" ", ".", names(x))
  # convert dollars from string to numeric
  if (any(names(x) == "1962")) {
    vars = as.character(1962:2024)
    } else vars = as.character(1976:2024)
  x[vars] = lapply(x[vars], function(y) as.numeric(gsub(",", "", y)) * 1000)
  return(x)
})

budauth <- files$budauth
outlays <- files$outlays
receipts <- files$receipts

3.3 Gross Domestic Product (GDP)

Sources

gdp_all <- read.csv("https://apps.bea.gov/national/Release/TXT/NipaDataA.txt", stringsAsFactors = FALSE)
gdp <- subset(gdp_all, X.SeriesCode == "A001RC")[-1]
names(gdp) <- c("year", "gdp")
gdp$gdp <- as.numeric(gsub(",", "", gdp$gdp)) * 1e6

# # Data from FredCast
#gdp_raw <- read.csv("https://fred.stlouisfed.org/graph/fredgraph.csv?id=GNPA&scale=left&cosd=1929-01-01&coed=2017-01-01&fq=Annual&fam=avg&fgst=lin&fgsnd=2009-06-01&line_index=1&transformation=lin&vintage_date=2018-10-15&revision_date=2018-10-15&nd=1929-01-01", stringsAsFactors = FALSE)
# gdp$DATE <- format(as.Date(gdp$DATE), "%Y")
# names(gdp) <- c("year", "gdp")
# gdp <- subset(gdp, year %in% 1947:2019)

3.4 Consumer Price Index

Sources

cpi_raw <- read.csv("https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23e1e9f0&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1169&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=CPIAUCSL&scale=left&cosd=1947-01-01&coed=2018-01-01&line_color=%234572a7&link_values=false&line_style=solid&mark_type=none&mw=3&lw=2&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2009-06-01&line_index=1&transformation=lin&vintage_date=2019-06-25&revision_date=2019-06-25&nd=1947-01-01", stringsAsFactors = FALSE)
cpi <- cpi_raw
names(cpi) <- c("year", "cpi")
cpi <- within(cpi, {
  year = as.integer(format(as.Date(year), "%Y"))
  cpi  = as.numeric(cpi)
  adj = cpi[year == 2019] / cpi
  })

3.5 USDA data

FY19

3.6 Save Snapshot

save(budauth, outlays, receipts, gdp, cpi, file = "C:/workspace2/usbudget.RData")

3.7 Load Snapshot

load(file = "C:/workspace2/usbudget.RData")

4 Breakdown of Budget Authorization

# FY19 Budget Authorization
formatC(sum(budauth$`2019`[budauth$`2019` > 0]), format = "fg", big.mark = ",")

[1] “5,809,483,000,000”

formatC(sum(budauth$`2019`[budauth$`2019` < 0]), format = "fg", big.mark = ",")

[1] “-1,148,971,000,000”

# FY19 Outlays
formatC(sum(outlays$`2019`[outlays$`2019` > 0]), format = "fg", big.mark = ",")

[1] “5,713,037,000,000”

formatC(sum(outlays$`2019`[outlays$`2019` < 0]), format = "fg", big.mark = ",")

[1] “-1,183,849,000,000”

# n_agency
length(unique(budauth$Agency.Name))

[1] 222

# n_bureau
length(unique(budauth$Bureau.Name))

[1] 486

agencies <- group_by(budauth, Agency.Name) %>%
  summarize(dol_fy18 = sum(`2019`[`2019` > 0]),
            n_bureau = length(unique(Bureau.Name)),
            n_account = length(unique(Account.Code))
            ) %>%
  arrange(- dol_fy18)

# View(agencies)

datatable(agencies)

#n_agency
length(unique(agencies$Agency.Name[agencies$n_bureau > 1]))

[1] 24

# Common column names
names(receipts)[names(receipts) %in% c("Agency.code", "Agency.name", "Bureau.code", "Bureau.name", "Account.code", "Account.name")] <- c("Agency.Code", "Agency.Name", "Bureau.Dode", "Bureau.Name", "Acount.Code", "Account.Name")
idx <- names(budauth) %in% names(outlays) & names(budauth) %in% names(receipts)
vars <- names(budauth)[idx]

budget <- rbind(
  data.frame(file = "budauth",  budauth[vars],  check.names = FALSE),
  data.frame(file = "outlays",  outlays[vars],  check.names = FALSE),
  data.frame(file = "receipts", receipts[vars], check.names = FALSE)
  )

select(budget, file, as.character(1976:2024)) %>%
  gather(key = year, value = dollars, - file) %>%
  group_by(file, year) %>%
  summarize(dollars = sum(dollars)) %>%
  mutate(year = as.integer(year)) %>% 
  ggplot(aes(x = year, y = dollars / 1e9, col = file)) +
  geom_line(aes(lty = (year <= 2019)), lwd = 2) +
  geom_vline(xintercept = 2019) +
  scale_linetype_manual(values = c("dotted", "solid")) +
  scale_x_continuous(breaks = seq(1880, 2030, 8), limits = c(1976, 2024)) +
  theme(aspect.ratio = 1/2) +
  ylab("Billons of Dollars") +
  ggtitle("Budget Files")

5 Top 10 US Agencies

bud <- select(budauth, Agency.Name, as.character(1976:2024)) %>%
  gather(key = year, value = dollars, - Agency.Name) %>%
  group_by(Agency.Name, year) %>%
  summarize(dollars = sum(dollars)) %>%
  mutate(year = as.integer(year))

top10 <- filter(bud, year == 2019) %>%
  arrange(- dollars) %>% # View()
  head()

gg_budget <- filter(bud, Agency.Name %in% top10$Agency.Name) %>%
  ggplot(aes(x = year, y = dollars / 1e9, col = Agency.Name)) +
  geom_line(aes(lty = (year <= 2019)), lwd = 2) +
  geom_vline(xintercept = 2019) +
  scale_linetype_manual(values = c("dotted", "solid")) +
  scale_x_continuous(breaks = seq(1880, 2030, 8), limits = c(1976, 2024)) +
  theme(aspect.ratio = 1/2) +
  ylab("Billons of Dollars") +
  ggtitle("Top 10 US Agencies")
plot(gg_budget)

6 Mandatory vs Discretionary Spending

sp <- select(budauth, BEA.Category, as.character(1976:2024)) %>%
  gather(key = year, value = dollars, - BEA.Category) %>%
  group_by(BEA.Category, year) %>%
  summarize(dollars = sum(dollars[dollars > 0])) %>%
  mutate(year = as.integer(year))

ggplot(sp, aes(x = year, y = dollars / 1e9, col = BEA.Category)) +
  geom_line(aes(lty = (year <= 2019)), lwd = 2) +
  geom_vline(xintercept = 2019) +
  scale_linetype_manual(values = c("dotted", "solid")) +
  scale_x_continuous(breaks = seq(1880, 2030, 8)) +
  theme(aspect.ratio = 1/2) +
  ylab("Billons of Dollars") +
  ggtitle("Spending Category")

# percentage
group_by(budauth, BEA.Category, Agency.Name) %>%
  summarize(dollars = sum(`2019`)) %>%
  mutate(dollars = ifelse(is.na(dollars), 0, dollars)) %>%
  spread(BEA.Category, dollars) %>%
  mutate(pct_man = Mandatory / (Mandatory + Discretionary)) %>%
  arrange(- Mandatory) %>%
  head(24) %>%
  datatable()

7 Adjust for the Consumer Price Index (CPI)

Examples

gg_budget_adj <- inner_join(bud, cpi, by = "year") %>%
  mutate(dollars = dollars * adj) %>%
  filter(Agency.Name %in% top10$Agency.Name) %>%
  ggplot(aes(x = year, y = dollars / 1e9, col = Agency.Name)) +
  geom_line(lwd = 2) +
  geom_vline(xintercept = 2019) +
  scale_x_continuous(breaks = seq(1880, 2030, 8), limits = c(1976, 2024)) +
  theme(aspect.ratio = 1/2) +
  ylab("Billons of Dollars (Adj for Inflation)") +
  ggtitle("Top 10 US Agencies")

gridExtra::grid.arrange(gg_budget, gg_budget_adj, ncol = 1)

8 Percentage of GDP

gg_gdp <- gdp %>%
  inner_join(cpi, by = "year") %>%
  mutate(dollars = gdp * adj) %>%
  ggplot(aes(x = year, y = dollars / 1e9)) +
  geom_line(lwd = 1) +
  scale_x_continuous(breaks = seq(1880, 2030, 8), limits = c(1976, 2019)) +
  theme(aspect.ratio = 1/2) +
  ylab("Billons of Dollars (Adj for Inflation)") +
  ggtitle("Gross Domestic Product")

gg_gdp_pct <- bud %>%
  group_by(year) %>%
  summarize(dollars = sum(dollars)) %>%
  inner_join(gdp, by = "year") %>%
  mutate(pct_gdp = dollars / gdp) %>%
  ggplot(aes(x = year, y = pct_gdp)) +
  geom_line(lwd = 1) +
  # ylim(0, 0.5) +
  scale_x_continuous(breaks = seq(1880, 2030, 8), limits = c(1976, 2019)) +
  theme(aspect.ratio = 1/2) +
  ylab("percent of GDP (%)") +
  ggtitle("US Budget")

gridExtra::grid.arrange(gg_gdp, gg_gdp_pct, ncol = 1)

An introduction to the US Federal Budget with R

Stephen Roecker

2019-06-25

1 Abstract

2 Load packages

3 Budget Data

3.1 GitHub Snapshot

3.2 US Budget

3.3 Gross Domestic Product (GDP)

3.4 Consumer Price Index

3.5 USDA data

3.6 Save Snapshot

3.7 Load Snapshot

4 Breakdown of Budget Authorization

5 Top 10 US Agencies

6 Mandatory vs Discretionary Spending

7 Adjust for the Consumer Price Index (CPI)

8 Percentage of GDP

9 Further Resources