knitr::opts_chunk$set(message = FALSE, warning = FALSE)
library(ggplot2)
library(dplyr)

1 Case Study 1 - Systematic effects of varying grid and neighorhood size on LSP

1.1 Tidy Old Data

load(file = "C:/workspace2/github/thesis-wvu/trunk/scale/lidar_data.RData")

lidar <- rbind(
  # Gilmer
  stack(gil01m.df),
  stack(gil03m.df),
  stack(gil09m.df),
  stack(gil27m.df),
  # Jefferson
  stack(jef01m.df),
  stack(jef03m.df),
  stack(jef09m.df),
  stack(jef27m.df)
  )
lidar$ind <- as.character(lidar$ind)

# split property.filename grouping variable into separate columns
lidar <- cbind(lidar,
               data.frame(
                 do.call("rbind", 
                         strsplit(lidar$ind, "_")
                ),
                stringsAsFactors = FALSE
                ))
names(lidar)[3:4] <- c("loc", "var")

lidar <- within(lidar, {
  res = as.numeric(substr(loc, 4, 5))
  loc = substr(loc, 1, 3)
  loc = ifelse(loc == "gil", "Gilmer", "Jefferson")
  
  var2 = var
  var2  = sub("sgp", "sg", var)
  var = substr(var2, 1, 2)
  var[var == "sg"] = "slope"
  var[var == "sa"] = "northness"
  var[var == "kp"] = "cupro"
  var[var == "kt"] = "cutan"
  var = factor(var, levels = c("slope", "northness", "cupro", "cutan"))
  
  ws = substr(var2, 3, nchar(var2))
  ws = as.numeric(sub("w|w360", "", ws))
  var2 = NULL
  
  ns  = as.factor(res * ws)
  res = as.factor(res)
  ws  = as.factor(ws)
  })
lidar <- lidar[c("loc", "res", "ns", "ws", "var", "values")]

save(lidar, file = "lidar_data2.RData")

1.2 Create Boxplots

load(file = "lidar_data2.RData")

lidar <- within(lidar, {
  values = ifelse(var == "slope" & loc == "Gilmer" & values > 100,    NA, values)
  values = ifelse(var == "slope" & loc == "Jefferson" & values > 20,     NA, values)
  values = ifelse(var %in% c("cupro", "cutan") & loc == "Gilmer" & abs(values) > 20, NA, values)
  values = ifelse(var %in% c("cupro", "cutan") & loc == "Jefferson" & abs(values) > 10, NA, values)
  })

# Gilmer Boxplots
filter(lidar, loc == "Gilmer") %>%
ggplot(aes(x = ns, y = values, fill = res)) +
  geom_boxplot() +
  facet_wrap(~ var, scales = "free_y") +
  scale_fill_discrete(name = "grid size\n(meters)") +
  xlab("neighborhood size (meters)") +
  labs(caption = "slope y-axis max set at 100\ncupro & cutan max set at abs(values) < 20") +
  ggtitle("Gilmer")

# Jefferson Boxplots
filter(lidar, loc == "Jefferson") %>%
ggplot(aes(x = ns, y = values, fill = res)) +
  geom_boxplot() +
  facet_wrap(~ var, scales = "free_y") +
  scale_fill_discrete(name = "grid size\n(meters)") +
  xlab("neighborhood size (meters)") +
  labs(caption = "slope y-axis max set at 20\ncupro & cutan max set at abs(values) < 10") +
  ggtitle("Jefferson")

1.3 Compute LiDAR Comparisons

load(file = "C:/workspace2/github/thesis-wvu/trunk/scale/lidar_data2.RData")

lidar <- within(lidar, {
  ind    = paste(loc, var, res, ns, sep = "_")
  ns     = as.character(ns)
  })


vars <- c("loc", "var", "res")
lidar_dif <- {
  split(lidar, lidar[vars]) ->.;
  lapply(., function(x) {
    us = data.frame(unstack(x, values ~ ns))
    names(us) = sub("X", "", names(us))
    idx = !is.na(as.numeric(names(us)))
    na  = sort(as.numeric(names(us)[idx]))
    us  = us[as.character(na)]

    x2 = data.frame(
      x[1, vars],
      us,
      check.names      = FALSE,
      stringsAsFactors = FALSE
      )
    
    idx = which(!is.na(as.numeric(names(x2))))
    x3 = data.frame(
      x[1, vars],
      ns   = as.numeric(names(x2)[idx]),
      MD   = round(     colMeans( x2[, idx[1]] - x2[idx],    na.rm = TRUE), 3),
      RMSD = round(sqrt(colMeans((x2[, idx[1]] - x2[idx])^2, na.rm = TRUE)), 3),
      r    = round(           cor(x2[idx[1]],  x2[idx],      use = "pairwise.complete.obs"), 3)[1, ]
      )
  }) ->.;
  do.call("rbind", .)
  }

vars <- c("MD","RMSD", "r")
lidar_lo <- reshape(lidar_dif, 
                    direction = "long",
                    timevar = "variable", times = vars,
                    v.names = "value",    varying = vars
                    )

1.4 Create Line Plots

# slope Gradient
filter(lidar_lo, var == "slope" & variable %in% c("MD", "RMSD")) %>%
  ggplot(aes(x = ns, y = value, col = res)) +
  geom_line(lwd = 1) +
  geom_point(size = 2) +
  facet_grid(variable ~ loc) +
  scale_color_discrete(name="grid size\n(meters)") +
  xlab("neighborhood size (meters)") +
  ylab("difference (%)") + 
  ggtitle("MD and RMSD for Slope Gradient")

# RMSD differences
filter(lidar_lo, ! var == "slope" & variable == "RMSD") %>%
  ggplot(aes(x = ns, y = value, col = res)) +
  geom_line(lwd = 1) +
  geom_point(size = 2) +
  facet_grid(var ~ loc, scales = "free_y") +
  scale_color_discrete(name="grid size\n(meters)") +
  xlab("neighborhood size (meters)") +
  ylab("RMSD") + 
  ggtitle("RMSD for Northness and Curvatures")

# correlations
filter(lidar_lo, variable %in% "r") %>%
  ggplot(aes(x = ns, y = value, col = res)) +
  geom_line(lwd = 1) +
  geom_point(size = 2) +
  facet_grid(var ~ loc) +
  theme(aspect.ratio = 1/2) +
  scale_color_discrete(name="grid size\n(meters)") +
  xlab("neighborhood size (meters)") +
  ylab("correlation coefficient (r)") + ylim(0, 1) + 
  ggtitle("Correlations for All DEM Derivatives")

2 Case Study 2 - Soil and LSP correlations response to neighborhood size

2.1 Tidy Soil Data

library(aqp)
library(dplyr)

fp <- "C:/Users/Stephen.Roecker/NextCloud/projects/thesis-wvu"

s <- read.csv(file.path(fp, "siteAttributes.csv"), stringsAsFactors = FALSE)
s[1:4] <- lapply(s[1:4], as.numeric)

h <- read.csv(file.path(fp, "horizonAttributes.csv"), stringsAsFactors = FALSE)
h$CaMg <- with(h, Ca + Mg) 
spc <- h
depths(spc) <- upedonid ~ hzdept + hzdepb

h_s <- aqp::slice(spc, 0:150 ~ fragvol + clay + sand + C + pH + CaMg, just.the.data = TRUE)
h_s$dep_int <- cut(h_s$hzdepb,
                   breaks = c(0, 15, 60, 100, 150), 
                   labels = c("0-15", "15-60", "60-100", "100-150")
                   )

h_di <- group_by(h_s, upedonid, dep_int) %>%
  summarize(fragvol = mean(fragvol, na.rm = TRUE),
            clay    = mean(clay,    na.rm = TRUE),
            C       = sum(C),
            pH      = mean(pH,      na.rm = TRUE),
            CaMg    = sum(CaMg), 
            hzthk   = sum(!is.na(.pctMissing))
            )%>%
  mutate(fragvol = ifelse(fragvol <= 0.1, 0.1, fragvol),
          C       = ifelse(C == 0,         NA,  C),
          CaMg    = ifelse(CaMg == 0,      NA,  CaMg)
          )
sh_di <- merge(s, h_di, by = "upedonid", all.x = TRUE)

2.2 Depth Plots

library(ggplot2)
library(GGally)

# Depth Plot

h_slab <- slab(spc, ~ fragvol + clay + C + pH + CaMg)

ggplot(h_slab, aes(x = bottom, y = p.q50)) +
  geom_line() +
  geom_ribbon(aes(ymin = p.q5,  ymax = p.q95, x = bottom), alpha = 0.2) + 
  geom_ribbon(aes(ymin = p.q25, ymax = p.q75, x = bottom), alpha = 0.2) + 
  xlim(150, 0) +
  facet_wrap(~ variable, scales = "free_x") +
  coord_flip() +
  xlab("depth (cm)") +
  ylab("5th, 25th, Median, 75th, and 95th Quantiles") +
  ggtitle("Depth Plot of Soil Properties")

# Scatter Plot Matrix

h$CaMg_log <- log(h$CaMg + 0.1)
sh_di$CaMg_log <- log(sh_di$CaMg + 0.1)

vars <- c("fragvol", "clay", "C", "pH", "CaMg_log")
ggpairs(h[vars])

ggpairs(sh_di[vars])

2.3 Sample Geodata and Correlate with Soil Depth Intervals

library(sp)
library(sf)
library(raster)

pts <- sh_di
coordinates(pts) <- ~ utm_easting + utm_northing
proj4string(pts) <- CRS("+init=epsg:26917")

pts2 <- st_sf(sh_di, 
              geometry = st_sfc(st_multipoint(as.matrix(sh_di[3:4]))),
              crs      = "+init=epsg:26917"
              )

# stack SAGA rasters

load(file = "C:/workspace2/github/thesis-wvu/trunk/scale/geodata_df.RData")

sg <- {
  subset(geodata, ! var %in% c("elev", "cucon") & ! grepl("slopeR|slopeD", var) & ! ws %in% c(27, 45, 63, 81)) ->.;
  split(., .$res) ->.;
  lapply(., function(x) { 
    cat("raster stacking and extracting", x$res[1], "\n")
    rs = stack(x$sdat)
    sg = as.data.frame(raster::extract(rs, pts, sp = TRUE))
    
    # compute northness
    idx      = grepl("aspect", names(sg))
    sg[idx]  = lapply(sg[idx], function(x) abs(180 - x))
    
    return(sg)
    }) ->.;
  }

sg2 <- {
  lapply(sg, function(x) {
    cat("correlating, stacking, and tidying", x$res[1]
    # split by depth interval
    split(x, x$dep_int) ->.;
    # compute correlation and convert to long format
    lapply(., function(x2) {
      
      fragvol = data.frame(hzthk = x2$hzthk, fragvol = log(x2$fragvol + 0.1), x2[16:ncol(x2)])
      clay    = data.frame(hzthk = x2$hzthk, clay    = x2$clay,               x2[16:ncol(x2)])
      C       = data.frame(hzthk = x2$hzthk, C       = x2$C,                  x2[16:ncol(x2)])
      pH      = data.frame(hzthk = x2$hzthk, pH      = x2$pH,                 x2[16:ncol(x2)])
      CaMg    = data.frame(hzthk = x2$hzthk, CaMg    = log(x2$CaMg + 0.1),    x2[16:ncol(x2)])
      
      # fragvol
      idx      = complete.cases(fragvol)
      frag_cor = cov.wt(x = fragvol[idx, ], wt = fragvol$hzthk[idx], cor = TRUE)$cor[- c(1:2), 2]
      # clay
      idx      = complete.cases(clay)
      clay_cor = cov.wt(x = clay[idx, ],    wt = clay$hzthk[idx],    cor = TRUE)$cor[- c(1:2), 2]
      # C
      idx      = complete.cases(C)
      C_cor    = cov.wt(x = C[idx, ],       wt = C$hzthk[idx],       cor = TRUE)$cor[- c(1:2), 2]
      # pH
      idx      = complete.cases(pH)
      pH_cor   = cov.wt(x = pH[idx, ],      wt = pH$hzthk[idx],      cor = TRUE)$cor[- c(1:2), 2]
      # CaMg
      idx      = complete.cases(CaMg)
      CaMg_cor = cov.wt(x = CaMg[idx, ],    wt = CaMg$hzthk[idx],    cor = TRUE)$cor[- c(1:2), 2]
      
      test = data.frame(values = c(fragvol = frag_cor, clay = clay_cor, C = C_cor, pH = pH_cor, CaMg = CaMg_cor))
      test$ind = row.names(test)
      # append depth interval
      test$dep_int = x2$dep_int[1]
      
      return(test)
      }) -> .;
    do.call("rbind", .) ->test;
    
    # split property.filename grouping variable into separate columns
    test = cbind(
      test,
      data.frame(
        do.call("rbind",
                strsplit(test$ind, "\\.|_")
                ),
        stringsAsFactors = FALSE
        )
      )
    names(test)[4:7] = c("prop", "source", "area", "var")
    
    # split source and variable columns into additional columns
    test = within(test, {
      res    = substr(source, 5, 8)
      res    = as.numeric(substr(res, 1, nchar(res) - 1))
      source = substr(source, 1, 4)
      ws     = sub("slope|aspect|cupro|cutan", "", var)
      ws     = as.numeric(substr(ws, 1, nchar(ws) - 1))
      ns     = ws * res
      res    = factor(res, levels = sort(unique(res)))
      dep_int = factor(paste(dep_int, "cm"), levels = c("0-15 cm", "15-60 cm", "60-100 cm", "100-150 cm"))
      var[grepl("slope",  var)] = "slope"
      var[grepl("aspect", var)] = "northness"
      var[grepl("cupro",  var)] = "cupro"
      var[grepl("cutan",  var)] = "cutan"
    })

    }) ->.;
  do.call("rbind", .) ->.;
  }

save(sg2, file = "C:/Users/Stephen.Roecker/NextCloud/projects/thesis-wvu/soil_correlations.RData")

2.4 Create Line Plots

2.4.1 All DEM Grid and Neighborhood Size Combinations

load(file = "C:/Users/Stephen.Roecker/NextCloud/projects/thesis-wvu/soil_correlations.RData")

filter(sg2, ! res %in% c("6", "45", "81")) %>%
  ggplot(aes(x = ns, y = values, col = var, shape = res)) +
  geom_point(size = 2.5, alpha = 0.5) +
  geom_line(lwd = 1, alpha = 0.5) +
  geom_hline(yintercept = 0) +
  facet_grid(prop ~ dep_int) +
  theme(aspect.ratio = 1) +
  scale_shape_discrete(name = "grid size\n(meters)") +
  scale_color_discrete(name="terrain\nattribute")+
  xlab("neighborhood size (meters)") +
  ylab("correlation coefficient (r)") +
  labs(caption = "log transformation applied to fragvol and CaMg") +
  ggtitle("Correlation Between Soil Properties and Terrain Attributes\nOver 4 Depth Intervals and Several Grid and Neighborhood Sizes")

2.4.2 9-meter DEM and Neighborhood Size Combinations

filter(sg2, res == "9") %>%
  ggplot(aes(x = ns, y = values, col = var, shape = res)) +
  geom_point(size = 2.5) +
  geom_line(lwd = 1) +
  geom_hline(yintercept = 0) +
  facet_grid(prop ~ dep_int) +
  theme(aspect.ratio = 1) +
  scale_shape_discrete(name = "grid size\n(meters)") +
  scale_color_discrete(name="terrain\nattribute")+
  xlab("neighborhood size (meters)") +
  ylab("correlation coefficient (r)") +
  labs(caption = "log transformation applied to fragvol and CaMg") +
  ggtitle("Correlation Between Soil Properties and Terrain Attributes\nOver 4 Depth Intervals and Several Neighborhood Sizes")

2.4.3 All DEM Grid Sizes and 3x3 Window Size Combinations

filter(sg2, ws == 3) %>%
  ggplot(aes(x = ns, y = values, col = var)) +
  geom_point(aes(shape = res), size = 2.5) +
  geom_line(lwd = 1) +
  geom_hline(yintercept = 0) +
  facet_grid(prop ~ dep_int) +
  theme(aspect.ratio = 1) +
  scale_shape_discrete(name = "grid size\n(meters)") +
  scale_color_discrete(name="terrain\nattribute")+
  xlab("neighborhood size (meters)") +
  ylab("correlation coefficient (r)") +
  labs(caption = "log transformation applied to fragvol and CaMg") +
  ggtitle("Correlation Between Soil Properties and Terrain Attributes\nOver 4 Depth Intervals and Several Grid Sizes")