https://github.com/cran/FedData
Tip revision: 60af57602806bdd1561ecaac0c2bc848a52005cb authored by R. Kyle Bocinsky on 10 March 2023, 21:40:10 UTC
version 3.0.3
version 3.0.3
Tip revision: 60af576
SSURGO_FUNCTIONS.R
#' Download and crop data from the NRCS SSURGO soils database.
#'
#' This is an efficient method for spatially merging several different soil survey areas
#' as well as merging their tabular data.
#'
#' \code{get_ssurgo} returns a named list of length 2:
#' \enumerate{
#' \item 'spatial': A \code{SpatialPolygonsDataFrame} of soil mapunits
#' in the template, and
#' \item 'tabular': A named list of \code{\link{data.frame}s} with the SSURGO tabular data.
#' }
#'
#' @param template A Raster* or Spatial* object to serve
#' as a template for cropping; optionally, a vector of area names [e.g., c('IN087','IN088')] may be provided.
#' @param label A character string naming the study area.
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' The directory will be created if missing. Defaults to './RAW/SSURGO/'.
#' @param extraction.dir A character string indicating where the extracted and cropped SSURGO shapefiles should be put.
#' The directory will be created if missing. Defaults to './EXTRACTIONS/SSURGO/'.
#' @param force.redo If an extraction for this template and label already exists, should a new one be created? Defaults to FALSE.
#' @return A named list containing the 'spatial' and 'tabular' data.
#' @export
#' @importFrom sp SpatialPointsDataFrame %over%
#' @importFrom readr read_csv write_csv
#' @examples
#' \dontrun{
#' # Get the NRCS SSURGO data (USA ONLY)
#' SSURGO.MEVE <- get_ssurgo(template = FedData::meve, label = "meve")
#'
#' # Plot the VEP polygon
#' plot(meve$geometry)
#'
#' # Plot the SSURGO mapunit polygons
#' plot(SSURGO.MEVE$spatial, lwd = 0.1, add = T)
#'
#' # Or, download by Soil Survey Area names
#' SSURGO.areas <- get_ssurgo(template = c("CO670", "CO075"), label = "CO_TEST")
#'
#' # Let's just look at spatial data for CO675
#' SSURGO.areas.CO675 <- SSURGO.areas$spatial[SSURGO.areas$spatial$AREASYMBOL == "CO075", ]
#'
#' # And get the NED data under them for pretty plotting
#' NED.CO675 <- get_ned(template = SSURGO.areas.CO675, label = "SSURGO_CO675")
#'
#' # Plot the SSURGO mapunit polygons, but only for CO675
#' plot(NED.CO675)
#' plot(SSURGO.areas.CO675, lwd = 0.1, add = T)
#' }
get_ssurgo <- function(template,
label,
raw.dir = paste0(tempdir(), "/FedData/raw/ssurgo"),
extraction.dir = paste0(tempdir(), "/FedData/"),
force.redo = FALSE) {
raw.dir <- normalizePath(paste0(raw.dir, "/."), mustWork = FALSE)
extraction.dir <- normalizePath(paste0(extraction.dir, "/."), mustWork = FALSE)
dir.create(raw.dir, showWarnings = FALSE, recursive = TRUE)
dir.create(extraction.dir, showWarnings = FALSE, recursive = TRUE)
outfile <- paste0(extraction.dir, "/", label, "_ssurgo.gpkg")
if (!force.redo & file.exists(outfile)) {
SSURGOData <-
outfile %>%
sf::st_layers() %$%
name %>%
magrittr::set_names(., .) %>%
purrr::map(~ sf::read_sf(outfile, layer = .x))
return(list(
spatial = SSURGOData$geometry,
tabular = purrr::list_modify(SSURGOData, geometry = NULL)
))
}
if (identical(class(template), "character")) {
q <- paste0(
"SELECT areasymbol, saverest FROM sacatalog WHERE areasymbol IN (", paste(paste0("'", template, "'"), collapse = ","),
");"
)
SSURGOAreas <- soils_query(q)
} else {
template %<>% template_to_sf()
# Get shapefile of SSURGO study areas in the template
SSURGOAreas <-
get_ssurgo_inventory(template = template, raw.dir = raw.dir)
if (!any(SSURGOAreas$iscomplete == 1)) {
stop("There are no complete soil surveys in your study area.")
}
# Remove SSURGO study areas that are not available
SSURGOAreas <- SSURGOAreas[SSURGOAreas$iscomplete != 0, ]
}
# Get data for each study area
SSURGOData <-
purrr::map2(
.x = SSURGOAreas$areasymbol,
.y = SSURGOAreas$saverest,
.f = function(area, date) {
message("(Down)Loading SSURGO data for survey area ", as.character(area))
get_ssurgo_study_area(
template = template,
area = as.character(area),
date = as.Date(date, format = "%m/%d/%Y"),
raw.dir = raw.dir
)
}
) %>%
purrr::transpose()
# Combine mapunits
SSURGOData$spatial %<>%
do.call("rbind", .)
# Crop to template
if (!identical(class(template), "character")) {
suppressWarnings(
SSURGOData$spatial %<>%
dplyr::ungroup() %>%
sf::st_cast("MULTIPOLYGON") %>%
sf::st_cast("POLYGON")
)
suppressMessages(
suppressWarnings(
SSURGOData$spatial %<>%
dplyr::filter(
SSURGOData$spatial %>%
sf::st_intersects(
template %>%
sf::st_transform(
sf::st_crs(SSURGOData$spatial)
)
) %>%
purrr::map_lgl(~ (length(.x) > 0))
)
)
)
}
SSURGOData$tabular %<>%
purrr::transpose() %>%
purrr::map(function(x) {
x %>%
purrr::compact() %>%
purrr::discard(~ nrow(.x) == 0) %>%
purrr::map(function(y) {
if (any(names(y) == "musym")) {
y %<>% dplyr::mutate(musym = as.character(musym))
}
y
}) %>%
dplyr::bind_rows() %>%
readr::type_convert(col_types = readr::cols())
})
# Extract only the mapunits in the study area,
# and iterate through the data structure
SSURGOData$tabular %<>%
extract_ssurgo_data(as.character(SSURGOData$spatial$MUKEY))
# SSURGOData$tabular$mapunit %<>%
# dplyr::mutate(mukey = as.character(mukey)) %>%
# dplyr::left_join(SSURGOData$spatial %>%
# dplyr::select(MUKEY),
# by = c("mukey" = "MUKEY")) %>%
# sf::st_as_sf()
unlink(outfile)
SSURGOData %$%
c(geometry = list(spatial), tabular) %>%
purrr::iwalk(function(x, n) {
sf::write_sf(x, dsn = outfile, layer = n)
})
SSURGOData <-
outfile %>%
sf::st_layers() %$%
name %>%
magrittr::set_names(., .) %>%
purrr::map(~ sf::read_sf(outfile, layer = .x))
return(list(spatial = SSURGOData$geometry, tabular = purrr::list_modify(SSURGOData, geometry = NULL)))
}
#' Download a zipped directory containing a shapefile of the SSURGO study areas.
#'
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' @return A character string representing the full local path of the SSURGO study areas zipped directory.
#' @export
#' @keywords internal
download_ssurgo_inventory <- function(raw.dir, ...) {
# Import the shapefile of SSURGO study areas. This is available at
# http://soildatamart.sc.egov.usda.gov/download/StatusMaps/soilsa_a_SSURGO.zip
url <- "https://websoilsurvey.sc.egov.usda.gov/DataAvailability/SoilDataAvailabilityShapefile.zip"
destdir <- raw.dir
download_data(url = url, destdir = destdir, ...)
return(normalizePath(paste(destdir, "/SoilDataAvailabilityShapefile.zip", sep = "")))
}
#' Download and crop a shapefile of the SSURGO study areas.
#'
#' \code{get_ssurgo_inventory} returns a \code{SpatialPolygonsDataFrame} of the SSURGO study areas within
#' the specified \code{template}. If template is not provided, returns the entire SSURGO inventory of study areas.
#'
#' @param template A Raster* or Spatial* object to serve
#' as a template for cropping.
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' The directory will be created if missing.
#' @return A \code{SpatialPolygonsDataFrame} of the SSURGO study areas within
#' the specified \code{template}.
#' @export
#' @keywords internal
get_ssurgo_inventory <- function(template = NULL, raw.dir) {
if (!is.null(template)) {
template %<>%
template_to_sf() %>%
sf::st_transform(4326)
}
# If there is a template, only download the areas in the template. Thanks to Dylan Beaudette for this method!
if (!is.null(template) &&
httr::status_code(
httr::GET(
"https://sdmdataaccess.nrcs.usda.gov/Spatial/SDMWGS84Geographic.wfs"
)
) == 200) {
bounds <-
template %>%
sf::st_bbox() %>%
sf::st_as_sfc()
# Only download 1 square degree at a time to avoid oversized AOI error
if ((sf::st_bbox(template)[["xmax"]] - sf::st_bbox(template)[["xmin"]]) > 1 |
(sf::st_bbox(template)[["ymax"]] - sf::st_bbox(template)[["ymin"]]) > 1) {
grid <- sp::GridTopology(
cellcentre.offset = c(-179.5, -89.5),
cellsize = c(1, 1),
cells.dim = c(360, 180)
) %>%
# sp::SpatialGrid(proj4string=CRS("+proj=longlat +datum=WGS84")) %>%
methods::as("SpatialPolygons") %>%
sf::st_as_sfc() %>%
sf::st_set_crs(4326)
bounds %<>%
sf::st_intersection(grid)
}
SSURGOAreas <-
bounds %>%
purrr::map_dfr(function(x) {
bound <-
x %>%
sf::st_bbox()
if (identical(bound["xmin"], bound["xmax"])) {
bound["xmax"] <- bound["xmax"] + 1e-04
}
if (identical(bound["ymin"], bound["ymax"])) {
bound["ymax"] <- bound["ymax"] + 1e-04
}
bbox.text <- paste(bound, collapse = ",")
temp.file <- paste0(tempdir(), "/soils.gml")
httr::GET("https://sdmdataaccess.nrcs.usda.gov/Spatial/SDMWGS84Geographic.wfs",
query = list(
Service = "WFS",
Version = "1.1.0",
Request = "GetFeature",
Typename = "SurveyAreaPoly",
BBOX = bbox.text,
SRSNAME = "EPSG:4326",
OUTPUTFORMAT = "GML3"
),
httr::write_disk(temp.file,
overwrite = TRUE
)
)
tryCatch(
suppressMessages(
suppressWarnings(
sf::read_sf(temp.file, drivers = "GML") %>%
dplyr::mutate(saverest = as.Date(lubridate::parse_date_time(saverest, orders = "b d Y HMOp", locale = "en_US"))) %>%
# sf::st_intersection(template) %>%
sf::st_drop_geometry()
)
),
error = function(e) {
return(NULL)
}
)
}) %>%
dplyr::distinct() %>%
dplyr::arrange(areasymbol)
} else {
tmpdir <- tempfile()
if (!dir.create(tmpdir)) {
stop("failed to create my temporary directory")
}
file <- download_ssurgo_inventory(raw.dir = raw.dir)
utils::unzip(file, exdir = tmpdir)
SSURGOAreas <- sf::read_sf(normalizePath(tmpdir), layer = "soilsa_a_nrcs")
if (!is.null(template)) {
SSURGOAreas %<>%
sf::st_make_valid() %>%
sf::st_intersection(sf::st_transform(template, sf::st_crs(SSURGOAreas)))
}
unlink(tmpdir, recursive = TRUE)
}
# Check to see if all survey areas are available
if (0 %in% SSURGOAreas$iscomplete) {
warning(
"Some of the soil surveys in your area are unavailable.\n
Soils and productivity data will have holes.\n
Missing areas:\n",
paste0(as.vector(SSURGOAreas[SSURGOAreas$iscomplete == 0, ]$areasymbol), collapse = "\n"), "\n\n
Continuing with processing available soils.\n\n"
)
}
return(SSURGOAreas)
}
#' Download a zipped directory containing the spatial and tabular data for a SSURGO study area.
#'
#' \code{download_ssurgo_study_area} first tries to download data including a state-specific Access
#' template, then the general US template.
#'
#' @param area A character string indicating the SSURGO study area to be downloaded.
#' @param date A character string indicating the date of the most recent update to the SSURGO
#' area for these data. This information may be gleaned from the SSURGO Inventory (\code{\link{get_ssurgo_inventory}}).
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' @return A character string representing the full local path of the SSURGO study areas zipped directory.
#' @export
#' @keywords internal
download_ssurgo_study_area <- function(area, date, raw.dir) {
# Try to download with the state database, otherwise grab the US
url <- paste("http://websoilsurvey.sc.egov.usda.gov/DSD/Download/Cache/SSA/wss_SSA_", area, "_[", date, "].zip", sep = "")
destdir <- raw.dir
download_data(url = url, destdir = destdir, nc = T)
return(normalizePath(paste(destdir, "/wss_SSA_", area, "_[", date, "].zip", sep = "")))
}
#' Download and crop the spatial and tabular data for a SSURGO study area.
#'
#' \code{get_ssurgo_study_area} returns a named list of length 2:
#' \enumerate{
#' \item 'spatial': A \code{SpatialPolygonsDataFrame} of soil mapunits
#' in the template, and
#' \item 'tabular': A named list of \code{\link{data.frame}s} with the SSURGO tabular data.
#' }
#'
#' @param template A Raster* or Spatial* object to serve
#' as a template for cropping. If missing, whose study area is returned
#' @param area A character string indicating the SSURGO study area to be downloaded.
#' @param date A character string indicating the date of the most recent update to the SSURGO
#' area for these data. This information may be gleaned from the SSURGO Inventory (\code{\link{get_ssurgo_inventory}}).
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' The directory will be created if missing.
#' @return A \code{SpatialPolygonsDataFrame} of the SSURGO study areas within
#' the specified \code{template}.
#' @export
#' @keywords internal
get_ssurgo_study_area <- function(template = NULL, area, date, raw.dir) {
tmpdir <- tempfile()
if (!dir.create(tmpdir)) {
stop("failed to create my temporary directory")
}
file <- download_ssurgo_study_area(area = area, date = date, raw.dir = raw.dir)
utils::unzip(file, exdir = tmpdir)
suppressMessages({
mapunits <-
sf::read_sf(paste0(tmpdir, "/", area, "/spatial"),
layer = paste0("soilmu_a_", tolower(area))
) %>%
sf::st_make_valid()
})
# Read in all tables
tablesData <-
paste0(tmpdir, "/", area, "/tabular") %>%
list.files(full.names = T) %>%
magrittr::set_names(
.,
tools::file_path_sans_ext(basename(.))
) %>%
purrr::map(
function(file) {
# Hack to bypass one line file bug in readr::read_delim
if(length(readLines(file)) == 1){
write("\n", file, append = TRUE)
}
tryCatch(
return(
readr::read_delim(file,
col_names = F,
col_types = readr::cols(.default = readr::col_character()),
delim = "|"
)
),
error = function(e) {
return(NULL)
}
)
}
) %>%
purrr::compact()
SSURGOTableMapping <-
tablesData$mstab %>%
dplyr::select(1, 5) %>%
magrittr::set_names(c("TABLE", "FILE"))
tablesData <- tablesData[SSURGOTableMapping$FILE]
tablesHeads <- tablesHeaders[SSURGOTableMapping$TABLE]
notNull <- (!sapply(tablesData, is.null) & !sapply(tablesHeads, is.null))
tablesData <- tablesData[notNull]
tablesHeads <- tablesHeads[notNull]
tables <-
mapply(tablesData,
tablesHeads,
FUN = function(theData, theHeader) {
names(theData) <- names(theHeader)
return(theData)
}
)
names(tables) <- names(tablesHeads)
tables %<>%
extract_ssurgo_data(as.character(mapunits$MUKEY))
unlink(tmpdir, recursive = TRUE)
return(list(spatial = mapunits, tabular = tables))
}
#' Extract data from a SSURGO database pertaining to a set of mapunits.
#'
#' \code{extract_ssurgo_data} creates a directed graph of the joins in a SSURGO tabular dataset,
#' and then iterates through the tables, only retaining data pertinent to a set of mapunits.
#'
#' @param tables A list of SSURGO tabular data.
#' @param mapunits A character vector of mapunits (likely dropped from SSURGO spatial data)
#' defining which mapunits to retain.
#' @return A list of extracted SSURGO tabular data.
#' @export
#' @keywords internal
extract_ssurgo_data <- function(tables, mapunits) {
mapping <- tables$mdstatrshipdet
mappingGraph <- igraph::graph.edgelist(as.matrix(mapping[, c("ltabphyname", "rtabphyname")]))
igraph::E(mappingGraph)$mapVar <- as.character(mapping$ltabcolphyname)
mappingGraph <- igraph::graph.neighborhood(mappingGraph, order = max(sapply(igraph::decompose.graph(mappingGraph), igraph::diameter)) +
1, nodes = "mapunit", mode = "out")[[1]]
mapHierarchy <- igraph::shortest.paths(mappingGraph, "mapunit")
mapHierarchy <- colnames(mapHierarchy)[order(mapHierarchy)]
mapHierarchy <- mapHierarchy[-1]
mapEdges <- cbind(igraph::get.edgelist(mappingGraph), igraph::E(mappingGraph)$mapVar)
mapEdges <- mapEdges[match(mapHierarchy, mapEdges[, 2]), ]
tables$mapunit %<>%
dplyr::filter(mukey %in% mapunits)
for (i in 1:nrow(mapEdges)) {
X <- mapEdges[i, ]
tables[[X[2]]] <- tables[[X[2]]][tables[[X[2]]][[X[3]]] %in% tables[[X[1]]][[X[3]]], ]
}
return(tables)
}
#' Submit a Soil Data Access (SDA) Query
#'
#' \code{soils_query} submit an SQL query to retrieve data from the Soil Data Mart.
#' Please see https://sdmdataaccess.sc.egov.usda.gov/Query.aspx for guidelines
#'
#' @param q A character string representing a SQL query to the SDA service
#' @return A tibble returned from the SDA service
#' @keywords internal
soils_query <- function(q) {
tryCatch(
httr::POST(
url = "https://sdmdataaccess.sc.egov.usda.gov/tabular/post.rest",
body = list(
query = q,
format = "JSON+COLUMNNAME"
),
encode = "form"
) %>%
httr::content(as = "parse", encoding = "UTF-8") %$%
Table %>%
purrr::transpose() %>%
magrittr::set_names(., purrr::map_chr(., ~ (.x[[1]]))) %>%
purrr::map(~ (unlist(.x[-1]))) %>%
tibble::as_tibble(),
error = function(e) {
stop("Improperly formatted SDA SQL request")
}
)
}