https://github.com/cran/FedData
Raw File
Tip revision: 5138a366cd75297132ef29d1c741c0bf4d3d69da authored by R. Kyle Bocinsky on 11 October 2022, 16:12:40 UTC
version 3.0.0
Tip revision: 5138a36
SSURGO_FUNCTIONS.R
#' Download and crop data from the NRCS SSURGO soils database.
#'
#' This is an efficient method for spatially merging several different soil survey areas
#' as well as merging their tabular data.
#'
#' \code{get_ssurgo} returns a named list of length 2:
#' \enumerate{
#' \item 'spatial': A \code{SpatialPolygonsDataFrame} of soil mapunits
#' in the template, and
#' \item 'tabular': A named list of \code{\link{data.frame}s} with the SSURGO tabular data.
#' }
#'
#' @param template A Raster* or Spatial* object to serve
#' as a template for cropping; optionally, a vector of area names [e.g., c('IN087','IN088')] may be provided.
#' @param label A character string naming the study area.
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' The directory will be created if missing. Defaults to './RAW/SSURGO/'.
#' @param extraction.dir A character string indicating where the extracted and cropped SSURGO shapefiles should be put.
#' The directory will be created if missing. Defaults to './EXTRACTIONS/SSURGO/'.
#' @param force.redo If an extraction for this template and label already exists, should a new one be created? Defaults to FALSE.
#' @return A named list containing the 'spatial' and 'tabular' data.
#' @export
#' @importFrom sp SpatialPointsDataFrame %over%
#' @importFrom readr read_csv write_csv
#' @examples
#' \dontrun{
#' # Get the NRCS SSURGO data (USA ONLY)
#' SSURGO.MEVE <- get_ssurgo(template = FedData::meve, label = "meve")
#'
#' # Plot the VEP polygon
#' plot(meve$geometry)
#'
#' # Plot the SSURGO mapunit polygons
#' plot(SSURGO.MEVE$spatial, lwd = 0.1, add = T)
#'
#' # Or, download by Soil Survey Area names
#' SSURGO.areas <- get_ssurgo(template = c("CO670", "CO075"), label = "CO_TEST")
#'
#' # Let's just look at spatial data for CO675
#' SSURGO.areas.CO675 <- SSURGO.areas$spatial[SSURGO.areas$spatial$AREASYMBOL == "CO075", ]
#'
#' # And get the NED data under them for pretty plotting
#' NED.CO675 <- get_ned(template = SSURGO.areas.CO675, label = "SSURGO_CO675")
#'
#' # Plot the SSURGO mapunit polygons, but only for CO675
#' plot(NED.CO675)
#' plot(SSURGO.areas.CO675, lwd = 0.1, add = T)
#' }
get_ssurgo <- function(template,
                       label,
                       raw.dir = paste0(tempdir(), "/FedData/raw/ssurgo"),
                       extraction.dir = paste0(tempdir(), "/FedData/"),
                       force.redo = FALSE) {
  raw.dir <- normalizePath(paste0(raw.dir, "/."), mustWork = FALSE)
  extraction.dir <- normalizePath(paste0(extraction.dir, "/."), mustWork = FALSE)

  dir.create(raw.dir, showWarnings = FALSE, recursive = TRUE)
  dir.create(extraction.dir, showWarnings = FALSE, recursive = TRUE)

  outfile <- paste0(extraction.dir, "/", label, "_ssurgo.gpkg")

  if (!force.redo & file.exists(outfile)) {
    SSURGOData <-
      outfile %>%
      sf::st_layers() %$%
      name %>%
      magrittr::set_names(., .) %>%
      purrr::map(~ sf::read_sf(outfile, layer = .x))

    return(list(
      spatial = SSURGOData$geometry,
      tabular = purrr::list_modify(SSURGOData, geometry = NULL)
    ))
  }

  if (identical(class(template), "character")) {
    q <- paste0(
      "SELECT areasymbol, saverest FROM sacatalog WHERE areasymbol IN (", paste(paste0("'", template, "'"), collapse = ","),
      ");"
    )
    SSURGOAreas <- soils_query(q)
  } else {
    template %<>% template_to_sf()

    # Get shapefile of SSURGO study areas in the template
    SSURGOAreas <-
      get_ssurgo_inventory(template = template, raw.dir = raw.dir)

    if (!any(SSURGOAreas$iscomplete == 1)) {
      stop("There are no complete soil surveys in your study area.")
    }


    # Remove SSURGO study areas that are not available
    SSURGOAreas <- SSURGOAreas[SSURGOAreas$iscomplete != 0, ]
  }

  # Get data for each study area
  SSURGOData <-
    purrr::map2(
      .x = SSURGOAreas$areasymbol,
      .y = SSURGOAreas$saverest,
      .f = function(area, date) {
        message("(Down)Loading SSURGO data for survey area ", as.character(area))
        get_ssurgo_study_area(
          template = template,
          area = as.character(area),
          date = as.Date(date, format = "%m/%d/%Y"),
          raw.dir = raw.dir
        )
      }
    ) %>%
    purrr::transpose()


  # Combine mapunits
  SSURGOData$spatial %<>%
    do.call("rbind", .)

  # Crop to template
  if (!identical(class(template), "character")) {
    suppressWarnings(
      SSURGOData$spatial %<>%
        dplyr::ungroup() %>%
        sf::st_cast("MULTIPOLYGON") %>%
        sf::st_cast("POLYGON")
    )

    suppressMessages(
      suppressWarnings(
        SSURGOData$spatial %<>%
          dplyr::filter(
            SSURGOData$spatial %>%
              sf::st_intersects(
                template %>%
                  sf::st_transform(
                    sf::st_crs(SSURGOData$spatial)
                  )
              ) %>%
              purrr::map_lgl(~ (length(.x) > 0))
          )
      )
    )
  }

  SSURGOData$tabular %<>%
    purrr::transpose() %>%
    purrr::map(function(x) {
      x %>%
        purrr::compact() %>%
        purrr::discard(~ nrow(.x) == 0) %>%
        purrr::map(function(y) {
          if (any(names(y) == "musym")) {
            y %<>% dplyr::mutate(musym = as.character(musym))
          }
          y
        }) %>%
        dplyr::bind_rows() %>%
        readr::type_convert(col_types = readr::cols())
    })

  # Extract only the mapunits in the study area,
  # and iterate through the data structure
  SSURGOData$tabular %<>%
    extract_ssurgo_data(as.character(SSURGOData$spatial$MUKEY))

  # SSURGOData$tabular$mapunit %<>%
  #   dplyr::mutate(mukey = as.character(mukey)) %>%
  #   dplyr::left_join(SSURGOData$spatial %>%
  #                      dplyr::select(MUKEY),
  #                    by = c("mukey" = "MUKEY")) %>%
  #   sf::st_as_sf()

  unlink(outfile)

  SSURGOData %$%
    c(geometry = list(spatial), tabular) %>%
    purrr::iwalk(function(x, n) {
      sf::write_sf(x, dsn = outfile, layer = n)
    })

  SSURGOData <-
    outfile %>%
    sf::st_layers() %$%
    name %>%
    magrittr::set_names(., .) %>%
    purrr::map(~ sf::read_sf(outfile, layer = .x))

  return(list(spatial = SSURGOData$geometry, tabular = purrr::list_modify(SSURGOData, geometry = NULL)))
}

#' Download a zipped directory containing a shapefile of the SSURGO study areas.
#'
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' @return A character string representing the full local path of the SSURGO study areas zipped directory.
#' @export
#' @keywords internal
download_ssurgo_inventory <- function(raw.dir, ...) {
  # Import the shapefile of SSURGO study areas.  This is available at
  # http://soildatamart.sc.egov.usda.gov/download/StatusMaps/soilsa_a_SSURGO.zip
  url <- "https://websoilsurvey.sc.egov.usda.gov/DataAvailability/SoilDataAvailabilityShapefile.zip"
  destdir <- raw.dir
  download_data(url = url, destdir = destdir, ...)
  return(normalizePath(paste(destdir, "/SoilDataAvailabilityShapefile.zip", sep = "")))
}

#' Download and crop a shapefile of the SSURGO study areas.
#'
#' \code{get_ssurgo_inventory} returns a \code{SpatialPolygonsDataFrame} of the SSURGO study areas within
#' the specified \code{template}. If template is not provided, returns the entire SSURGO inventory of study areas.
#'
#' @param template A Raster* or Spatial* object to serve
#' as a template for cropping.
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' The directory will be created if missing.
#' @return A \code{SpatialPolygonsDataFrame} of the SSURGO study areas within
#' the specified \code{template}.
#' @export
#' @keywords internal
get_ssurgo_inventory <- function(template = NULL, raw.dir) {
  if (!is.null(template)) {
    template %<>%
      template_to_sf() %>%
      sf::st_transform(4326)
  }

  # If there is a template, only download the areas in the template. Thanks to Dylan Beaudette for this method!
  if (!is.null(template) &&
    httr::status_code(
      httr::GET(
        "https://sdmdataaccess.nrcs.usda.gov/Spatial/SDMWGS84Geographic.wfs"
      )
    ) == 200) {
    bounds <-
      template %>%
      sf::st_bbox() %>%
      sf::st_as_sfc()

    # Only download 1 square degree at a time to avoid oversized AOI error
    if ((sf::st_bbox(template)[["xmax"]] - sf::st_bbox(template)[["xmin"]]) > 1 |
      (sf::st_bbox(template)[["ymax"]] - sf::st_bbox(template)[["ymin"]]) > 1) {
      grid <- sp::GridTopology(
        cellcentre.offset = c(-179.5, -89.5),
        cellsize = c(1, 1),
        cells.dim = c(360, 180)
      ) %>%
        # sp::SpatialGrid(proj4string=CRS("+proj=longlat +datum=WGS84")) %>%
        methods::as("SpatialPolygons") %>%
        sf::st_as_sfc() %>%
        sf::st_set_crs(4326)

      bounds %<>%
        sf::st_intersection(grid)
    }

    SSURGOAreas <-
      bounds %>%
      purrr::map_dfr(function(x) {
        bound <-
          x %>%
          sf::st_bbox()

        if (identical(bound["xmin"], bound["xmax"])) {
          bound["xmax"] <- bound["xmax"] + 1e-04
        }
        if (identical(bound["ymin"], bound["ymax"])) {
          bound["ymax"] <- bound["ymax"] + 1e-04
        }

        bbox.text <- paste(bound, collapse = ",")

        temp.file <- paste0(tempdir(), "/soils.gml")

        httr::GET("https://sdmdataaccess.nrcs.usda.gov/Spatial/SDMWGS84Geographic.wfs",
          query = list(
            Service = "WFS",
            Version = "1.1.0",
            Request = "GetFeature",
            Typename = "SurveyAreaPoly",
            BBOX = bbox.text,
            SRSNAME = "EPSG:4326",
            OUTPUTFORMAT = "GML3"
          ),
          httr::write_disk(temp.file,
            overwrite = TRUE
          )
        )

        tryCatch(
          suppressMessages(
            suppressWarnings(
              sf::read_sf(temp.file, drivers = "GML") %>%
                dplyr::mutate(saverest = as.Date(lubridate::parse_date_time(saverest, orders = "b d Y HMOp", locale = "en_US"))) %>%
                # sf::st_intersection(template) %>%
                sf::st_drop_geometry()
            )
          ),
          error = function(e) {
            return(NULL)
          }
        )
      }) %>%
      dplyr::distinct() %>%
      dplyr::arrange(areasymbol)
  } else {
    tmpdir <- tempfile()
    if (!dir.create(tmpdir)) {
      stop("failed to create my temporary directory")
    }

    file <- download_ssurgo_inventory(raw.dir = raw.dir)

    utils::unzip(file, exdir = tmpdir)

    SSURGOAreas <- sf::read_sf(normalizePath(tmpdir), layer = "soilsa_a_nrcs")

    if (!is.null(template)) {
      SSURGOAreas %<>%
        sf::st_make_valid() %>%
        sf::st_intersection(sf::st_transform(template, sf::st_crs(SSURGOAreas)))
    }

    unlink(tmpdir, recursive = TRUE)
  }

  # Check to see if all survey areas are available
  if (0 %in% SSURGOAreas$iscomplete) {
    warning(
      "Some of the soil surveys in your area are unavailable.\n
            Soils and productivity data will have holes.\n
            Missing areas:\n",
      paste0(as.vector(SSURGOAreas[SSURGOAreas$iscomplete == 0, ]$areasymbol), collapse = "\n"), "\n\n
            Continuing with processing available soils.\n\n"
    )
  }

  return(SSURGOAreas)
}

#' Download a zipped directory containing the spatial and tabular data for a SSURGO study area.
#'
#' \code{download_ssurgo_study_area} first tries to download data including a state-specific Access
#' template, then the general US template.
#'
#' @param area A character string indicating the SSURGO study area to be downloaded.
#' @param date A character string indicating the date of the most recent update to the SSURGO
#' area for these data. This information may be gleaned from the SSURGO Inventory (\code{\link{get_ssurgo_inventory}}).
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' @return A character string representing the full local path of the SSURGO study areas zipped directory.
#' @export
#' @keywords internal
download_ssurgo_study_area <- function(area, date, raw.dir) {

  # Try to download with the state database, otherwise grab the US
  url <- paste("http://websoilsurvey.sc.egov.usda.gov/DSD/Download/Cache/SSA/wss_SSA_", area, "_[", date, "].zip", sep = "")
  destdir <- raw.dir
  download_data(url = url, destdir = destdir, nc = T)

  return(normalizePath(paste(destdir, "/wss_SSA_", area, "_[", date, "].zip", sep = "")))
}

#' Download and crop the spatial and tabular data for a SSURGO study area.
#'
#' \code{get_ssurgo_study_area} returns a named list of length 2:
#' \enumerate{
#' \item 'spatial': A \code{SpatialPolygonsDataFrame} of soil mapunits
#' in the template, and
#' \item 'tabular': A named list of \code{\link{data.frame}s} with the SSURGO tabular data.
#' }
#'
#' @param template A Raster* or Spatial* object to serve
#' as a template for cropping. If missing, whose study area is returned
#' @param area A character string indicating the SSURGO study area to be downloaded.
#' @param date A character string indicating the date of the most recent update to the SSURGO
#' area for these data. This information may be gleaned from the SSURGO Inventory (\code{\link{get_ssurgo_inventory}}).
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' The directory will be created if missing.
#' @return A \code{SpatialPolygonsDataFrame} of the SSURGO study areas within
#' the specified \code{template}.
#' @export
#' @keywords internal
get_ssurgo_study_area <- function(template = NULL, area, date, raw.dir) {
  tmpdir <- tempfile()
  if (!dir.create(tmpdir)) {
    stop("failed to create my temporary directory")
  }

  file <- download_ssurgo_study_area(area = area, date = date, raw.dir = raw.dir)
  
  utils::unzip(file, exdir = tmpdir)
  suppressMessages({
    mapunits <-
      sf::read_sf(paste0(tmpdir, "/", area, "/spatial"),
        layer = paste0("soilmu_a_", tolower(area))
      ) %>%
      sf::st_make_valid()
  })
  
  # Read in all tables
  tablesData <-
    paste0(tmpdir, "/", area, "/tabular") %>%
    list.files(full.names = T) %>%
    magrittr::set_names(
      .,
      tools::file_path_sans_ext(basename(.))
    ) %>%
    purrr::map(
      function(file) {
        # Hack to bypass one line file bug in readr::read_delim 
        if(length(readLines(file)) == 1){
          write("\n", file, append = TRUE)
        }
        
        tryCatch(
          return(
            readr::read_delim(file,
              col_names = F,
              col_types = readr::cols(.default = readr::col_character()),
              delim = "|"
            )
          ),
          error = function(e) {
            return(NULL)
          }
        )
      }
    ) %>%
    purrr::compact()


  SSURGOTableMapping <-
    tablesData$mstab %>%
    dplyr::select(1, 5) %>%
    magrittr::set_names(c("TABLE", "FILE"))

  tablesData <- tablesData[SSURGOTableMapping$FILE]
  tablesHeads <- tablesHeaders[SSURGOTableMapping$TABLE]

  notNull <- (!sapply(tablesData, is.null) & !sapply(tablesHeads, is.null))
  tablesData <- tablesData[notNull]
  tablesHeads <- tablesHeads[notNull]

  tables <-
    mapply(tablesData,
      tablesHeads,
      FUN = function(theData, theHeader) {
        names(theData) <- names(theHeader)
        return(theData)
      }
    )

  names(tables) <- names(tablesHeads)

  tables %<>%
    extract_ssurgo_data(as.character(mapunits$MUKEY))

  unlink(tmpdir, recursive = TRUE)

  return(list(spatial = mapunits, tabular = tables))
}

#' Extract data from a SSURGO database pertaining to a set of mapunits.
#'
#' \code{extract_ssurgo_data} creates a directed graph of the joins in a SSURGO tabular dataset,
#' and then iterates through the tables, only retaining data pertinent to a set of mapunits.
#'
#' @param tables A list of SSURGO tabular data.
#' @param mapunits A character vector of mapunits (likely dropped from SSURGO spatial data)
#' defining which mapunits to retain.
#' @return A list of extracted SSURGO tabular data.
#' @export
#' @keywords internal
extract_ssurgo_data <- function(tables, mapunits) {
  mapping <- tables$mdstatrshipdet
  mappingGraph <- igraph::graph.edgelist(as.matrix(mapping[, c("ltabphyname", "rtabphyname")]))
  igraph::E(mappingGraph)$mapVar <- as.character(mapping$ltabcolphyname)

  mappingGraph <- igraph::graph.neighborhood(mappingGraph, order = max(sapply(igraph::decompose.graph(mappingGraph), igraph::diameter)) +
    1, nodes = "mapunit", mode = "out")[[1]]
  mapHierarchy <- igraph::shortest.paths(mappingGraph, "mapunit")
  mapHierarchy <- colnames(mapHierarchy)[order(mapHierarchy)]
  mapHierarchy <- mapHierarchy[-1]
  mapEdges <- cbind(igraph::get.edgelist(mappingGraph), igraph::E(mappingGraph)$mapVar)
  mapEdges <- mapEdges[match(mapHierarchy, mapEdges[, 2]), ]

  tables$mapunit %<>%
    dplyr::filter(mukey %in% mapunits)

  for (i in 1:nrow(mapEdges)) {
    X <- mapEdges[i, ]
    tables[[X[2]]] <- tables[[X[2]]][tables[[X[2]]][[X[3]]] %in% tables[[X[1]]][[X[3]]], ]
  }

  return(tables)
}

#' Submit a Soil Data Access (SDA) Query
#'
#' \code{soils_query} submit an SQL query to retrieve data from the Soil Data Mart.
#' Please see https://sdmdataaccess.sc.egov.usda.gov/Query.aspx for guidelines
#'
#' @param q A character string representing a SQL query to the SDA service
#' @return A tibble returned from the SDA service
#' @keywords internal
soils_query <- function(q) {
  tryCatch(
    httr::POST(
      url = "https://sdmdataaccess.sc.egov.usda.gov/tabular/post.rest",
      body = list(
        query = q,
        format = "JSON+COLUMNNAME"
      ),
      encode = "form"
    ) %>%
      httr::content(as = "parse", encoding = "UTF-8") %$%
      Table %>%
      purrr::transpose() %>%
      magrittr::set_names(., purrr::map_chr(., ~ (.x[[1]]))) %>%
      purrr::map(~ (unlist(.x[-1]))) %>%
      tibble::as_tibble(),
    error = function(e) {
      stop("Improperly formatted SDA SQL request")
    }
  )
}
back to top