https://github.com/cran/FedData
Tip revision: 88660a732f4b9b57d13b7505620f5fe11cc60566 authored by R. Kyle Bocinsky on 03 October 2016, 22:45:45 UTC
version 2.3.0
version 2.3.0
Tip revision: 88660a7
ITRDB_FUNCTIONS.R
#' Download the latest version of the ITRDB, and extract given parameters.
#'
#' \code{get_itrdb} returns a named list of length 3:
#' \enumerate{
#' \item "metadata": A data.table or \code{SpatialPointsDataFrame} (if \code{makeSpatial==TRUE}) of the locations
#' and names of extracted ITRDB chrononlogies,
#' \item "widths": A matrix of tree-ring widths/densities given user selection, and
#' \item "depths": A matrix of tree-ring sample depths.
#' }
#'
#' @param template A Raster* or Spatial* object to serve
#' as a template for selecting chronologies. If missing,
#' all available global chronologies are returned.
#' @param label A character string naming the study area.
#' @param recon.years A numeric vector of years over which reconstructions are needed;
#' if missing, the union of all years in the available chronologies are given.
#' @param calib.years A numeric vector of all required years---chronologies without these years will be discarded;
#' if missing, all available chronologies are given.
#' @param species A character vector of 4-letter tree species identifiers;
#' if missing, all available chronologies are given.
#' @param measurement.type A character vector of measurement type identifiers. Options include:
#' \itemize{
#' \item "Total Ring Density"
#' \item "Earlywood Width"
#' \item "Earlywood Density"
#' \item "Latewood Width"
#' \item "Minimum Density"
#' \item "Ring Width"
#' \item "Latewood Density"
#' \item "Maximum Density"
#' \item "Latewood Percent"
#' }
#' if missing, all available chronologies are given.
#' @param chronology.type A character vector of chronology type identifiers. Options include:
#' \itemize{
#' \item "ARSTND"
#' \item "Low Pass Filter"
#' \item "Residual"
#' \item "Standard"
#' \item "Re-Whitened Residual"
#' \item "Measurements Only"
#' }
#' if missing, all available chronologies are given.
#' @param makeSpatial Should the metadata be presented as a SpatialPointsDataFrame? Defaults to FALSE.
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' The directory will be created if missing. Defaults to "./RAW/ITRDB/".
#' @param extraction.dir A character string indicating where the extracted and cropped ITRDB dataset should be put.
#' The directory will be created if missing. Defaults to "./EXTRACTIONS/ITRDB/".
#' @param force.redo If an extraction already exists, should a new one be created? Defaults to FALSE.
#' @return A named list containing the "metadata", "widths", and "depths" data.
#' @export
#' @importFrom data.table :=
#' @examples
#' \dontrun{
#' # Extract data for the Village Ecodynamics Project "VEPIIN" study area:
#' # http://village.anth.wsu.edu
#' vepPolygon <- polygon_from_extent(raster::extent(672800,740000,4102000,4170000),
#' proj4string="+proj=utm +datum=NAD83 +zone=12")
#'
#' # Get the ITRDB records
#' ITRDB <- get_itrdb(template=vepPolygon, label="VEPIIN", makeSpatial=T)
#'
#' # Plot the VEP polygon
#' plot(vepPolygon)
#'
#' # Map the locations of the tree ring chronologies
#' plot(ITRDB$metadata, pch=1, add=T)
#' legend('bottomleft', pch=1, legend="ITRDB chronologies")
#' }
get_itrdb <- function(template=NULL, label=NULL, recon.years=NULL, calib.years=NULL, species=NULL, measurement.type=NULL, chronology.type=NULL, makeSpatial=F, raw.dir="./RAW/ITRDB/", extraction.dir=ifelse(!is.null(label),paste0("./EXTRACTIONS/",label,"/ITRDB/"),"./EXTRACTIONS/ITRDB/"), force.redo=FALSE){
dir.create(raw.dir, showWarnings = FALSE, recursive = TRUE)
dir.create(extraction.dir, showWarnings = FALSE, recursive = TRUE)
if(is.null(template) & is.null(label)){
label <- "allChronologies"
}
if(!is.null(template) & is.null(label)){
stop("Template provided but no label given.")
}
if(!force.redo && !is.null(label) && file.exists(paste0(extraction.dir,'/',label,'_ITRDB.Rds'))){
out <- readr::read_rds(paste0(extraction.dir,'/',label,'_ITRDB.Rds',sep=''))
return(out)
}
data <- download_itrdb(raw.dir=raw.dir,force.redo=force.redo)
## Nulling out to appease R CMD CHECK
LAT <- LON <- START <- END <- SPECIES <- MEASUREMENT_TYPE <- CHRONOLOGY_TYPE <- NULL
if(!is.null(calib.years)){
data <- data[START < min(calib.years) & END > max(calib.years)]
}
if(!is.null(species)){
data <- data[SPECIES %in% species]
}
if(!is.null(measurement.type)){
data <- data[MEASUREMENT_TYPE %in% measurement.type]
}
if(!is.null(chronology.type)){
data <- data[CHRONOLOGY_TYPE %in% chronology.type]
}
if(!is.null(template)){
data <- data[LAT >= -90 & LAT <= 90 & LON >= -180 & LON <= 180]
sp.data <- sp::SpatialPointsDataFrame(as.matrix(data[,c("LON","LAT"),with=F]),data=data[,"SERIES", with=F, drop=F], proj4string=sp::CRS("+proj=longlat +datum=WGS84"))
data <- data[!is.na((sp.data %over% sp::spTransform(template,sp::CRS(raster::projection(sp.data)))))]
rm(sp.data)
if(dim(data)[[1]]==0) stop("No ITRDB chronologies within template polygon.")
}
if(is.null(recon.years)){
if(dim(data)[[1]]==0) stop("No ITRDB chronologies within template polygon.")
all.years <- range(as.numeric(unique(unlist(sapply(data[,data],rownames)))))
recon.years <- all.years[[1]]:all.years[[2]]
}
all.data <- lapply(data[,data],function(df){
df <- df[rownames(df) %in% recon.years,]
df.years <- as.numeric(rownames(df))
missing.years <- setdiff(recon.years, df.years)
missing.mat <- matrix(nrow = length(missing.years), ncol = 2)
colnames(missing.mat) <- c("WIDTH","DEPTH")
rownames(missing.mat) <- missing.years
df.all <- rbind(df,missing.mat)
df.all <- df.all[order(as.numeric(rownames(df.all))),]
})
names(all.data) <- data$SERIES
widths <- sapply(all.data,function(df){data.matrix(df[, "WIDTH", drop=F])})
depths <- sapply(all.data,function(df){data.matrix(df[, "DEPTH", drop=F])})
colnames(widths) <- colnames(depths) <- names(all.data)
rownames(widths) <- rownames(depths) <- recon.years
data <- data[,data:=NULL]
if(makeSpatial){
data <- sp::SpatialPointsDataFrame(as.matrix(data[, c("LON","LAT"), with=F]),
data = data,
proj4string = sp::CRS("+proj=longlat +datum=WGS84"))
}
out <- list(metadata = data, widths = widths, depths = depths)
if(!is.null(label)){
readr::write_rds(out,path = paste0(extraction.dir,'/',label,'_ITRDB.Rds'))
}
return(out)
}
#' Download the latest version of the ITRDB.
#'
#' Downloads and parses the latest zipped (numbered) version of the ITRDB.
#' This function includes improvements to the \code{\link{read_crn}} function from the
#' \pkg{dplR} library. The principle changes are better parsing of metadata, and support
#' for the Schweingruber-type Tucson format. Chronologies that are unable to be read
#' are reported to the user.
#'
#' @param raw.dir A character string indicating where raw downloaded files should be put.
#' The directory will be created if missing. Defaults to "./RAW/ITRDB/".
#' @param force.redo If a download already exists, should a new one be created? Defaults to FALSE.
#' @return A data.table containing all of the ITRDB data.
#' @export
#' @keywords internal
download_itrdb <- function(raw.dir="./RAW/ITRDB/", force.redo=FALSE){
dir.create(raw.dir, showWarnings = FALSE, recursive = TRUE)
opts <- list(
verbose = F,
noprogress = T,
fresh_connect = T,
ftp_use_epsv = T,
forbid_reuse = T,
dirlistonly = T)
hand <- curl::new_handle()
curl::handle_setopt(hand, .list = opts)
url <- 'ftp://ftp.ncdc.noaa.gov/pub/data/paleo/treering/chronologies/'
filenames = readLines(curl::curl(url, handle = hand))
filenames = paste(url, filenames, sep = "")
filenames <- filenames[grep("*.zip",filenames)]
for(file in filenames){
download_data(url=file,destdir=raw.dir)
}
## A vector of the files in the output.dir
zips <- paste0(raw.dir,basename(filenames))
version <- max(as.numeric(gsub("[^0-9]", "",zips)))
zips <- zips[grepl(version,zips)]
message("Extracting chronology data from ITRDB version ",version," dated ",as.character(as.Date(base::file.info(zips[[1]])$mtime)))
if(!force.redo &&
file.exists(paste(raw.dir,"ITRDB_",version,".Rds",sep=''))){
itrdb.metadata <- readr::read_rds(paste(raw.dir,"/ITRDB_",version,".Rds",sep=''))
}else{
all.data <- lapply(zips,function(file){
tmpdir <- tempfile()
if (!dir.create(tmpdir)) stop("failed to create my temporary directory")
utils::unzip(file,exdir=tmpdir)
crns <- list.files(tmpdir, full.names=T)
crns <- crns[grepl("\\.crn",crns)]
message("Extracting chronology data from ",length(crns)," files in ",file)
records <- lapply(crns,function(this.crn){
tryCatch(suppressWarnings(read_crn(this.crn)),error=function(e){return(NULL)})
})
unlink(tmpdir, recursive = TRUE)
if(sum(sapply(records, is.null))>0){
message(sum(sapply(records, is.null))," file(s) couldn't be extracted:")
for(file in basename(crns)[sapply(records, is.null)])
message(file)
message("File(s) likely malformed!")
}
records <- unlist(records,recursive=F)
return(records)
})
all.data <- unlist(all.data, recursive=F)
## Nulling out to appease R CMD CHECK
LAT <- LON <- START <- END <- data <- NULL
itrdb.metadata <- data.table::rbindlist(lapply(all.data,'[[','meta'))
itrdb.data <- lapply(all.data,'[[','data')
itrdb.metadata[,data:=itrdb.data]
# Change latitude and longitude to numeric
itrdb.metadata[,LAT:=as.numeric(as.character(LAT))]
itrdb.metadata[,LON:=as.numeric(as.character(LON))]
# remove records with unknown locations
itrdb.metadata <- itrdb.metadata[!is.na(LAT) & !is.na(LON),]
# or unknown years
itrdb.metadata <- itrdb.metadata[!is.na(START) & !is.na(END),]
# or nonsense years (There are ~50 weird ones)
itrdb.metadata <- itrdb.metadata[START <= as.numeric(format(Sys.time(), "%Y")) & END <= as.numeric(format(Sys.time(), "%Y"))]
readr::write_rds(itrdb.metadata,paste(raw.dir,"/ITRDB_",version,".Rds",sep=''))
}
return(itrdb.metadata)
}
#' Read a Tucson-format chronology file.
#'
#' This function includes improvements to the \code{read.crn} function from the
#' \pkg{dplR} library. The principle changes are better parsing of metadata, and support
#' for the Schweingruber-type Tucson format. Chronologies that are unable to be read
#' are reported to the user. This function automatically recognizes Schweingruber-type files.
#'
#' This wraps two other functions: \code{\link{read_crn_metadata}} \code{\link{read_crn_data}}.
#'
#' @param file A character string path pointing to a \code{*.crn} file to be read.
#' @return A list containing the metadata and chronology.
#' @export
#' @keywords internal
read_crn <- function(file){
id <- toupper(gsub(".crn","",basename(file)))
all.data <- scan(file,what='character', multi.line=F, fill=T, sep="\n", quiet=T)
all.data <- iconv(all.data,from="latin1",to="")
# This removes blank lines
con <- file(file)
writeLines(all.data,con)
close(con)
tails <- substr_right(all.data,3)
if(any(tails=="RAW")){
SCHWEINGRUBER <- T
}else{
SCHWEINGRUBER <- F
}
meta <- read_crn_metadata(file,SCHWEINGRUBER)
data <- read_crn_data(file,SCHWEINGRUBER)
if(!SCHWEINGRUBER){
year.range <- range(as.numeric(rownames(data)))
}else{
year.range <- range(as.numeric(rownames(data[[1]])))
}
meta$START <- year.range[[1]]
meta$END <- year.range[[2]]
meta$FILE <- basename(file)
if(SCHWEINGRUBER){
out <- lapply(1:length(data), function(i){
if(names(data)[i]=='ARS') type.chronology <- "ARSTND"
if(names(data)[i]=='RAW') type.chronology <- "Measurements Only"
if(names(data)[i]=='RES') type.chronology <- "Residual"
if(names(data)[i]=='STD') type.chronology <- "Standard"
this.meta <- meta
this.meta$CHRONOLOGY_TYPE <- type.chronology
id <- regmatches(this.meta$SERIES,regexec(".*[0-9]", as.character(this.meta$SERIES)))
typeID.chronology <- NA
typeID.measurement <- NA
if(this.meta$CHRONOLOGY_TYPE == "ARSTND") typeID.chronology <- "A"
if(this.meta$CHRONOLOGY_TYPE == "Low Pass Filter") typeID.chronology <- "P"
if(this.meta$CHRONOLOGY_TYPE == "Residual") typeID.chronology <- "R"
if(this.meta$CHRONOLOGY_TYPE == "Standard") typeID.chronology <- "S"
if(this.meta$CHRONOLOGY_TYPE == "Re-Whitened Residual") typeID.chronology <- "W"
if(this.meta$CHRONOLOGY_TYPE == "Measurements Only") typeID.chronology <- "N"
if(this.meta$MEASUREMENT_TYPE == "Total Ring Density") typeID.measurement <- "D"
if(this.meta$MEASUREMENT_TYPE == "Earlywood Width") typeID.measurement <- "E"
if(this.meta$MEASUREMENT_TYPE == "Earlywood Density") typeID.measurement <- "I"
if(this.meta$MEASUREMENT_TYPE == "Latewood Width") typeID.measurement <- "L"
if(this.meta$MEASUREMENT_TYPE == "Minimum Density") typeID.measurement <- "N"
if(this.meta$MEASUREMENT_TYPE == "Ring Width") typeID.measurement <- "R"
if(this.meta$MEASUREMENT_TYPE == "Latewood Density") typeID.measurement <- "T"
if(this.meta$MEASUREMENT_TYPE == "Maximum Density") typeID.measurement <- "X"
if(this.meta$MEASUREMENT_TYPE == "Latewood Percent") typeID.measurement <- "P"
this.meta$SERIES <- paste(id,typeID.measurement,typeID.chronology,sep='')
return(list(meta=this.meta,data=data[[i]]))
})
return(out)
}
return(list(list(meta=meta,data=data)))
}
#' Read metadata from a Tucson-format chronology file.
#'
#' This function includes improvements to the \code{\link{read_crn}} function from the
#' \pkg{dplR} library. The principle changes are better parsing of metadata, and support
#' for the Schweingruber-type Tucson format. Chronologies that are unable to be read
#' are reported to the user. The user (or \code{\link{read_crn}}) must tell the function whether
#' the file is a Schweingruber-type chronology.
#'
#' Location information is converted to decimal degrees.
#'
#' @param file A character string path pointing to a \code{*.crn} file to be read.
#' @param SCHWEINGRUBER Is the file in the Schweingruber-type Tucson format?
#' @return A data.frame containing the metadata.
#' @export
#' @keywords internal
read_crn_metadata <- function(file,SCHWEINGRUBER){
id <- toupper(gsub("\\.crn","",basename(file)))
id <- gsub("_CRNS","",id)
typeID <- regmatches(id,regexec("[0-9]+(.*)", id))[[1]][2]
id <- regmatches(id,regexec(".*[0-9]", id))
typeID.chronology <- NA
typeID.measurement <- NA
if(nchar(typeID)==2){
typeID <- strsplit(typeID,split='')[[1]]
typeID.measurement <- typeID[[1]]
typeID.chronology <- typeID[[2]]
}else if(SCHWEINGRUBER){
typeID.measurement <- typeID[[1]]
}else{
typeID.chronology <- typeID[[1]]
}
if(is.na(typeID.chronology)) typeID.chronology <- ''
if(is.na(typeID.measurement)) typeID.measurement <- ''
type.chronology <- NA
type.measurement <- NA
if(typeID.chronology=="A") type.chronology <- "ARSTND"
if(typeID.chronology=="P") type.chronology <- "Low Pass Filter"
if(typeID.chronology=="R") type.chronology <- "Residual"
if(typeID.chronology=="S") type.chronology <- "Standard"
if(typeID.chronology=="W") type.chronology <- "Re-Whitened Residual"
if(typeID.chronology=="N") type.chronology <- "Measurements Only"
if(typeID.chronology==''){
type.chronology <- "Standard"
typeID.chronology <- "S"
}
if(is.na(type.chronology)){
type.chronology <- "Standard"
typeID.measurement <- typeID.chronology
typeID.chronology <- "S"
}
if(typeID.measurement=="D") type.measurement <- "Total Ring Density"
if(typeID.measurement=="E") type.measurement <- "Earlywood Width"
if(typeID.measurement=="I") type.measurement <- "Earlywood Density"
if(typeID.measurement=="L") type.measurement <- "Latewood Width"
if(typeID.measurement=="N") type.measurement <- "Minimum Density"
if(typeID.measurement=="R") type.measurement <- "Ring Width"
if(typeID.measurement=="T") type.measurement <- "Latewood Density"
if(typeID.measurement=="X") type.measurement <- "Maximum Density"
if(typeID.measurement=="P") type.measurement <- "Latewood Percent"
if(typeID.measurement=="W") type.measurement <- "Ring Width"
if(typeID.measurement==''){
type.measurement <- "Ring Width"
typeID.measurement <- "R"
}
id <- paste(id,typeID.measurement,typeID.chronology,sep='')
## Nulling out to appease R CMD CHECK
lats <- lons <- NULL
if(!SCHWEINGRUBER){
# Parse the header of the CRN file
meta.1 <- as.character(utils::read.fwf(file,c(6,3,52,4), skip=0, n=1, colClasses = "character", strip.white=T, stringsAsFactors=F))
meta.2 <- as.character(utils::read.fwf(file,c(6,3,13,18,6,5,6,9,6,5), skip=1,n=1, colClasses = "character", strip.white=T, stringsAsFactors=F))
meta.3 <- as.character(utils::read.fwf(file,c(6,3,52,2,12), skip=2, n=1, colClasses = "character", strip.white=T, stringsAsFactors=F))
meta <- c(id,meta.1[3:4],type.measurement,type.chronology,meta.2[c(5:7,9:10)],meta.3[3])
names(meta) <- c("SERIES","NAME","SPECIES","MEASUREMENT_TYPE","CHRONOLOGY_TYPE","ELEVATION","LAT","LON","START","END","CONTRIBUTOR")
}else{
meta <- scan(file,what='character', n=3,multi.line=F, fill=T, sep="\n", quiet=T)
meta <- sub(" RAW","",meta)
meta[1] <- gsub(substr(meta[1],1,9),"",meta[1])
meta[2] <- gsub(substr(meta[2],1,9),"",meta[2])
meta[3] <- gsub(substr(meta[3],1,9),"",meta[3])
meta.1 <- unlist(strsplit(meta[1], "\\s+"))
place <- paste(meta.1[1:which(grepl("_",meta.1))-1], collapse=' ')
species <- meta.1[which(grepl("_",meta.1))+1]
meta.2 <- unlist(strsplit(meta[2], "\\s+\\s+\\s+"))
meta.2 <- c(meta.2[1],substr(meta.2[2],1,as.numeric(regexpr('\\d',meta.2[2]))-1),substr(meta.2[2],as.numeric(regexpr('\\d',meta.2[2])),nchar(meta.2[2])))
meta.2 <- c(meta.2[1],meta.2[2],unlist(strsplit(meta.2[3], "\\s+")))
if(length(which(meta.2=="-"))>0){
meta.2 <- meta.2[-which(meta.2=="-")]
}
title <- meta.2[1:2]
elev <- meta.2[3]
location <- meta.2[4]
location.split <- unlist(strsplit(location,""))
splits <- which(location.split=="-" | location.split==" ")-1
if(length(splits)>0 && splits[1]==0){
splits <- splits[-1]
}
lat <- tryCatch(paste(location.split[1:splits[1]],collapse=''),error=function(e){NA})
lon <- tryCatch(paste(location.split[(splits[1]+1):length(location.split)],collapse=''),error=function(e){NA})
begin <- meta.2[5]
end <- meta.2[6]
meta.3 <- meta[3]
meta.3 <- sub("\\s-.*","",meta.3)
meta <- c(id,place,species,type.measurement,type.chronology,elev,lat,lon,begin,end,meta.3)
names(meta) <- c("SERIES","NAME","SPECIES","MEASUREMENT_TYPE","CHRONOLOGY_TYPE","ELEVATION","LAT","LON","START","END","CONTRIBUTOR")
}
meta[["ELEVATION"]] <- gsub("M", "", meta[["ELEVATION"]])
meta[["ELEVATION"]] <- gsub("m", "", meta[["ELEVATION"]])
meta[["CONTRIBUTOR"]] <- gsub(" ", " ", meta[["CONTRIBUTOR"]])
meta[["LAT"]] <- gsub(" ", "0", meta[["LAT"]],fixed = TRUE)
meta[["LON"]] <- gsub(" ", "0", meta[["LON"]],fixed = TRUE)
meta[["LAT"]] <- gsub("+", "", meta[["LAT"]],fixed = TRUE)
# meta[["LON"]] <- gsub("-", "", meta[["LON"]],fixed = TRUE)
meta[["CONTRIBUTOR"]] <- toupper(meta[["CONTRIBUTOR"]])
meta[["NAME"]] <- toupper(meta[["NAME"]])
meta[["SERIES"]] <- toupper(meta[["SERIES"]])
meta[["LAT"]] <- format(as.numeric(meta[["LAT"]])/100, nsmall=2)
meta[["LON"]] <- format(as.numeric(meta[["LON"]])/100, nsmall=2)
if(!any(meta[["LAT"]]=="NA",meta[["LON"]]=="NA")){
locations <- meta[c("LAT","LON")]
locations[["LAT"]] <- within(data.frame(lats=locations[["LAT"]]), {
dms <- do.call(rbind, strsplit(as.character(lats), "\\."))
neg <- grepl("-",dms[,1])
LAT <- abs(as.numeric(dms[,1])) + (as.numeric(dms[,2]))/60
if(neg){
LAT <- -1*LAT
}
rm(dms)
rm(lats)
rm(neg)
})
locations[["LON"]] <- within(data.frame(lons=locations[["LON"]]), {
dms <- do.call(rbind, strsplit(as.character(lons), "\\."))
neg <- grepl("-",dms[,1])
LON <- abs(as.numeric(dms[,1])) + (as.numeric(dms[,2]))/60
if(neg){
LON <- -1*LON
}
rm(dms)
rm(lons)
rm(neg)
})
meta[["LAT"]] <- locations[["LAT"]]
meta[["LON"]] <- locations[["LON"]]
}
meta <- data.frame(meta)
if(nrow(meta)>ncol(meta)) meta <- data.frame(t(meta))
return(meta)
}
#' Read chronology data from a Tucson-format chronology file.
#'
#' This function includes improvements to the \code{\link{read_crn}} function from the
#' \pkg{dplR} library. The principle changes are better parsing of metadata, and support
#' for the Schweingruber-type Tucson format. Chronologies that are unable to be read
#' are reported to the user. The user (or \code{\link{read_crn}}) must tell the function whether
#' the file is a Schweingruber-type chronology.
#'
#' @param file A character string path pointing to a \code{*.crn} file to be read.
#' @param SCHWEINGRUBER Is the file in the Schweingruber-type Tucson format?
#' @return A data.frame containing the data, or if \code{SCHWEINGRUBER==T}, a list containing four types of data.
#' @export
#' @keywords internal
read_crn_data <- function(file,SCHWEINGRUBER){
if(!SCHWEINGRUBER){
years <- as.character(utils::read.fwf(file,c(6,3,13,18,6,5,6,9,6,5),skip=1,n=1,colClasses = "character", strip.white=T, stringsAsFactors=F))[9:10]
if(any(grepl(" ",years))){
years <- unlist(strsplit(years," "))
}
digits.year <- max(nchar(years),4)
dig.dif <- digits.year-nchar(years[[1]])
encoding = getOption("encoding")
## Open the data file for reading
con <- file(file, encoding = encoding)
on.exit(close(con))
## Read 4th line - should be first data line
dat1 <- readLines(con, n=4)
if(length(dat1) < 4)
stop("file has under 4 lines")
dat1 <- dat1[4]
# yearStart <- nchar(dat1)-70-digits.year
yearStart <- as.numeric(regexpr(years[[1]],dat1))-1-dig.dif
if(yearStart<0) yearStart <- 6
# if(yearStart>6) yearStart <- 6
if(nchar(dat1) < 10)
stop("first data line ends before col 10")
# yrcheck <- as.numeric(substr(dat1, 7, 10))
# if(is.null(yrcheck) || length(yrcheck)!=1 || is.na(yrcheck) ||
# yrcheck < -1e04 || yrcheck > 1e04)
# stop(gettextf("cols %d-%d of first data line not a year", 7, 10,
# domain="R-dplR"))
## Look at last line to determine if Chronology Statistics are present
## if nchar <=63 then there is a stats line
nlines <- length(readLines(con, n=-1))
## Read file
skip.lines <- 3
## Do nothing. read.fwf closes (and destroys ?!?) the file connection
on.exit()
## Get chron stats if needed
suppressWarnings(chron.stats <- utils::read.fwf(con, c(yearStart, digits.year, 6, 6, 6, 7, 9, 9, 10),
skip=nlines-1, strip.white=TRUE, colClasses="character", stringsAsFactors=F))
## Unintuitively, the connection object seems to have been destroyed
## by the previous read.fwf. We need to create a new one.
con <- file(file, encoding = encoding)
## If columns 3 in chron.stats is an integer then there is no
## statistics line
if((!is.integer(chron.stats[[3]]) | (chron.stats[[3]]==0)) & !grepl(" ",chron.stats[[3]])){
names(chron.stats) <-
c("SiteID", "nYears", "AC[1]", "StdDev", "MeanSens",
"MeanRWI", "IndicesSum", "IndicesSS", "MaxSeries")
## Really read file
dat <- utils::read.fwf(con, c(yearStart, digits.year, rep(c(4, 3), 10)),
skip=skip.lines, n=nlines-skip.lines-1,
strip.white=TRUE, colClasses="character", stringsAsFactors=F)
} else {
## Really read file
dat <- utils::read.fwf(con, c(yearStart, digits.year, rep(c(4, 3), 10)),
skip=skip.lines, n=nlines-skip.lines,
strip.white=TRUE, colClasses="character", stringsAsFactors=F)
}
# dat <- dat[complete.cases(dat),]
dat[[1]] <- as.character(dat[[1]])
for(i in 2:22){
dat[[i]] <- as.numeric(as.character(dat[[i]]))
}
## Remove any blank lines at the end of the file, for instance
dat <- dat[!is.na(dat[[2]]), , drop=FALSE] # requires non-NA year
series.label <- dat[[1]]
series.ids <- unique(series.label)
decade.yr <- as.numeric(dat[[2]])
nseries <- length(series.ids)
series.index <- match(series.label, series.ids)
min.year <- (min(decade.yr) %/% 10) * 10
max.year <- ((max(decade.yr)+10) %/% 10) * 10
span <- max.year - min.year + 1
ncol.crn.mat <- nseries + 1
crn.mat <- matrix(NA_real_, ncol=ncol.crn.mat, nrow=span)
colnames(crn.mat) <- c("WIDTH", "DEPTH")
rownames(crn.mat) <- min.year:max.year
## RWI
x <- as.matrix(dat[seq(from=3, to=21, by=2)])
## All sample depths
y <- as.matrix(dat[seq(from=4, to=22, by=2)])
for(i in seq_len(nseries)){
idx <- which(series.index == i)
for(j in idx) {
yr <- (decade.yr[j] %/% 10) * 10
row.seq <- seq(from = yr - min.year + 1, by = 1, length.out = 10)
crn.mat[row.seq, i] <- x[j, ]
if(i == 1) {
crn.mat[row.seq, ncol.crn.mat] <- y[j, ]
}
}
}
## Clean up NAs
crn.mat[which(crn.mat[, -ncol.crn.mat] == 9990)] <- NA # column-major order
crn.mat <- crn.mat[!apply(is.na(crn.mat[, -ncol.crn.mat, drop=FALSE]), 1, all),,drop=FALSE]
seq.series <- seq_len(nseries)
crn.mat[, "WIDTH"] <- crn.mat[, "WIDTH"] / 1000
# dt.widths <- data.table::data.table(t(crn.mat[,"WIDTH",drop=F]))
# dt.depths <- data.table::data.table(t(crn.mat[,"DEPTH",drop=F]))
#
# output <- list(WIDTH=dt.widths,DEPTH=dt.depths)
return(data.frame(crn.mat))
}else{
raw.data <- scan(file,what='character', multi.line=F, fill=T, sep="\n", strip.white=T, quiet=T)
tails <- toupper(substr_right(raw.data,3))
raw.data <- split(raw.data,tails)
raw.data <- lapply(raw.data,function(x){x[-c(1:3)]})
meta <- scan(file,what='character', n=3,multi.line=F, fill=T, sep="\n", quiet=T)
meta <- sub(" RAW","",meta)
meta[2] <- gsub(substr(meta[2],1,9),"",meta[2])
meta.2 <- unlist(strsplit(meta[2], "\\s+\\s+\\s+"))
meta.2 <- c(meta.2[1],substr(meta.2[2],1,as.numeric(regexpr('\\d',meta.2[2]))-1),substr(meta.2[2],as.numeric(regexpr('\\d',meta.2[2])),nchar(meta.2[2])))
meta.2 <- c(meta.2[1],meta.2[2],unlist(strsplit(meta.2[3], "\\s+")))
if(length(which(meta.2=="-"))>0){
meta.2 <- meta.2[-which(meta.2=="-")]
}
meta.2 <- meta.2[5:6]
digits.year <- max(nchar(meta.2),4)
widths <- c(6, digits.year, rep(c(4, 3), 10))
starts <- c(1,cumsum(widths)+1)
stops <- cumsum(widths)
starts <- utils::head(starts,n=length(stops))
dat <- lapply(raw.data,function(this.raw.data){
dat <- as.data.frame(
matrix(
unlist(
lapply(this.raw.data,FUN=function(x){
sapply(1:length(starts), function(ii){substr(x, starts[ii], stops[ii])
})
})
)
,ncol=22,byrow=T)
)
dat[[1]] <- as.character(dat[[1]])
for(i in 2:22){
dat[[i]] <- as.numeric(as.character(dat[[i]]))
}
series.name <- dat[[1]]
series.ids <- unique(series.name)
decade.yr <- dat[[2]]
nseries <- length(series.ids)
series.index <- match(series.name, series.ids)
min.year <- (min(decade.yr) %/% 10) * 10
max.year <- ((max(decade.yr)+10) %/% 10) * 10
span <- max.year - min.year + 1
ncol.crn.mat <- nseries + 1
crn.mat <- matrix(NA_real_, ncol=ncol.crn.mat, nrow=span)
colnames(crn.mat) <- c("WIDTH", "DEPTH")
rownames(crn.mat) <- min.year:max.year
## RWI
x <- as.matrix(dat[seq(from=3, to=21, by=2)])
## All sample depths
y <- as.matrix(dat[seq(from=4, to=22, by=2)])
for(i in seq_len(nseries)){
idx <- which(series.index == i)
for(j in idx) {
yr <- (decade.yr[j] %/% 10) * 10
row.seq <- seq(from = yr - min.year + 1, by = 1, length.out = 10)
crn.mat[row.seq, i] <- x[j, ]
if(i == 1) {
crn.mat[row.seq, ncol.crn.mat] <- y[j, ]
}
}
}
## Clean up NAs
crn.mat[which(crn.mat[, -ncol.crn.mat] == 9990)] <- NA # column-major order
crn.mat <-
crn.mat[!apply(is.na(crn.mat[, -ncol.crn.mat, drop=FALSE]), 1, all),
,
drop=FALSE]
seq.series <- seq_len(nseries)
crn.mat[, "WIDTH"] <- crn.mat[, "WIDTH"] / 1000
# dt.widths <- data.table::data.table(t(crn.mat[,"WIDTH",drop=F]))
# dt.depths <- data.table::data.table(t(crn.mat[,"DEPTH",drop=F]))
#
# output <- list(WIDTH=dt.widths,DEPTH=dt.depths)
return(data.frame(crn.mat))
})
return(dat)
}
}