https://github.com/cran/CHNOSZ
Tip revision: 56901436b0b3087ec88d37a9b95426f4aeaf27e0 authored by Jeffrey M. Dick on 22 February 2011, 00:00:00 UTC
version 0.9-4
version 0.9-4
Tip revision: 5690143
info.R
# CHNOSZ/info.R
# search information and thermodynamic properties of species
# 20061024 extraced from species.R jmd
info <- function(species=NULL,states=NULL,quiet=FALSE,file='',return.approx=TRUE,check=FALSE) {
# file argument: where to cat the messages about inconsistencies among
# GHS, and between eos parameters and Cp, V ('' for console)
# quiet=TRUE disables checks of self-consistency of ghs
# and eos/Cp/V values and also makes things run faster
#if(missing(quiet)) if(sys.nframe()>1) quiet <- TRUE
# if check=TRUE, run a consistency check for each species,
# returning a nice table at the end ...
if(check) {
if(missing(species)) species <- 1:nrow(thermo$obigt)
cat(paste('info: checking consistency of parameters for',length(species),'species\n'))
if(file != "") cat(paste('info: saving output to file',file,'\n'))
else cat("info: this will take a while...\n")
} else {
if(missing(species)) {
# a friendly summary of thermodynamic information? 20101129
cat("info: species is NULL; summarizing information about thermo data object...\n")
cat(paste("thermo$obigt has",nrow(thermo$obigt[thermo$obigt$state=="aq",]),"aqueous,",
nrow(thermo$obigt),"total species\n"))
cat(paste("number of literature sources:",nrow(thermo$source),", available elements:",
nrow(thermo$element),", buffers:",length(unique(thermo$buffer$name)),"\n"))
cat(paste("number of proteins in thermo$protein is",nrow(thermo$protein),"from",
length(unique(thermo$protein$organism)),"organisms\n"))
cat(paste("number of proteins from complete genomes:",nrow(thermo$ECO),"(ECO),",
nrow(thermo$HUM),"(HUM),",nrow(thermo$SGD),"(SGD)\n"))
cat(paste("thermo$yeastgfp has",nrow(thermo$yeastgfp),"localizations and",
length(thermo$yeastgfp$abundance[!is.na(thermo$yeastgfp$abundance)]),"abundances\n"))
}
}
# show license or release notes
if(identical(tolower(species),'license') | identical(tolower(species),'licence') |
identical(tolower(species),'copying')) {
licensefile <- file.path(system.file(package="CHNOSZ"), "COPYING")
file.show(licensefile)
return()
}
if(identical(tolower(species),'chnosz') | identical(tolower(species),'news')) {
newsfile <- file.path(system.file(package="CHNOSZ"), "NEWS")
file.show(newsfile)
return()
}
species.na <- states.na <- FALSE
if(!is.null(species)) if(is.na(species[1])) species.na <- TRUE
if(!is.null(states)) if(is.na(states[1])) states.na <- TRUE
if(species.na | states.na) stop('info: species and/or states arguments are NA.')
# argument handling
if(missing(states) | is.null(states)) missing.states <- TRUE else missing.states <- FALSE
if(!is.null(states) & length(species)!=length(states))
states <- rep(states,length.out=length(species))
# fill-in missing state with default setting
if(is.null(states)) states <- rep(thermo$opt$state,length(species))
# if arguments are character, return the matching rownumbers in thermo$obigt
if(is.character(species[1])) {
# first make sure the rownames are their numbers
rownames(thermo$obigt) <<- 1:nrow(thermo$obigt)
# the lists of species addresses
ighs <- numeric(0); ieos <- numeric(0)
ighs.list <- list()
# dataframe for telling the user what species are found
thisghs <- thermo$obigt[0,]
for(i in 1:length(species)) {
use.other.states <- FALSE
if(length(grep('_',species[i]))>0) {
# we're dealing with a protein
tghs <- thermo$obigt[(thermo$obigt$name %in% species[i]) & thermo$obigt$state %in% states[i],]
# try to add up protein
if(nrow(tghs)==0) {
# split the name at the underscore
us <- match(TRUE, s2c(species[i]) == '_')
protein <- substr(species[i],1,us-1)
organism <- substr(species[i],us+1,nchar(species[i]))
ip <- protein(protein,organism)
# did we find a protein? add its properties to obigt
if(length(ip) > 0) {
newrow <- protein(ip,states[i])
colnames(newrow) <- colnames(thermo$obigt)
thermo$obigt <<- rbind(thermo$obigt,newrow)
rownames(thermo$obigt) <<- 1:nrow(thermo$obigt)
tghs <- thermo$obigt[nrow(thermo$obigt),]
}
}
iighs <- nrow(tghs)
if(iighs==0) iighs <- 1
} else {
# selection criteria: search names and abbreviations and formulas
# and state, if the states argument is supplied
if(missing.states) use.other.states <- TRUE
else if(states[i]=="") use.other.states <- TRUE
if(!use.other.states) {
if(states[i]=='cr') searchstates <- c('cr','cr1','cr2','cr3',
'cr4','cr5','cr6','cr7','cr8','cr9') else searchstates <- states[i]
tghs <- thermo$obigt[(thermo$obigt$name %in% species[i] |
thermo$obigt$abbrv %in% species[i] | thermo$obigt$formula %in% species[i]) &
thermo$obigt$state %in% searchstates,]
iighs <- 1:nrow(tghs)
}
# allow other states if the argument is missing
else {
tghs <- thermo$obigt[(thermo$obigt$name %in% species[i] |
thermo$obigt$abbrv %in% species[i] | thermo$obigt$formula %in% species[i]),]
# 20090416 if the name matched put that first
iname <- match(species[i],tghs$name)[1]
if(!is.na(iname)) tghs <- tghs[c(iname,(1:nrow(tghs))[-iname]),]
# except we like O2(g)
if(species[i]=="O2") {
iname <- match("oxygen",tghs$name)
if(!is.na(iname)) tghs <- tghs[c(iname,(1:nrow(tghs))[-iname]),]
}
# here we go to the last (duplicated) entry that thermo$opt$level allows
if(nrow(tghs)==0) iiighs <- 99
else iiighs <- which(tghs$state==tghs$state[1])
#iighs <- max(min(thermo$opt$level,max(iiighs)),1)
iighs <- max(min(1,max(iiighs)),1)
states[i] <- as.character(tghs$state[iighs])
}
}
# notify the user of multiple matches
if(nrow(tghs)>1) {
if(!quiet) {
tg <- tghs[,1:5]
if(length(unique(tg$formula))==1 & length(unique(tg$name))==1) {
otext <- ''
if(species[i] %in% tg$formula) otext <- tg$name[1]
if(species[i] %in% tg$name) otext <- tg$formula[1]
if(otext==species[i]) otext <- '' else otext <- paste(' (',otext,')',sep='')
stext <- ''
for(j in 1:nrow(tg)) {
if(j==nrow(tg)) ntext <- '.\n' else ntext <- ', '
stext <- paste(stext,tg$state[j],ntext,sep='')
}
cat(paste('info: ',species[i],otext,' available in ',stext,sep=''))
} else {
cat(paste('info:',species[i],'matches these species:\n'))
print(tghs[,1:5])
}
}
}
# notify the user of no matches
if(nrow(tghs)==0) {
findnames <- function(species,state=NA,max) {
if(!is.na(state)) altghs <- thermo$obigt[thermo$obigt$state==state,]
else altghs <- thermo$obigt
anames <- unique(c(agrep(species,as.character(altghs$name),value=TRUE,max.distance=max),
agrep(species,as.character(altghs$abbrv),value=TRUE,max.distance=max),
agrep(species,as.character(altghs$formula),value=TRUE,max.distance=max)))
if(length(anames)>0) {
# finish off the message and print the approximate matches
# 20090301 observe quiet
if(!quiet) cat('.\n')
# which species are approximately matching
iga <- which(altghs$name %in% anames |
altghs$abbrv %in% anames | altghs$formula %in% anames,1:4)
# print species names
# 20090301 only if return.approx is TRUE
if(return.approx) {
if(length(anames) > 20) {
cat('info: similar species names, abbreviations, or formulas are:\n')
nt <- 200
if(length(anames) > nt) {
cat('info: (truncated at) ',nt,'.\n',sep='')
anames <- anames[1:200]
}
print(anames)
# print species info
} else {
cat('info: approximately matching species are:\n')
print(altghs[iga,1:4])
}
}
return(iga)
} else return()
}
if(use.other.states) {
# 20090301 observe quiet
if(!quiet) cat(paste('info: no match for ',species[i],sep=''))
t <- findnames(species[i],max=0.1)
if(is.null(t)) t <- findnames(species[i],max=0.3)
} else {
if(!quiet) cat(paste('info: no match for ',species[i],' ',states[i],sep=''))
t <- findnames(species[i],states[i],max=0.1)
if(is.null(t)) t <- findnames(species[i],states[i],max=0.3)
}
if(is.null(t)) {
if(!quiet) cat(', and no approximate matches.\n')
if(return.approx) ighs.list[length(ighs.list)+1] <- NA
} else {
# if we want to return the indices of approximate matches
if(return.approx) ighs.list[length(ighs.list)+1] <- list(t)
}
}
tighs <- as.numeric(rownames(tghs)[iighs])
ighs <- c(ighs,tighs)
if(!is.na(tighs[1])) for(i in 1:length(tighs))
ighs.list[[length(ighs.list)+1]] <- tighs[i]
# build the table of species to show the user
if(!is.na(tighs[[1]])) thisghs <- rbind(thisghs,tghs[iighs,])
}
if(nrow(thisghs)>0) {
if(quiet) {
#cat(paste('info: matching species indices are ',c2s(rownames(thermo$obigt)[ighs],sep=', '),'.\n',sep=''))
} else {
#if(nrow(thisghs)==1) {iword <- 'index'; word <- 'this'}
#else {iword <- 'indices'; word <- 'these'}
#cat('info: returning',iword,'for',word,'species:\n')
#print(thisghs[,1:7])
for(j in 1:nrow(thisghs)) {
tf <- thisghs$formula[j]
if(tf %in% species) tf <- '' else tf <- paste(', ',tf,sep='')
thissource <- thisghs$source1[j]
ts2 <- thisghs$source2[j]
if(!is.na(ts2)) thissource <- paste(thissource,', ',ts2,sep='')
td <- thisghs$date[j]
if(!is.na(td)) thissource <- paste(thissource,', ',td,sep='')
ii <- ighs[j]
if(is.na(ii)) ii <- rownames(thisghs)[j]
cat('info: ',ii,' refers to ',thisghs$name[j],tf,
' ',thisghs$state[j],' (',thissource,')\n',sep='')
}
}
}
# try to return a vector not a list
t <- try(as.numeric(ighs.list),silent=TRUE)
if(class(t)=='try-error') return(invisible(ighs.list))
else return(invisible(as.numeric(ighs.list)))
}
# if numeric arguments are given, return ghs and eos
if(is.numeric(species[1])) {
nnspecies <- species[species > 0]
myghs <- thermo$obigt[nnspecies,]
# to keep track of the results of consistency checks
DCp <- DV <- DG <- Di <- numeric()
Dname <- Dstate <- character()
for(i in 1:length(species)) {
if(species[i] > nrow(thermo$obigt) | species[i] < 1) {
cat(paste("info: there aren't",species[i],"species.\n"))
next
}
dCp <- dV <- dG <- NA
# convert the eos parameters depending on state
# and check them for NAs and consistency with Cp, V values
if(myghs$state[i]=='aq') {
myghs[i,13:20] <- myghs[i,13:20] * 10^c(-1,2,0,4,0,4,5,0)
colnames(myghs)[13:20] <- c('a1','a2','a3','a4','c1','c2','omega','Z')
naEOS <- which(is.na(myghs[i,13:20]))
if(!quiet) {
# value of X consistent with CHNOSZ
X <- -2.773788E-7
# value of X consistent with SUPCRT
X <- -3.055586E-7
Cp <- myghs$c1[i] + myghs$c2[i]/(298.15-228)^2 + myghs$omega[i]*298.15*X
if(!is.na(myghs$Cp[i])) {
if(abs(Cp-myghs$Cp[i])>1) {
cat(paste('info: Cp (from EOS) of',
myghs$name[i],myghs$state[i],'differs by',round(Cp-myghs$Cp[i],2),
'from tabulated value.\n'),file=file,append=TRUE)
dCp <- round(Cp-myghs$Cp[i],2)
}
} else {
if(!is.na(Cp)) {
cat(paste('info: Cp of',myghs$name[i],myghs$state[i],
'is NA; set by EOS parameters to',round(Cp,2),'\n'))
myghs$Cp[i] <- as.numeric(Cp)
}
}
}
if(!quiet) {
# value of Q consistent with IAPWS95/AW90
Q <- 0.00002483137
# value of Q consistent with SUPCRT92
Q <- 0.00002775729
V <- convert(myghs$a1[i],'cm3bar') + convert(myghs$a2[i],'cm3bar')/(2601) +
(convert(myghs$a3[i],'cm3bar') + convert(myghs$a4[i],'cm3bar')/(2601))/(298.15-228) -
Q * myghs$omega[i]
if(!is.na(myghs$V[i]) & !is.na(V)) {
if(abs(V-myghs$V[i])>1) {
cat(paste('info: V (from EOS) of',
myghs$name[i],myghs$state[i],'differs by',round(V-myghs$V[i],2),
'from tabulated value.\n'),file=file,append=TRUE)
dV <- round(V-myghs$V[i],2)
}
} else {
if(!is.na(V)) {
cat(paste('info: V of',myghs$name[i],myghs$state[i],
'is NA; set by EOS parameters to',round(V,2),'\n'))
myghs$V[i] <- as.numeric(V)
}
}
}
} else {
myghs[i,13:20] <- myghs[i,13:20] * 10^c(0,-3,5,0,-5,0,0,0)
colnames(myghs)[13:20] <- c('a','b','c','d','e','f','lambda','T')
naEOS <- which(is.na(myghs[i,12:19]))
if(length(naEOS)>0) {
cat(paste('info:',c2s(colnames(myghs)[12:19][naEOS],sep=','),'of',
myghs$name[i],myghs$state[i],'are NA; set to 0.\n'))
myghs[i,naEOS+11] <- 0
}
if(!quiet) {
Cp <- cgl('Cp',eos=myghs[i,],ghs=myghs[i,],T=thermo$opt$Tr,P=thermo$opt$Pr)[[1]]
if(!is.na(myghs$Cp[i]) & !is.na(Cp)) {
if(abs(Cp-myghs$Cp[i])>1) {
cat(paste('info: Cp (from EOS) of',
myghs$name[i],myghs$state[i],'differs by',round(Cp-myghs$Cp[i],2),
'from tabulated value.\n'),file=file,append=TRUE)
dCp <- round(Cp-myghs$Cp[i],2)
}
} else {
if(!is.na(Cp)) {
cat(paste('info: Cp of ',myghs$name[i],' ',myghs$state[i],
' is NA; set by EOS parameters to ',round(Cp,2),'.\n',sep=''))
myghs$Cp[i] <- as.numeric(Cp)
}
}
}
}
# check the GHS values
naGHS <- which(is.na(myghs[i,8:10]))
j <- as.numeric(rownames(myghs)[i])
if(length(naGHS)>1) {
} else if(length(naGHS)==1) {
GHS <- GHS(as.character(myghs$formula[i]),DG=myghs[i,8],DH=myghs[i,9],S=myghs[i,10])
if(!quiet) cat(paste('info: ',c2s(colnames(myghs)[8:10][naGHS]),' of ',
myghs$name[i],' ',myghs$state[i],' is NA; set to ',round(GHS,2),'.\n',sep=''))
myghs[i,naGHS+7] <- GHS
} else {
G <- GHS(as.character(myghs$formula[i]),DG=myghs[i,8],DH=myghs[i,9],S=myghs[i,10])
warn <- FALSE
if(!is.na(G)) {
if(abs(G-myghs[i,8])>1000) warn <- TRUE
#if(myghs[i,8]!=0) if(abs((G-myghs[i,8])/myghs[i,8])>0.05) warn <- TRUE
if(warn) {
if(!quiet) cat(paste('info: G (from H and S) of',myghs$name[i],myghs$state[i],'differs by',
round(G-myghs[i,8]),'from tabulated value.\n'),file=file,append=TRUE)
dG <- round(G-myghs[i,8])
}
} else {
if(!quiet) cat(paste('info: G of',myghs$name[i],myghs$state[i],'is NA!!! (maybe a missing element?)\n'),file=file,append=TRUE)
}
}
if(check) {
if(!(is.na(dCp)&is.na(dV)&is.na(dG)) ) {
Dname <- c(Dname,as.character(myghs$name[i]))
Dstate <- c(Dstate,as.character(myghs$state[i]))
DCp <- c(DCp,dCp)
DV <- c(DV,dV)
DG <- c(DG,dG)
Di <- c(Di,species[i])
}
}
} # end loop over i species
if(length(unique(myghs$state))!=1 & 'aq' %in% myghs$state) {
cat('info: the species are in aqueous and other states\n')
colnames(myghs)[13:20] <- colnames(thermo$obigt)[13:20]
}
if(check | all(is.na(myghs))) {
# for some reason, DCp likes to become list. make it numeric
DCp <- as.numeric(DCp)
t <- data.frame(ispecies=Di,name=Dname,state=Dstate,DCp=DCp,DV=DV,DG=DG)
return(t)
} else return(myghs)
}
}