#' Artic lake sediment data #' #' Sand, silt, clay compositions of 39 sediment samples at different water #' depths in an Arctic lake. This data set can be found on page 359 of the #' Aitchison book (see reference). #' #' The rows sum up to 100, except for rounding errors. #' #' @name arcticLake #' @docType data #' @format A data frame with 39 observations on the following 3 variables. #' \describe{ \item{list("sand")}{numeric vector of percentages of sand} #' \item{list("silt")}{numeric vector of percentages of silt} #' \item{list("clay")}{numeric vector of percentages of clay} } #' @source Aitchison, J. (1986) \emph{The Statistical Analysis of Compositional #' Data} Monographs on Statistics and Applied Probability. Chapman \& Hall #' Ltd., London (UK). 416p. #' @keywords datasets #' @examples #' #' data(arcticLake) #' NULL #' Coffee data #' #' 27 commercially available coffee samples of different origins. #' #' In the original data set, 15 volatile compounds (descriptors of coffee #' aroma) were selected for a statistical analysis. We selected only three #' compounds (compositional parts) Hydroxy-2-propanone, methylpyrazine and #' methylfurfural to allow for a visualization in a ternary diagram. #' #' @name coffee #' @docType data #' @format A data frame with 27 observations on the following 4 variables. #' \describe{ \item{list("Metpyr")}{Hydroxy-2-propanone} #' \item{list("5-Met")}{methylpyrazine} \item{list("furfu")}{methylfurfural} #' \item{list("sort")}{a character vector} } #' @references M.~Korhonov\'a, K.~Hron, D.~Klimc\'ikov\'a, L.~Muller, #' P.~Bedn\'ar, and P.~Bart\'ak (2009) Coffee aroma - statistical analysis of #' compositional data. \emph{Talanta}, 80(2): 710--715. #' @keywords datasets #' @examples #' #' data(coffee) #' NULL #' Household expenditures data #' #' This data set from Aitchison (1986), p. 395, describes household #' expenditures (in former Hong Kong dollars) on five commundity groups. #' #' This data set contains household expenditures on five commodity groups of 20 #' single men. The variables represent housing (including fuel and light), #' foodstuff, alcohol and tobacco, other goods (including clothing, footwear #' and durable goods) and services (including transport and vehicles). Thus #' they represent the ratios of the men's income spent on the mentioned #' expenditures. #' #' @name expenditures #' @docType data #' @format A data frame with 20 observations with the following 5 variables. #' \describe{ \item{list("housing")}{housing (including fuel and light)} #' \item{list("foodstuffs")}{foodstuffs} \item{list("alcohol")}{alcohol and #' tobacco} \item{list("other")}{other goods (including clothing, footwear and #' durable goods)} \item{list("services")}{services (including transport and #' vehicles)} } #' @source Aitchison, J. (1986) \emph{The Statistical Analysis of Compositional #' Data} Monographs on Statistics and Applied Probability. Chapman \& Hall #' Ltd., London (UK). 416p. #' @keywords datasets #' @examples #' #' data(expenditures) #' ## imputing a missing value in the data set using k-nearest neighbor imputation: #' expenditures[1,3] #' expenditures[1,3] <- NA #' impKNNa(expenditures)$xImp[1,3] #' NULL #' Mean consumption expenditures data. #' #' Mean consumption expenditure of households at EU-level. The final #' consumption expenditure of households encompasses all domestic costs (by #' residents and non-residents) for individual needs. #' #' #' @name expendituresEU #' @docType data #' @format A data frame with 27 observations on the following 12 variables. #' \describe{ \item{list("Food")}{a numeric vector} \item{list("Alcohol")}{a #' numeric vector} \item{list("Clothing")}{a numeric vector} #' \item{list("Housing")}{a numeric vector} \item{list("Furnishings")}{a #' numeric vector} \item{list("Health")}{a numeric vector} #' \item{list("Transport")}{a numeric vector} \item{list("Communications")}{a #' numeric vector} \item{list("Recreation")}{a numeric vector} #' \item{list("Education")}{a numeric vector} \item{list("Restaurants")}{a #' numeric vector} \item{list("Other")}{a numeric vector} } #' @references Eurostat provides a website with the data: #' #' \url{http://epp.eurostat.ec.europa.eu/statistics_explained/index.php/Household_consumption_expenditure} #' @source Eurostat: #' \url{http://epp.eurostat.ec.europa.eu/statistics_explained/images/c/c2/Mean_consumption_expenditure_of_households,_2005(PPS).PNG} #' @keywords datasets #' @examples #' #' data(expendituresEU) #' NULL #' Haplogroups data. #' #' Distribution of European Y-chromosome DNA (Y-DNA) haplogroups by region in #' percentage. #' #' Human Y-chromosome DNA can be divided in genealogical groups sharing a #' common ancestor, called haplogroups. #' #' @name haplogroups #' @docType data #' @format A data frame with 38 observations on the following 12 variables. #' \describe{ \item{list("I1")}{pre-Germanic (Nordic)} #' \item{list("I2b")}{pre-Celto-Germanic} \item{list("I2a1")}{Sardinian, #' Basque} \item{list("I2a2")}{Dinaric, Danubian} #' \item{list("N1c1")}{Uralo-Finnic, Baltic, Siberian} #' \item{list("R1a")}{Balto-Slavic, Mycenaean Greek, Macedonia} #' \item{list("R1b")}{Italic, Celtic, Germanic; Hitite, Armenian} #' \item{list("G2a")}{Caucasian, Greco-Anatolien} \item{list("E1b1b")}{North #' and Eastern Afrika, Near Eastern, Balkanic} \item{list("J2")}{Mesopotamian, #' Minoan Greek, Phoenician} \item{list("J1")}{Semitic (Arabic, Jewish)} #' \item{list("T")}{Near-Eastern, Egyptian, Ethiopian, Arabic} } #' @source Eupedia: #' \url{http://www.eupedia.com/europe/european_y-dna_haplogroups.shtml} #' @keywords datasets #' @examples #' #' data(haplogroups) #' NULL #' Machine operators data set #' #' The data set from Aitchison (1986), p. 382, contains compositions of #' eight-hour shifts of 27 machine operators. The parts represent proportions #' of shifts in each activity: high-quality production, low-quality production, #' machine setting and machine repair. #' #' #' @name machineOperators #' @docType data #' @format A data frame with 27 observations on the following 4 variables. #' \describe{ \item{list("hqproduction")}{high-quality production} #' \item{list("lqproduction")}{low-quality production} #' \item{list("setting")}{machine settings} \item{list("repair")}{machine #' repair} } #' @references Aitchison, J. (1986) \emph{The Statistical Analysis of #' Compositional Data} Monographs on Statistics and Applied Probability. #' Chapman \& Hall Ltd., London (UK). 416p. #' @keywords datasets #' @examples #' #' ## maybe str(machineOperators) ; plot(machineOperators) ... #' NULL #' PhD Students in the EU #' #' PhD students in Europe based on the standard klassification system splitted #' by different kind of studies (given as percentages). #' #' Due to unknown reasons the rowSums of the percentages is not always 100. #' #' @name phd #' @docType data #' @format The format is: num [1:33, 1:8] 516.5 7.5 5.2 22.6 4.8 ... - attr(*, #' "dimnames")=List of 2 ..$ : chr [1:33] "EU" "Belgien" "Bulgarien" #' "Tschech.Rep." ... ..$ : chr [1:8] "Gesamtzahl der Doktoranden (in 1 000)" #' "maennlich" "weiblich" "Naturwissen-schaften, Mathematik, Informatik u. #' Ingenieurwesen" ... #' @source #' \url{http://epp.eurostat.ec.europa.eu/cache/ITY_PUBLIC/1-18092009-AP/DE/1-18092009-AP-DE.PDF} #' @keywords datasets #' @examples #' #' data(phd) #' phdImputed <- impCoda(phd)$xOrig #' NULL #' Robust Estimation for Compositional Data. #' #' The package contains methods for imputation of compositional data including #' robust methods, (robust) outlier detection for compositional data, (robust) #' principal component analysis for compositional data, (robust) factor #' analysis for compositional data, (robust) discriminant analysis (Fisher #' rule) and (robust) Anderson-Darling normality tests for compositional data #' as well as popular log-ratio transformations (alr, clr, ilr, and their #' inverse transformations). #' #' \tabular{ll}{ Package: \tab robCompositions\cr Type: \tab Package\cr #' Version: \tab 1.3.3\cr Date: \tab 2009-11-28\cr License: \tab GPL 2\cr #' LazyLoad: \tab yes\cr } #' #' @name robCompositions-package #' @aliases robCompositions-package robCompositions #' @docType package #' @author Matthias Templ, Peter Filzmoser, Karel Hron, #' #' Maintainer: Matthias Templ #' @references Aitchison, J. (1986) \emph{The Statistical Analysis of #' Compositional Data} Monographs on Statistics and Applied Probability. #' Chapman \& Hall Ltd., London (UK). 416p. \ #' #' Filzmoser, P., and Hron, K. (2008) Outlier detection for compositional data #' using robust methods. \emph{Math. Geosciences}, \bold{40} 233-248. #' #' Filzmoser, P., Hron, K., Reimann, C. (2009) Principal Component Analysis for #' Compositional Data with Outliers. \emph{Environmetrics}, \bold{20} (6), #' 621--632. #' #' P. Filzmoser, K. Hron, C. Reimann, R. Garrett (2009): Robust Factor Analysis #' for Compositional Data. \emph{Computers and Geosciences}, \bold{35} (9), #' 1854--1861. #' #' Hron, K. and Templ, M. and Filzmoser, P. (2010) Imputation of missing values #' for compositional data using classical and robust methods #' \emph{Computational Statistics and Data Analysis}, \bold{54} (12), #' 3095--3107. #' #' C. Reimann, P. Filzmoser, R.G. Garrett, and R. Dutter (2008): Statistical #' Data Analysis Explained. \emph{Applied Environmental Statistics with R}. #' John Wiley and Sons, Chichester, 2008. #' @keywords package #' @examples #' #' ## k nearest neighbor imputation #' data(expenditures) #' expenditures[1,3] #' expenditures[1,3] <- NA #' impKNNa(expenditures)$xImp[1,3] #' #' ## iterative model based imputation #' data(expenditures) #' x <- expenditures #' x[1,3] #' x[1,3] <- NA #' xi <- impCoda(x)$xImp #' xi[1,3] #' s1 <- sum(x[1,-3]) #' impS <- sum(xi[1,-3]) #' xi[,3] * s1/impS #' #' xi <- impKNNa(expenditures) #' xi #' summary(xi) #' \dontrun{plot(xi, which=1)} #' plot(xi, which=2) #' plot(xi, which=3) #' #' ## pca #' data(expenditures) #' p1 <- pcaCoDa(expenditures) #' p1 #' plot(p1) #' #' ## outlier detection #' data(expenditures) #' oD <- outCoDa(expenditures) #' oD #' plot(oD) #' #' ## transformations #' data(arcticLake) #' x <- arcticLake #' x.alr <- addLR(x, 2) #' y <- addLRinv(x.alr) #' addLRinv(addLR(x, 3)) #' data(expenditures) #' x <- expenditures #' y <- addLRinv(addLR(x, 5)) #' head(x) #' head(y) #' addLRinv(x.alr, ivar=2, useClassInfo=FALSE) #' #' data(expenditures) #' eclr <- cenLR(expenditures) #' inveclr <- cenLRinv(eclr) #' head(expenditures) #' head(inveclr) #' head(cenLRinv(eclr$x.clr)) #' #' require(MASS) #' Sigma <- matrix(c(5.05,4.95,4.95,5.05), ncol=2, byrow=TRUE) #' z <- isomLRinv(mvrnorm(100, mu=c(0,2), Sigma=Sigma)) #' NULL #' Aphyric skye lavas data #' #' AFM compositions of 23 aphyric Skye lavas. This data set can be found on #' page 360 of the Aitchison book (see reference). #' #' #' @name skyeLavas #' @docType data #' @format A data frame with 23 observations on the following 3 variables. #' \describe{ \item{list("sodium-potassium")}{a numeric vector of percentages #' of Na2O\eqn{+}K2O} \item{list("iron")}{a numeric vector of percentages of #' Fe2O3} \item{list("magnesium")}{a numeric vector of percentages of MgO} } #' @source Aitchison, J. (1986) \emph{The Statistical Analysis of Compositional #' Data} Monographs on Statistics and Applied Probability. Chapman \& Hall #' Ltd., London (UK). 416p. #' @keywords datasets #' @examples #' #' data(skyeLavas) #' NULL