https://github.com/cran/nFactors
Raw File
Tip revision: 0d077e574bae60fadd67bd0683ad4277c58f593a authored by Gilles Raiche on 10 October 2022, 11:20:07 UTC
version 2.4.1.1
Tip revision: 0d077e5
nBartlett.r
#'
#' Bartlett, Anderson and Lawley Procedures to Determine the Number of Components/Factors
#'
#' This function computes the Bartlett, Anderson and Lawley indices for determining the
#' number of components/factors to retain.
#' @details Note: the latex formulas are available only in the pdf version of this help file.
#'
#' The hypothesis tested is: \cr
#'
#' (1)  \eqn{\qquad \qquad H_k: \lambda_{k+1} = \ldots = \lambda_p} \cr
#'
#' This hypothesis is verified by the application of different version of a
#' \eqn{\chi^2} test with different values for the degrees of freedom.
#' Each of these tests shares the compution of a \eqn{V_k} value: \cr
#'
#' (2) \eqn{\qquad \qquad V_k  =
#'   \prod\limits_{i = k + 1}^p {\left\{ {{{\lambda _i }
#'     \over {{\raise0.7ex\hbox{$1$} \!\mathord{\left/
#'     {\vphantom {1 q}}\right.\kern-\nulldelimiterspace}
#'       \!\lower0.7ex\hbox{$q$}}\sum\limits_{i = k + 1}^p {\lambda _i } }}} \right\}}
#' }
#'
#' \eqn{p} is the number of eigenvalues, \eqn{k} the number of eigenvalues to test,
#' and \eqn{q} the \eqn{p-k} remaining eigenvalues. \eqn{n} is equal to the sample size
#' minus 1 (\eqn{n = N-1}). \cr
#'
#' The Anderson statistic is distributed as a \eqn{\chi^2} with \eqn{(q + 2)(q - 1)/2} degrees
#' of freedom and is equal to: \cr
#'
#' (3) \eqn{\qquad \qquad - n\log (V_k ) \sim \chi _{(q + 2)(q - 1)/2}^2 } \cr
#'
#' An improvement of this statistic from Bartlett (Bentler, and Yuan, 1996, p. 300;
#'                                                 Horn and Engstrom, 1979, equation 8) is distributed as a \eqn{\chi^2}
#' with \eqn{(q)(q - 1)/2} degrees of freedom and is equal to: \cr
#'
#' (4) \eqn{\qquad \qquad - \left[ {n - k - {{2q^2 q + 2} \over {6q}}}
#'                                 \right]\log (V_k ) \sim \chi _{(q + 2)(q - 1)/2}^2 }  \cr
#'
#' Finally, Anderson (1956) and James (1969) proposed another statistic. \cr
#'
#' (5) \eqn{\qquad \qquad - \left[ {n - k - {{2q^2 q + 2} \over {6q}}
#'   + \sum\limits_{i = 1}^k {{{\bar \lambda _q^2 } \over {\left( {\lambda _i
#'     - \bar \lambda _q } \right)^2 }}} } \right]\log (V_k ) \sim \chi _{(q + 2)(q - 1)/2}^2 } \cr
#'
#' Bartlett (1950, 1951) proposed a correction to the degrees of freedom of these \eqn{\chi^2} after the
#' first significant test: \eqn{(q+2)(q - 1)/2}. \cr
#'
#' @param x          numeric: a \code{vector} of eigenvalues, a \code{matrix} of correlations or of covariances or a \code{data.frame} of data (eigenFrom)
#' @param N          numeric: number of subjects
#' @param alpha      numeric: statistical significance level
#' @param cor        logical: if \code{TRUE} computes eigenvalues from a correlation matrix, else from a covariance matrix
#' @param details    logical: if \code{TRUE} also returns detains about the computation for each eigenvalue
#' @param correction logical: if \code{TRUE} uses a correction for the degree of freedom after the first eigenvalue
#' @param ...        variable: additionnal parameters to give to the \code{cor} or \code{cov} functions
#' @return \item{nFactors}{numeric: vector of the number of factors retained by the Bartlett, Anderson and Lawley procedures.}
#' @return \item{details}{numeric: matrix of the details for each index.}
#' @author Gilles Raiche \cr Centre sur les Applications des Modeles de
#' Reponses aux Items (CAMRI) \cr Universite du Quebec a Montreal\cr
#' \email{raiche.gilles@@uqam.ca}
#' @seealso \code{\link{plotuScree}}, \code{\link{nScree}}, \code{\link{plotnScree}}, \code{\link{plotParallel}}
#'
#' @references
#'  Anderson, T. W. (1963). Asymptotic theory for principal component analysis. \emph{Annals of Mathematical Statistics, 34}, 122-148.
#'
#'  Bartlett, M. S. (1950). Tests of significance in factor analysis. \emph{British Journal of Psychology, 3}, 77-85.
#'
#'  Bartlett, M. S. (1951). A further note on tests of significance. \emph{British Journal of Psychology, 4}, 1-2.
#'
#'  Bentler, P. M. and Yuan, K.-H. (1996). Test of linear trend in eigenvalues of a covariance matrix with application to data analysis.
#'  \emph{British Journal of Mathematical and Statistical Psychology, 49}, 299-312.
#'
#'  Bentler, P. M. and Yuan, K.-H. (1998). Test of linear trend in the smallest
#'  eigenvalues of the correlation matrix. \emph{Psychometrika, 63}(2), 131-144.
#'
#'  Horn, J. L. and Engstrom, R. (1979). Cattell's scree test in relation to
#'  Bartlett's chi-square test and other observations on the number of factors
#'  problem. \emph{Multivariate Behavioral Reasearch, 14}(3), 283-300.
#'
#'  James, A. T. (1969). Test of equality of the latent roots of the covariance
#'  matrix. \emph{In} P. K. Krishna (Eds): \emph{Multivariate analysis, volume 2}.New-York, NJ: Academic Press.
#'
#'  Lawley, D. N. (1956). Tests of significance for the latent roots of covarianceand correlation matrix. \emph{Biometrika, 43}(1/2), 128-136.
#'
#' @export
# #' @importFrom stats pchisq
#' @keywords multivariate
#' @examples
#' ## ................................................
#' ## SIMPLE EXAMPLE OF A BARTLETT PROCEDURE
#'
#' data(dFactors)
#' eig      <- dFactors$Raiche$eigenvalues
#'
#' results  <- nBartlett(x=eig, N= 100, alpha=0.05, details=TRUE)
#' results
#'
#' plotuScree(eig, main=paste(results$nFactors[1], ", ",
#'                            results$nFactors[2], " or ",
#'                            results$nFactors[3],
#'                            " factors retained by the LRT procedures",
#'                            sep=""))
#'
nBartlett <-
function(x, N, alpha=0.05, cor=TRUE, details=TRUE, correction=TRUE, ...) {
 stopMessage  <- paste("\n These indices are only valid with a principal component solution.\n",
                       " ...................... So, only positive eugenvalues are permitted.\n",
                       sep="")
 x            <- eigenComputes(x, cor=cor, ...)
 if (length(which(x<0)) > 0) {cat(stopMessage);stop()}

 n            <- length(x)
 detail       <- NULL
 bartlett.n   <- anderson.n   <- lawley.n                   <- 0
 bartlett     <- bartlett.chi <- bartlett.df <- bartlett.p  <- numeric(n)
 anderson.chi <- anderson.df  <- anderson.p                 <- numeric(n)
 lawley.chi   <- lawley.df    <- lawley.p                   <- numeric(n)
 for (k in 0:(n-1)) {
  i <- k+1
  bartlett[i]     <- prod(x[(k+1):n]) /  (sum(x[(k+1):n])/(n-k))^(n-k) # From Horn et Engstrom (1979)
  bartlett.chi[i] <- -(N - 1 - ((2*n+5)/6) - ((2*k)/3)) * log(bartlett[i])
  bartlett.df[i]  <- .5 * (n-k) * (n-k-1)   # Bartlett without correction, from Horn and Engstrom (1979. p. 291, equation 8)
  if (correction==TRUE & bartlett.n > 0) bartlett.df[i]  <- .5 * (n-k+2) * (n-k-1)  # From Bentler and Yuan (1996, p. 300)
  bartlett.p[i]   <- pchisq(bartlett.chi[i] , bartlett.df[i], lower.tail = FALSE)
  # Conditions to stop when non significant test are obtained
  anderson.chi[i] <- -N * log(bartlett[i])  # From Bentler and Yuan (1996, p. 300, equations 3-4)
  anderson.df[i]  <- .5 * (n-k+2) * (n-k-1) # From Bentler and Yuan (1996, p. 300)
  anderson.p[i]   <- pchisq(anderson.chi[i] , anderson.df[i], lower.tail = FALSE)
  lMean           <- mean(x[(k+1):n])
  lawley.chi[i]   <- -(N - 1 - ((2*n+5)/6) - ((2*k)/3) + sum((lMean^2)/((x[k]+lMean)^2))) * log(bartlett[i]) # From Bentler and Yuan (1996, p. 300, equation 6)
  lawley.df[i]    <- .5 * (n-k) * (n-k-1) # From Horn and Engstrom (1979. p. 291, equation 8)
  lawley.p[i]     <- pchisq(lawley.chi[i] , lawley.df[i], lower.tail = FALSE)
# print(c(bartlett[i], bartlett.chi[i], bartlett.df[i], bartlett.p[i]),2)  ############ TEST #############
  if (i == 1) {
   bartlett.n <- bartlett.n + as.numeric(bartlett.p[i] <= alpha)
   anderson.n <- anderson.n + as.numeric(anderson.p[i] <= alpha)
   lawley.n   <- lawley.n   + as.numeric(lawley.p[i]   <= alpha)
      }
  if (i > 1)  {
   if(bartlett.p[i-1] <= 0.05) bartlett.n <- bartlett.n + as.numeric(bartlett.p[i] <= alpha)
   if(anderson.p[i-1] <= 0.05) anderson.n <- anderson.n + as.numeric(anderson.p[i] <= alpha)
   if(lawley.p[i-1]   <= 0.05) lawley.n   <- lawley.n   + as.numeric(lawley.p[i]   <= alpha)
   }
  }
 if (bartlett.n == 0) bartlett.n <- n # If no test if significant, retain all components
 if (anderson.n == 0) anderson.n <- n
 if (lawley.n   == 0) lawlwy.n   <- n
 if (details == TRUE) detail    <- data.frame(v=(1:(n)),values=x[1:(n)],
                                               bartlett, bartlett.chi, bartlett.df, bartlett.p,
                                               anderson.chi, anderson.df, anderson.p,
                                               lawley.chi,   lawley.df,   lawley.p)
 res        <- list(detail=detail,
                    nFactors=c(bartlett=bartlett.n, anderson=anderson.n, lawley=lawley.n))
 class(res) <- c("nFactors","list")
 return(res)
 }

back to top