lavaan.survey.R
``````# Complex sampling analysis of SEM models
# Daniel Oberski, 2015-11-03

lavaan.survey <-
function(lavaan.fit, survey.design,
estimator=c("MLM", "MLMV", "MLMVS", "WLS", "DWLS", "ML"),
estimator.gamma=c("default","Yuan-Bentler")) {

# Not all estimators in lavaan make sense to use here, therefore matching args
estimator <- match.arg(estimator)
if(estimator=="ML") warning("Estimator 'ML' will not correct standard errors and chi-square statistic.")
estimator.gamma <- match.arg(estimator.gamma) # Smoothing Gamma or not

# Names of the observed variables (same for each group)
ov.names <- lavaanNames(lavaan.fit, type="ov", group=1)

# The MP-inverse duplication matrix is handy for removing redundancy
Dplus <- lavaan::lav_matrix_duplication_ginv( length(ov.names) )
# Create a formula that includes all observed variables for svymean
ov.formula <- as.formula(paste("~", paste(ov.names, collapse="+")))

# <no. group>-sized lists that will contain the asy. covariance matrix,
#  and sample covariance matrix and mean vector
ngroups <- lavInspect(lavaan.fit, "ngroups")

Gamma <- vector("list", ngroups)
sample.cov <- vector("list", ngroups)
sample.mean <- vector("list", ngroups)
sample.nobs <- vector("list", ngroups)

for(g in seq(ngroups)) {
if(ngroups > 1) {
# Use survey::subset to create data groups
survey.design.g <-
subset(survey.design, eval(parse(text=sprintf("%s == '%s'",
lavInspect(lavaan.fit, "group"),
lavInspect(lavaan.fit, "group.label")[[g]]))))
}
else { # In case of no groups, just use the original survey design object.
survey.design.g <- survey.design
}

# Function that takes survey design and returns the Gamma & observed moments
get.stats.design <- function(survey.design.g, sample.nobs.g) {
sample.cov.g <- as.matrix(svyvar(ov.formula, design=survey.design.g, na.rm=TRUE))
# survey package returns the variance matrix of the (co)variances as attr:
Gamma.cov.g <- attr(sample.cov.g, "var")
# Remove (co)variances wrt redundant elements of covs; not used by lavaan.
Gamma.cov.g <- Dplus %*% Gamma.cov.g %*% t(Dplus)

# Same for mean vector
sample.mean.g <- svymean(ov.formula, design=survey.design.g, na.rm=TRUE)
Gamma.mean.g <- attr(sample.mean.g, "var")

# Join asy. variance matrices for means and covariances
Gamma.g <- lavaan::lav_matrix_bdiag(Gamma.mean.g, Gamma.cov.g)

Gamma.g <- Gamma.g * sample.nobs.g # lavaan wants nobs * Gamma.

# Since the above nonparametric estimate of Gamma can be unstable, Yuan
# and Bentler suggested a model-smoothed estimate of it, optional here:
if(estimator.gamma == "Yuan-Bentler") {
r <- get.residuals(lavaan.fit) # Iff these asy = 0, all will be well...
Gamma.g <- Gamma.g + (sample.nobs.g/(sample.nobs.g - 1)) * (r %*% t(r))
}
# Get rid of attribute, preventing errors caused by lazy evaluation
# (This has to be at the end or lazy evaluation mayhem will ensue)
attr(sample.cov.g, "var") <- NULL
tmp  <- as.vector(sample.mean.g)
names(tmp) <- names(sample.mean.g)
sample.mean.g <- tmp

list(Gamma.g=Gamma.g, sample.cov.g=sample.cov.g, sample.mean.g=sample.mean.g)
}
# The data may be a list of multiply imputed datasets
if(!any(class(survey.design.g) == "svyimputationList")) {
# If no imputations, just use usual no. observations and asy variance
sample.nobs.g <- lavInspect(lavaan.fit, "nobs")[[g]]

stats <- get.stats.design(survey.design.g, sample.nobs.g)
}
else { # In case of multiply imputed data
# Not only can nobs differ from lavaan.fit, but also per imputation
sample.nobs.g <- get.sample.nobs(survey.design.g, lavInspect(lavaan.fit, "group"))

# Retrieve point and variance estimates per imputation,
#    [TODO: this line will not be correct when nobs differs over imputations]
stats.list <- lapply(survey.design.g[[1]], get.stats.design, sample.nobs=sample.nobs.g)
m  <- length(stats.list) # no. imputation

# Point estimates are average over imputations
sample.cov.list <- lapply(stats.list, `[[`, 'sample.cov.g')
sample.cov.g <- Reduce(`+`, sample.cov.list) / m
cov.df <- Reduce(`rbind`, lapply(sample.cov.list, lavaan::lav_matrix_vech))
sample.mean.list <- lapply(stats.list, `[[`, 'sample.mean.g')
sample.mean.g <- Reduce(`+`, sample.mean.list) / m
mean.df <- Reduce(`rbind`, sample.mean.list)

# Variance estimates depend on within- and between-imputation variance:
Gamma.within  <- Reduce(`+`, lapply(stats.list, `[[`, 'Gamma.g')) / m
Gamma.between <- cov(cbind(mean.df, cov.df))
Gamma.g <- Gamma.within + ((m + 1)/m) * Gamma.between

# set stats with multiple imputation point and variance estimates
stats <- list(Gamma.g=Gamma.g, sample.cov.g=sample.cov.g, sample.mean.g=sample.mean.g)
}

# Augment the list for this group
Gamma[[g]] <- stats\$Gamma.g
sample.cov[[g]] <- stats\$sample.cov.g
sample.mean[[g]] <- stats\$sample.mean.g
sample.nobs[[g]] <- sample.nobs.g
} # End of loop over groups

new.call <- lavInspect(lavaan.fit, "call")
new.call\$data <- NULL                # Remove any data argument
new.call\$sample.cov <- sample.cov    # Set survey covariances
new.call\$sample.mean <- sample.mean  # Set survey means
new.call\$sample.nobs <- sample.nobs
new.call\$estimator <- estimator  # Always use Satorra-Bentler or WLS estimator

if(substr(estimator, 1, 2) == "ML") { # ML, robust options
# Set asymptotic covariance matrix of sample means and covariances
new.call\$NACOV <- Gamma
}
if(estimator %in% c("WLS", "DWLS")) {
# Weighted Least Squares, adjust the weight matrix: MP inverse of Gamma
# Note that Gamma may be singular.
new.call\$WLS.V <- lapply(Gamma, ginv)
}
new.fit <- eval(as.call(new.call)) # Run lavaan with the new arguments

if(estimator %in% c("WLS", "DWLS")) return(new.fit) # We are done for WLS

# For ML with robust se's, check that a possibly singular Gamma has not
# created dependencies in the parameter estimates.
# (Code below should really be implemented in lavaan...)
evs.too.small <- sapply(Gamma, function(Gamma.g) {
any(eigen(Gamma.g, only.values=TRUE)\$values < (.Machine\$double.eps*100))
})
if(any(evs.too.small)) {
V.est <- lavaan::vcov(new.fit)
if(any(Re(eigen(V.est, only.values=TRUE)\$values) < (.Machine\$double.eps*100))) {
long.string  <- sprintf("Some of the standard errors may not be trustworthy.
Some of the observed covariances or means are
collinear, and this has generated collinearity in your
parameter estimates.  This may be a sample size issue,
missing data problem, or due to having too few
clusters relative to the number of parameters. Problem
encountered in group(s) %s",
paste(which(evs.too.small), collapse=", "))

warning(strwrap(long.string, width=9999, simplify=TRUE))#gotta love it
}
}

new.fit
}

# Obtain residuals from a lavaan fit object, concatenating means w/ covariances
# (used in Yuan-Bentler correction)
get.residuals <- function(fit) {
r  <- lavaan::residuals(fit)
c(r\$mean, lavaan::lav_matrix_vech(r\$cov))
}

# Obtain sample size from multiply imputed svydesign object.
# In case sample size differs over imputations, takes median over imputations.
# TODO: Does not work with multiple group yet.
get.sample.nobs  <- function(svy.imp.design, group=NULL) {
nobs.imp <- lapply(svy.imp.design[[1]], function(des) {nrow(des\$variables)})
return(median(unlist(nobs.imp)))
}

# Use the pFsum function from the survey package to obtain p value for the
#   overall model fit using an F reference distribution where the
#   denominator degrees of freedom is the design degrees of freedom.
# An anonymous reviewer for J Stat Software suggested that
#  "in surveys with small numbers of primary sampling units this sort of
#   correction has often improved the
#   behavior of tests in other contexts."
# The eigenvalues of the U.Gamma matrix will be the coefficients in the
#   mixture of F's distribution (Skinner, Holt & Smith, pp. 86-87).
pval.pFsum <- function(lavaan.fit, survey.design, method = "saddlepoint") {
# Check that Satorra-Bentler or Satterthwaite adjustment is present
if(!lavInspect(lavaan.fit, "options")\$test %in%