Raw File
gait.R
#  -----------------------------------------------------------------------
#                        Gait data
#  -----------------------------------------------------------------------

#  -----------------------------------------------------------------------
#
#                          Overview of the analyses
#
#  The gait data were chosen for these sample analyses because they are
#  bivariate:  consisting of both hip and knee angles observed over a
#  gait cycle for 39 children.  The bivariate nature of the data implies
#  certain displays and analyses that are not usually considered, and
#  especially the use of canonical correlation analysis.
#  As with the daily weather data, the harmonic acceleration roughness
#  penalty is used throughout since the data are periodic with a strong
#  sinusoidal component of variation.
#  After setting up the data, smoothing the data using GCV (generalized
#  cross-validation) to select a smoothing parameter, and displaying
#  various descriptive results, the data are subjected to a principal
#  components analysis, followed by a canonical correlation analysis of the
#  joint variation of hip and knee angle, and finally a registration of the
#  curves.  The registration is included here especially because the 
#  registering of periodic data requires the estimation of a phase shift
#  constant for each curve in addition to possible nonlinear 
#  transformations of time.
#
#  -----------------------------------------------------------------------

#  Last modified 5 November 2008 by Jim Ramsay
#  Previously modified 25 February 2007 by Spencer Graves

#  attach the FDA functions

#  Windows ... R
library(fda)
#attach("c:\\Program Files\\R\\R-2.2.0\\fdaR\\R\\.RData")

#  Windows ... S-PLUS

#attach("c:\\Program Files\\Insightful\\Splus70\\fdaS\\functions\\.Data")


#  -------------  input the data for the two measures  ---------------

#hip  <- matrix(scan("../data/hip.txt", 0), 20, 39)
#knee <- matrix(scan("../data/knee.txt",0), 20, 39)

#  set up the argument values

#gaittime  <- (1:20)/21
# Match Matlab 
# gaittime  <- seq(0.025, 0.975, length=20)
(gaittime <- as.numeric(dimnames(gait)[[1]]))

#  set up a three-dimensional array of function values

#gait <- array(0, c(20, 39, 2))
#dimnames(gait) <- list(NULL, paste("child", 1:39, sep=""), 
#                       c("Hip Angle", "Knee Angle") )
#gait[,,1] <- hip
#gait[,,2] <- knee
apply(gait, 3, range)

# -----------  set up the harmonic acceleration operator  ----------

harmaccelLfd <- vec2Lfd(c(0, 0, (2*pi)^2, 0))

#  Set up basis for representing gait data.  The basis is saturated since
#  there are 20 data points per curve, and this set up defines 21 basis
#  functions. Recall that a fourier basis has an odd number of basis functions.

gaitbasis3 <- create.fourier.basis(nbasis=3)
gaitfd3 <- smooth.basis(gaittime, gait, basisobj=gaitbasis3)$fd
str(gaitfd3)
gaitVar.fd3 <- var.fd(gaitfd3)
str(gaitVar.fd3)

# Check the answers with manual computations 
all.equal(var(t(gaitfd3$coefs[,,1])), gaitVar.fd3$coefs[,,,1])
# TRUE
all.equal(var(t(gaitfd3$coefs[,,2])), gaitVar.fd3$coefs[,,,3])
# TRUE
all.equal(var(t(gaitfd3$coefs[,,2]), t(gaitfd3$coefs[,,1])),
          gaitVar.fd3$coefs[,,,2])
# TRUE

# See help("var.fd") examples 











gaitVar.fd3$coefs

gaitbasis5 <- create.fourier.basis(nbasis=5)
gaitbasis21 <- create.fourier.basis(nbasis=21)

#  -----------  create the fd object   -----------

#                 Choose level of smoothing using
#          the generalized cross-validation criterion

#  set up range of smoothing parameters in log_10 units

gaitLoglam <- seq(-13,-10,0.5)
nglam   <- length(gaitLoglam)

gaitSmoothStats <- array(NA, dim=c(nglam, 3),
      dimnames=list(gaitLoglam, c("log10.lambda", "df", "gcv") ) )
gaitSmoothStats[, 1] <- gaitLoglam

#  loop through smoothing parameters

for (ilam in 1:nglam) {
#	lambda <- 10^loglam[ilam]
#   	fdParobj <- fdPar(gaitbasis, harmaccelLfd, lambda)
	gaitSmooth <- smooth.basisPar(gaittime, gait, gaitbasis21, 
                  Lfdobj=harmaccelLfd, lambda=10^gaitLoglam[ilam])
        gaitSmoothStats[ilam, "df"] <- gaitSmooth$df
        gaitSmoothStats[ilam, "gcv"] <- sum(gaitSmooth$gcv)
# note: gcv is a matrix in this case
}

#  display and plot GCV criterion and degrees of freedom

gaitSmoothStats

#  set up plotting arrangements for one and two panel displays allowing
#  for larger fonts

op <- par(mfrow=c(2,1))
plot(gaitLoglam, gaitSmoothStats[, "gcv"], type="b",
     xlab="Log_10 lambda", ylab="GCV Criterion", 
     main="Gait Smoothing", log="y")

plot(gaitLoglam, gaitSmoothStats[, "df"], type="b",
     xlab="Log_10 lambda", ylab="Degrees of freedom", 
     main="Gait Smoothing")
par(op)

# With gaittime =  seq(0.025, 0.975, length=20), 
#    GCV is minimized with lambda = 10^(-11).
# With gaittime = (1:20)/21,
#    GCV is minimized with lambda = 10^(-11.5).

#lambda    <- 10^(-11.5)
#gaitfdPar <- fdPar(gaitbasis21, harmaccelLfd, lambda)

str(gait)
gaitfd <- smooth.basisPar(gaittime, gait,
       gaitbasis21, Lfdobj=harmaccelLfd, lambda=1e-11)$fd

str(gaitfd)
names(gaitfd$fdnames) = c("Normalized time", "Child", "Angle")
gaitfd$fdnames[[3]] = c("Hip", "Knee")

#  --------  plot curves and their first derivatives  ----------------

#par(mfrow=c(1,2), mar=c(3,4,2,1), pty="s")
op <- par(mfrow=c(2,1))
plot(gaitfd, cex=1.2)
par(op)

#  plot each pair of curves interactively

#par(mfrow=c(1,2), mar=c(3,4,2,1), pty="s")
op <- par(mfrow=c(2,1))
plotfit.fd(gait, gaittime, gaitfd, cex=1.2)
# Problem:  does not work properly;
# ask=TRUE with appropriate choices for lty, etc., 
# might make it stop after each one,
# but would not fix the overplotting
# Need to modify plotfit.fd to accept ylim = array,
# not just a vector of length 2.  
par(op)

#  plot the residuals, sorting cases by residual sum of squares

#par(mfrow=c(1,2), mar=c(3,4,2,1), pty="s")
plotfit.fd(gait, gaittime, gaitfd, residual=TRUE, sort=TRUE, cex=1.2)

#  plot first derivative of all curves

#par(mfrow=c(1,2), mar=c(3,4,2,1), pty="s")
plot(gaitfd, Lfdob=1)

#  ------------  compute the mean functions  --------------------

gaitmeanfd <- mean.fd(gaitfd)

#  plot these functions and their first two derivatives

#par(mfcol=c(2,3),pty="s")
#op <- par(mfrow=c(3,2))
op <- par(mfcol=2:3)
plot(gaitmeanfd)
plot(gaitmeanfd, Lfdobj=1)
plot(gaitmeanfd, Lfdobj=2)
par(op)

#  --------------  PCA of gait data  --------------------

#  do the PCA with varimax rotation

# Smooth with lambda as determined above
gaitfdPar <- fdPar(gaitbasis, harmaccelLfd, lambda=1e-11)
gaitpca.fd <- pca.fd(gaitfd, nharm=4, gaitfdPar)

gaitpca.fd <- varmx.pca.fd(gaitpca.fd)

#  plot harmonics using cycle plots

#par(mfrow=c(1,1), mar=c(3,4,2,1), pty="s")
op <- par(mfrow=c(2,2))
plot.pca.fd(gaitpca.fd, cycle=TRUE)
par(op)

#  ------  Canonical correlation analysis of knee-hip curves  ------

hipfd  <- gaitfd[,1]
kneefd <- gaitfd[,2]

hipfdPar  <- fdPar(hipfd,  harmaccelLfd, 1e-8)
kneefdPar <- fdPar(kneefd, harmaccelLfd, 1e-8)

ccafd    <- cca.fd(hipfd, kneefd, ncan=3, hipfdPar, kneefdPar)

#  plot the canonical weight functions

#  each harmonic in turn

op <- par(mfrow=c(2,1), mar=c(3,4,2,1), pty="m")
plot.cca.fd(ccafd, cex=1.2)
par(op)

#  display the canonical correlations

round(ccafd$ccacorr[1:6],3)

%  ----------  compute the variance and covariance functions  -------

gaitvarbifd <- var.fd(gaitfd)
str(gaitvarbifd)

gaitvararray = eval.bifd(gaittime, gaittime, gaitvarbifd)

#par(mfrow=c(1,1), mar=c(3,4,2,1), pty="m")

#  plot variance and covariance functions as contours

filled.contour(gaittime, gaittime, gaitvararray[,,1,1], cex=1.2,las='none')
title("Knee - Knee")

filled.contour(gaittime, gaittime, gaitvararray[,,1,2], cex=1.2)
title("Knee - Hip")

filled.contour(gaittime, gaittime, gaitvararray[,,1,3], cex=1.2)
title("Hip - Hip")

#  plot variance and covariance functions as surfaces

persp(gaittime, gaittime, gaitvararray[,,1,1], cex=1.2)
title("Knee - Knee")

persp(gaittime, gaittime, gaitvararray[,,1,2], cex=1.2)
title("Knee - Hip")

persp(gaittime, gaittime, gaitvararray[,,1,3], cex=1.2)
title("Hip - Hip")

#par(mfrow=c(1,1), mar=c(3,4,2,1), pty="m")

#  plot correlation functions as contours

gaitCorArray <- cor.fd(gaittime, gaitfd)

quantile(gaitCorArray)

contour(gaittime, gaittime, gaitCorArray[,,1,1], cex=1.2)
title("Knee - Knee")

contour(gaittime, gaittime, gaitCorArray[,,1,2], cex=1.2)
title("Knee - Hip")

contour(gaittime, gaittime, gaitCorArray[,,1,3], cex=1.2)
title("Hip - Hip")




#  ----  Register the first derivative of the gait data  --------

#  set up basis for warping function

nwbasis <- 7
#wbasis  <- create.bspline.basis(gaitrange,nwbasis,5)
warpbasis  <- create.bspline.basis(nbasis=nwbasis,norder=5)

Dgaitfd <- deriv.fd(gaitfd)

Dgaitmeanfd  <- mean.fd(Dgaitfd)

#lambda <- 1e-2
Warpfd    <- fd(matrix(0,nwbasis,39),warpbasis)
WarpfdPar <- fdPar(Warpfd, 3, lambda=0.01)

regstr <- register.fd(Dgaitmeanfd, Dgaitfd, WarpfdPar, periodic=TRUE)
#-------  Curve  1   --------
#Error in yregmat[, 1, ] : incorrect number of dimensions
#?????

xfine       <- seq(0,1,len=101)
Dgaitregfd  <- regstr$regfd
Dgaitregmat <- eval.fd(xfine, Dgaitregfd)
warpfd      <- regstr$Wfd
shift       <- regstr$shift
warpmat     <- eval.monfd(xfine, warpfd)
warpmat     <- warpmat/outer(rep(1,101),warpmat[101,])

par(mfrow=c(1,1), mar=c(4,4,2,1), pty="m")
matplot(xfine, warpmat, type="l", xlab="t", ylab="h(t)", 
        main="Time warping functions", cex=1.2)

#  plot both the unregistered and registered versions of the curves

par(mfrow=c(2,2))
plot(Dgaitfd,    ask=FALSE)
plot(Dgaitregfd, ask=FALSE)

#  plot the deformation functions, def(t) = warp(t) - t

defmat <- warpmat
for (i in 1:39) 
	defmat[,i] <- warpmat[,i] - xfine - shift[i]

par(mfrow=c(1,1), mar=c(4,4,2,1), pty="m")
matplot(xfine, defmat, type="l", xlab="t", ylab="h(t) - t", 
        main="Deformation functions", cex=1.2)



back to top