https://github.com/cran/GenAlgo
Tip revision: 35a421aca28768b31853a0b29cdbab60e40e7569 authored by Kevin R. Coombes on 18 May 2018, 14:29:40 UTC
version 2.1.4
version 2.1.4
Tip revision: 35a421a
featureSelection.R
## ----makeHappy,echo=FALSE------------------------------------------------
if (!require(Umpire)) {
knitr::opts_chunk$set(eval = FALSE)
}
## ----lib-----------------------------------------------------------------
library(GenAlgo)
library(Umpire)
library(oompaBase)
## ----survmodel-----------------------------------------------------------
set.seed(391629)
sm <- SurvivalModel(baseHazard=1/5, accrual=5, followUp=1)
## ----cancModel-----------------------------------------------------------
nBlocks <- 20 # number of possible hits
cm <- CancerModel(name="cansim",
nPossible=nBlocks,
nPattern=6,
OUT = function(n) rnorm(n, 0, 1),
SURV= function(n) rnorm(n, 0, 1),
survivalModel=sm)
### Include 100 blocks/pathways that are not hit by cancer
nTotalBlocks <- nBlocks + 100
## ----hyper---------------------------------------------------------------
### block size
blockSize <- round(rnorm(nTotalBlocks, 100, 30))
### log normal mean hypers
mu0 <- 6
sigma0 <- 1.5
### log normal sigma hypers
rate <- 28.11
shape <- 44.25
### block corr
p <- 0.6
w <- 5
## ----engine--------------------------------------------------------------
rho <- rbeta(nTotalBlocks, p*w, (1-p)*w)
base <- lapply(1:nTotalBlocks,
function(i) {
bs <- blockSize[i]
co <- matrix(rho[i], nrow=bs, ncol=bs)
diag(co) <- 1
mu <- rnorm(bs, mu0, sigma0)
sigma <- matrix(1/rgamma(bs, rate=rate, shape=shape), nrow=1)
covo <- co *(t(sigma) %*% sigma)
MVN(mu, covo)
})
eng <- Engine(base)
## ----alter---------------------------------------------------------------
altered <- alterMean(eng, normalOffset, delta=0, sigma=1)
object <- CancerEngine(cm, eng, altered)
summary(object)
## ----clean1--------------------------------------------------------------
rm(altered, base, blockSize, cm, eng, mu0, nBlocks, nTotalBlocks,
p, rate, rho, shape, sigma0, sm, w)
## ----traind--------------------------------------------------------------
train <- rand(object, 198)
tdata <- train$data
pid <- paste("PID", sample(1001:9999, 198+93), sep='')
rownames(train$clinical) <- colnames(tdata) <- pid[1:198]
## ----noise---------------------------------------------------------------
noise <- NoiseModel(3, 1, 1e-16)
train$data <- log2(blur(noise, 2^(tdata)))
sum(is.na(train$data))
rm(tdata)
summary(train$clinical)
summary(train$data[, 1:3])
## ----validd--------------------------------------------------------------
valid <- rand(object, 93)
vdata <- valid$data
vdata <- log2(blur(noise, 2^(vdata))) # add noise
sum(is.na(vdata))
vdata[is.na(vdata)] <- 0.26347
valid$data <- vdata
colnames(valid$data) <- rownames(valid$clinical) <- pid[199:291]
rm(vdata, noise, object, pid)
summary(valid$clinical)
summary(valid$data[, 1:3])
## ----measureFitness------------------------------------------------------
measureFitness <- function(arow, context) {
predictors <- t(context$data[arow, ]) # space defined by features
groups <- context$clinical$Outcome # good or bad outcome
maha(predictors, groups, method='var')
}
## ----mutator-------------------------------------------------------------
mutator <- function(allele, context) {
sample(1:nrow(context$data),1)
}
## ----initialize----------------------------------------------------------
set.seed(821831)
n.individuals <- 200
n.features <- 10
y <- matrix(0, n.individuals, n.features)
for (i in 1:n.individuals) {
y[i,] <- sample(1:nrow(train$data), n.features)
}
## ----round1--------------------------------------------------------------
my.ga <- GenAlg(y, measureFitness, mutator, context=train) # initialize
summary(my.ga)
## ----save0---------------------------------------------------------------
recurse <- my.ga
pop0 <- sort(table(as.vector(my.ga@data)))
## ----recurse-------------------------------------------------------------
NGEN <- 20
diversity <- meanfit <- fitter <- rep(NA, NGEN)
for (i in 1:NGEN) {
recurse <- newGeneration(recurse)
fitter[i] <- recurse@best.fit
meanfit[i] <- mean(recurse@fitness)
diversity[i] <- popDiversity(recurse)
}
## ----fig.cap="Fitness by generation."------------------------------------
plot(fitter, type='l', ylim=c(0, 1.5), xlab="Generation", ylab="Fitness")
abline(h=max(fitter), col='gray', lty=2)
lines(fitter)
lines(meanfit, col='gray')
points(meanfit, pch=16, col=jetColors(NGEN))
legend("bottomleft", c("Maximum", "Mean"), col=c("black", "blue"), lwd=2)
## ----fig.cap="Diversity."------------------------------------------------
plot(diversity, col='gray', type='l', ylim=c(0,10), xlab='', ylab='', yaxt='n')
points(diversity, pch=16, col=jetColors(NGEN))
## ----div-----------------------------------------------------------------
sort(table(as.vector(recurse@data)))
