Revision ecb4f844264b72eaa8cbd708244ecd32d414c7dd authored by Naima16 on 02 June 2020, 17:03:15 UTC, committed by GitHub on 02 June 2020, 17:03:15 UTC
1 parent a5fd677
simulatuion_errorI.R
# Simulations for type 1 error.
#We will permute the spiders data at random [I used method A, Permute data in individual rows, method B, Permute data in individual columns], producing each time a data matrix in which H0 is certainly true.
#We will then run function dbd.perm2() and obtain the associated p-value.
#We will repeat this simulation a large number of times and obtain an estimate of the proportion of the simulations where H0 was rejected at a variety of alpha rejection levels {0.01, 0.05, 0.10, 0.20}.
#“A statistical testing procedure is valid if the probability of a type I error (rejecting H0 when true) is no greater than α, the level of significance, for any α.” (Edgington 1995, p. 37).
simul.type1.dbd = function(nsim=100, n=28, p=12, g=NULL, pg = 6, gen.method=1, prob.mat1=0.8, sd1=1.5,perm.method=2)
{
# Generate matrix mat1 (site x species)
if(gen.method==1) { # Random Poisson data
mat1 <- matrix(rpois((n*p),prob.mat1),n,p)
} else { # Random lognormal data
mat1 <- matrix(round(exp(rnorm((n*p),0,sd1))),n,p)
}
out = matrix(NA, nsim, 3)
rownames(out) = paste("Simul",1:nsim,sep=".")
colnames(out) = c("cor.sg", "cor.t", "p.val")
aa <- system.time({
for(i in 1:nsim) {
if(perm.method==1) { # Permute p/a data in individual rows of mat1 (sites x species), method A
for(ii in 1:n) mat1.H0[ii,] = sample(mat1[ii,])
}
if(perm.method==2) {
# Permute data in individual columns of mat1 (sites x species) # method B
mat1.H0 = apply(mat1,2,sample)
}
tmp = dbd.perm2(mat1.H0, g, perm.method=perm.method, nperm=999, clock=FALSE)
out[i,] = c(tmp$Stat.out[1,1:2], tmp$p.val)
}
})
aa[3] <- sprintf("%2f", aa[3])
cat("Time for computation =", aa[3], " sec\n")
#
data.frame(out)
}
dbd.perm2 <- function(mat, g, perm.method=1, nperm=999, nn.print=0,clock=TRUE)
{
epsilon <- sqrt(.Machine$double.eps)
aa <- system.time({
mat = as.matrix(mat)
n <- nrow(mat)
p <- ncol(mat)
# Create matrix mat1 (sites x species), p/a data
mat1=.Call("presence_absence",mat)
rich.sp=.Call("richesse",mat1)
# Create matrix A of dummy variables (species x genera)
g = as.factor(g)
ng = length(g)
A = model.matrix(~g-1)
A = A[1:ng,]
colnames(A) = levels(g)
# Create matrix mat2 (sites x genera), p/a data
mat2=.Call("produit",mat,A)
mat2=.Call("presence_absence",mat2)
rich.gen=.Call("richesse",mat2)
Nsp.by.Ngen = rich.sp/rich.gen
Richness = as.data.frame(cbind(rich.gen,rich.sp, Nsp.by.Ngen))
Richness=Richness[Richness$rich.gen !=0,]
Richness.emp=Richness
Stat.out = matrix(NA,nperm+1,2)
colnames(Stat.out) = c("cor.sg", "cor.t")
rownames(Stat.out) = paste("perm", 0:nperm, sep=".")
# Compute statistics for unpermuted data and write them to Stat.out
Stat.out[1,1] <- cor.sg <- cor(Richness$rich.gen, Richness$Nsp.by.Ngen)
Stat.out[1,2] <- cor.t <- cor.sg * sqrt(n-2)/sqrt(1-cor.sg^2)
if(cor.sg > 1.0-sqrt(epsilon)) cat("cor = 1.0; a test is useless.\n")
#
# Permutations begin
if((perm.method<1) | (perm.method>3)) stop ("Method not implemented")
for(i in 1:(nperm)) {
mat1.perm = matrix(0,n,p)
### this permutation was dropped from the paper
# if(perm.method==1) { # Permute p/a data in in the whole matrix mat1 (sites x species)
# mat1.perm=.Call("sampleC_all",mat1)
# }
## this is model A in the paper
if(perm.method==1) { # Permute p/a data in individual rows of mat1 (sites x species)
mat1.perm=.Call("sampleC_row_real",mat1)
}
##this is model B in the paper
if(perm.method==2) { # Permute data in individual columns of mat1 (sites x species)
mat1.perm = .Call("sampleC_col_real",mat1)
}
mat1.perm=.Call("presence_absence",mat1.perm)
# Create matrix "mat2" of sites x genera
mat2=.Call("produit",mat1.perm,A) ##produit matriciel
mat2=.Call("presence_absence",mat2)
rich.gen=.Call("richesse",mat2)
length(rich.gen[rich.gen==0])
rich.sp=.Call("richesse",mat1.perm)
Nsp.by.Ngen.perm = rich.sp/rich.gen
Richness = as.data.frame(cbind(rich.gen, Nsp.by.Ngen.perm))
Richness=Richness[Richness$rich.gen !=0,]
# Compute statistics
Stat.out[i+1,1] <- cor.sg.perm <- cor(Richness$rich.gen, Richness$Nsp.by.Ngen.perm)
Stat.out[i+1,2] <- cor.t.perm <- cor.sg.perm * sqrt(n-2)/sqrt(1-cor.sg.perm^2)
}
#
# Print a few lines (nn lines) of the output matrix
if(nn.print>0) print(Stat.out[1:nn.print,])
})
aa[3] <- sprintf("%2f", aa[3])
if (clock) cat("Time for computation =", aa[3], " sec\n")
# Compute p-value
p.val = length(which(Stat.out[,2] >= Stat.out[1,2]))/(nperm+1)
list(Richness=Richness.emp, Stat.out=Stat.out, p.val=p.val,Richness.perm=Richness)
}
genera = c("Alop", "Alop", "Alop", "Arct", "Arct", "Aulo", "Pard", "Pard", "Pard", "Pard", "Troc", "Zora")
res1 = simul.type1.dbd(nsim=100, g=genera, gen.method=1,perm.method=1)
head(res1)
res1$p.val
length(which(res1$p.val <= 0.05))
length(which(res1$p.val <= 0.10))
length(which(res1$p.val <= 0.20))
length(which(res1$p.val <= 0.50))
mean(res1$p.val)
Computing file changes ...