https://github.com/J0vid/MGP_shiny
Tip revision: 61fb597c48ced306dd588e289e69c3e3d8f9ce15 authored by J. David Aponte on 02 November 2021, 01:05:47 UTC
trying to sort authorship
trying to sort authorship
Tip revision: 61fb597
report.Rmd
---
title: "Process MGP report"
output: html_document
params:
variables2: NA
lambda: NA
mutant: NA
mag: NA
pls_axis: NA
---
```{r sparse process MGP}
selection.vector <- process.list()[[1]][process.list()[[2]] %in% params$variables2]
process.ano <- NULL
for(i in 1: length(selection.vector)) process.ano <- c(process.ano, as.character(DO.go[DO.go[,3] == selection.vector[i], 2]))
#offline method for getting gene metadata
#pull gene names from process.ano (go terms)
coi <- c("ENSEMBL", "SYMBOL")
go2symbol <- unique(na.omit(AnnotationDbi::select(org.Mm.eg.db, keys = process.ano, columns = coi, keytype = "GO")[,-2:-3]))
coi2 <- c("TXCHROM", "TXSTART", "TXEND")
symbol2info <- AnnotationDbi::select(mmusculusEnsembl, keys = go2symbol[,2], columns = coi2, keytype="GENEID")
transcipt.size <- abs(symbol2info[,3] - symbol2info[,4])
#symbol, chr, start, end
chr_name <- rep(NA, nrow(go2symbol))
gene.start <- rep(NA, nrow(go2symbol))
gene.end <- rep(NA, nrow(go2symbol))
for(i in 1:length(unique(symbol2info$GENEID))){
tmp.transcript <- symbol2info[symbol2info[,1] == unique(symbol2info$GENEID)[i],][which.max(transcipt.size[symbol2info[,1] == unique(symbol2info$GENEID)[i]]),]
chr_name[i] <- tmp.transcript$TXCHROM
gene.start[i] <- tmp.transcript$TXSTART
gene.end[i] <- tmp.transcript$TXEND
}
seq.info <- data.frame(mgi_symbol = go2symbol$SYMBOL, chromosome_name = chr_name, start_position = gene.start, end_position = gene.end)
seq.info[,2] <- as.character(seq.info[,2])
seq.info[,3:4] <- as.matrix(seq.info[,3:4])/1e6
#get rid of weird chromosome names
if(length(grep(seq.info$chromosome_name, pattern = "CHR")) > 0) seq.info <- seq.info[-grep(seq.info$chromosome_name, pattern = "CHR"),]
gene.names <- seq.info[,1]
seq.indexes <- matrix(NA, ncol = 3, nrow = dim(seq.info)[1])
for(j in 1 : dim(seq.info)[1]){
tmp.indexes <- combined.markers[which(combined.markers$chr == seq.info[j,2] & combined.markers$Mbp_mm10 > mean(as.numeric(seq.info[j,3:4])) - 2 & combined.markers$Mbp_mm10 < mean(as.numeric(seq.info[j,3:4])) + 2), c(1,3)]
#for each gene, select the marker closest to the middle of the gene
seq.indexes[j,] <- as.matrix(cbind(seq.info[j,1],tmp.indexes[which.min(abs(tmp.indexes[,2] - mean(as.numeric(seq.info[j,3:4])))),]))
}
#put together selected genotype data
probs.rows <- matrix(NA, nrow = nrow(Y), ncol = nrow(seq.indexes) * 8)
probrowseq <- seq(1, ncol(probs.rows) + 8, by = 8)
for(i in 1:nrow(seq.indexes)) probs.rows[, probrowseq[i]:(probrowseq[i+1] - 1) ] <- as.matrix(collect(tbl(DO_probs_DB, seq.indexes[i,2])))
#fit lasso pls2B
# pls.svd <- mddsPLS(Xs = probs.rows, Y = Y, R = input$pls_axis, lambda = input$lambda)
process.svd <- mddsPLS(Xs = probs.rows, Y = Y, R = params$pls_axis, lambda = params$lambda)
# full.pred <- predict(process.svd, probs.rows)
#
# ess <- sum(apply(full.pred, 1, function(x) (x - colMeans(Y))^2))
# rss <- sum(apply(Y, 1, function(x) (x - colMeans(full.pred))^2))
#
# #variance explained
# ess/(rss + ess)
```
```{r plot marker/gene loadings}
#now we should be able to take pls.svd directly and maybe label them by founder in a new column, then barplot by family, by gene
do.names <- c("A/J", "C57BL/6J", "129S1/SvImJ", "NOD/ShiLtJ", "NZO/HlLtJ", "CAST/EiJ", "PWK/PhJ", "WSB/EiJ")
do.colors <- c("A/J" = "#F0F000","C57BL/6J" = "#808080", "129S1/SvImJ"= "#F08080", "NOD/ShiLtJ" = "#1010F0","NZO/HlLtJ" = "#00A0F0","CAST/EiJ" = "#00A000", "PWK/PhJ" = "#F00000", "WSB/EiJ" = "#9000E0")
pathway.loadings <- data.frame(gloadings = process.svd$mod$u[[1]][, params$pls_axis], gnames = as.character(rep(seq.info[,1], each = 8)), founders = rep(do.names, nrow(seq.info)))
p <- ggplot(data = pathway.loadings, aes(x = gnames, y = gloadings)) +
geom_bar(stat = "identity", width = .75, position=position_dodge()) +
theme(text = element_text(size=6),
axis.text.x = element_text(angle = 75, hjust = 1),
axis.title.x = element_text(margin = margin(t = 20))) +
scale_fill_manual(values=do.colors) +
xlab("Gene") +
ylab("Genetic coefficient") +
theme(axis.text = element_text(angle = 55, hjust = 1, size = 15),
axis.title = element_text(size = 15, face = "bold"))
p2 <- ggplot(data = pathway.loadings, aes(x = gnames, y = gloadings, fill = founders)) +
geom_bar(stat = "identity", width = .75, position=position_dodge()) +
theme(text = element_text(size=6),
axis.text.x = element_text(angle = 70, hjust = 1),
axis.title.x = element_text(margin = margin(t = 20))) +
scale_fill_manual(values=do.colors) +
xlab("") +
ylab("MGP loading") +
theme(axis.text = element_text(angle = 55, hjust = 1, size = 12),
axis.title = element_text(size = 12, face = "bold"))
p
p2
```
```{r 3d pheno}
#rotmesh into new aligned space DO mean
fixed.do.mean <- matrix(colMeans(Y), ncol = 3, byrow = T)
par3d(zoom = .75)
shape.warp <- plot3d(shape.mean$mesh, col = adjustcolor("lightgrey", .3), aspect = "iso", alpha = .2, specular = 1, axes = F, box = F, xlab = "", ylab = "", zlab = "", main = "")
spheres3d(fixed.do.mean, radius = .002, color = adjustcolor("red", .3))
print(dim(process.svd$mod$ts[[params$pls_axis]]))
proj.coords.a1 <- row2array3d(predict(process.svd, probs.rows[c(which.min(process.svd$mod$ts[[params$pls_axis]][, 1]), which.max(process.svd$mod$ts[[params$pls_axis]][,1])),])$y, Nlandmarks = 54)
proj.coords.a2 <- proj.coords.a1[,,2]
proj.coords.a1 <- proj.coords.a1[,,1]
spheres3d(proj.coords.a1, radius = .003, color = 1)
# for(i in 1:54) arrow3d(proj.coords.a2[i,], proj.coords.a1[i,] + (proj.coords.a1[i,] - proj.coords.a2[i,]) * (params$mag - 1), type = "lines", col = "black", barblen = 0.005, lwd = 6)
segments3d(x = (rbind(as.vector(proj.coords.a1[,1]), as.vector((proj.coords.a2 + (proj.coords.a2 - proj.coords.a1) * (4 - 1))[,1]))),
y = (rbind(as.vector(proj.coords.a1[,2]), as.vector((proj.coords.a2 + (proj.coords.a2 - proj.coords.a1) * (4 - 1))[,2]))),
z = (rbind(as.vector(proj.coords.a1[,3]), as.vector((proj.coords.a2 + (proj.coords.a2 - proj.coords.a1) * (4 - 1))[,3]))),
lwd = 3)
# #mutant comparison
mutant <- params$mutant
tmp.mutant.registration <- gpagen(arrayspecs(rbind(Y, as.matrix(mutant.lms[mutant.db$Genotype == mutant,])), 54, 3))$coords
# #debug: tmp.mutant.registration <- gpagen(arrayspecs(rbind(Y, as.matrix(mutant.lms[mutant.db$Genotype == "Alk2",])), 54, 3))$coords
do.mean <- array.mean(tmp.mutant.registration[,,1:nrow(Y)])
mutant.mean <- array.mean(tmp.mutant.registration[,,-(1:nrow(Y))])
segments3d(x = (rbind(as.vector(do.mean[,1]), as.vector((mutant.mean + (mutant.mean - do.mean) * (input$mag - 1))[,1]))),
y = (rbind(as.vector(do.mean[,2]), as.vector((mutant.mean + (mutant.mean - do.mean) * (input$mag - 1))[,2]))),
z = (rbind(as.vector(do.mean[,3]), as.vector((mutant.mean + (mutant.mean - do.mean) * (input$mag - 1))[,3]))),
lwd = 3,
col = 2)
# for(i in 1:54) arrow3d(do.mean[i,] - (mutant.mean[i,] - do.mean[i,]), mutant.mean[i,], type = "lines", col = "red", barblen = 0.005, lwd = 4)
rglwidget()
```