Content - a38ccc2a597aeab3a8d7c869951bc789253f6957 - 540075b/report.Rmd

visit type:
Tip revision: 61fb597c48ced306dd588e289e69c3e3d8f9ce15 authored by J. David Aponte on 02 November 2021, 01:05:47 UTC
trying to sort authorship
Tip revision: 61fb597
report.Rmd
---
title: "Process MGP report"
output: html_document
params:
  variables2: NA
  lambda: NA
  mutant: NA
  mag: NA
  pls_axis: NA
---

```{r sparse process MGP}



selection.vector <- process.list()[[1]][process.list()[[2]] %in% params$variables2]
    
    process.ano <- NULL
    for(i in 1: length(selection.vector)) process.ano <- c(process.ano, as.character(DO.go[DO.go[,3] == selection.vector[i], 2]))
    
      
      #offline method for getting gene metadata
      #pull gene names from process.ano (go terms)
      coi <- c("ENSEMBL", "SYMBOL")
      go2symbol <- unique(na.omit(AnnotationDbi::select(org.Mm.eg.db, keys = process.ano, columns = coi, keytype = "GO")[,-2:-3]))
      
      coi2 <- c("TXCHROM", "TXSTART", "TXEND")
      symbol2info <- AnnotationDbi::select(mmusculusEnsembl, keys = go2symbol[,2], columns = coi2, keytype="GENEID")
      transcipt.size <- abs(symbol2info[,3] - symbol2info[,4])
      
      #symbol, chr, start, end
      chr_name <- rep(NA,  nrow(go2symbol))
      gene.start <- rep(NA,  nrow(go2symbol))
      gene.end <- rep(NA,  nrow(go2symbol))
      
      for(i in 1:length(unique(symbol2info$GENEID))){
        
          tmp.transcript <- symbol2info[symbol2info[,1] == unique(symbol2info$GENEID)[i],][which.max(transcipt.size[symbol2info[,1] == unique(symbol2info$GENEID)[i]]),]
        
          chr_name[i] <- tmp.transcript$TXCHROM
          gene.start[i] <- tmp.transcript$TXSTART
          gene.end[i] <- tmp.transcript$TXEND
        
      }
      
      seq.info <- data.frame(mgi_symbol = go2symbol$SYMBOL, chromosome_name = chr_name, start_position = gene.start, end_position = gene.end)
      seq.info[,2] <- as.character(seq.info[,2])
      seq.info[,3:4] <- as.matrix(seq.info[,3:4])/1e6  
      
      #get rid of weird chromosome names
      if(length(grep(seq.info$chromosome_name, pattern = "CHR")) > 0) seq.info <- seq.info[-grep(seq.info$chromosome_name, pattern = "CHR"),]
      
      gene.names <- seq.info[,1]
      seq.indexes <- matrix(NA, ncol = 3, nrow = dim(seq.info)[1])
      
      for(j in 1 : dim(seq.info)[1]){
          tmp.indexes <-  combined.markers[which(combined.markers$chr == seq.info[j,2] & combined.markers$Mbp_mm10 > mean(as.numeric(seq.info[j,3:4])) - 2 & combined.markers$Mbp_mm10 < mean(as.numeric(seq.info[j,3:4])) + 2), c(1,3)]
          #for each gene, select the marker closest to the middle of the gene
          seq.indexes[j,] <- as.matrix(cbind(seq.info[j,1],tmp.indexes[which.min(abs(tmp.indexes[,2] - mean(as.numeric(seq.info[j,3:4])))),]))
      }
      
      #put together selected genotype data
      probs.rows <- matrix(NA, nrow = nrow(Y), ncol = nrow(seq.indexes) * 8)
      probrowseq <- seq(1, ncol(probs.rows) + 8, by = 8)
      for(i in 1:nrow(seq.indexes)) probs.rows[, probrowseq[i]:(probrowseq[i+1] - 1) ] <- as.matrix(collect(tbl(DO_probs_DB, seq.indexes[i,2])))
      
      #fit lasso pls2B
      # pls.svd <- mddsPLS(Xs = probs.rows, Y = Y, R = input$pls_axis, lambda = input$lambda)

process.svd <- mddsPLS(Xs = probs.rows, Y = Y, R = params$pls_axis, lambda = params$lambda)

# full.pred <- predict(process.svd, probs.rows)
# 
# ess <- sum(apply(full.pred, 1, function(x) (x - colMeans(Y))^2))
# rss <- sum(apply(Y, 1, function(x) (x - colMeans(full.pred))^2))
# 
# #variance explained
# ess/(rss + ess)

```


```{r plot marker/gene loadings}
#now we should be able to take pls.svd directly and maybe label them by founder in a new column, then barplot by family, by gene
do.names <- c("A/J", "C57BL/6J", "129S1/SvImJ", "NOD/ShiLtJ", "NZO/HlLtJ", "CAST/EiJ", "PWK/PhJ", "WSB/EiJ")
do.colors <- c("A/J" = "#F0F000","C57BL/6J" = "#808080", "129S1/SvImJ"= "#F08080", "NOD/ShiLtJ" = "#1010F0","NZO/HlLtJ" = "#00A0F0","CAST/EiJ" = "#00A000", "PWK/PhJ" = "#F00000", "WSB/EiJ" = "#9000E0")

pathway.loadings <- data.frame(gloadings = process.svd$mod$u[[1]][, params$pls_axis], gnames = as.character(rep(seq.info[,1], each = 8)), founders = rep(do.names, nrow(seq.info)))

p <- ggplot(data = pathway.loadings, aes(x = gnames, y = gloadings)) +
  geom_bar(stat = "identity", width = .75, position=position_dodge()) +
  theme(text = element_text(size=6), 
        axis.text.x = element_text(angle = 75, hjust = 1),
        axis.title.x = element_text(margin = margin(t = 20))) +
  scale_fill_manual(values=do.colors) +
  xlab("Gene") +
  ylab("Genetic coefficient") +
  theme(axis.text = element_text(angle = 55, hjust = 1, size = 15), 
        axis.title = element_text(size = 15, face = "bold"))

p2 <- ggplot(data = pathway.loadings, aes(x = gnames, y = gloadings, fill = founders)) +
  geom_bar(stat = "identity", width = .75, position=position_dodge()) +
  theme(text = element_text(size=6),
        axis.text.x = element_text(angle = 70, hjust = 1),
        axis.title.x = element_text(margin = margin(t = 20))) +
  scale_fill_manual(values=do.colors) +
  xlab("") +
  ylab("MGP loading") +
  theme(axis.text = element_text(angle = 55, hjust = 1, size = 12), 
        axis.title = element_text(size = 12, face = "bold"))

p
p2

```


```{r 3d pheno}
#rotmesh into new aligned space DO mean
fixed.do.mean <- matrix(colMeans(Y), ncol = 3, byrow = T)

par3d(zoom = .75)

shape.warp <-  plot3d(shape.mean$mesh, col = adjustcolor("lightgrey", .3), aspect = "iso", alpha = .2, specular = 1, axes = F, box = F, xlab = "", ylab = "", zlab = "", main = "")
spheres3d(fixed.do.mean, radius = .002, color = adjustcolor("red", .3))

print(dim(process.svd$mod$ts[[params$pls_axis]]))
proj.coords.a1 <- row2array3d(predict(process.svd, probs.rows[c(which.min(process.svd$mod$ts[[params$pls_axis]][, 1]), which.max(process.svd$mod$ts[[params$pls_axis]][,1])),])$y, Nlandmarks = 54)
    proj.coords.a2 <- proj.coords.a1[,,2]
    proj.coords.a1 <- proj.coords.a1[,,1]

spheres3d(proj.coords.a1, radius = .003, color = 1)

# for(i in 1:54) arrow3d(proj.coords.a2[i,], proj.coords.a1[i,] + (proj.coords.a1[i,] - proj.coords.a2[i,]) * (params$mag - 1), type = "lines", col = "black", barblen = 0.005, lwd = 6)

segments3d(x = (rbind(as.vector(proj.coords.a1[,1]), as.vector((proj.coords.a2 + (proj.coords.a2 - proj.coords.a1) * (4 - 1))[,1]))),
           y = (rbind(as.vector(proj.coords.a1[,2]), as.vector((proj.coords.a2 + (proj.coords.a2 - proj.coords.a1) * (4 - 1))[,2]))),
           z = (rbind(as.vector(proj.coords.a1[,3]), as.vector((proj.coords.a2 + (proj.coords.a2 - proj.coords.a1) * (4 - 1))[,3]))),
           lwd = 3)

# #mutant comparison
   mutant <- params$mutant
   
   tmp.mutant.registration <- gpagen(arrayspecs(rbind(Y, as.matrix(mutant.lms[mutant.db$Genotype == mutant,])), 54, 3))$coords
#     #debug: tmp.mutant.registration <- gpagen(arrayspecs(rbind(Y, as.matrix(mutant.lms[mutant.db$Genotype == "Alk2",])), 54, 3))$coords
    do.mean <- array.mean(tmp.mutant.registration[,,1:nrow(Y)])
    mutant.mean <- array.mean(tmp.mutant.registration[,,-(1:nrow(Y))])

    segments3d(x = (rbind(as.vector(do.mean[,1]), as.vector((mutant.mean + (mutant.mean - do.mean) * (input$mag - 1))[,1]))),
               y = (rbind(as.vector(do.mean[,2]), as.vector((mutant.mean + (mutant.mean - do.mean) * (input$mag - 1))[,2]))),
              z = (rbind(as.vector(do.mean[,3]), as.vector((mutant.mean + (mutant.mean - do.mean) * (input$mag - 1))[,3]))),
                   lwd = 3,
                   col = 2)

         # for(i in 1:54) arrow3d(do.mean[i,] - (mutant.mean[i,] - do.mean[i,]), mutant.mean[i,], type = "lines", col = "red", barblen = 0.005, lwd = 4)

rglwidget()

```
Browse the archive

https://github.com/J0vid/MGP_shiny