Content - 59606f25db292d6e2846fc940f1b9b7e2abee055 - 8a390ce/testingAssociations.r

visit type:

https://github.com/markcharder/GeneralBioinformaticsScripts

30 May 2020, 14:46:28 UTC

Tip revision: 380d81358d73ed02c76afb8d23ccfb1f57b47516 authored by Mark on 07 October 2016, 07:21:45 UTC
committed reciprocal blast hits script

Tip revision: 380d813

testingAssociations.r

## First is for repeats.
readFilesRepeast <- function(){
  effectorClosest<<-list()
  secretedClosest<<-list()
  noEffClosest<<-list()
  noSecClosest<<-list()
  fileArray <<- c()
  noeffFiles <- list.files(pattern = "*noEffectors.closest")
  effectorFiles <- list.files(pattern = "*effectors.closest")
  nosecFiles <- list.files(pattern = "*noSecreted.closest")
  secretedFiles <- list.files(pattern = "*secreted.closest")
  for (i in 1:length(secretedFiles)){
    fileArray[i] <<- secretedFiles[i]
    print(paste("Processing file: ", secretedFiles[i]))
    secreted <- read.table(secretedFiles[i], header=F)
    secretedClosest[[i]] <<- secreted
    noSec <- read.table(nosecFiles[i], header=F)
    noSec <- sample(noSec$V1, length(secreted$V1))
    noSecClosest[[i]] <<- noSec
    }
  for (i in 1:length(effectorFiles)){
    print(paste("Processing file: ", effectorFiles[i]))
    effectors <- read.table(effectorFiles[i], header=F)
    effectorClosest[[i]] <<- effectors
    noEff <- read.table(noeffFiles[i], header=F)
    noEff <- sample(noEff$V1, length(effectors$V1))
    noEffClosest[[i]] <<- noEff
    }
  }

## Second is for RIPs.
readFilesRIPs <- function(){
  effectorClosest<<-list()
  secretedClosest<<-list()
  noEffClosest<<-list()
  noSecClosest<<-list()
  fileArray <<- c()
  noeffFiles <- list.files(pattern = "*noEffectors.rips.closest")
  effectorFiles <- list.files(pattern = "*effectors.rips.closest")
  nosecFiles <- list.files(pattern = "*noSecreted.rips.closest")
  secretedFiles <- list.files(pattern = "*secreted.rips.closest")
  for (i in 1:length(secretedFiles)){
    fileArray[i] <<- secretedFiles[i]
    print(paste("Processing file: ", secretedFiles[i]))
    secreted <- read.table(secretedFiles[i], header=F)
    secretedClosest[[i]] <<- secreted
    noSec <- read.table(nosecFiles[i], header=F)
    noSec <- sample(noSec$V1, length(secreted$V1))
    noSecClosest[[i]] <<- noSec
    }
  for (i in 1:length(effectorFiles)){
    print(paste("Processing file: ", effectorFiles[i]))
    effectors <- read.table(effectorFiles[i], header=F)
    effectorClosest[[i]] <<- effectors
    noEff <- read.table(noeffFiles[i], header=F)
    noEff <- sample(noEff$V1, length(effectors$V1))
    noEffClosest[[i]] <<- noEff
    }
  }
require("ggplot2")

##Plot data using ggplot2.
plotData<-function(number){
  nosec<-data.frame(unlist(noSecClosest[[number]]),
    rep("Non-secreted",length(unlist(noSecClosest[[number]]))))
  sec<-data.frame(unlist(secretedClosest[[number]]),
    rep("Secreted",length(unlist(secretedClosest[[number]]))))
  noef<-data.frame(unlist(noEffClosest[[number]]),
    rep("Non-effector",length(unlist(noEffClosest[[number]]))))
  ef<-data.frame(unlist(effectorClosest[[number]]),
    rep("Effector",length(unlist(effectorClosest[[number]]))))
  colnames(nosec)<-c("a","b")
  colnames(sec)<-c("a","b")
  colnames(noef)<-c("a","b")
  colnames(ef)<-c("a","b")
  datagg<<-rbind(nosec,sec,noef,ef)

  ggplot(datagg,aes(b,a)) + xlab("Gene type") + ylab("Distance from nearest repeat") + geom_violin()
  }