library(tximport)
library(DESeq2)
library(tidyverse)
library(dplyr)
library(ggplot2)
gene.names<-read.table("/groups/nodine/lab/members/Ranjith/mCHG/figure5_rna/AGI_to_name.txt",header = T)
names(gene.names)[1]<-"GeneId"
--- Gene Quantification and DeSeq -----
files <- list.files("/scratch-cbe/users/ranjith.papareddy/rCMT3_transcriptome_reanal/rSEM_V44/star_rsem", full.names = T, recursive = T, pattern = ".genes.results")
naming<-c(files)
names(files)<-naming
names(files)<- gsub(names(files),pattern = "/scratch-cbe/users/ranjith.papareddy/rCMT3_transcriptome_reanal/rSEM_V44/star_rsem/|.genes.results",replacement = "")
names(files)<- gsub(names(files),pattern = "_trimmed/abundance.tsv",replacement = "")
names(files)
files<-files[c(10:15,1:3)]
txi.rsem <- tximport(files, type = "rsem", txIn = FALSE, txOut = FALSE)
rsem.WT_rCMT3<-txi.rsem$abundance
sampleNames <- colnames(txi.rsem$counts)
sampleGroup <- factor(c(rep("TGroup",6),rep("CGroup",3)))
sampleTable <- data.frame(sampleName = sampleNames, type = sampleGroup)
rownames(sampleTable) <- colnames(txi.rsem$counts)
txi.rsem$length[txi.rsem$length == 0] <- 1
ddsTxi <- DESeqDataSetFromTximport(txi.rsem, sampleTable, design = ~ type)
dds<-ddsTxi
ddsTxi <- DESeq(ddsTxi)
res <- results(ddsTxi,independentFiltering=F)
res$GeneId<-rownames(res)
res.tpm<-as.data.frame(rsem.WT_rCMT3)%>%rownames_to_column(var="GeneId")%>%inner_join(as.data.frame(res))%>%
full_join(gene.names)
DMRsdistnacetogenes<-read_tsv("/groups/nodine/lab/members/Ranjith/mCHG/figure5_rna/rCMT3_WT_BC_DMR.distance2Genes.tsv")
Deseq_DMR_table<- DMRsdistnacetogenes%>%full_join(res.tpm)
write_tsv(Deseq_DMR_table[,-c(9,10)],"/groups/nodine/lab/members/Ranjith/mCHG/figure5_rna/table2-Deseq_DMR_rCMT3vsWT.tsv" )
boxplot(DMRsdistnacetogenes[,c(11:13)])
clusters<-read.table("/groups/nodine/lab/members/Ranjith/mCHG/Gene_Clustering/clustering_results_pcg_1tpm_5Cs.txt")
clusters<- as.data.frame(clusters[,c(4,13)])
res.tpm.clust<-clusters%>%inner_join(as.data.frame(res))
write.table(res.tpm.clust, "/groups/nodine/lab/members/Ranjith/mCHG/figure5_rna/deseq_table.tsv",quote = F,
sep ="\t")
resSig <- res.tpm[(!is.na(res.tpm$padj) & (res.tpm$padj < 0.010000) & (abs(res.tpm$log2FoldChange)> 1)), ]
dim(resSig)
write.table(res.tpm.clust, "/groups/nodine/lab/members/Ranjith/mCHG/figure5_rna/deg_0.01_log1.tsv",quote = F,
sep ="\t")
with()
resSig.down<-resSig%>%filter(log2FoldChange < -1)
head(resSig.down)
dim(resSig.down)
res.down.clust<-clusters%>%inner_join(as.data.frame(resSig.down))
head(res.down.clust)
view(resSig.down)
res.down.clust.1<-res.down.clust%>%filter(mClust %in% c(1,2))
resSig.up<-resSig%>%filter(log2FoldChange > 1)
resSig.up.clust<-clusters%>%inner_join(as.data.frame(resSig.up))
table(resSig.up.clust$mClust)
dim(resSig.up)
par(mfrow=c(4,4),las=2)
boxplot(log2(resSig.down[2:10]))
boxplot(log2(resSig.up[2:10]))
pheatmap::pheatmap(log2(resSig.up[2:10]+1),scale = "row",cluster_cols = F)
##### deg features
genes.dist.dmr<- read.table("/groups/nodine/lab/members/Ranjith/mCHG/figure5_rna/genes_dist_todmrs.tsv")
colnames(genes.dist.dmr)[4]<-"GeneId"
down.genes.dist.dmr<-genes.dist.dmr%>%inner_join(as.data.frame(resSig.down), by="GeneId")%>%filter(abs(V14)<1500)
dim(down.genes.dist.dmr)
rownames(down.genes.dist.dmr)<-down.genes.dist.dmr$gene_name
vioplot(down.genes.dist.dmr$V11, down.genes.dist.dmr$V12, down.genes.dist.dmr$V13)
write.table(down.genes.dist.dmr[,c(-c(1:3,8:10,26:28,24))],"/groups/nodine/lab/members/Ranjith/mCHG/figure5_rna/down.genes.dist.dmr.tsv", quote = F, sep="\t" )
library(venn)
grid.newpage()
draw.pairwise.venn(area1 = 4733, area2 = 563, cross.area = 24, category = c("Dog People",
"Cat People"))
meantpm<- read.table("/groups/nodine/lab/members/Ranjith/mCHG/figure5_rna/mean_tpm_table.tsv")%>%rownames_to_column(var="GeneId")
meantpm.drg<-meantpm%>%inner_join(down.genes.dist.dmr)
rownames(meantpm.drg)<-meantpm.drg$gene_name
pheatmap::pheatmap(log2(meantpm.drg[,c(2:12)]+1), scale = "row", cluster_cols = F, cellwidth = 20, cellheight = 15)
freq.up<-c(62, 39, 17, 11 )
freq.down<-c(171,129,58,16)
freq.all<-c(7882,7847,5469,14439)
frq.cbind<-cbind(freq.up,freq.down,freq.all)
rownames(frq.cbind)<-c("clust1","clust2","clust3","clust4")
scale.frq <- as.data.frame(scale(frq.cbind , center = FALSE, scale = colSums(frq.cbind)))
par(mfrow=c(3,3),las=2)
scale.frq$log2.down<- log2(scale.frq$freq.down/scale.frq$freq.all)
barplot(as.matrix(scale.frq$log2),beside=T,ylim=c(-2,2),space = 0.15)
####
with(res.tpm, plot(log2FoldChange,-log10(padj), xlim=c(-11,11), pch=20, col="gainsboro"))
title( main="multi",xlab = "-log10(padj)" , ylab="log2 FC")
# Add colored points: red if padj<0.05, orange of log2FC>1, green if both)
with(subset(as.data.frame(res.tpm), padj<.01 & log2FoldChange>=1 ), points(-log10(padj), log2FoldChange, pch=20, col="slateblue", bg= '#1c75bc',cex=1))
with(subset(as.data.frame(res.tpm), padj<.01 & log2FoldChange <=-1 ), points(-log10(padj), log2FoldChange, pch=20, col="lightcoral", bg= 'black',cex=1))
box(bty="o")
abline(h=0,lty=3)
pie(freq.down)
Volcano <-res.tpm %>%
mutate(padj = replace(padj, padj <= 0.00000001, 0.00000001)) %>%
mutate(log2FoldChange = replace(log2FoldChange, log2FoldChange <= -4.5, -4.5))%>%
mutate(log2FoldChange = replace(log2FoldChange, log2FoldChange >= 4.5, 4.5))%>%
mutate(threshold = ifelse(log2FoldChange >= 1,"A", ifelse(log2FoldChange<=-1 , "B", "C")))
V.plot <- ggplot(Volcano, aes(x=log2FoldChange, y=-log10(padj))) +
geom_point(aes(colour = threshold), alpha=0.6, size=1.5) +
scale_colour_manual(values = c("A"= "#113559", "C"="#e4e3e2", "B"= "#591111"))+
xlim(c(-5, 5)) + ylim(c(0, 8))+theme_classic()+theme(panel.border = element_rect(colour = "black", fill=NA, size=0.5))+
geom_vline(xintercept=c(-1.5,1.5),linetype="dashed", color = "black",size=0.5)
library(ggplot2)
Deseq_DMR_table.test<-as.data.frame(Deseq_DMR_table)%>%filter(padj<.01 & log2FoldChange< -1)%>%filter(abs(Distance2gene)<1500)%>%
filter(Hypo %in% c("WT"))
names(Deseq_DMR_table.test)[c(15:17)]<-c("rCMT3_L1_R1","rCMT3_L1_R2","rCMT3_L1_R3")
names(Deseq_DMR_table.test)[c(18:20)]<-c("rCMT3_L3_R1","rCMT3_L3_R2","rCMT3_L3_R3")
write_tsv(Deseq_DMR_table.test[-c(9,10)],"/groups/nodine/lab/members/Ranjith/mCHG/figure5_rna/downregulated.hypomethylated.tsv")
meantpm.drg<-meantpm%>%inner_join(as.data.frame(Deseq_DMR_table.test[c(4,30)]))
rownames(meantpm.drg)<-meantpm.drg$gene_name
breaksList = c(-2,seq(-1.5,1,0.075),seq(1.5,2,0.05), 2.5)
library(RColorBrewer)
pheatmap::pheatmap(log2(meantpm.drg[,c(5:12)]+1), scale = "row", cluster_cols = F, cellwidth = 20, cellheight = 15,border_color = "white",
color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(length(breaksList)),
breaks = breaksList)
boxplot(Deseq_DMR_table.test[,c(11:13)])