1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# Title:  Get a large count table to be used as input for DEseq2 analysis
# Author: Jingxun Chen
# Date: code compiled on 20220805
# Related publication: Andrew McKay, Emma K. Costa, and Jingxun Chen, eLife, 2022


# ------------------------------------------------------------------
# Set up 
# ------------------------------------------------------------------
# Set wd to the current directory
setwd("/Users/jingxun/Dropbox/KillifishFeederPaper_AndrewMcKay/Revision/Code/RNAseq_Code_check/GetCounts")

# Please create the following folder if not present: 'Output'

# ---------------  Input variables ---------------------------
genename = "gene_name"
homeDir = as.character("/Users/jingxun/Dropbox/KillifishFeederPaper_AndrewMcKay/Revision/Code/RNAseq_Code_check/GetCounts")
sampleTable <- read.csv(paste0(homeDir, "/Input/DRexp20211219ExperimentDesign.csv"), row.names = 1, stringsAsFactors = F)


# ------------------------------------------------------------------
# Define functions for rpkm and tpm
# ------------------------------------------------------------------
## reference: https://gist.github.com/slowkow/c6ab0348747f86e2748b#file-counts_to_tpm-r-L44
## reference: https://www.biostars.org/p/171766/

rpkm <- function(counts, lengths) {
  rate <- counts / lengths
  rate / sum(counts) * 1e9
}

tpm <- function(counts, lengths) {
  rate <- counts / lengths
  rate / sum(rate) * 1e6
}


                    
# ------------------------------------------------------------------
# Loop to get the counts
# ------------------------------------------------------------------
for (i in 1:length(sampleTable$files)){
  
  outfilename <- file.path(homeDir, "/Output/", paste0(sampleTable$lib[i], "_counts.csv")) # Change this to the path of output file
  print (sampleTable$files[i])
  outfilename
  cts <- read.table(paste0(homeDir, "/featureCount/", sampleTable$files[i], ".featureCounts"), header = TRUE)
  colnames(cts)[7] <- paste("Counts")
  colnames(cts)[1] <- paste("GeneID")
  x = cts[c("GeneID", "Length", "Counts")]
  
  # get tpm, this is not normalized across samples
  x$TPM = tpm(x$Counts, x$Length)  
  write.table(x,file=outfilename,quote=FALSE,sep=",",row.names=FALSE)

  rm(outfilename, cts, x) # remove the temp variables for the next run

}

sessionInfo()