1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
### Author: Franck Soubès
### Bioinformatics Master Degree - University of Bordeaux, France
### Link: https://github.com/GeT-TRiX/MA_Trix_App/
### Where: GET-TRiX's facility
### Application: MATRiX is a shiny application for Mining and functional Analysis of TRanscriptomics data
### Licence: GPL-3.0


#' formating is a alpha function of the higher elaborate decTestTRiX function
#'
#' @param adj A subset dataframe
#' @param pval Cutoff pvalue
#'
#' @return A numeric value of the number of significant genes


formating = function( adj, pval){


  passingval = adj %>%
    apply(2,FUN = function(x){return(x < pval)}) %>%
    apply(1,sum)

  passingval = which( passingval > 0)

  cat("Il y a",length(passingval),"gène significatifs")

  return(passingval)

}



#' Create a data frame containing the number of signficant genes for different conditions pval and log fc
#'
#' @param adj A subset dataframe
#'
#' @return A data frame
#'
#' @export

createdfsign = function(adj) {


  dtsign = data.frame(matrix(ncol = 2, nrow = length(adj)))
  y <- c("FDR < 0.01", "FDR < 0.05")

  dtsign = data.frame(matrix(ncol <- 2, nrow <- length(adj)))
  y <- c("pvalue(0.01)", "pvalue(0.05)")

  colnames(dtsign) <- y
  rownames(dtsign) <- colnames(adj)
  pvalue = c(0.01, 0.05)

  i <- 1
  for (pv in pvalue) {
    for (elem in colnames(adj)) {

      if (i %% constmod == 0) {
        i <- 1
      }
      if (pv == 0.05)
      {

        dtsign$`FDR < 0.05`[i] = evaluatesignpar(adj, elem, pv)
        i = i + 1
      }
      else{

        dtsign$`pvalue(0.01)`[i] = evaluatesignpar(adj, elem, pv)
        i <- i + 1

      }
    }
  }
  return(dtsign)
}


#' isimilar is a function which aims is to ensure that the unique ids are equal between the workingset and the restable as for the samples.
#'
#' @param restab A statistical dataframe
#' @param pdata A dataframe that associates samples to their respective biological conditions
#' @param workingset A normalized expression dataframe
#'
#' @return A boolean list
#'
#' @export

isimilar <- function(restab, pdata, workingset){

  sameids <- all(restab[[1]] == workingset [[1]])
  samedesign <- all(colnames(workingset[-1])== pdata[[1]])
  return(list(sameids,samedesign))

}

#' This function returns a data frame of the element which are superior to a logFC cutoff (1.2,2,4,6 and 10) and for a defined pvalue
#'
#' @param alltop A statistical dataframe
#' @param pval Cutoff pvalue
#' @param testrix A character value of the statistical criterions (pvalue, FDR)
#' @param grepre A list of subset dataframes
#'
#' @return A dataframe of the number of significant genes depending of both cutoff (logFC and pvalue)
#'
#' @export

restabfc <- function(alltop, pval, testrix, grepre) {

  j = 1
  whatest  = ifelse(testrix == "FDR", T, F)
  if (whatest)
    adj = alltop[, grep( grepre[[1]], names(alltop), value = TRUE), drop= F]
  else
    adj = alltop[, grep(grepre[[3]], names(alltop), value = TRUE),drop= F]

  logfc = alltop[, grep( grepre[[2]], names(alltop), value = TRUE),drop= F]
  myfc = c(1, 1.2, 2, 4, 6, 10)
  fcpval = data.frame(matrix(ncol = length(myfc), nrow = length(adj)))
  thresholds = c("FC>1.0", "FC >1.2" , "FC >2", "FC >4", "FC >6", "FC >10")

  for (fc in myfc) {
    fcpval[j] = cbind.data.frame(colSums(adj < pval & 2 ** abs(logfc) > fc, na.rm=T))
    j = j + 1
  }

  names(logfc) =  gsub(
    pattern =  grepre[[2]] ,
    replacement = "",
    x = names(logfc),
    perl =  TRUE
  )

  colnames(fcpval) = thresholds
  rownames(fcpval) = colnames(logfc)
  return(fcpval)

}

#' This function returns a transformed data frame of character type to a data frame of factor type
#'
#' @param datach A dataframe that associates samples to their respective biological conditions
#'
#' @return A dataframe of factor type
#'
#' @export


chartofa = function(datach){

  datach[] <- lapply( datach, factor)
  col_names <- names(datach)
  datach[col_names] <- lapply(datach[col_names] , factor)
  return(datach)
}


#' meanrankgenes is a function which aims is to return a dataframe with the average logFC for each duplicate genes
#'
#' @param dtsign A statistical dataframe (ids and logFC)
#' @param stat A character to grep logFC or pvalue values within the statistical table
#' @param rankcomp A character vector of the selected contrast(s)
#' @param multcomp A choosen comparison to sort the genes based on the logFC
#' @param regulationvolc A character to specific the regulation (both, up, down) for the volcano page
#' @param jvenn A boolean value
#'
#' @return A dataframe
#'
#' @export
#'

meanrankgenes  <- function(dtsign, stat , rankcomp=NULL, multcomp, regulationvolc=NULL, jvenn = F){

  selcomp <-  paste0(stat, multcomp )
  options(datatable.optimize=1)

  for (i in selcomp) {
    dtsign[[i]] = as.numeric(as.character(dtsign[[i]]))
  }

  summarizetable <- dtsign %>% select(GeneName, paste0(stat, multcomp))  %>%
    as.data.table() %>% .[,lapply(.SD,function(x) mean=round(mean(x), 3)),"GeneName"] %>% as.data.frame()

  if(!jvenn){
  summarizetable$rank <- summarizetable %>% select(paste0(stat , rankcomp) ) %>% rank(.)
  summarizetable <- if(regulationvolc == "down") summarizetable %>% arrange( desc(-rank) ) else summarizetable %>% arrange( desc(rank) )
  }

  return(summarizetable)
}


#' This function returns a data frame of the significant genes associated with the corresponding cluster index
#'
#' @param hmp01_All An heatmap object
#' @param exprData An index vector of significant genes from the restable
#' @param pval A data frame of the restable
#' @param height A cutoff point of a dendogram
#'
#' @return A data frame (unique ids, gene symbols, pvalue and cluster)
#'
#' @export
#'

heatmtoclust = function( hmp01_All, exprData, pval ,height= 5){

  cut02 = cut(hmp01_All$rowDendrogram, h = height )

  HCgroupsLab = lapply(cut02$lower, function(x)
    labels(x))
  id = colnames(pval[1])

  reorderdend = exprData[rev(hmp01_All$rowInd), hmp01_All$colInd]

  taildendo= as.integer(lapply(seq(length(HCgroupsLab)), function(x)
  {return(tail(HCgroupsLab[[x]],1))}))

  mygen = as.integer(row.names(reorderdend))
  pval$X = as.integer(rownames(pval))

  heatmclust = pval %>%
    dplyr::select (X,id,GeneName) %>%
    filter( X %in% mygen) %>%
    left_join(data.frame(X=mygen), . , by="X") %>%
    arrange(-row_number())

  i = 1
  for(row in 1:nrow(heatmclust)){
    if(heatmclust$X[row] == taildendo[i] ){
      heatmclust$cluster[row] = i
      i = i+1
    }
    else
      heatmclust$cluster[row] = i
  }

  return(heatmclust)

}