https://github.com/Gregor-Mendel-Institute/RKP2021-CMT3
Raw File
Tip revision: 89d7e2ea78af1969bb161640baed09296ed2485f authored by Papareddy on 23 April 2021, 11:18:44 UTC
Update README.md
Tip revision: 89d7e2e
Eucledian_distance.R
euc.dist=function(A, B, W=rep(1,length(A))){
  # Calculate a Euclidean distance between array A and array B.
  # Optional W is an array of weights, where larger weights have stronger effects
  return(sqrt(sum((((A - B)*W) ^ 2))))
}

zscore = function(v){
  (v - mean(v))/sd(v)
}

TPM = read.table("~/Desktop/Manustrips/mCHG/figure1/covariance/embryo_mean_tpm.tsv",stringsAsFactors = F,header = T,row.names = 1)
TPM = TPM[rowSums(TPM > 1)>=1,] # Subset for genes that are expressed > 1TPM in >= 1 stage of embryogenesis
zscores = round(t(apply(TPM,1,zscore)),2)
TPM = round(TPM,2)

distances = function(gene, dataset=zscores, weighted=FALSE, include_input=FALSE){
  # Returns a sorted array of genes by their distance from the input gene(s)
  gene_subset = gene[gene %in% rownames(dataset)] # Subset the list of input genes for those that exist in the dataset
  weights = rep(1,length(gene_subset)) # Default weight of 1 for every point
  
  if(length(gene_subset)>1){ # More than 1 gene was provided. Calculate the centroid
    search_values = colMeans(dataset[gene_subset,])
    if(weighted){ # Calculate the inverse of the standard deviation for each dimension in the input data
      weights = (apply(dataset[gene_subset,], 2, sd))^-1
    }
  }else{
    if(length(gene_subset)==0){ # No genes were found in the dataset
      return()
    }
    # Exactly 1 gene makes up the search values
    search_values = dataset[gene_subset,]
  }
  
  dists = apply(dataset, 1, function(gene_values){
    return(euc.dist(as.numeric(search_values),as.numeric(gene_values),as.numeric(weights)))
  })
  
  if(!include_input){
    dists = dists[!names(dists) %in% gene]
  }
  return(sort(dists))
}



back to top