https://gist.github.com/wenjie1991/d79fe428ac80c8f2e5d781a966df3978
Raw File
Tip revision: 0887f17c2a830b5adfc42757a744a547a8e8fc54 authored by Wenjie Sun on 05 March 2021, 16:15:03 UTC
Tip revision: 0887f17
pathway_enrichment.R
library(stringr)
library(magrittr)
library(msigdbr)
library(clusterProfiler)
library(org.Mm.eg.db)

symbol2entrez = function(x) {
    ensembl = str_split_fixed(x, "\\|", 2) %>% extract(, 1) %>% str_split_fixed("\\.", 2) %>% extract(, 1)
    select(org.Mm.eg.db, ensembl, "ENTREZID", "SYMBOL")[["ENTREZID"]] %>% na.omit
}

gene_set_enrichment = function(query, species = "mouse", background = NULL) {

  # TODO: Human datasets
  species_dict = c("mouse" = "Mus musculus", "human" = "Homo ...")
  species_official_name = species_dict[species]

  m_df = msigdbr(species = species_official_name, category = "H")
  m_HGENE = m_df %>% dplyr::select(gs_id, entrez_gene) %>% as.data.frame()
  m_HNAME = m_df %>% dplyr::select(gs_id, gs_name) %>% as.data.frame()
  #   m_df = msigdbr(species = species_official_name, category = "C2")
  #   m_c2GENE = m_df %>% dplyr::select(gs_id, entrez_gene) %>% as.data.frame()
  #   m_c2NAME = m_df %>% dplyr::select(gs_id, gs_name) %>% as.data.frame()
  #   m_df = msigdbr(species = species_official_name, category = "C4")
  #   m_c4GENE = m_df %>% dplyr::select(gs_name, entrez_gene) %>% as.data.frame()
  #   m_c4NAME = m_df %>% dplyr::select(gs_id, gs_name) %>% as.data.frame()

  enrichemnt = list()
  # TODO: auto transform the gene name type
  entrezid = query

  if (is.null(background)) {
    enrichemnt$KEGG = enrichKEGG(entrezid, organism = 'mmu', pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, universe = background)
    #     enrichemnt$C2 = enricher(entrezid, universe = background, pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, TERM2GENE = m_c2GENE, TERM2NAME = m_c2NAME)
    enrichemnt$HALLMARK = enricher(entrezid, universe = background, pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, TERM2GENE = m_HGENE, TERM2NAME = m_HNAME)
    #     enrichemnt$C4 = enricher(entrezid, universe = background, pAdjustMethod = 'BH', pvalueCutoff = 1, qvalueCutoff = 1, TERM2GENE = m_c4GENE, TERM2NAME = m_c4NAME)
    enrichemnt = lapply(enrichemnt, function(x) {setReadable(x, OrgDb = org.Mm.eg.db, keyType="ENTREZID") %>% data.frame}) %>% ldply(.id = "DataBase") %>% data.table
  }
  enrichemnt
}

plot_enrichment_result = function(x) {
  g = ggplot(x) + aes(x = -log10(pvalue), y = Description, color = Count) + geom_point()
  g + theme_bw() + scale_color_continuous()
}


## EXAMPLE:
# d = fread("../data/As_result/WATER_VS_DSS_AS_diffpsi0.1.tsv")
# up_symbol_v = d[change_type == "up", symbol] %>% symbol2entrez
# down_symbol_v = d[change_type == "down", symbol] %>% symbol2entrez
# both_v = d$symbol %>% symbol2entrez

# dir.create("../data/GeneSet_result")
# x = gene_set_enrichment(up_symbol_v)[pvalue <= 0.1]
# g1 =plot_enrichment_result(x) + labs(title = "UP genes")
# write_tsv(x, "../data/GeneSet_result/Water_VS_DSS_UP_pathway.tsv")
back to top