Revision 8f750d2bd2afc7bd12844aedf402519ea117930a authored by cscjohns on 03 July 2021, 19:17:11 UTC, committed by GitHub on 03 July 2021, 19:17:11 UTC
1 parent f66f892
Raw File
dbi_5_functional_analyses.R
################################################################################
#
# Functional genomic analyses
#
################################################################################

# Format R session -------------------------------------------------------------
# Load packages
library(biomaRt)
library(cobs)
library(doParallel)
library(edgeR)
library(EMMREML)
library(limma)
library(tidyverse)


# Create a biomart object with ensembl mfas genes
mart <- useMart(biomart = 'ENSEMBL_MART_ENSEMBL',
                dataset = paste0('mfascicularis', '_gene_ensembl'))

# Create a vector of gene names from expression matrix
ensembl.gene.names <- DEG_info$stable_ID

# Create a biomart object of GO terms associated with genes from study
mfas.GO <- getBM(attributes = c('ensembl_gene_id', 'go_id'),
                 filters = 'ensembl_gene_id',
                 values = ensembl.gene.names,
                 mart = mart)

# Create a list of 
gene.ID2GO <- lapply(unique(mfas.GO$ensembl_gene_id),
                     function(x){sort(mfas.GO[mfas.GO$ensembl_gene_id == x,
                                              'go_id'])})
names(gene.ID2GO) <- unique(mfas.GO$ensembl_gene_id)

# #try opposite list
# go_groups <- lapply(unique(mfas.GO$go_id),
#                      function(x){sort(mfas.GO[mfas.GO$go_id == x,
#                                               'ensembl_gene_id'])})
# names(go_groups) <- unique(mfas.GO$go_id)

## diet.tvals is a vector of diet standardized diet betas (beta/se(beta))
diet_tvals <-
  as.vector(unlist(DEG_info %>% arrange(stable_ID) %>% dplyr::select(std_beta_diet)))
names(diet_tvals) <- as.vector(unlist(DEG_info %>% arrange(stable_ID) %>% dplyr::select(stable_ID)))

# Calculate t-value threshold for FDR cutoff
med_tval_th <- (min(diet_tvals[unlist(DEG_info %>%
                                        filter(diet_FDR < 0.05,
                                               es_diet > 0) %>%
                                        dplyr::select(stable_ID))]) - 0.00001)
west_tval_th <- (max(diet_tvals[unlist(DEG_info %>%
                                         filter(diet_FDR < 0.05,
                                                es_diet < 0) %>%
                                         dplyr::select(stable_ID))]) + 0.00001)

# Create topGOdata objects for mediterranean and western diets
GOdata_med_de <- new('topGOdata',
                     description = 'Simple session',
                     ontology = 'BP',
                     allGenes = diet_tvals,
                     geneSel = function(diet_tvals) diet_tvals > med_tval_th,
                     nodeSize = 10,
                     annot = annFUN.gene2GO,
                     gene2GO = gene.ID2GO)

GOdata_west_de <- new('topGOdata',
                      description = 'Simple session',
                      ontology = 'BP',
                      allGenes = diet_tvals,
                      geneSel = function(diet_tvals) diet_tvals < west_tval_th,
                      nodeSize = 10,
                      annot = annFUN.gene2GO,
                      gene2GO = gene.ID2GO)

# Run Fisher Exact tests to look at enrichment
go_FET_med_de <- runTest(GOdata_med_de,
                         algorithm = 'weight01',
                         statistic = 'fisher')

go_FET_west_de <- runTest(GOdata_west_de,
                          algorithm = 'weight01',
                          statistic = 'fisher')

# Create results tables
go_med_de_results <- GenTable(GOdata_med_de,
                              FET.weight01 = go_FET_med_de,
                              orderBy = 'FET.weight01',
                              ranksOf = 'FET.weight01',
                              topNodes = go_FET_med_de@geneData[4],
                              numChar = 1000)
med_de_go_results <- go_med_de_results %>%
  mutate(pval = as.numeric(FET.weight01),
         qval = qvalue(pval)$qvalues,
         padj = p.adjust(pval, method = "BH"),
         lor = log2(Significant / (Annotated - Significant)) -
           log2((go_FET_med_de@geneData[2] - Significant) /
                  (go_FET_med_de@geneData[1] - Annotated -
                     go_FET_med_de@geneData[2] - Significant))) %>%
  filter(pval < 0.01) %>%
  select(GO.ID, Term, Annotated, Significant, Expected, lor, pval, qval, padj) %>%
  arrange(desc(lor))
write_csv(med_de_go_results, path = "GO_med.csv")

go_west_de_results <- GenTable(GOdata_west_de,
                               FET.weight01 = go_FET_west_de,
                               orderBy = 'FET.weight01',
                               ranksOf = 'FET.weight01',
                               topNodes = go_FET_west_de@geneData[4],
                               numChar = 1000)
west_de_go_results <- go_west_de_results %>%
  mutate(pval = as.numeric(FET.weight01),
         qval = qvalue(as.numeric(pval))$qvalues,
         padj = p.adjust(pval, method = "BH"),
         lor = log2(Significant / (Annotated - Significant)) -
           log2((go_FET_west_de@geneData[2] - Significant) /
                  (go_FET_west_de@geneData[1] - Annotated -
                     go_FET_west_de@geneData[2] - Significant))) %>%
  filter(pval < 0.01) %>%
  select(-c(FET.weight01)) %>%
  select(GO.ID, Term, Annotated, Significant, Expected, lor, pval, qval, padj) %>%
  arrange(desc(lor))
write_csv(west_de_go_results, path = "GO_west.csv")

# Mediation--------------------------------------------------------

# Create gene lists for background sets and mediated genes

west_mediated_genes <- as.vector(unlist(
  mediation_slim %>%
    filter(std_beta_diet < 0) %>%
    arrange(stable_ID) %>%
    select(std_beta_diet)))
names(west_mediated_genes) <- as.vector(unlist(
  mediation_slim %>%
    filter(std_beta_diet < 0) %>%
    arrange(stable_ID) %>%
    select(stable_ID)))

med_mediated_genes <- as.vector(unlist(
  mediation_slim %>%
    filter(std_beta_diet > 0) %>%
    arrange(stable_ID) %>%
    select(std_beta_diet)))

names(med_mediated_genes) <- as.vector(unlist(
  mediation_slim %>%
    filter(std_beta_diet > 0) %>%
    arrange(stable_ID) %>%
    select(stable_ID)))

all_mediated_genes <- c(west_mediated_genes, med_mediated_genes)

west_de_genes <- as.vector(unlist(DEG_info %>%
                                    filter(diet_FDR < 0.05,
                                           es_diet < 0) %>%
                                    arrange(stable_ID) %>%
                                    select(std_beta_diet)))
names(west_de_genes) <- as.vector(unlist(DEG_info %>%
                                           filter(diet_FDR < 0.05,
                                                  es_diet < 0) %>%
                                           arrange(stable_ID) %>%
                                           select(stable_ID)))

med_de_genes <- as.vector(unlist(DEG_info %>%
                                   filter(diet_FDR < 0.05,
                                          es_diet > 0) %>%
                                   arrange(stable_ID) %>%
                                   select(std_beta_diet)))
names(med_de_genes) <- as.vector(unlist(DEG_info %>%
                                          filter(diet_FDR < 0.05,
                                                 es_diet > 0) %>%
                                          arrange(stable_ID) %>%
                                          select(stable_ID)))

all_de_genes <- c(west_de_genes, med_de_genes)

# Create topGOdata objects for mediation
GOdata_med_mediation <- new('topGOdata',
                            description = 'Simple session',
                            ontology = 'BP',
                            allGenes = med_de_genes,
                            geneSel = function(med_de_genes) med_de_genes %in%
                              med_mediated_genes,
                            nodeSize = 5,
                            annot = annFUN.gene2GO,
                            gene2GO = gene.ID2GO)

GOdata_west_mediation <- new('topGOdata',
                             description = 'Simple session',
                             ontology = 'BP',
                             allGenes = west_de_genes,
                             geneSel = function(west_de_genes) west_de_genes %in%
                               west_mediated_genes,
                             nodeSize = 5,
                             annot = annFUN.gene2GO,
                             gene2GO = gene.ID2GO)

GOdata_all_mediation <- new('topGOdata',
                            description = 'Simple session',
                            ontology = 'BP',
                            allGenes = all_de_genes,
                            geneSel = function(all_de_genes) all_de_genes %in%
                              all_mediated_genes,
                            nodeSize = 5,
                            annot = annFUN.gene2GO,
                            gene2GO = gene.ID2GO)


# Run Fisher Exact Tests to look at enrichment
go_FET_med <- runTest(GOdata_med_mediation,
                      algorithm = 'weight01',
                      statistic = 'fisher')

go_FET_west <- runTest(GOdata_west_mediation,
                       algorithm = 'weight01',
                       statistic = 'fisher')

go_FET_all <- runTest(GOdata_all_mediation,
                      algorithm = 'weight01',
                      statistic = 'fisher')

# Create results tables
go_west_results <- GenTable(GOdata_west_mediation,
                            FET.weight01 = go_FET_west,
                            orderBy = 'FET.weight01',
                            ranksOf = 'FET.weight01',
                            topNodes = go_FET_west@geneData[4],
                            numChar = 1000)
west_go_results <- go_west_results %>%
  mutate(pval = as.numeric(FET.weight01),
         qval = qvalue(as.numeric(pval))$qvalues,
         padj = p.adjust(pval, method = "BH"),
         lor = log2(Significant / (Annotated - Significant)) -
           log2((go_FET_west@geneData[2] - Significant) /
                  (go_FET_west@geneData[1] - Annotated -
                     go_FET_west@geneData[2] - Significant))) %>%
  filter(pval < 0.01) %>%
  select(GO.ID, Term, Annotated, Significant, Expected, lor, pval, qval, padj) %>%
  arrange(desc(lor))
write_csv(west_go_results, path = "GO_west_mediated.csv")

go_med_results <- GenTable(GOdata_med_mediation,
                           FET.weight01 = go_FET_med,
                           orderBy = 'FET.weight01',
                           ranksOf = 'FET.weight01',
                           topNodes = go_FET_med@geneData[4],
                           numChar = 1000)
med_go_results <- go_med_results %>%
  mutate(pval = as.numeric(FET.weight01),
         qval = qvalue(as.numeric(pval))$qvalues,
         padj = p.adjust(pval, method = "BH"),
         lor = log2(Significant / (Annotated - Significant)) -
           log2((go_FET_med@geneData[2] - Significant) /
                  (go_FET_med@geneData[1] - Annotated -
                     go_FET_med@geneData[2] - Significant))) %>%
  filter(pval < 0.01) %>%
  select(GO.ID, Term, Annotated, Significant, Expected, lor, pval, qval, padj) %>%
  arrange(desc(lor))
write_csv(med_go_results, path = "GO_med_mediated.csv")

go_all_results <- GenTable(GOdata_all_mediation,
                           FET.weight01 = go_FET_all,
                           orderBy = 'FET.weight01',
                           ranksOf = 'FET.weight01',
                           topNodes = go_FET_all@geneData[4],
                           numChar = 1000)
all_go_results <- go_all_results %>%
  mutate(pval = as.numeric(FET.weight01),
         qval = qvalue(as.numeric(pval))$qvalues,
         padj = p.adjust(pval, method = "BH"),
         lor = log2(Significant / (Annotated - Significant)) -
           log2((go_FET_all@geneData[2] - Significant) /
                  (go_FET_all@geneData[1] - Annotated -
                     go_FET_all@geneData[2] - Significant))) %>%
  filter(pval < 0.01) %>%
  select(GO.ID, Term, Annotated, Significant, Expected, lor, pval, qval, padj) %>%
  arrange(desc(lor))
write_csv(all_go_results, path = "GO_all_mediated.csv")

# Old stuff ----------------------------------------
# vector of significance levels to test
sig.levels <- c(0.10, 0.05, 0.01)
for(i in 1:length(sig.levels)){
  # Set significance level
  sig.level <- sig.levels[i]
  
  # Calculate t-value threshold for FDR cutoff
  med.tval.th <-
    (min(diet.tvals[row.names(DEG_info[DEG_info$diet_FDR < sig.level &
                                         DEG_info$es_diet > 0, ])])
     - 0.00001)
  wes.tval.th <-
    (max(diet.tvals[row.names(DEG_info[DEG_info$diet_FDR < sig.level &
                                         DEG_info$es_diet < 0, ])])
     + 0.00001)
  
  # Create topGOdata objects for mediterranean and western diets
  GOdata.upreg.med <- new('topGOdata',
                          description = 'Simple session',
                          ontology = 'BP',
                          allGenes = diet.tvals,
                          geneSel = function(diet.tvals) diet.tvals > med.tval.th,
                          nodeSize = 10,
                          annot = annFUN.gene2GO,
                          gene2GO = gene.ID2GO)
  
  
  # Run four different tests to look at enrichment for MD
  FET.weight01 <- runTest(GOdata.upreg.med, algorithm = 'weight01',
                          statistic = 'fisher')
  FET.parentchild <- runTest(GOdata.upreg.med, algorithm = 'parentchild',
                             statistic = 'fisher')
  KS.weight01 <- runTest(GOdata.upreg.med, algorithm = 'weight01',
                         statistic = 'ks')
  KS.elim <- runTest(GOdata.upreg.med, algorithm = 'elim', statistic = 'ks')
  
  # Gather the top nodes from tests
  top.nodes <- max(c(FET.weight01@geneData[4], FET.parentchild@geneData[4],
                     KS.weight01@geneData[4], KS.elim@geneData[4]))
  
  # Create results object
  GOdata.upreg.med.results <- GenTable(GOdata.upreg.med,
                                       FET.weight01 = FET.weight01,
                                       FET.parentchild = FET.parentchild,
                                       KS.weight01 = KS.weight01,
                                       KS.elim = KS.elim,
                                       orderBy = 'FET.weight01',
                                       ranksOf = 'FET.weight01',
                                       topNodes = top.nodes)
  
  # rename column headings
  for(j in 1:length(colnames(GOdata.upreg.med.results))){
    colnames(GOdata.upreg.med.results)[j] <-
      paste0(as.character(sig.level), '_', colnames(GOdata.upreg.med.results)[j])
  }
  
  # Store results in data frame
  ifelse(i == 1, all.topGO.med.results <- GOdata.upreg.med.results[order(
    GOdata.upreg.med.results[,1]),],
    all.topGO.med.results <-
      cbind(all.topGO.med.results,
            GOdata.upreg.med.results[order(
              GOdata.upreg.med.results[,1]),]))
  
  # Graph the results
  printGraph(GOdata.upreg.med, KS.weight01, firstSigNodes = 5,
             fn.prefix = paste0(out_dir, 'topGO_med_', sig.level, '_KSweight01'),
             useInfo = 'all', pdfSW = TRUE)
  printGraph(GOdata.upreg.med, FET.weight01, firstSigNodes = 5,
             fn.prefix = paste0(out_dir, 'topGO_med_', sig.level, '_FETweight01'),
             useInfo = 'all', pdfSW = TRUE)
  printGraph(GOdata.upreg.med, KS.elim, firstSigNodes = 5,
             fn.prefix = paste0(out_dir, 'topGO_med_', sig.level, '_KSelim'),
             useInfo = 'all', pdfSW = TRUE)
  printGraph(GOdata.upreg.med, FET.parentchild, firstSigNodes = 5,
             fn.prefix = paste0(out_dir, 'topGO_med_', sig.level, '_FETparentchild'),
             useInfo = 'all', pdfSW = TRUE)
  
  # Repeat for WD
  GOdata.upreg.wes <- new('topGOdata',
                          description = 'Simple session',
                          ontology = 'BP',
                          allGenes = diet.tvals,
                          geneSel = function(diet.tvals) diet.tvals < wes.tval.th,
                          nodeSize = 10,
                          annot = annFUN.gene2GO,
                          gene2GO = gene.ID2GO)
  FET.weight01 <- runTest(GOdata.upreg.wes, algorithm = 'weight01',
                          statistic = 'fisher')
  FET.parentchild <- runTest(GOdata.upreg.wes, algorithm = 'parentchild',
                             statistic = 'fisher')
  KS.weight01 <- runTest(GOdata.upreg.wes, algorithm = 'weight01',
                         statistic = 'ks')
  KS.elim <- runTest(GOdata.upreg.wes, algorithm = 'elim', statistic = 'ks')
  top.nodes <- max(c(FET.weight01@geneData[4], FET.parentchild@geneData[4],
                     KS.weight01@geneData[4], KS.elim@geneData[4]))
  GOdata.upreg.wes.results <- GenTable(GOdata.upreg.wes,
                                       FET.weight01 = FET.weight01,
                                       FET.parentchild = FET.parentchild,
                                       KS.weight01 = KS.weight01,
                                       KS.elim = KS.elim,
                                       orderBy = 'FET.weight01',
                                       ranksOf = 'FET.weight01',
                                       topNodes = top.nodes)
  for(j in 1:length(colnames(GOdata.upreg.wes.results))){
    colnames(GOdata.upreg.wes.results)[j] <-
      paste0(as.character(sig.level), '_', colnames(GOdata.upreg.wes.results)[j])
  }
  
  ifelse(i == 1, all.topGO.wes.results <- GOdata.upreg.wes.results[order(
    GOdata.upreg.wes.results[,1]),],
    all.topGO.wes.results <-
      cbind(all.topGO.wes.results,
            GOdata.upreg.wes.results[order(
              GOdata.upreg.wes.results[,1]),]))
  
  
  # Graph the results
  printGraph(GOdata.upreg.wes, KS.weight01, firstSigNodes = 5,
             fn.prefix = paste0(out_dir, 'topGO_wes_', sig.level, '_KSweight01'),
             useInfo = 'all', pdfSW = TRUE)
  printGraph(GOdata.upreg.wes, FET.weight01, firstSigNodes = 5,
             fn.prefix = paste0(out_dir, 'topGO_wes_', sig.level, '_FETweight01'),
             useInfo = 'all', pdfSW = TRUE)
  printGraph(GOdata.upreg.wes, KS.elim, firstSigNodes = 5,
             fn.prefix = paste0(out_dir, 'topGO_wes_', sig.level, '_KSelim'),
             useInfo = 'all', pdfSW = TRUE)
  printGraph(GOdata.upreg.wes, FET.parentchild, firstSigNodes = 5,
             fn.prefix = paste0(out_dir, 'topGO_wes_', sig.level, '_FETparentchild'),
             useInfo = 'all', pdfSW = TRUE)
  
  # Write data to table
  write.table(GOdata.upreg.wes.results,
              file = paste0(out_dir, 'topGO_wes_', sig.level, '_upregulated.csv'),
              append = FALSE, quote = FALSE, row.names = FALSE, col.names = TRUE,
              sep = ',')
  write.table(GOdata.upreg.med.results,
              file = paste0(out_dir, 'topGO_med_', sig.level, '_upregulated.csv'),
              append = FALSE, quote = FALSE, row.names = FALSE, col.names = TRUE,
              sep = ',')
}



old.cols <- c('0.1_FET.weight01', '0.1_FET.parentchild',
              '0.1_KS.weight01', '0.1_KS.elim',
              '0.05_FET.weight01', '0.05_FET.parentchild',
              '0.05_KS.weight01', '0.05_KS.elim',
              '0.01_FET.weight01', '0.01_FET.parentchild',
              '0.01_KS.weight01', '0.01_KS.elim')
new.cols <- c('padj_0.1_FET.weight01', 'padj_0.1_FET.parentchild',
              'padj_0.1_KS.weight01', 'padj_0.1_KS.elim',
              'padj_0.05_FET.weight01', 'padj_0.05_FET.parentchild',
              'padj_0.05_KS.weight01', 'padj_0.05_KS.elim',
              'padj_0.01_FET.weight01', 'padj_0.01_FET.parentchild',
              'padj_0.01_KS.weight01', 'padj_0.01_KS.elim')

for(i in 1:12){
  all.topGO.med.results[, new.cols[i]] <-
    p.adjust(all.topGO.med.results[, old.cols[i]], method = 'BH')
  all.topGO.wes.results[, new.cols[i]] <-
    p.adjust(all.topGO.wes.results[, old.cols[i]], method = 'BH')
}

# Create a list of significant GO terms for each test and threshold
sig.GO.terms <- list()
sig.GO.terms[1:12] <- lapply(old.cols,
                             function(x){all.topGO.med.results[
                               (!is.na(all.topGO.med.results[,x]) &
                                  all.topGO.med.results[, x] < 0.05),
                               '0.1_Term']})
names(sig.GO.terms)[1:12] <-
  lapply(old.cols,function(x){as.character(paste0('med_upregulated_', x))})

sig.GO.terms[13:24] <- lapply(old.cols,
                              function(x){all.topGO.wes.results[
                                (!is.na(all.topGO.wes.results[,x]) &
                                   all.topGO.wes.results[, x] < 0.05),
                                '0.1_Term']})
names(sig.GO.terms)[13:24] <-
  lapply(old.cols,function(x){as.character(paste0('wes_upregulated_', x))})

# Create a list of significnat GO IDs for each test and threshold
sig.GO.IDs <- list()
sig.GO.IDs[1:12] <- lapply(old.cols,
                           function(x){all.topGO.med.results[
                             (!is.na(all.topGO.med.results[,x]) &
                                all.topGO.med.results[, x] < 0.05),
                             '0.1_GO.ID']})
names(sig.GO.IDs)[1:12] <-
  lapply(old.cols,function(x){as.character(paste0('med_upregulated_', x))})

sig.GO.IDs[13:24] <- lapply(old.cols,
                            function(x){all.topGO.wes.results[
                              (!is.na(all.topGO.wes.results[,x]) &
                                 all.topGO.wes.results[, x] < 0.05),
                              '0.1_GO.ID']})
names(sig.GO.IDs)[13:24] <-
  lapply(old.cols,function(x){as.character(paste0('wes_upregulated_', x))})

number.GO.sig <- data.frame(matrix(lapply(sig.GO.terms, length),
                                   byrow = FALSE, nrow = 12))
colnames(number.GO.sig) <- c('med_upregulated', 'wes_upregulated')
row.names(number.GO.sig) <- new.cols

sig.GO <- list(number.GO.sig, sig.GO.IDs, sig.GO.terms)

write.table(all.topGO.med.results[
  all.topGO.med.results$padj_0.05_FET.parentchild < 0.05,
  c('0.1_GO.ID', 'padj_0.05_FET.parentchild')],
  file = paste0(out_dir, 'GO_IDs_med.txt'),
  append = FALSE, quote = FALSE, row.names = FALSE, col.names = FALSE,
  sep = '\t')

write.table(all.topGO.wes.results[
  all.topGO.wes.results$padj_0.05_FET.parentchild < 0.05,
  c('0.1_GO.ID', 'padj_0.05_FET.parentchild')],
  file = paste0(out_dir, 'GO_IDs_wes.txt'),
  append = FALSE, quote = FALSE, row.names = FALSE, col.names = FALSE,
  sep = '\t')

write.table(all.topGO.med.results,
            file = paste0(out_dir, 'GO_med_upregulated_final.tsv'),
            append = FALSE, quote = FALSE, row.names = FALSE, col.names = TRUE,
            sep = '\t')

write.table(all.topGO.wes.results,
            file = paste0(out_dir, 'GO_wes_upregulated_final.tsv'),
            append = FALSE, quote = FALSE, row.names = FALSE, col.names = TRUE,
            sep = '\t')

# Testing enrichment for disease-associated genes ---------------

# Load in gene sets from ptwas paper
ptwas_in <- read_delim("~/Documents/2_data/ptwas_data.txt",
                       delim = " ") %>%
  clean_names()

# Load mart for converting human ensembl gene ids to M. fascicularis
c2h_table <- read_delim("~/Documents/2_data/cyno_to_human.txt",
                        delim = "\t") %>%
  clean_names()

# Rename the columns
colnames(c2h_table) <- c("human_stable_id", "cyno_stable_id")

# Identify MED/WEST/non_sig genes in the PTWAS gene set
ptwas_data <- ptwas_in %>%
  separate(col = "gene",
           into = c("human_stable_id", NA)) %>%  # remove gene variant id
  left_join(c2h_table,
            by = "human_stable_id") %>%  # use c2h table to ID orthologues
  left_join(DEG_info %>%
              mutate(diet_genelist = ifelse(diet_sig == "TRUE", diet_dir, "not_sig")) %>%
              select(stable_ID, diet_genelist),
            by = c("cyno_stable_id" = "stable_ID")) %>%
  replace_na(list(rep("missing", 6)))

ptwas_data$diet_genelist <- replace_na(ptwas_data$diet_genelist, "missing")

west_enrich <- ptwas_data %>%
  group_by(trait, diet_genelist) %>%
  summarize(n = n()) %>%
  pivot_wider(names_from = diet_genelist,
              values_from = n,
              values_fill = 0) %>%
  mutate(lor = log2(fisher.test(matrix(c(WEST,
                                         MED + not_sig,
                                         2236 - WEST,
                                         10004 - MED - not_sig),
                                       nrow = 2),
                                alternative = "g")$estimate),
         loglow_ci = log2(fisher.test(matrix(c(WEST,
                                               MED + not_sig,
                                               2236 - WEST,
                                               10004 - MED - not_sig),
                                             nrow = 2),
                                      alternative = "g")$conf.int[1]),
         loghi_ci = log2(fisher.test(matrix(c(WEST,
                                              MED + not_sig,
                                              2236 - WEST,
                                              10004 - MED - not_sig),
                                            nrow = 2),
                                     alternative = "g")$conf.int[2]),
         pval = fisher.test(matrix(c(WEST,
                                     MED + not_sig,
                                     2236 - WEST,
                                     10004 - MED - not_sig),
                                   nrow = 2),
                            alternative = "g")$p.value) %>%
  arrange(desc(lor))

west_enrich$qval <- qvalue(west_enrich$pval)$qvalues
west_enrich$padj <- p.adjust(west_enrich$pval)

med_enrich <- ptwas_data %>%
  group_by(trait, diet_genelist) %>%
  summarize(n = n()) %>%
  pivot_wider(names_from = diet_genelist,
              values_from = n,
              values_fill = 0) %>%
  mutate(lor = log2(fisher.test(matrix(c(MED,
                                         WEST + not_sig,
                                         2664 - MED,
                                         9576 - WEST - not_sig),
                                       nrow = 2),
                                alternative = "g")$estimate),
         pval = fisher.test(matrix(c(MED,
                                     WEST + not_sig,
                                     2664 - MED,
                                     9576 - WEST - not_sig),
                                   nrow = 2),
                            alternative = "g")$p.value) %>%
  arrange(desc(lor))

med_enrich$qval <- qvalue(med_enrich$pval)$qvalues

deg_enrich <- ptwas_data %>%
  group_by(trait, diet_genelist) %>%
  summarize(n = n()) %>%
  pivot_wider(names_from = diet_genelist,
              values_from = n,
              values_fill = 0) %>%
  mutate(or = fisher.test(matrix(c(MED + WEST,
                                   not_sig,
                                   4900 - MED - WEST,
                                   7340 - not_sig),
                                 nrow = 2),
                          alternative = "g")$estimate,
         pval = fisher.test(matrix(c(MED + WEST,
                                     not_sig,
                                     4900 - MED,
                                     7340 - not_sig),
                                   nrow = 2),
                            alternative = "g")$p.value) %>%
  arrange(desc(or))

deg_enrich$qval <- qvalue(deg_enrich$pval)$qvalues

west_report <- west_enrich %>%
  filter(qval < 0.2) %>%
  ungroup() %>%
  select(-trait) %>%
  add_column(
    source = c(
      "IMMUNOBASE, hg19",
      "ADIPOGen",
      "GLGC, MC",
      "GLGC, MC",
      "SSGAC",
      "UKB_6152_9",
      "Astle* et al., *2016",
      "UKB_23099",
      "UKB_21001"),
    trait = c(
      "Celiac Disease",
      "Adiponectin",
      "LDL-C",
      "HDL-C",
      "Education Years Pooled",
      "Hayfever, Allergic Rhinitis, or Eczema",
      "Eosinophil Counts",
      "Body Fat Percentage",
      "Body Mass Index (BMI)")) %>%
  select(trait, WEST, MED, not_sig, missing, lor, pval, qval, everything()) %>%
  mutate(trait = fct_reorder(trait, lor, .desc = FALSE)) %>%
  filter(trait != "Education Years Pooled")

west_report %>%
  mutate(sig = ifelse(qval < 0.2, "sig", "not_sig"),
         xtext = str_c("**", trait, "**,<br>*(", source, ")*"),
         xtext = fct_reorder(xtext, lor, .desc = FALSE)) %>%
  filter(qval < 0.2) %>%
  ggplot(aes(x = xtext, y = lor)) +
  geom_errorbar(aes(x = xtext,
                    ymin = loglow_ci,
                    ymax = loghi_ci),
                col = "black", size = 1, width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dotted", size = 1) +
  geom_point(aes(col = sig), size = 4) +
  scale_color_brewer(palette = "Dark2",
                     breaks = c("sig", "not_sig"),
                     labels = c("< 0.2", "> 0.2"),
                     name = "q Value",
                     guide = NULL) +
  labs(x = NULL,
       y = expression(Log[2]*"(Odds Ratio)"),
       title = "Traits Enriched in Western Genes") +
  coord_flip() +
  theme(legend.position = "top",
        axis.text.y = ggtext::element_markdown())

ggsave("~/Desktop/ptwas_plot.pdf",
       device = "pdf",
       width = 10,
       height = 5,
       units = "in")
back to top