Revision 8f750d2bd2afc7bd12844aedf402519ea117930a authored by cscjohns on 03 July 2021, 19:17:11 UTC, committed by GitHub on 03 July 2021, 19:17:11 UTC
1 parent f66f892
Raw File
dbi_2_behavior.R
################################################################################
#
# Behavioral analysis
#
################################################################################

# Format R session -------------------------------------------------------------
# Load packages
library(tidyverse)
select <- dplyr::select

# Set theme and colors for graphing
theme_set(theme_classic(base_size = 20, base_family = ""))  # graphing defaults
wes_col <- "#D55E00"  # specify color for Western diet
med_col <- "#56B4E9"  # specify color for Mediterranean diet

# Import raw data
load("data_files/processed_data_1.RData")

# Behavior vs. diet ------------------------------------------------------------
# Test for differences in diets in the baseline and treatment phases of the
# experiment.

# Test for differences in behavior b/w diets in during baseline phase
BL_pvals <- apply(BL_behav, 2, function(x){
  wilcox.test(x[sample_info$diet == "MD"],
              x[sample_info$diet == "WD"])$p.value})
sum(p.adjust(BL_pvals, method = "holm") < 0.05)

# Test for differences in behavior b/w diets in during treatment phase
TX_pvals <- apply(TX_behav, 2, function(x){
  wilcox.test(x[sample_info$diet == "MD"],
              x[sample_info$diet == "WD"])$p.value})
sum(p.adjust(TX_pvals, method = "holm") < 0.05)
TX_pvals[p.adjust(TX_pvals, method = "holm") < 0.05]

# Diet Altered Behavior (DAB) --------------------------------------------------
# Conduct a principal component analysis (PCA) of the behavioral data.

# PCA on weighted TX phase raw behaviors using FactoMineR's PCA function
behav_PCA <- PCA(TX_behav, ncp = 35, graph = FALSE,
                 scale.unit = TRUE)

# Add diet coding to the individual PC loadings to create correlation matrix
pca_df <- cbind(behav_PCA$ind$coord,
                "diet" = sample_info$diet,
                "rank" = sample_info$rel_rank) %>%
  as_tibble()

# Correlation matrix between PC loadings and diet
pca_cor <- cor(pca_df)

# Create a data frame to graph variance epxlained
pca_pov <- data.frame(cbind(behav_PCA$eig,
                            pca_cor[1:(ncol(pca_cor) - 2), "diet"],
                            c(1:nrow(behav_PCA$eig)))) %>%
  as_tibble()
colnames(pca_pov) <-
  c("eigenvalue", "pct_var", "cum_pct_var", "cor_diet", "PC")

# T tests of projections between diet groups
pc_diet_test <- apply(pca_df[, 1:20], 2, function(x){
  t.test(x[sample_info$diet == "WD"],
         x[sample_info$diet == "MD"])$p.value})
pca_pov$diet_pval <- pc_diet_test  # add p-values to data frame

# Add DAB score to sample_info matrix
sample_info$DAB <- -behav_PCA$ind$coord[, 2]

# Figures 4A and 4--figure supplement 1 (plots of behavior observed) -----------
# The following funciton will generate boxplots with the underlying data
# represented as points for a given data frame (df) and set of behaviors
# (behav_set) with the ability to specifiy axis labels and limits. It is used
# to plot the behavioral observations during the baseline (BL) and treatment
# (TX) phases of the experiment.

# Define function for plotting
behavior.plots <- function(df, behav_set, axis_label, xmin, xmax){
  # Alter df input to prepare for plotting
  plotter <- df %>%
    as.data.frame() %>%
    add_column(diet = sample_info$diet) %>%
    gather(key = "code", value = "value", PCT_TM_B:RT_XSUBrec) %>%
    left_join(behav_codes %>%
                select(code, description),
              by = "code") %>%
    mutate(description = str_to_title(description))
  
  # Summarize the median values of each behavior in each group
  diet_meds <- plotter %>%
    group_by(diet, code) %>%
    summarize(median = median(value)) %>%
    spread(key = diet, value = median) %>%
    mutate(dif_meds = MD - WD, mag_dif_meds = abs(dif_meds))
  
  behavior_medians <- data.frame(
    "median" = apply(df, 2, median),
    "code" = colnames(BL_behav))
  
  behavior_pca <- data.frame(
    "loading" = behav_PCA$var$cor[, 2],
    "code" = names(behav_PCA$var$cor[, 2]))
  
  behav_plot <- plotter %>%
    left_join(behavior_medians, by = "code") %>%
    left_join(behavior_pca, by = "code") %>%
    left_join(diet_meds, by = "code") %>%
    mutate(description = str_remove(description, "Percent Of ") %>%
             str_remove(" Eyes Closed") %>%
             str_replace("Alone Nonstereotypic", "In") %>%
             str_remove("Time ") %>%
             str_remove("Spent ") %>%
             str_remove("In ") %>%
             str_remove("Rate Of ")) %>%
    filter(code %in% behav_set) %>%
    mutate(description = fct_reorder(description, loading, .desc = TRUE)) %>%
    ggplot(aes(x = description, y = value, color = diet)) +
    geom_boxplot(aes(fill = diet), color = "black", alpha = 0.3,
                 outlier.size = 0, coef = 0) +
    geom_point(position = position_jitterdodge(jitter.width = 0.1,
                                               jitter.height = 0,
                                               dodge.width = 0.75)) +
    scale_color_manual(values = c(wes_col, med_col),
                       labels = c("WD", "MD"),
                       name = "Diet") +
    scale_fill_manual(values = c(wes_col, med_col),
                      labels = c("WD", "MD"),
                      name = "Diet") +
    scale_y_continuous(limits = c(xmin, xmax)) +
    labs(x = "",
         y = axis_label) +
    coord_flip() +
    theme(legend.position = "top")
  return(behav_plot)
}

# Generate plot for Figure 4A
figure_4a <- behavior.plots(TX_behav, c("PCT_TM_B", "PCT_TM_H", "PCT_TM_R"),
                            "Duration of Behavior (Percent of Time)", NA, NA)
saveRDS(figure_4a, file = "plots/figure_4a.RDS")

# Generate plot for Figure 4--figure supplement 1A
figure_4_figure_supplement_1a <-
  behavior.plots(BL_behav,
                 colnames(BL_behav)[!str_detect(colnames(BL_behav), "PCT_TM")],
                 "Frequency of Behavior (Events per Hour)", 0, 60)
saveRDS(figure_4_figure_supplement_1a,
        file = "plots/figure_4_figure_supplement_1a.RDS")

# Generate plot for Figure 4--figure supplement 1B
figure_4_figure_supplement_1b <-
  behavior.plots(TX_behav,
                 colnames(TX_behav)[!str_detect(colnames(TX_behav), "PCT_TM")],
                 "Frequency of Behavior (Events per Hour)", 0, 60)
saveRDS(figure_4_figure_supplement_1b,
        file = "plots/figure_4_figure_supplement_1b.RDS")

# Generate plot for Figure 4--figure supplement 1C
figure_4_figure_supplement_1c <-
  behavior.plots(BL_behav,
                 colnames(BL_behav)[!str_detect(colnames(BL_behav), "PCT_TM")],
                 "Duration of Behavior (Percent of Time)", 0, 100)
saveRDS(figure_4_figure_supplement_1c,
        file = "plots/figure_4_figure_supplement_1c.RDS")

# Generate plot for Figure 4--figure supplement 1D
figure_4_figure_supplement_1d <-
  behavior.plots(TX_behav,
                 colnames(TX_behav)[!str_detect(colnames(TX_behav), "PCT_TM")],
                 "Duration of Behavior (Percent of Time)", 0, 100)
saveRDS(figure_4_figure_supplement_1d,
        file = "plots/figure_4_figure_supplement_1d.RDS")

# Figure 4B (scree plot of behavioral PCA) -------------------------------------
# Plot the variance explained by each PC of the behavioral PCA and the p-value
# of the difference in projections between diet groups.

# Generate plot 4B
figure_4b <- pca_pov %>%
  ggplot(aes(x = PC, y = pct_var, fill = -log(pc_diet_test))) +
  geom_bar(stat = "identity") +
  labs(x = "Principal Component (PC)",
       y = "Variance Explained (%)") +
  scale_fill_gradient(low = "gray",
                      high = "DarkGreen",
                      name = "Diet Correlation\n-Log10(p)")
saveRDS(figure_4b, file = "plots/figure_4b.RDS")


# Figure 4C (plot of PC2 vs diet) ----------------------------------------------
# Plot individual PC2 projection (DAB score) by diet.

# Generate plot 4C
figure_4c <- sample_info %>%
  ggplot(aes(x = diet, y = DAB, fill = diet, color = diet)) +
  geom_boxplot(outlier.alpha = 0, alpha = 0.3, color = "black", coef = 0) +
  geom_jitter(size = 3, width = 0.1) +
  geom_hline(yintercept = 0, linetype = "dashed", col = "black") +
  scale_fill_manual(name = "Diet",
                    values = c(wes_col, med_col),
                    labels = c("WD", "MD")) +
  scale_color_manual(name = "Diet",
                     values = c(wes_col, med_col),
                     labels = c("WD", "MD")) +
  scale_x_discrete(labels = c("WD" = "Western", "MD" = "Mediterranean")) +
  theme(axis.title.x = element_blank(),
        legend.position = "none") +
  labs(y = "DAB Score")
saveRDS(figure_4c, file = "plots/figure_4c.RDS")

# Figure 4D (correlation between DAB score and behaviors) ----------------------
# Plot the correlation between individual PC2 projection (sign reversed; DAB
# score) and behaviors during the treatment phase. Only show those that are
# significantly correlated (Holm-Bonferroni-adjusted p-value < 0.05).

# Create data frame of correlations
pc2_behav_cor <- data.frame(
  "cor" = apply(TX_behav, 2, function(x){
    cor(sample_info$DAB, x)}),
  "cor_pval" = p.adjust(apply(TX_behav, 2, function(x){
    cor.test(sample_info$DAB, x)$p.value}), method = "holm"),
  "code" = colnames(TX_behav),
  "diet" = apply(TX_behav, 2, function(x){
    cor(-sample_info$diet_west, x)})) %>%
  left_join(behav_codes %>%
              select(code, description),
            by = "code") %>%
  mutate(description = str_to_title(description) %>%
           str_remove("Percent Of ") %>%
           str_remove(" Eyes Closed") %>%
           str_replace("Alone Nonstereotypic", "In") %>%
           str_remove("Time ") %>%
           str_remove("Spent ") %>%
           str_remove("In ") %>%
           str_remove("Rate Of "))

# Generate plot 4D
figure_4d <- pc2_behav_cor %>%
  mutate(description = fct_reorder(description, cor)) %>%
  filter(cor_pval < 0.05) %>%
  ggplot(aes(x = description, y = cor)) +
  geom_point(aes(col = factor(ifelse(diet > 0, 1, 0))), size = 5) +
  geom_segment(aes(y = 0,
                   x = description, 
                   yend = cor, 
                   xend = description)) +
  scale_color_manual(breaks = c(0, 1),
                     values = c(wes_col, med_col),
                     guide = FALSE) +
  scale_y_continuous(limits = c(-1, 1)) +
  labs(y = "Correlation with Diet-Altered Behavior (DAB)",
       x = "") +
  coord_flip() +
  geom_hline(yintercept = 0, linetype = "dashed", col = "black")
saveRDS(figure_4d, file = "plots/figure_4d.RDS")

# Figure 4--figure supplement 2 (plot of PC1 vs rank) --------------------------
# Plot individual PC1 projections (sign reversed) vs relative rank.

# Test correlation between rank and PC1
cor.test(pca_df$rank, -pca_df$Dim.1)

# Generate figure 4--figure supplement 2
figure_4_figure_supplement_2 <- pca_df %>%
  ggplot(aes(x = rank, y = -Dim.1)) +
  geom_smooth(method = "lm", se = TRUE, col = "black") +
  geom_point(aes(col = as.factor(diet)), size = 4) +
  labs(x = "Relative Rank",
       y = "Projection onto Behavior PC1",
       caption = "r = 0.84") +
  scale_color_manual(name = NULL,
                     breaks = c(1, 2),
                     values = c(wes_col, med_col),
                     labels = c("Western", "Mediterranean")) +
  theme(legend.position = "top",
        plot.caption = element_text(face = "italic"))
saveRDS(figure_4_figure_supplement_2,
        file = "plots/figure_4_figure_supplement_2.RDS")

# Save output data -------------------------------------------------------------
# Save data for downstream analyses
save(gene_counts, sample_info, r_matrix, kinship,
     file = "data_files/processed_data_2.RData")
back to top