hmm_+_ssf_explore.Rmd
---
title: "Altusried HMM + SSF - results"
author: "Rachel"
date: "2023-07-05"
output:
html_document:
toc: true
toc_float: true
theme: cerulean
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F,message=F)
#main packages #delete/add as needed
library(dplyr)
library(ggplot2)
library(DescTools)
library(knitr)
library(ggpubr)
library(plotly)
#library(pracma) #for standard error
library(reactable)
library(wesanderson)
library(tidyr)
library(dgof)
library(amt)
library(pROC)
std_error <- function(x) sd(x,na.rm=T) / sqrt(length(x))
```
## About
Here are the exploratory results for population models for Altusried, when accounting for HMM state.
For determining the population models, I followed the approach of Morrison et al. (2015) [See manuscript for full reference] - to calculate mean coefficients to describe the population, I averaged coefficients from the individually selected models. Where the individual did not have a term in its model, the coefficient was set to 0. Also, I've removed outliers when calculating the means.
## Steps per state
```{r fish number steps per state,eval=F}
files_ssf_df <- list.files("data/final ssf + hmm df", full.names = T)
#files are per state, so read in, save fish id, sp, state and nrow
for(i in 1:length(files_ssf_df)){
dat <- read.csv(files_ssf_df[i])
#remove false steps
dat <- dat %>% filter(case_==TRUE)
dat_sum <- dat %>% group_by(fish_id,species,state) %>% summarise(n_steps=n())
if(i==1){
all_sum <- dat_sum
} else{
all_sum <- rbind(all_sum,dat_sum)
}
}
#work out % steps per fish
p <- all_sum %>% group_by(fish_id,species) %>% summarise(total_steps = sum(n_steps))
all_sum_fin <- all_sum %>% spread(key="state",value="n_steps")
all_sum_fin$total_steps<-p$total_steps
all_sum_fin <- all_sum_fin %>%rename(state_1=`1`,state_2=`2`)
#replace NA with 0
all_sum_fin[is.na(all_sum_fin)] <- 0
all_sum_fin$perc_state_1 <- (all_sum_fin$state_1/all_sum_fin$total_steps)*100
all_sum_fin$perc_state_2 <- (all_sum_fin$state_2/all_sum_fin$total_steps)*100
#mean(all_sum_fin$perc_state_1) #mean for state 1 slightly higher than state 2 (52 vs 48)
#mean(all_sum_fin$perc_state_2)
#write.csv(all_sum_fin,"FINAL RESULTS/number steps per state per fish.csv",row.names = F)
```
```{r plots of steps per state,eval=F}
all_sum_fin <- read.csv("FINAL RESULTS/number steps per state per fish.csv") #file made above
#histograms of step spreads #basically mostly 50/50
hist(all_sum_fin$perc_state_1,main = "Percentage steps state 1", xlab = "Percentage of steps")
hist(all_sum_fin$perc_state_2,main="Percentage steps state 2",xlab = "Percentage of steps")
ggplot(filter(all_sum,n_steps<100),aes(x=n_steps))+
geom_histogram()+
facet_grid(state~.)
```
Three fish have <25 steps in state 2 (and will be removed from further analysis)
* Barbel 46856
* Grayling 46910
* Grayling 46914
## Step length and turning angle distributions per state
Pooling all fish together, step length and turning angle distributions are plotted per state
```{r calculate distribution parameters,warning=F,message=F}
#read in all data for barbel and grayling, split into state
barb <- read.csv("data/steps with states/barbel_all.csv")
gray <- read.csv("data/steps with states/grayling_all.csv")
#this is using data before being made into the SSF dataframes, but still a general indicator for the HMM results section
#remove 3 fish that have too few steps in state 2
barb <- filter(barb,fish_id!=46856)
gray <- filter(gray,fish_id!=46910 & fish_id!=46914)
#rbind and do as one
all_d <- rbind(barb,gray)
sl_distrs <- all_d %>% group_by(species,State) %>% summarise(sl_shape = fit_distr(sl_,"gamma")[[2]][[1]],sl_scale=fit_distr(sl_,"gamma")[[2]][[2]], ta_kappa =fit_distr(ta_,"vonmises")$params$kappa,ta_mu =fit_distr(ta_,"vonmises")$params$mu )
reactable(sl_distrs)
```
Looking at the actual values for distributions, SL and TA distributions suggest that more directed movement in state 1 and with bigger step lengths, as expected for transit state.
Density plots per species/state:
Step lengths (with x axis limited to max 10 m):
```{r density plots for step lengths}
#generate 1000 estimates #
for(i in 1:nrow(sl_distrs)){
row_i <- sl_distrs[i,]
r_step <- rgamma(10000000,shape = row_i$sl_shape,
scale = row_i$sl_scale) %>% as.data.frame()
df <- data.frame(step_length = r_step,species=row_i$species,state=row_i$State)
names(df)[1] <- "step_length"
if(i==1){
steps <- df
} else{
steps <- rbind(steps,df)
}
}
#steps %>% group_by(mean_sl) %>% summarise(mean=mean(step_length))
ggplot(steps,aes(x=step_length,col=as.factor(state)))+
geom_density(adjust=5)+
scale_color_manual(values=wes_palette("Darjeeling1",n=5,type="discrete"))+
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
facet_grid(.~species)+
labs(x="Step length (m)",y="Density")+
coord_cartesian(xlim=c(0,10)) #limit x coords since go up to 200 :<
```
Turning angles:
```{r density plots for turning angles}
#generate 1000 estimates # of means? or 1000 steps
for(i in 1:nrow(sl_distrs)){
row_i <- sl_distrs[i,]
r_ang <- circular::rvonmises(10000000,mu = row_i$ta_mu,
kappa = row_i$ta_kappa) %>% as.data.frame()
df <- data.frame(ta = r_ang,species=row_i$species,state=row_i$State)
names(df)[1] <- "turning_angle"
if(i==1){
ta <- df
} else{
ta <- rbind(ta,df)
}
}
ta$deg <- ta$turning_angle *180/pi
ggplot(ta,aes(x=turning_angle,col=as.factor(state)))+
geom_density()+
scale_color_manual(values=wes_palette("Darjeeling1",n=5,type="discrete"))+
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
facet_grid(species~.)
```
To make a nicer plot and centered on 0 (e.g. directed movement), for values >pi, I will subtract 2pi
```{r ta plot 2}
ta$ta_2 <- case_when(ta$turning_angle>pi ~ta$turning_angle-2*pi,
ta$turning_angle<=pi ~ ta$turning_angle)
ggplot(ta,aes(x=ta_2,col=as.factor(state)))+
geom_density(adjust=1.5)+
scale_color_manual(values=wes_palette("Darjeeling1",n=5,type="discrete"))+
theme_bw()+
theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank())+
facet_grid(species~.)+
labs(y="Density",x="Radians")
```
Can see that state 1 will have more directed movement.
Together, the distribution plots show that in state 1 (transit), fish will tend to take longer step lengths and have more directed movement. This suggests we should (in simulations) use different distributions per state.
This can be tested statistically via the Kolmogorov Smirnov test
```{r ks test}
#barbel sl
bslst1 <- steps %>% filter(species=="Barbel" & state==1)
bslst2 <- steps %>% filter(species=="Barbel" & state==2)
ks.test(bslst1$step_length,bslst2$step_length)
#gray sl
gslst1 <- steps %>% filter(species=="Barbel" & state==1)
gslst2 <- steps %>% filter(species=="Barbel" & state==2)
ks.test(gslst1$step_length,gslst2$step_length)
#barbel ta
btast1 <- ta %>% filter(species=="Barbel" & state==1)
btast2 <- ta %>% filter(species=="Barbel" & state==2)
ks.test(btast1$turning_angle,btast2$turning_angle)
#gray ta
gtast1 <- ta %>% filter(species=="Barbel" & state==1)
gtast2 <- ta %>% filter(species=="Barbel" & state==2)
ks.test(gtast1$turning_angle,gtast2$turning_angle)
#then remove the items so they arent holding up so muhc space
rm(list=c("gtast1","gtast2","btast1","btast2","bslst1","bslst2","gslst1","gslst2",
"df","ta","steps"))
```
This suggests that we need to use separate movement kernels when simulating.
## Number of terms per model
Summary statistics for the number of terms in a model are shown below.
```{r read in all files re model structure and combine}
files <- list.files("ssf modelling - file per fish/files listing coeffs and params",full.names=T)
files_state1 <- files[grepl("state1",files)]
files_state2 <- files[grepl("state2",files)]
#remove fish with few steps
files_state2 <- files_state2[!(grepl("46856",files_state2))]
files_state2 <- files_state2[!(grepl("46910",files_state2))]
files_state2 <- files_state2[!(grepl("46914",files_state2))]
#state 1
for(i in 1:length(files_state1)){
dat <- read.csv(files_state1[[i]])
if(i==1){
st1_mod_dat <- dat
} else{
st1_mod_dat <- rbind(st1_mod_dat,dat)
}
}
#state 2
for(i in 1:length(files_state2)){
dat <- read.csv(files_state2[[i]])
if(i==1){
st2_mod_dat <- dat
} else{
st2_mod_dat <- rbind(st2_mod_dat,dat)
}
}
st1_mod_dat$fish_id <- as.factor(st1_mod_dat$fish_id)
st2_mod_dat$fish_id <- as.factor(st2_mod_dat$fish_id)
```
```{r shorten parameter names and make new table minus extra tod}
st1_mod_dat$short_names <- stringi::stri_replace_all_regex(st1_mod_dat$params,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_dusk","tod_end_night","tod_start_dusk","tod_start_night", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":","tod_end_day","tod_start_day"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":","TOD","TOD"),
vectorize=FALSE)
st1_mod_dat_short <- st1_mod_dat %>% group_by(fish_id,short_names) %>% slice(1)
st1_mod_dat_short <- st1_mod_dat_short[c(8,9,10)]
#state2
st2_mod_dat$short_names <- stringi::stri_replace_all_regex(st2_mod_dat$params,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_dusk","tod_end_night","tod_start_dusk","tod_start_night", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":","tod_end_day","tod_start_day"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":","TOD","TOD"),
vectorize=FALSE)
st2_mod_dat_short <- st2_mod_dat %>% group_by(fish_id,short_names) %>% slice(1)
st2_mod_dat_short <- st2_mod_dat_short[c(8,9,10)]
```
For state 1:
```{r get mean and max number of terms state1 }
sum <- st1_mod_dat_short %>% group_by(fish_id,species) %>% summarise(num_terms=length(unique(short_names)))
bb <- summarise(group_by(sum,species), mean=mean(num_terms),sd=sd(num_terms),se=std_error(num_terms),max=max(num_terms),min=min(num_terms),median=median(num_terms))
a <- summarise(group_by(sum), mean=mean(num_terms),sd=sd(num_terms),se=std_error(num_terms), max=max(num_terms),min=min(num_terms),median=median(num_terms))
a$species <- "Both"
tab <- rbind(a,bb)
tab2 <- tab[,c("species","mean","se","sd","max","min","median")]
tab2
```
For state 2:
```{r get mean and max number of terms state2}
sum <- st2_mod_dat_short %>% group_by(fish_id,species) %>% summarise(num_terms=length(unique(short_names)))
bb <- summarise(group_by(sum,species), mean=mean(num_terms),sd=sd(num_terms),se=std_error(num_terms),max=max(num_terms),min=min(num_terms),median=median(num_terms))
a <- summarise(group_by(sum), mean=mean(num_terms),sd=sd(num_terms),se=std_error(num_terms),max=max(num_terms),min=min(num_terms),median=median(num_terms))
a$species <- "Both"
tab <- rbind(a,bb)
tab2 <- tab[,c("species","mean","se","sd","max","min","median")]
tab2
```
State 2 (transit) had slightly fewer terms per model on average (mean is 2 fewer, median 1 fewer) but there's wide variation
```{r t test to compare the number of terms per state per species}
#get counts
sum <- st1_mod_dat_short %>% group_by(fish_id,species) %>% summarise(num_terms_st1=length(unique(short_names)))
sum2 <- st2_mod_dat_short %>% group_by(fish_id,species) %>% summarise(num_terms_st2=length(unique(short_names)))
#merge into 1 df
sums_comb <- merge(sum,sum2,by=c("fish_id","species"))
sums_barb <- sums_comb %>% filter(species=="Barbel")
sums_gray <- sums_comb %>% filter(species!="Barbel")
#t test barbel
t.test(sums_barb$num_terms_st1,sums_barb$num_terms_st2,paired=T)
#for barbel, not sig different, null hypothesis true
t.test(sums_gray$num_terms_st1,sums_gray$num_terms_st2,paired=T)
#also not sig diff
```
So, while state 1 has on average more terms, it is not significanlty higher for either species
## Terms affecting individual fish movement per state
Frequency of terms in individual models per state per species.
### State 1
```{r table of term frequency per species st1,eval=F}
st1_mod_dat_short$term <- st1_mod_dat_short$short_names
counts <- st1_mod_dat_short %>% group_by(species,term) %>% summarise(frequency=n())
barbcountsst1 <- counts %>% filter(species=="Barbel")
graycountsst1 <- counts %>% filter(species=="Grayling")
barbcountsst1$percent <- barbcountsst1$frequency/19*100
graycountsst1$percent <- graycountsst1$frequency/10*100
write.csv(barbcountsst1,"FINAL RESULTS/barb_term_freq_state1.csv",row.names = F)
write.csv(graycountsst1,"FINAL RESULTS/gray_term_freq_state1.csv",row.names = F)
```
Barbel:
```{r barb all termsst1}
barbcountsst1 <- read.csv("FINAL RESULTS/barb_term_freq_state1.csv")
reactable(barbcountsst1)
```
Grayling:
```{r gray all termsst1}
graycountsst1 <- read.csv("FINAL RESULTS/gray_term_freq_state1.csv")
reactable(graycountsst1)
```
### State 2
```{r table of term frequency per species st2,eval=F}
st2_mod_dat_short$term <- st2_mod_dat_short$short_names
counts <- st2_mod_dat_short %>% group_by(species,term) %>% summarise(frequency=n())
barbcountsst2 <- counts %>% filter(species=="Barbel")
graycountsst2 <- counts %>% filter(species=="Grayling")
barbcountsst2$percent <- barbcountsst2$frequency/18*100
graycountsst2$percent <- graycountsst2$frequency/8*100
#display terms in >50% of models
barbcountshalf <- barbcountsst2 %>% filter(percent>=50)
graycountshalf <- graycountsst2 %>% filter(percent>=50)
barbcountshalf <- barbcountshalf[2:4]
graycountshalf <- graycountshalf[2:4]
b <- reactable(barbcountshalf)
g <-reactable(graycountshalf)
write.csv(barbcountsst2,"FINAL RESULTS/barb_term_freq_state2.csv",row.names = F)
write.csv(graycountsst2,"FINAL RESULTS/gray_term_freq_state2.csv",row.names = F)
```
Barbel:
```{r barb all termsst2}
barbcountsst2 <- read.csv("FINAL RESULTS/barb_term_freq_state2.csv")
reactable(barbcountsst2)
```
Grayling:
```{r gray all termsst2}
graycountsst2 <- read.csv("FINAL RESULTS/gray_term_freq_state2.csv")
reactable(graycountsst2)
```
### Plot of term frequency in each state
Plot for barbel
```{r barbel term frequency plot}
barbcountsst1$state <- "state 1"
barbcountsst2$state <- "state 2"
#add empties
st1terms <- unique(barbcountsst1$term)
st2terms <- unique(barbcountsst2$term)
miss_st1 <- st2terms[!(st2terms%in%st1terms)]
#b <- data.frame(term=miss_st1,frequency=rep(0,length(miss_st1)),state="state 1")
#barbcountsst1 <- bind_rows(barbcountsst1,b)
miss_st2 <- st1terms[!(st1terms%in%st2terms)]
bb <- data.frame(term=miss_st2,frequency=rep(0,length(miss_st2)),state="state 2")
barbcountsst2 <- bind_rows(barbcountsst2,bb)
barbcounts_all <- rbind(barbcountsst1,barbcountsst2)
ggplot(barbcounts_all,aes(x=term,y=frequency,fill=state))+
geom_bar(stat='identity',position="dodge",col="black")+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
scale_fill_manual(values=c("grey74","grey20"),name="State")+
labs(x="Term",y="Frequency")
```
Plot for grayling
```{r grayling term frequency plot}
graycountsst1$state <- "state 1"
graycountsst2$state <- "state 2"
#add empties
st1terms <- unique(graycountsst1$term)
st2terms <- unique(graycountsst2$term)
miss_st1 <- st2terms[!(st2terms%in%st1terms)]
b <- data.frame(term=miss_st1,frequency=rep(0,length(miss_st1)),state="state 1")
graycountsst1 <- bind_rows(graycountsst1,b)
miss_st2 <- st1terms[!(st1terms%in%st2terms)]
bb <- data.frame(term=miss_st2,frequency=rep(0,length(miss_st2)),state="state 2")
graycountsst2 <- bind_rows(graycountsst2,bb)
graycounts_all <- rbind(graycountsst1,graycountsst2)
ggplot(graycounts_all,aes(x=term,y=frequency,fill=state))+
geom_bar(stat='identity',position="dodge",col="black")+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
scale_fill_manual(values=c("grey74","grey20"),name="State")+
labs(x="Term",y="Frequency")
```
```{r read in individual models and split to barbel and grayling for state 1,eval=F}
files <- list.files("ssf modelling - file per fish/files listing coeffs and params",full.names=T)
files_st1 <- files[grepl("state1",files)]
####state 1####
for(i in 1:length(files_st1)){
dat <- read.csv(files_st1[[i]])
if(i==1){
ind_mod_dat_st1 <- dat
} else{
ind_mod_dat_st1 <- rbind(ind_mod_dat_st1,dat)
}
}
ind_mod_dat_st1$state <- 1
ind_mod_dat_st1$fish_id <- as.factor(ind_mod_dat_st1$fish_id)
ind_mod_dat_st1 <- ind_mod_dat_st1 %>% rename(terms=params) %>%
select(-c("X"))
barb_ind_mod_dat_st1 <- ind_mod_dat_st1 %>% filter(species=="Barbel")
gray_ind_mod_dat_st1 <- ind_mod_dat_st1 %>% filter(species=="Grayling")
barb_terms_st1 <-unique(barb_ind_mod_dat_st1$terms)
gray_terms_st1 <-unique(gray_ind_mod_dat_st1$terms)
#set missing terms for fish that dont have them to be 0 # do separaetly
#do as loop
#barbel
barb_fish_id <- unique(barb_ind_mod_dat_st1$fish_id)
for(i in 1:length(barb_fish_id)){
id <- barb_fish_id[[i]]
dat <- filter(barb_ind_mod_dat_st1,fish_id==id)
dat_terms <- unique(dat$terms)
new_terms <- barb_terms_st1[!(barb_terms_st1 %in% dat_terms)]
#make new df
new_dat <- cbind(new_terms,0,NA,NA,NA,NA) %>% as.data.frame() # save as above
new_dat$fish_id <- id
new_dat$species <- dat$species[1]
new_dat$state <- 1 #set so st1
names(new_dat) <- names(dat)
new_dat <- rbind(dat,new_dat)
if(i==1){
barb_ind_mod_st1_final <- new_dat
} else{
barb_ind_mod_st1_final <- rbind(barb_ind_mod_st1_final,new_dat)
}
}
#grayling
gray_fish_id <- unique(gray_ind_mod_dat_st1$fish_id)
for(i in 1:length(gray_fish_id)){
id <- gray_fish_id[[i]]
dat <- filter(gray_ind_mod_dat_st1,fish_id==id)
dat_terms <- unique(dat$terms)
new_terms <- gray_terms_st1[!(gray_terms_st1 %in% dat_terms)]
#make new df
new_dat <- cbind(new_terms,0,NA,NA,NA,NA) %>% as.data.frame() # save as above
new_dat$fish_id <- id
new_dat$species <- dat$species[1]
new_dat$state <- 1 #set so st1
names(new_dat) <- names(dat)
new_dat <- rbind(dat,new_dat)
if(i==1){
gray_ind_mod_st1_final <- new_dat
} else{
gray_ind_mod_st1_final <- rbind(gray_ind_mod_st1_final,new_dat)
}
}
barb_ind_mod_st1_final$coef <- as.numeric(barb_ind_mod_st1_final$coef)
gray_ind_mod_st1_final$coef <- as.numeric(gray_ind_mod_st1_final$coef)
#rename for plots
barb_ind_mod_st1_final$short_names <- stringi::stri_replace_all_regex(barb_ind_mod_st1_final$terms,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_dusk","tod_end_night","tod_start_dusk","tod_start_night", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":","tod_end_day","tod_start_day"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","Dusk","Night","Dusk","Night",'WV(start)',"SVG(start)","diffVang(start)",":","TOD","TOD"),
vectorize=FALSE)
gray_ind_mod_st1_final$short_names <- stringi::stri_replace_all_regex(gray_ind_mod_st1_final$terms,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_dusk","tod_end_night","tod_start_dusk","tod_start_night", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":","tod_end_day","tod_start_day"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","Dusk","Night","Dusk","Night",'WV(start)',"SVG(start)","diffVang(start)",":","Day","Day"),
vectorize=FALSE)
#save csvs now
write.csv(barb_ind_mod_st1_final,"FINAL RESULTS/barbel_all_coefs_state1.csv")
write.csv(gray_ind_mod_st1_final,"FINAL RESULTS/grayling_all_coefs_state1.csv")
```
```{r read in individual models and split to barbel and grayling for state 2,eval=F}
files <- list.files("ssf modelling - file per fish/files listing coeffs and params",full.names=T)
files_st2 <- files[grepl("state2",files)]
files_st2 <- files_st2[!(grepl("46856",files_st2))]
files_st2 <- files_st2[!(grepl("46910",files_st2))]
files_st2 <- files_st2[!(grepl("46914",files_st2))]
####state 2####
for(i in 1:length(files_st2)){
dat <- read.csv(files_st2[[i]])
if(i==1){
ind_mod_dat_st2 <- dat
} else{
ind_mod_dat_st2 <- rbind(ind_mod_dat_st2,dat)
}
}
ind_mod_dat_st2$state <- 2
ind_mod_dat_st2$fish_id <- as.factor(ind_mod_dat_st2$fish_id)
ind_mod_dat_st2 <- ind_mod_dat_st2 %>% rename(terms=params) %>%
select(-c("X"))
barb_ind_mod_dat_st2 <- ind_mod_dat_st2 %>% filter(species=="Barbel")
gray_ind_mod_dat_st2 <- ind_mod_dat_st2 %>% filter(species=="Grayling")
barb_terms_st2 <-unique(barb_ind_mod_dat_st2$terms)
gray_terms_st2 <-unique(gray_ind_mod_dat_st2$terms)
#set missing terms for fish that dont have them to be 0 # do separaetly
#do as loop
#barbel
barb_fish_id <- unique(barb_ind_mod_dat_st2$fish_id)
for(i in 1:length(barb_fish_id)){
id <- barb_fish_id[[i]]
dat <- filter(barb_ind_mod_dat_st2,fish_id==id)
dat_terms <- unique(dat$terms)
new_terms <- barb_terms_st2[!(barb_terms_st2 %in% dat_terms)]
#make new df
new_dat <- cbind(new_terms,0,NA,NA,NA,NA) %>% as.data.frame() # save as above
new_dat$fish_id <- id
new_dat$species <- dat$species[1]
new_dat$state <- 2 #set so st2
names(new_dat) <- names(dat)
new_dat <- rbind(dat,new_dat)
if(i==1){
barb_ind_mod_st2_final <- new_dat
} else{
barb_ind_mod_st2_final <- rbind(barb_ind_mod_st2_final,new_dat)
}
}
#grayling
gray_fish_id <- unique(gray_ind_mod_dat_st2$fish_id)
for(i in 1:length(gray_fish_id)){
id <- gray_fish_id[[i]]
dat <- filter(gray_ind_mod_dat_st2,fish_id==id)
dat_terms <- unique(dat$terms)
new_terms <- gray_terms_st2[!(gray_terms_st2 %in% dat_terms)]
#make new df
new_dat <- cbind(new_terms,0,NA,NA,NA,NA) %>% as.data.frame() # save as above
new_dat$fish_id <- id
new_dat$species <- dat$species[1]
new_dat$state <- 2 #set so st2
names(new_dat) <- names(dat)
new_dat <- rbind(dat,new_dat)
if(i==1){
gray_ind_mod_st2_final <- new_dat
} else{
gray_ind_mod_st2_final <- rbind(gray_ind_mod_st2_final,new_dat)
}
}
barb_ind_mod_st2_final$coef <- as.numeric(barb_ind_mod_st2_final$coef)
gray_ind_mod_st2_final$coef <- as.numeric(gray_ind_mod_st2_final$coef)
#rename for plots
barb_ind_mod_st2_final$short_names <- stringi::stri_replace_all_regex(barb_ind_mod_st2_final$terms,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_dusk","tod_end_night","tod_start_dusk","tod_start_night", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":","tod_end_day","tod_start_day"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","Dusk","Night","Dusk","Night",'WV(start)',"SVG(start)","diffVang(start)",":","Day","Day"),
vectorize=FALSE)
gray_ind_mod_st2_final$short_names <- stringi::stri_replace_all_regex(gray_ind_mod_st2_final$terms,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_dusk","tod_end_night","tod_start_dusk","tod_start_night", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":","tod_end_day","tod_start_day"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","Dusk","Night","Dusk","Night",'WV(start)',"SVG(start)","diffVang(start)",":","Day","Day"),
vectorize=FALSE)
#save csvs now
write.csv(barb_ind_mod_st2_final,"FINAL RESULTS/barbel_all_coefs_state2.csv")
write.csv(gray_ind_mod_st2_final,"FINAL RESULTS/grayling_all_coefs_state2.csv")
#make big df w both states epr species
barb_all <- rbind(barb_ind_mod_st1_final,barb_ind_mod_st2_final)
gray_all <- rbind(gray_ind_mod_st1_final,gray_ind_mod_st2_final)
#save csvs of both
write.csv(barb_all,"FINAL RESULTS/barbel_all_coefs_both_states.csv",row.names = F)
write.csv(gray_all,"FINAL RESULTS/grayling_all_coefs_both_states.csv",row.names = F)
```
```{r quick ggplot to have a look,eval=F}
bb <- barb_all %>% filter(terms=="depth_end_strdised" | terms=="water_velocity_end_strdised")
ggplot(bb,aes(x=short_names,y=coef,col=as.factor(state)))+
geom_boxplot()
```
## Model performance
First, model performance summaries per species.
```{r performance of individual models state1, error=F}
# need to read in EVERY individual model <- via folders beginning with 46
#then take individual model - can id via fish id being pasted into the general structure
#then count n terms etc
#and rbind to a big df
#state 1
model_folders <- list.files("ssf modelling - file per fish")
model_folders <- model_folders[grepl("46",model_folders)] #only keep individual fish models
#make blank df to append to
i <-1
for(i in 1:length(model_folders)){
file_to_read <- paste0("ssf modelling - file per fish/",model_folders[i],"/final_model_state1",model_folders[i],".rds")
if(file.exists(file_to_read)==T){
fish_mod2 <- readRDS(file_to_read)
dat <- read.csv(paste0('data/final ssf + hmm df/',model_folders[i]," ssf_data_20sec_time_gap_60min_track_state1.csv"))
spec <- dat$species[1]
form <- as.character(fish_mod2$formula)[[3]]
fish_mod <-fit_issf(data=dat,formula=as.formula(paste0("case_~",form)),model=T)
conc <- fish_mod$model$concordance[6:7] # want to keep last 2 cols, conc and std
n_terms <- summary(fish_mod)$coef %>% nrow() %>% as.numeric()
n_terms_sig_p <- summary(fish_mod)$coef %>% as.data.frame() %>% filter(`Pr(>|z|)`<0.05) %>% nrow() %>% as.numeric()
#remove nas
dat2 <- na.omit(dat)
pred <- predict(fish_mod$model,data=dat2,type="survival")
auc_val <- auc(dat2$case_,pred)
auc_val <- auc_val[[1]]
#form df
dat <- data.frame(model_folders[i],spec,n_terms,n_terms_sig_p,conc[1],conc[2],auc_val)
names(dat) <- c("fish_id","species","number_terms","terms_with_p<0.05","concordance","se concordance","auc")
if(i==1){
final_dat <- dat
}else {
final_dat <- rbind(final_dat,dat)
}
rownames(final_dat) <- NULL
}
}
#reactable(final_dat)
write.csv(final_dat,"FINAL RESULTS/model_infostate1_all_ind.csv",row.names = F)
```
```{r performance of individual models state2, error=F}
#state 2
model_folders <- list.files("ssf modelling - file per fish")
model_folders <- model_folders[grepl("46",model_folders)] #only keep individual fish models
#make blank df to append to
for(i in 1:length(model_folders)){
file_to_read <- paste0("ssf modelling - file per fish/",model_folders[i],"/final_model_state2",model_folders[i],".rds")
if(file.exists(file_to_read)==T){
fish_mod2 <- readRDS(file_to_read)
dat <- read.csv(paste0('data/final ssf + hmm df/',model_folders[i]," ssf_data_20sec_time_gap_60min_track_state2.csv"))
spec <- dat$species[1]
form <- as.character(fish_mod2$formula)[[3]]
fish_mod <-fit_issf(data=dat,formula=as.formula(paste0("case_~",form)),model=T)
conc <- fish_mod$model$concordance[6:7] # want to keep last 2 cols, conc and std
n_terms <- summary(fish_mod)$coef %>% nrow() %>% as.numeric()
n_terms_sig_p <- summary(fish_mod)$coef %>% as.data.frame() %>% filter(`Pr(>|z|)`<0.05) %>% nrow() %>% as.numeric()
#remove nas
dat2 <- na.omit(dat)
#form df
dat <- data.frame(model_folders[i],spec,n_terms,n_terms_sig_p,conc[1],conc[2],auc_val)
names(dat) <- c("fish_id","species","number_terms","terms_with_p<0.05","concordance","se concordance","auc")
if(i==1){
final_dat <- dat
}else {
final_dat <- rbind(final_dat,dat)
}
rownames(final_dat) <- NULL
}
}
#reactable(final_dat)
write.csv(final_dat,"FINAL RESULTS/model_infostate2_all_ind.csv",row.names = F)
```
```{r model performance}
mod_perf_st1 <- read.csv("FINAL RESULTS/model_infostate1_all_ind.csv")
mod_perf_st1_sum <- mod_perf_st1 %>% group_by(species) %>% summarise(mean_concordance=mean(concordance), se_concordance=std_error(concordance), sd_concordance=sd(concordance),max_concordance=max(concordance),min_concordance=min(concordance),mean_auc=mean(auc),se_auc=std_error(auc),sd_auc=sd(auc),max_auc=max(auc),min_auc=min(auc))
print("State 1")
reactable(mod_perf_st1_sum)
## state 2
mod_perf_st2 <- read.csv("FINAL RESULTS/model_infostate2_all_ind.csv")
mod_perf_st2_sum <- mod_perf_st2 %>% filter(!(fish_id %in% c( 46856,46910,46914))) %>% group_by(species) %>% summarise(mean_concordance=mean(concordance), se_concordance=std_error(concordance), sd_concordance=sd(concordance),max_concordance=max(concordance),min_concordance=min(concordance),mean_auc=mean(auc),se_auc=std_error(auc),sd_auc=sd(auc),max_auc=max(auc),min_auc=min(auc))
print("State 2")
reactable(mod_perf_st2_sum)
```
Both states have slightly higher overall concordance compared to the pooled data.
The grayling that has a concordance of 1 is likely entirely composed of outlier terms so will not affect population models.
Comparing concordance values for states 1, 2 and pooled via a t test
```{r comparing concordance values}
mod_perf_st1 <- read.csv("FINAL RESULTS/model_infostate1_all_ind.csv")
mod_perf_st2 <- read.csv("FINAL RESULTS/model_infostate2_all_ind.csv")
mod_perf_pooled <- read.csv("FINAL RESULTS/model_info_all_ind.csv")
mod_perf_st1$data <- "state1"
mod_perf_st1$st1conc <- mod_perf_st1$concordance
mod_perf_st2$data <- "state2"
mod_perf_st2$st2conc <- mod_perf_st2$concordance
mod_perf_st2 <- mod_perf_st2 %>% filter(!(fish_id %in% c( 46856,46910,46914)))
mod_perf_pooled$data <- "pooled"
mod_perf_pooled$pooledconc <- mod_perf_pooled$concordance
#make one df
mod_perf_all <- merge(mod_perf_st1,mod_perf_st2,by=c("fish_id","species")) %>% merge(.,mod_perf_pooled,by=c("fish_id","species"))
mod_perf_all %>% group_by(species) %>% summarise(`State 1 and pooled`=t.test(st1conc,pooledconc,paired=T)[[3]],`State 2 and pooled`=t.test(st2conc,pooledconc,paired=T)[[3]],`State 1 and state 2`=t.test(st1conc,st2conc,paired=T)[[3]]) %>% rename(Species=species) %>% #kable(caption="T test p values, comparing model concordances")
reactable()
```
There is no significant difference in model concordances, though state 1 v pooled is near-significant.
```{r calc barbel means both states,eval=F,warning=F}
barb_all <- read.csv("FINAL RESULTS/barbel_all_coefs_both_states.csv")
barb_means <- barb_all %>% group_by(terms,short_names,state) %>% summarise(coef=coef,q1=quantile(coef,na.rm=T)[2],q3=quantile(coef,na.rm=T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% filter(coef<=high.fence & coef>=low.fence) %>% summarise(mean_coef=mean(coef,na.rm=T),sd_coef=sd(coef,na.rm=T),se=std_error(coef),low.95=(mean(coef,na.rm=T)-(2*std_error(coef))),up.95=(mean(coef,na.rm=T)+(2*std_error(coef))), p=t.test(coef,conf.level = 0.95)[[3]])
#or mutate for low and up 95
barb_means <- barb_means %>% mutate(low.95 = mean_coef - qt(1 - (0.05 / 2), length(unique(barb_all$fish_id)) - 1) * se,
up.95 = mean_coef + qt(1 - (0.05 / 2), length(unique(barb_all$fish_id)) - 1) * se)
barb_means$state <- as.factor(barb_means$state)
write.csv(barb_means,"FINAL RESULTS/barbel_means_both_states.csv",row.names = F)
```
```{r calc gralying means,eval=F,warning=F}
gray_all <- read.csv("FINAL RESULTS/grayling_all_coefs_both_states.csv")
gray_means <- gray_all %>% group_by(terms,short_names,state) %>% summarise(coef=coef,q1=quantile(coef,na.rm=T)[2],q3=quantile(coef,na.rm=T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% filter(coef<=high.fence & coef>=low.fence) %>% summarise(mean_coef=mean(coef,na.rm=T),sd_coef=sd(coef,na.rm=T),se=std_error(coef),low.95=(mean(coef,na.rm=T)-(2*std_error(coef))),up.95=(mean(coef,na.rm=T)+(2*std_error(coef))), p=t.test(coef,conf.level = 0.95)[[3]])
gray_means <- gray_means %>% mutate(low.95 = mean_coef - qt(1 - (0.05 / 2), length(unique(gray_all$fish_id)) - 1) * se,
up.95 = mean_coef + qt(1 - (0.05 / 2), length(unique(gray_all$fish_id)) - 1) * se)
write.csv(gray_means,"FINAL RESULTS/grayling_means_both_states.csv")
```
```{r calculate pooled means,eval=F}
#read in all coefs pooled df
#barbel
barb_all <- read.csv("FINAL RESULTS/barbel_all_coefs_pooled.csv")
barb_means <- barb_all %>% group_by(terms,short_names) %>% summarise(coef=coef,q1=quantile(coef,na.rm=T)[2],q3=quantile(coef,na.rm=T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% filter(coef<=high.fence & coef>=low.fence) %>% summarise(mean_coef=mean(coef,na.rm=T),sd_coef=sd(coef,na.rm=T),se=std_error(coef),low.95=(mean(coef,na.rm=T)-(2*std_error(coef))),up.95=(mean(coef,na.rm=T)+(2*std_error(coef))), p=t.test(coef,conf.level = 0.95)[[3]])
barb_means <- barb_means %>% mutate(low.95 = mean_coef - qt(1 - (0.05 / 2), length(unique(barb_all$fish_id)) - 1) * se,
up.95 = mean_coef + qt(1 - (0.05 / 2), length(unique(barb_all$fish_id)) - 1) * se)
write.csv(barb_means,"FINAL RESULTS/barbel_means_pooled.csv",row.names = F)
#grayling
gray_all <- read.csv("FINAL RESULTS/grayling_all_coefs_pooled.csv")
gray_means <- gray_all %>% group_by(terms,short_names) %>% summarise(coef=coef,q1=quantile(coef,na.rm=T)[2],q3=quantile(coef,na.rm=T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% filter(coef<=high.fence & coef>=low.fence) %>% summarise(mean_coef=mean(coef,na.rm=T),sd_coef=sd(coef,na.rm=T),se=std_error(coef),low.95=(mean(coef,na.rm=T)-(2*std_error(coef))),up.95=(mean(coef,na.rm=T)+(2*std_error(coef))), p=t.test(coef,conf.level = 0.95)[[3]])
gray_means <- gray_means %>% mutate(low.95 = mean_coef - qt(1 - (0.05 / 2), length(unique(gray_all$fish_id)) - 1) * se,
up.95 = mean_coef + qt(1 - (0.05 / 2), length(unique(gray_all$fish_id)) - 1) * se)
#the qt calculates the specific value to multiply by
write.csv(gray_means,"FINAL RESULTS/grayling_means_pooled.csv",row.names = F)
```
## Boxplots per state
```{r boxplots}
barb_all <- read.csv("FINAL RESULTS/barbel_all_coefs_both_states.csv")
gray_all <- read.csv("FINAL RESULTS/grayling_all_coefs_both_states.csv")
ggplot(barb_all,aes(x=short_names,y=coef,col=as.factor(state)))+
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
scale_color_manual(name="Data type", values=wes_palette("Zissou1",n=5,type="discrete")[c(1,5)])+
scale_y_continuous(limits=c(-4,4))+
labs(title="Barbel")
ggplot(gray_all,aes(x=short_names,y=coef,col=as.factor(state)))+
geom_boxplot()+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
scale_color_manual(name="Data type", values=wes_palette("Zissou1",n=5,type="discrete")[c(1,5)])+
scale_y_continuous(limits=c(-5,7))+
labs(title="Grayling")
```
## Population models
Using a t-test to see whether a term is significantly different to 0 on average.
```{r barbel plot means + CI of both states + pooled,warning=F}
barb_means_bothst <- read.csv("FINAL RESULTS/barbel_means_both_states.csv")
barb_means_pooled <- read.csv("FINAL RESULTS/barbel_means_pooled.csv")
barb_means_pooled$state <- "pooled"
barb_means <- rbind(barb_means_bothst,barb_means_pooled)
barb_means$diff_to_0 <- case_when(barb_means$p<=0.05 ~ "Yes",barb_means$p>0.05~"No")
barb_means$diff_to_0[is.na(barb_means$diff_to_0)] <-"No"
terms_not_0 <- filter(barb_means,p<=0.05)$term %>% unique()
barb_means_not_0 <- barb_means %>% filter(terms %in% terms_not_0)
barb_means_not_0$state <- as.factor(barb_means_not_0$state)
#or redo and just filter terms where all means = 0
terms_not_0 <- filter(barb_means,mean_coef!=0)$term %>% unique()
barb_means_not_0 <- barb_means %>% filter(terms %in% terms_not_0)
barb_means_not_0$state <- as.factor(barb_means_not_0$state)
bpl <- ggplot(barb_means_not_0,aes(x=short_names,y=mean_coef,col=state,alpha=diff_to_0,shape=diff_to_0))+
geom_hline(aes(yintercept=0),col="red",linetype="dashed",alpha=0.5)+
geom_point(position = position_dodge(width=0.5))+
geom_errorbar(aes(x=short_names,ymax=up.95,ymin=low.95,col=state,alpha=diff_to_0), position = position_dodge(width = 0.5))+
scale_alpha_discrete(range=c(0.3,1),name="Is term sig dif to 0?")+
labs(title="Barbel",x="Term",y="Mean coefficient size")+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),legend.position="bottom")+
scale_color_manual(name="Data type", values=wes_palette("Zissou1",n=5,type="discrete")[c(1,3,5)],
labels = c("State 1","State 2","Both-states"))+
scale_shape_manual(values=c(17,16),name="Is term sig dif to 0?")#+
# scale_colour_discrete(labels = c("State 1","State 2","Both-states"))
bpl
ggsave(filename = "figures/barbel_states_2_FINAL.tiff",plot = bpl,width=7,height=5)
```
For barbel, on average:
* positive effect of depth when transiting, no effect when searching
* move faster when in deeper water when transiting, more than when searching
* move faster in high SVG when transiting, no effect when searching
Generally a lot of overlap between terms though, and with the pooled means
Terms in population model (where terms sig diff to 0):
```{r barbel number terms present in each of the 3 mdoels}
barb_means %>% filter(diff_to_0=="Yes") %>%
group_by(state) %>% summarise(number_of_terms=length(unique(short_names))) %>% reactable()
```
Terms that are in only one of the two states overall (including those which are sig same as 0)
```{r barbel terms present in only one or the other state}
bst1 <- barb_means_bothst %>% filter(state==1& mean_coef!=0)
bst2 <- barb_means_bothst %>% filter(state==2& mean_coef!=0)
st1_only <- unique(bst1$short_names)[!(unique(bst1$short_names)%in%(unique(bst2$short_names)))]
st2_only <- unique(bst2$short_names)[!(unique(bst2$short_names)%in%(unique(bst1$short_names)))]
print("State 1 only terms: ")
print(paste0(st1_only))
print("State 2 only terms: ")
print(paste0(st2_only))
```
So we can see which terms are ONLY in state 1 (ergo, differ between the two).
All terms in state 2 are in state 1 for barbel.
Grayling
```{r grayling plot means + CI of both states + pooled,warning=F}
gray_means_bothst <- read.csv("FINAL RESULTS/grayling_means_both_states.csv")
gray_means_pooled <- read.csv("FINAL RESULTS/grayling_means_pooled.csv")
gray_means_pooled$state <- "pooled"
gray_means_pooled <- gray_means_pooled %>% filter(short_names!="SVG(end):Night")
gray_means_bothst <- gray_means_bothst #%>% select(-X)
gray_means_bothst$state <- as.character(gray_means_bothst$state)
gray_means <- bind_rows(gray_means_bothst,gray_means_pooled)
gray_means$diff_to_0 <- case_when(gray_means$p<=0.05 ~ "Yes",gray_means$p>0.05~"No")
gray_means$diff_to_0[is.na(gray_means$diff_to_0)] <-"No"
terms_not_0 <- filter(gray_means,p<=0.05)$term %>% unique()
#or just remove where all == 0.000 etc
terms_not_0 <- filter(gray_means,short_names!="log(SL):Night") %>% filter(mean_coef!=0)#$term #%>% unique()
terms_not_0 <- terms_not_0$terms %>% unique()
gray_means_not_0 <- gray_means %>% filter(terms %in% terms_not_0)
gray_means_not_0$state <- as.factor(gray_means_not_0$state)
gpl <- ggplot(gray_means_not_0,aes(x=short_names,y=mean_coef,col=state,alpha=diff_to_0,shape=diff_to_0))+
geom_hline(aes(yintercept=0),col="red",linetype="dashed",alpha=0.5)+
geom_point(position = position_dodge(width=0.5))+
geom_errorbar(aes(x=short_names,ymax=up.95,ymin=low.95,col=state,alpha=diff_to_0), position = position_dodge(width = 0.5))+
scale_alpha_discrete(range=c(0.3,1),name="Is term sig dif to 0?")+
labs(title="Grayling",x="Term",y="Mean coefficient size")+
theme_bw()+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),legend.position = "bottom")+
scale_color_manual(name="Data type", values=wes_palette("Zissou1",n=5,type="discrete")[c(1,3,5)],
labels = c("State 1","State 2","Both-states"))+ scale_shape_manual(values=c(17,16),name="Is term sig dif to 0?")
gpl
ggsave(filename = "figures/gray_states_2_FINAL.tiff",plot = gpl,width=7,height=5)
```
Terms in population model (where terms sig diff to 0):
```{r grayling number terms present in each of the 3 mdoels}
gray_means %>% filter(diff_to_0=="Yes") %>%
group_by(state) %>% summarise(number_of_terms=length(unique(short_names))) %>% reactable()
```
Terms that are in only one of the two states overall (including those which are sig same as 0), but removing terms where the mean was 0 (e.g. only present in outliers).
```{r grayling terms present in only one or the other state}
gst1 <- gray_means_bothst %>% filter(state==1& mean_coef!=0)
gst2 <- gray_means_bothst %>% filter(state==2& mean_coef!=0)
st1_only <- unique(gst1$short_names)[!(unique(gst1$short_names)%in%(unique(gst2$short_names)))]
st2_only <- unique(gst2$short_names)[!(unique(gst2$short_names)%in%(unique(gst1$short_names)))]
print("State 1 only terms: ")
print(paste0(st1_only))
print("State 2 only terms: ")
print(paste0(st2_only))
```
For both barbel and grayling, need to compare statistically coefficients for each term between state.
## Differences between states on average for each term
T-tests to, for every term, see if there's a significant difference between states in each species. Only saves terms that are significantly different between states.
This is using paired T-tests, since the sample groups use the same fish. Fish who didn't have both states don't count here.
```{r barbebl t test diff between states}
barb_all <- read.csv("FINAL RESULTS/barbel_all_coefs_both_states.csv")
#want to add in pooled stuff too !!
#need to remove outliers and then fish ID that are only in 1 state
a <- barb_all %>% filter(state==1)
#outliers
a2 <- a %>% group_by(short_names) %>% summarise(q1=quantile(coef,na.rm=T)[2],q3=quantile(coef,na.rm=T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<=high.fence & coef>=low.fence)# %>% dplyr::ungroup()
a3 <- merge(a,a2,by="short_names")
a4 <- a3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
barb_id_st1 <- unique(a4$fish_id) #barb in st 1
b <- barb_all %>% filter(state==2)
b2 <- b %>% group_by(short_names) %>% summarise(q1=quantile(coef)[2],q3=quantile(coef)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<high.fence & coef>low.fence)# %>% dplyr::ungroup()
b3 <- merge(b,b2,by="short_names")
b4 <- b3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
barb_id_st2 <- unique(b$fish_id) #barb in st 2
#which ids to remove #so only doing t test on fish in both states
#not in state 1
r <- barb_id_st2[!(barb_id_st2 %in% barb_id_st1)]
s <- barb_id_st1[!(barb_id_st1 %in% barb_id_st2)]
to_remove <- list(r,s[[1]],s[[2]])
#remove fish that arent in both states
barb_all <- barb_all %>% filter(!(fish_id %in% to_remove))
#remove ids
#get each term for looping
terms <- unique(barb_all$short_names)
#blank list to save sig diff terms to
barb_terms_diff_between_states <- list()
i <-1
#loop
for(i in 1:length(terms)){
term_to_mod <- terms[i]
dat <- barb_all %>% filter(short_names==term_to_mod)
datst1 <- dat %>% filter(state==1)
datst2 <- dat %>% filter(state==2)
if(length(datst1$coef)==length(datst2$coef)){
ttest_res <- t.test(datst1$coef,datst2$coef,paired=T)
if(ttest_res$p.value<0.05){
barb_terms_diff_between_states[(length(barb_terms_diff_between_states)+1)] <-term_to_mod
}
}else{
# print(paste0(i,"yo theres not matching terms for term ",term_to_mod))
}
}
print(paste0("For barbel, the terms which differ between states are: ",barb_terms_diff_between_states))
```
For barbel, some terms differ between states - depth and log(sl):diffSVGang. The latter is only present in state 1 and is not significantly different to 0.
```{r barbebl t test diff between st1 and pooled}
barb_all <- read.csv("FINAL RESULTS/barbel_all_coefs_both_states.csv")
#want to add in pooled stuff too !!
barb_pooled <- read.csv("FINAL RESULTS/barbel_all_coefs_pooled.csv")
barb_pooled$state <- "pooled"
#need to remove outliers and then fish ID that are only in 1 state
a <- barb_all %>% filter(state==1)
#outliers
a$state <- as.factor(a$state)
a2 <- a %>% group_by(short_names) %>% summarise(q1=quantile(coef,na.rm=T)[2],q3=quantile(coef,na.rm=T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<=high.fence & coef>=low.fence)# %>% dplyr::ungroup()
a3 <- merge(a,a2,by="short_names")
a4 <- a3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
barb_id_st1 <- unique(a4$fish_id) #barb in st 1
b <- barb_pooled
b2 <- b %>% group_by(short_names) %>% summarise(q1=quantile(coef)[2],q3=quantile(coef)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<high.fence & coef>low.fence)# %>% dplyr::ungroup()
b3 <- merge(b,b2,by="short_names")
b4 <- b3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
barb_id_st2 <- unique(b$fish_id) #barb in st 2
#which ids to remove #so only doing t test on fish in both states
#not in state 1
r <- barb_id_st2[!(barb_id_st2 %in% barb_id_st1)]
#s <- barb_id_st1[!(barb_id_st1 %in% barb_id_st2)] #nothing here
to_remove <- list(r)
#remove fish that arent in both states
barb_all <- bind_rows(a,barb_pooled) %>% filter(!(fish_id %in% to_remove))
#remove ids
#get each term for looping
terms <- unique(barb_all$short_names)
#blank list to save sig diff terms to
barb_terms_diff_between_states <- list()
i <-1
#loop
for(i in 1:length(terms)){
term_to_mod <- terms[i]
dat <- barb_all %>% filter(short_names==term_to_mod)
datst1 <- dat %>% filter(state==1)
datst2 <- dat %>% filter(state=="pooled")
if(length(datst1$coef)==length(datst2$coef)){
ttest_res <- t.test(datst1$coef,datst2$coef,paired=T)
if(ttest_res$p.value<0.05){
barb_terms_diff_between_states[(length(barb_terms_diff_between_states)+1)] <-term_to_mod
}
}else{
# print(paste0(i,"theres not matching terms for term ",term_to_mod))
}
}
print(paste0("For barbel, the terms which differ between state 1 and pooled are: ",barb_terms_diff_between_states))
```
Log(sl):depth and log(SL):diffSVGang differ between state 1 and pooled. The latter is only present in state 1.
```{r barbebl t test diff between st2 and pooled}
barb_all <- read.csv("FINAL RESULTS/barbel_all_coefs_both_states.csv")
#want to add in pooled stuff too !!
barb_pooled <- read.csv("FINAL RESULTS/barbel_all_coefs_pooled.csv")
barb_pooled$state <- "pooled"
#need to remove outliers and then fish ID that are only in 1 state
a <- barb_all %>% filter(state==2)
#outliers
a$state <- as.factor(a$state)
a2 <- a %>% group_by(short_names) %>% summarise(q1=quantile(coef)[2],q3=quantile(coef)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<=high.fence & coef>=low.fence)# %>% dplyr::ungroup()
a3 <- merge(a,a2,by="short_names")
a4 <- a3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
barb_id_st1 <- unique(a4$fish_id) #barb in st 1
b <- barb_pooled
b2 <- b %>% group_by(short_names) %>% summarise(q1=quantile(coef)[2],q3=quantile(coef)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<high.fence & coef>low.fence)# %>% dplyr::ungroup()
b3 <- merge(b,b2,by="short_names")
b4 <- b3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
barb_id_st2 <- unique(b$fish_id) #barb in st 2
#which ids to remove #so only doing t test on fish in both states
#not in state 1
r <- barb_id_st2[!(barb_id_st2 %in% barb_id_st1)]
#s <- barb_id_st1[!(barb_id_st1 %in% barb_id_st2)] #nothing here
to_remove <- list(r)
#remove fish that arent in both states
barb_all <- bind_rows(a,barb_pooled) %>% filter(!(fish_id %in% to_remove))
#remove ids
#get each term for looping
terms <- unique(barb_all$short_names)
#blank list to save sig diff terms to
barb_terms_diff_between_states <- list()
i <-1
#loop
for(i in 1:length(terms)){
term_to_mod <- terms[i]
dat <- barb_all %>% filter(short_names==term_to_mod)
datst1 <- dat %>% filter(state==2)
datst2 <- dat %>% filter(state=="pooled")
if(length(datst1$coef)==length(datst2$coef)){
ttest_res <- t.test(datst1$coef,datst2$coef,paired=T)
if(ttest_res$p.value<0.05){
barb_terms_diff_between_states[(length(barb_terms_diff_between_states)+1)] <-term_to_mod
}
}else{
# print(paste0(i,"yo theres not matching terms for term ",term_to_mod))
}
}
print(paste0("For barbel, the terms which differ between state 2 and pooled are: ",barb_terms_diff_between_states))
```
No terms differ between state 2 and pooled.
```{r grayling t test diff between states,warning=F}
gray_all <- read.csv("FINAL RESULTS/grayling_all_coefs_both_states.csv")
#need to remove fish ID that are only in 1 state
a <- gray_all %>% filter(state==1)
#remove outliers
a2 <- a %>% group_by(short_names) %>% summarise(q1=quantile(coef)[2],q3=quantile(coef)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<high.fence & coef>low.fence)# %>% dplyr::ungroup()
a3 <- merge(a,a2,by="short_names")
a4 <- a3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
gray_id_st1 <- unique(a$fish_id) #gray in st 1
b <- gray_all %>% filter(state==2)
#remove outliers
b2 <- b %>% group_by(short_names) %>% summarise(q1=quantile(coef)[2],q3=quantile(coef)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<high.fence & coef>low.fence)# %>% dplyr::ungroup()
b3 <- merge(b,b2,by="short_names")
b4 <- b3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
gray_id_st2 <- unique(b$fish_id) #gray in st 2
#which ids to remove
#not in state 1
r <- gray_id_st2[!(gray_id_st2 %in% gray_id_st1)]
s <- gray_id_st1[!(gray_id_st1 %in% gray_id_st2)]
#remove fish not in a state
to_remove <- list(r,s[[1]],s[[2]],s[[3]])
#remove fish that arent in both states
gray_all <- gray_all %>% filter(!(fish_id %in% to_remove))
#get each term for looping
terms <- unique(gray_all$short_names)
#blank list to save sig diff terms to
gray_terms_diff_between_states <- list()
i <-1
#loop
for(i in 1:length(terms)){
term_to_mod <- terms[i]
dat <- gray_all %>% filter(short_names==term_to_mod)
datst1 <- dat %>% filter(state==1)
datst2 <- dat %>% filter(state==2)
if(length(datst1$coef)==length(datst2$coef)){
ttest_res <- t.test(datst1$coef,datst2$coef,paired=T)
if(ttest_res$p.value<0.05){
gray_terms_diff_between_states[(length(gray_terms_diff_between_states)+1)] <-term_to_mod
}
}else{
# print(paste0(i,"yo theres not matching terms for term ",term_to_mod))
}
}
print(paste0("For grayling, the terms which differ between states are: ",gray_terms_diff_between_states))
```
For grayling, no terms differ between states
Between states and pooled
```{r grayling t test diff between st1 and pooled}
gray_all <- read.csv("FINAL RESULTS/grayling_all_coefs_both_states.csv")
#want to add in pooled stuff too !!
gray_pooled <- read.csv("FINAL RESULTS/grayling_all_coefs_pooled.csv")
gray_pooled$state <- "pooled"
#need to remove outliers and then fish ID that are only in 1 state
a <- gray_all %>% filter(state==1)
#outliers
a$state <- as.factor(a$state)
a2 <- a %>% group_by(short_names) %>% summarise(q1=quantile(coef)[2],q3=quantile(coef)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<=high.fence & coef>=low.fence)# %>% dplyr::ungroup()
a3 <- merge(a,a2,by="short_names")
a4 <- a3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
gray_id_st1 <- unique(a4$fish_id) #gray in st 1
b <- gray_pooled
b2 <- b %>% group_by(short_names) %>% summarise(q1=quantile(coef)[2],q3=quantile(coef)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<high.fence & coef>low.fence)# %>% dplyr::ungroup()
b3 <- merge(b,b2,by="short_names")
b4 <- b3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
gray_id_st2 <- unique(b$fish_id) #gray in st 2
#which ids to remove #so only doing t test on fish in both states
#not in state 1
r <- gray_id_st2[!(gray_id_st2 %in% gray_id_st1)]
#s <- gray_id_st1[!(gray_id_st1 %in% gray_id_st2)] #nothing here
to_remove <- list(r)
#remove fish that arent in both states
gray_all <- bind_rows(a,gray_pooled) %>% filter(!(fish_id %in% to_remove))
#remove ids
#get each term for looping
terms <- unique(gray_all$short_names)
#blank list to save sig diff terms to
gray_terms_diff_between_states <- list()
i <-1
#loop
for(i in 1:length(terms)){
term_to_mod <- terms[i]
dat <- gray_all %>% filter(short_names==term_to_mod)
datst1 <- dat %>% filter(state==1)
datst2 <- dat %>% filter(state=="pooled")
if(length(datst1$coef)==length(datst2$coef)){
ttest_res <- t.test(datst1$coef,datst2$coef,paired=T)
if(ttest_res$p.value<0.05){
gray_terms_diff_between_states[(length(gray_terms_diff_between_states)+1)] <-term_to_mod
}
}else{
# print(paste0(i,"theres not matching terms for term ",term_to_mod))
}
}
print(paste0("For grayling, the terms which differ between state 1 and pooled are: ",gray_terms_diff_between_states))
```
```{r grayebl t test diff between st2 and pooled}
gray_all <- read.csv("FINAL RESULTS/grayling_all_coefs_both_states.csv")
#want to add in pooled stuff too
gray_pooled <- read.csv("FINAL RESULTS/grayling_all_coefs_pooled.csv")
gray_pooled$state <- "pooled"
#need to remove outliers and then fish ID that are only in 1 state
a <- gray_all %>% filter(state==2)
#outliers
a$state <- as.factor(a$state)
a2 <- a %>% group_by(short_names) %>% summarise(q1=quantile(coef)[2],q3=quantile(coef)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<=high.fence & coef>=low.fence)# %>% dplyr::ungroup()
a3 <- merge(a,a2,by="short_names")
a4 <- a3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
gray_id_st1 <- unique(a4$fish_id) #gray in st 1
b <- gray_pooled
b2 <- b %>% group_by(short_names) %>% summarise(q1=quantile(coef)[2],q3=quantile(coef)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() #%>% filter(coef<high.fence & coef>low.fence)# %>% dplyr::ungroup()
b3 <- merge(b,b2,by="short_names")
b4 <- b3 %>%mutate(outlier= case_when(coef <= high.fence & coef>=low.fence ~"No",
coef>high.fence | coef<low.fence ~ "Yes")) %>% filter(outlier=="No")
gray_id_st2 <- unique(b$fish_id) #gray in st 2
#which ids to remove #so only doing t test on fish in both states
#not in state 1
r <- gray_id_st2[!(gray_id_st2 %in% gray_id_st1)]
#s <- gray_id_st1[!(gray_id_st1 %in% gray_id_st2)] #nothing here
to_remove <- list(r)
#remove fish that arent in both states
gray_all <- bind_rows(a,gray_pooled) %>% filter(!(fish_id %in% to_remove))
#remove ids
#get each term for looping
terms <- unique(gray_all$short_names)
#blank list to save sig diff terms to
gray_terms_diff_between_states <- list()
i <-1
#loop
for(i in 1:length(terms)){
term_to_mod <- terms[i]
dat <- gray_all %>% filter(short_names==term_to_mod)
datst1 <- dat %>% filter(state==2)
datst2 <- dat %>% filter(state=="pooled")
if(length(datst1$coef)==length(datst2$coef)){
ttest_res <- t.test(datst1$coef,datst2$coef,paired=T)
if(ttest_res$p.value<0.05){
gray_terms_diff_between_states[(length(gray_terms_diff_between_states)+1)] <-term_to_mod
}
}else{
# print(paste0(i,"theres not matching terms for term ",term_to_mod))
}
}
print(paste0("For grayling, the terms which differ between state 2 and pooled are: ",gray_terms_diff_between_states))
```
There is no significant difference between coefficients for any terms in grayling models.
## Correlation between coefficients
Last, looking at correlations between terms' coefficient values - are the coefficient values of any terms correlated?
```{r make correlations,eval=F}
library(GGally)
barb_all <-read.csv("FINAL RESULTS/barbel_all_coefs_both_states.csv")
gray_all <- read.csv("FINAL RESULTS/grayling_all_coefs_both_states.csv")
#barbel
barbst1 <- barb_all %>% filter(state==1) %>% dplyr::select(-c(terms,exp.coef.,se.coef.,z,Pr...z..))
barbst2 <- barb_all %>% filter(state==2)%>% select(-c(terms,exp.coef.,se.coef.,z,Pr...z..))
#barbel state 1
barbst1_corr <- spread(data=barbst1,key=short_names,value=coef)#[c(9:43)]
#ggcorr(barbst1_corr) #correlation plot, maybe be some correlations for barbel, check manually for which are >0.5 corr
#do cor and fitler for >0.5
barbst1_corr_res <- cor(barbst1_corr[4:38])
barbst1_corr_res[lower.tri(barbst1_corr_res,diag=TRUE)] <- NA
#drop perfect correlations
barbst1_corr_res[barbst1_corr_res == 1] <- NA
barbst1_corr_res[barbst1_corr_res == -1] <- NA
barb_st1_cors <- barbst1_corr_res %>% as.table() %>% as.data.frame() %>% na.omit() %>% filter((Freq > 0.5 & Freq <0.999999) | (Freq < -0.5 & Freq > -0.99999))# %>% filter(Freq != 1 & Freq != -1)
#mtx_barb_st1 <- reshape2::acast(barb_st1_cors, Var1~Var2, value.var="Freq")
#corrplot(mtx_barb_st1,is.corr = T)
#barbel state 2
barbst2_corr <- spread(data=barbst2,key=short_names,value=coef)
#do cor and fitler for >0.5
barbst2_corr_res <- cor(barbst2_corr[4:37])
barbst2_corr_res[lower.tri(barbst2_corr_res,diag=TRUE)] <- NA
#drop perfect correlations
barbst2_corr_res[barbst2_corr_res == 1] <- NA
barbst2_corr_res[barbst2_corr_res == -1] <- NA
barb_st2_cors <- barbst2_corr_res %>% as.table() %>% as.data.frame() %>% na.omit() %>% filter((Freq > 0.5 & Freq <0.999999) | (Freq < -0.5 & Freq > -0.99999))# %>% filter(Freq != 1 & Freq != -1)
#grayling
grayst1 <- gray_all %>% filter(state==1) %>% select(-c(terms,exp.coef.,se.coef.,z,Pr...z..))
grayst2 <- gray_all %>% filter(state==2)%>% select(-c(terms,exp.coef.,se.coef.,z,Pr...z..))
#gryaling state 1
grayst1_corr <- spread(data=grayst1,key=short_names,value=coef)#[c(9:43)]
#do cor and fitler for >0.5
grayst1_corr_res <- cor(grayst1_corr[4:36])
grayst1_corr_res[lower.tri(grayst1_corr_res,diag=TRUE)] <- NA
#drop perfect correlations
grayst1_corr_res[grayst1_corr_res == 1] <- NA
grayst1_corr_res[grayst1_corr_res == -1] <- NA
gray_st1_cors <- grayst1_corr_res %>% as.table() %>% as.data.frame() %>% na.omit() %>% filter((Freq > 0.5 & Freq <0.999999) | (Freq < -0.5 & Freq > -0.99999))# %>% filter(Freq != 1 & Freq != -1)
#graling state 2
grayst2_corr <- spread(data=grayst2,key=short_names,value=coef)
#do cor and fitler for >0.5
grayst2_corr_res <- cor(grayst2_corr[4:31])
grayst2_corr_res[lower.tri(grayst2_corr_res,diag=TRUE)] <- NA
#drop perfect correlations
grayst2_corr_res[grayst2_corr_res == 1] <- NA
grayst2_corr_res[grayst2_corr_res == -1] <- NA
gray_st2_cors <- grayst2_corr_res %>% as.table() %>% as.data.frame() %>% na.omit() %>% filter((Freq > 0.5 & Freq <0.999999) | (Freq < -0.5 & Freq > -0.99999))# %>% filter(Freq != 1 & Freq != -1)
```
Highly correlated model coefficients:
Barbel state 1:
```{r barb st1 cor reactable,eval=F}
reactable(barb_st1_cors)
```
Barbel state 2:
```{r barb st2 cor reactable,eval=F}
reactable(barb_st2_cors)
```
Grayling state 1:
```{r gray st1 cor reactable,eval=F}
reactable(gray_st1_cors)
```
Grayling state 2:
```{r gray st2 cor reactable,eval=F}
reactable(gray_st2_cors)
```
Essentially, some terms are correlated between each other - some of these are strongly correlated, and some involve terms only in final models for a few fish.
## Fish size v coefficient size
Comparing the size of coefficients to the size of fish, in both states.
### Barbel
#### State 1
```{r barb add sizes model coefficients st1,eval=F}
barb_ind_mod_final <- read.csv("FINAL RESULTS/barbel_all_coefs_state1.csv")
#make 1 df
#add fish data e.g. legnths and weights as well
data_on_fish <-read.csv('data/Tagged_fish_data.csv')
ids <- unique(barb_ind_mod_final$fish_id)
for(i in 1:length(ids)){
temp <- barb_ind_mod_final %>% filter(fish_id==ids[i]) #get coefficients #want to have species and id just in case here
data_on_fish_i <- filter(data_on_fish,Tag_ID == ids[i])
temp$fork_length <- data_on_fish_i$Fork_length_mm
temp$total_length <- data_on_fish_i$Total_length_mm
temp$weight <- data_on_fish_i$Weight_g
temp$catch_loc <-data_on_fish_i$Catch_location
temp$if_counting_pool <- data_on_fish_i$Caught_in_counting_pool
temp$sex <- data_on_fish_i$Sex
if(i==1){
barb_coefs_and_size <- temp
} else{
barb_coefs_and_size <-rbind(barb_coefs_and_size,temp)
}
}
barb_coefs_and_size$short_names <- stringi::stri_replace_all_regex(barb_coefs_and_size$term,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_NEWdusk","tod_end_NEWnight","tod_start_NEWdusk","tod_start_NEWnight", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":"),
vectorize=FALSE)
```
```{r plot term coefficients with weight length etc barb,fig.height=6,fig.width=8,eval=F}
barb_coefs_and_size$term <- barb_coefs_and_size$terms
#remove tod plots?
barb_coefs_and_size <- barb_coefs_and_size[!(grepl("tod",barb_coefs_and_size$term)),]
ggplot(barb_coefs_and_size,aes(x=total_length,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="Barbel - total length and effect size",x="Total length (mm)",y="Coefficient value")
ggplot(barb_coefs_and_size,aes(x=fork_length,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="Barbel - fork length and effect size",x="Fork length(mm)",y="Coefficient value")
ggplot(barb_coefs_and_size,aes(x=weight,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="Barbel - weight and effect size",x="Weight (g)",y="Coefficient value")
ggplot(barb_coefs_and_size,aes(x=sex,y=coef))+
geom_boxplot()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="Barbel - weight and effect size",x="Weight (g)",y="Coefficient value")
```
Some parameters may have a trend with size but its not very clear. For every term, I'll run a simple GLM to see if the coefficient size is explained by fish size and from there which fish size explains it best.
```{r barb loop to plot coefs against size,fig.height=6,fig.width=8,eval=F}
#calculate and then plot to quickly visualise for sharing the results
terms_list_barb <- unique(barb_coefs_and_size$term)
for(i in 1:length(terms_list_barb)){
dat <-barb_coefs_and_size %>% filter(term==terms_list_barb[[i]])
modnull <-dat %>%glm(data=.,formula=coef~1,family="gaussian")
modweight <-dat %>%glm(data=.,formula=coef~weight,family="gaussian")
modfl <-dat %>%glm(data=.,formula=coef~fork_length,family="gaussian")
modtl <-dat %>%glm(data=.,formula=coef~total_length,family="gaussian")
aic_size <- AIC(modnull,modweight,modfl,modtl)
aic_size$term <- terms_list_barb[[i]]
aic_size$model <- c("null","weight","fork length","total length")
if(i==1){
aic_size_barb <- aic_size
}else{
aic_size_barb <-rbind(aic_size_barb,aic_size)
}
}
aic_size_barb$short_names <- stringi::stri_replace_all_regex(aic_size_barb$term,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_NEWdusk","tod_end_NEWnight","tod_start_NEWdusk","tod_start_NEWnight", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":"),
vectorize=FALSE)
ggplot(aic_size_barb,aes(x=model,y=AIC))+
geom_point()+
facet_wrap(~short_names,scales="free_y")+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
labs(title="Barbel - model AIC values, comparing coefficients to size",x="Model",y="AIC")
```
The ones that have no plots are Inf for all AICs, because like every fish is 0 :3c
Comparing barbel state 1 coefficients to size:
* total length and log(SL):diffSVGang - might mean something but looks like the line is affected by an extremely small individual
* SVG:diffVang and total length - also potentially skewed by smallest individual
* fork length and WV:D - skewed by smallest individual
These are all terms where there's not a clear trend at the top, but then the smallest individual sets the trend - worth rerunning AIC without the smallest guy (fork length 302).
```{r barb loop to plot coefs against size minus smallest,fig.height=6,fig.width=8,eval=F}
#calculate and then plot maybe? to quickly visualise for sharing the results?
terms_list_barb <- unique(barb_coefs_and_size$term)
barb_coefs_and_size2 <- filter(barb_coefs_and_size,fork_length>302)
for(i in 1:length(terms_list_barb)){
dat <-barb_coefs_and_size2 %>% filter(term==terms_list_barb[[i]])
modnull <-dat %>%glm(data=.,formula=coef~1,family="gaussian")
modweight <-dat %>%glm(data=.,formula=coef~weight,family="gaussian")
modfl <-dat %>%glm(data=.,formula=coef~fork_length,family="gaussian")
modtl <-dat %>%glm(data=.,formula=coef~total_length,family="gaussian")
aic_size <- AIC(modnull,modweight,modfl,modtl)
aic_size$term <- terms_list_barb[[i]]
aic_size$model <- c("null","weight","fork length","total length")
if(i==1){
aic_size_barb <- aic_size
}else{
aic_size_barb <-rbind(aic_size_barb,aic_size)
}
}
aic_size_barb$short_names <- stringi::stri_replace_all_regex(aic_size_barb$term,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_NEWdusk","tod_end_NEWnight","tod_start_NEWdusk","tod_start_NEWnight", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":"),
vectorize=FALSE)
ggplot(aic_size_barb,aes(x=model,y=AIC))+
geom_point()+
facet_wrap(~short_names,scales="free_y")+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
labs(title="Barbel - model AIC values, comparing coefficients to size",x="Model",y="AIC")
```
Removing the smallest individual (58 mm shorter than the rest) removed the significance of the size:coefficient relationships. No other relationships exist now.
#### State 2
```{r barb add sizes model coefficients st2,eval=F}
barb_ind_mod_final <- read.csv("FINAL RESULTS/barbel_all_coefs_state2.csv")
#make 1 df
#add fish data e.g. legnths and weights as well
data_on_fish <-read.csv('data/Tagged_fish_data.csv')
ids <- unique(barb_ind_mod_final$fish_id)
for(i in 1:length(ids)){
temp <- barb_ind_mod_final %>% filter(fish_id==ids[i]) #get coefficients #want to have species and id just in case here
data_on_fish_i <- filter(data_on_fish,Tag_ID == ids[i])
temp$fork_length <- data_on_fish_i$Fork_length_mm
temp$total_length <- data_on_fish_i$Total_length_mm
temp$weight <- data_on_fish_i$Weight_g
temp$catch_loc <-data_on_fish_i$Catch_location
temp$if_counting_pool <- data_on_fish_i$Caught_in_counting_pool
temp$sex <- data_on_fish_i$Sex
if(i==1){
barb_coefs_and_size <- temp
} else{
barb_coefs_and_size <-rbind(barb_coefs_and_size,temp)
}
}
barb_coefs_and_size$short_names <- stringi::stri_replace_all_regex(barb_coefs_and_size$term,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_NEWdusk","tod_end_NEWnight","tod_start_NEWdusk","tod_start_NEWnight", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":"),
vectorize=FALSE)
```
```{r plot term coefficients with weight length etc barb st2,fig.height=6,fig.width=8,eval=F}
barb_coefs_and_size$term <- barb_coefs_and_size$terms
barb_coefs_and_size <- barb_coefs_and_size[!(grepl("tod",barb_coefs_and_size$term)),]
ggplot(barb_coefs_and_size,aes(x=total_length,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="Barbel - total length and effect size",x="Total length (mm)",y="Coefficient value")
ggplot(barb_coefs_and_size,aes(x=fork_length,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="Barbel - fork length and effect size",x="Fork length(mm)",y="Coefficient value")
ggplot(barb_coefs_and_size,aes(x=weight,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="Barbel - weight and effect size",x="Weight (g)",y="Coefficient value")
ggplot(barb_coefs_and_size,aes(x=sex,y=coef))+
geom_boxplot()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="Barbel - weight and effect size",x="Weight (g)",y="Coefficient value")
#barb_coefs_and_size %>% group_by(catch_loc) %>% summarise(number=length(unique(fish_id)))
```
GLMs to see if size significantly explains any coefficient sizes.
```{r barb loop to plot coefs against size st2,fig.height=6,fig.width=8,eval=F}
#calculate and then plot maybe? to quickly visualise for sharing the results?
terms_list_barb <- unique(barb_coefs_and_size$term)
for(i in 1:length(terms_list_barb)){
dat <-barb_coefs_and_size %>% filter(term==terms_list_barb[[i]])
modnull <-dat %>%glm(data=.,formula=coef~1,family="gaussian")
modweight <-dat %>%glm(data=.,formula=coef~weight,family="gaussian")
modfl <-dat %>%glm(data=.,formula=coef~fork_length,family="gaussian")
modtl <-dat %>%glm(data=.,formula=coef~total_length,family="gaussian")
aic_size <- AIC(modnull,modweight,modfl,modtl)
aic_size$term <- terms_list_barb[[i]]
aic_size$model <- c("null","weight","fork length","total length")
if(i==1){
aic_size_barb <- aic_size
}else{
aic_size_barb <-rbind(aic_size_barb,aic_size)
}
}
aic_size_barb$short_names <- stringi::stri_replace_all_regex(aic_size_barb$term,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_NEWdusk","tod_end_NEWnight","tod_start_NEWdusk","tod_start_NEWnight", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":"),
vectorize=FALSE)
ggplot(aic_size_barb,aes(x=model,y=AIC))+
geom_point()+
facet_wrap(~short_names,scales="free_y")+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
labs(title="Barbel - model AIC values, comparing coefficients to size",x="Model",y="AIC")
```
Terms that change with size:
* log(SL):SVG and total length - but is not-zero for only 4 individuals... don't know if I trust concrete decisions from that
* Depth:diffSVGang and total length, possibly skewed by an outlier (smallest individual has largest value)
* WV:diffSVGang and fork length, again skewed by large value for smallest individual, 0 for all other fish bar one
CHeck when smallest fish is removed again
```{r barb loop to plot coefs against size st2 2,fig.height=6,fig.width=8,eval=F}
#calculate and then plot to quickly visualise for sharing the results
terms_list_barb <- unique(barb_coefs_and_size$term)
barb_coefs_and_size2 <- filter(barb_coefs_and_size,fork_length>302)
for(i in 1:length(terms_list_barb)){
dat <-barb_coefs_and_size2 %>% filter(terms==terms_list_barb[[i]])
modnull <-dat %>%glm(data=.,formula=coef~1,family="gaussian")
modweight <-dat %>%glm(data=.,formula=coef~weight,family="gaussian")
modfl <-dat %>%glm(data=.,formula=coef~fork_length,family="gaussian")
modtl <-dat %>%glm(data=.,formula=coef~total_length,family="gaussian")
aic_size <- AIC(modnull,modweight,modfl,modtl)
aic_size$term <- terms_list_barb[[i]]
aic_size$model <- c("null","weight","fork length","total length")
if(i==1){
aic_size_barb <- aic_size
}else{
aic_size_barb <-rbind(aic_size_barb,aic_size)
}
}
aic_size_barb$short_names <- stringi::stri_replace_all_regex(aic_size_barb$term,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_NEWdusk","tod_end_NEWnight","tod_start_NEWdusk","tod_start_NEWnight", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":"),
vectorize=FALSE)
ggplot(aic_size_barb,aes(x=model,y=AIC))+
geom_point()+
facet_wrap(~short_names,scales="free_y")+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
labs(title="Barbel - model AIC values, comparing coefficients to size",x="Model",y="AIC")
```
* diffSVGang(start):(end) - but only in 3 fish
### Grayling
#### State 1
```{r gray add sizes model coefficients st1,eval=F}
gray_ind_mod_final <- read.csv("FINAL RESULTS/grayling_all_coefs_state1.csv")
#make 1 df
#add fish data e.g. legnths and weights as well
data_on_fish <-read.csv('data/Tagged_fish_data.csv')
ids <- unique(gray_ind_mod_final$fish_id)
for(i in 1:length(ids)){
temp <- gray_ind_mod_final %>% filter(fish_id==ids[i]) #get coefficients #want to have species and id just in case here
data_on_fish_i <- filter(data_on_fish,Tag_ID == ids[i])
temp$fork_length <- data_on_fish_i$Fork_length_mm
temp$total_length <- data_on_fish_i$Total_length_mm
temp$weight <- data_on_fish_i$Weight_g
temp$catch_loc <-data_on_fish_i$Catch_location
temp$if_counting_pool <- data_on_fish_i$Caught_in_counting_pool
temp$sex <- data_on_fish_i$Sex
if(i==1){
gray_coefs_and_size <- temp
} else{
gray_coefs_and_size <-rbind(gray_coefs_and_size,temp)
}
}
gray_coefs_and_size$short_names <- stringi::stri_replace_all_regex(gray_coefs_and_size$term,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_NEWdusk","tod_end_NEWnight","tod_start_NEWdusk","tod_start_NEWnight", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":"),
vectorize=FALSE)
```
```{r plot term coefficients with weight length etc gray,fig.height=6,fig.width=8,eval=F}
gray_coefs_and_size$term <- gray_coefs_and_size$terms
gray_coefs_and_size <- gray_coefs_and_size[!(grepl("tod",gray_coefs_and_size$term)),]
ggplot(gray_coefs_and_size,aes(x=total_length,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="grayling - total length and effect size",x="Total length (mm)",y="Coefficient value")
ggplot(gray_coefs_and_size,aes(x=fork_length,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="grayling - fork length and effect size",x="Fork length(mm)",y="Coefficient value")
ggplot(gray_coefs_and_size,aes(x=weight,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="grayling - weight and effect size",x="Weight (g)",y="Coefficient value")
ggplot(gray_coefs_and_size,aes(x=sex,y=coef))+
geom_boxplot()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="grayling - weight and effect size",x="Weight (g)",y="Coefficient value")
#gray_coefs_and_size %>% group_by(catch_loc) %>% summarise(number=length(unique(fish_id)))
```
Some parameters may have a trend with size but its not very clear. For every term, I'll run a simple GLM to see if the coefficient size is explained by fish size and from there which fish size explains it best.
```{r gray loop to plot coefs against size,fig.height=6,fig.width=8,eval=F}
terms_list_gray <- unique(gray_coefs_and_size$term)
for(i in 1:length(terms_list_gray)){
dat <-gray_coefs_and_size %>% filter(term==terms_list_gray[[i]])
modnull <-dat %>%glm(data=.,formula=coef~1,family="gaussian")
modweight <-dat %>%glm(data=.,formula=coef~weight,family="gaussian")
modfl <-dat %>%glm(data=.,formula=coef~fork_length,family="gaussian")
modtl <-dat %>%glm(data=.,formula=coef~total_length,family="gaussian")
aic_size <- AIC(modnull,modweight,modfl,modtl)
aic_size$term <- terms_list_gray[[i]]
aic_size$model <- c("null","weight","fork length","total length")
if(i==1){
aic_size_gray <- aic_size
}else{
aic_size_gray <-rbind(aic_size_gray,aic_size)
}
}
aic_size_gray$short_names <- stringi::stri_replace_all_regex(aic_size_gray$term,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_NEWdusk","tod_end_NEWnight","tod_start_NEWdusk","tod_start_NEWnight", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":"),
vectorize=FALSE)
ggplot(aic_size_gray,aes(x=model,y=AIC))+
geom_point()+
facet_wrap(~short_names,scales="free_y")+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
labs(title="grayling - model AIC values, comparing coefficients to size",x="Model",y="AIC")
```
No significant relationships
#### State 2
```{r gray add sizes model coefficients st2,eval=F}
gray_ind_mod_final <- read.csv("FINAL RESULTS/grayling_all_coefs_state2.csv")
#make 1 df
#add fish data e.g. legnths and weights as well
data_on_fish <-read.csv('data/Tagged_fish_data.csv')
ids <- unique(gray_ind_mod_final$fish_id)
for(i in 1:length(ids)){
temp <- gray_ind_mod_final %>% filter(fish_id==ids[i]) #get coefficients #want to have species and id just in case here
data_on_fish_i <- filter(data_on_fish,Tag_ID == ids[i])
temp$fork_length <- data_on_fish_i$Fork_length_mm
temp$total_length <- data_on_fish_i$Total_length_mm
temp$weight <- data_on_fish_i$Weight_g
temp$catch_loc <-data_on_fish_i$Catch_location
temp$if_counting_pool <- data_on_fish_i$Caught_in_counting_pool
temp$sex <- data_on_fish_i$Sex
if(i==1){
gray_coefs_and_size <- temp
} else{
gray_coefs_and_size <-rbind(gray_coefs_and_size,temp)
}
}
gray_coefs_and_size$short_names <- stringi::stri_replace_all_regex(gray_coefs_and_size$term,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_NEWdusk","tod_end_NEWnight","tod_start_NEWdusk","tod_start_NEWnight", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":"),
vectorize=FALSE)
```
```{r plot term coefficients with weight length etc gray st2,fig.height=6,fig.width=8,eval=F}
gray_coefs_and_size$term <- gray_coefs_and_size$terms
gray_coefs_and_size <- gray_coefs_and_size[!(grepl("tod",gray_coefs_and_size$term)),]
ggplot(gray_coefs_and_size,aes(x=total_length,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="grayling - total length and effect size",x="Total length (mm)",y="Coefficient value")
ggplot(gray_coefs_and_size,aes(x=fork_length,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="grayling - fork length and effect size",x="Fork length(mm)",y="Coefficient value")
ggplot(gray_coefs_and_size,aes(x=weight,y=coef))+
geom_point()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="grayling - weight and effect size",x="Weight (g)",y="Coefficient value")
ggplot(gray_coefs_and_size,aes(x=sex,y=coef))+
geom_boxplot()+
geom_smooth(method="lm")+
facet_wrap(~short_names,scales = "free_y")+
labs(title="grayling - weight and effect size",x="Weight (g)",y="Coefficient value")
#gray_coefs_and_size %>% group_by(catch_loc) %>% summarise(number=length(unique(fish_id)))
```
GLMs to see if size significantly explains any coefficient sizes.
```{r gray loop to plot coefs against size st2,fig.height=6,fig.width=8,eval=F}
terms_list_gray <- unique(gray_coefs_and_size$term)
for(i in 1:length(terms_list_gray)){
dat <-gray_coefs_and_size %>% filter(term==terms_list_gray[[i]])
modnull <-dat %>%glm(data=.,formula=coef~1,family="gaussian")
modweight <-dat %>%glm(data=.,formula=coef~weight,family="gaussian")
modfl <-dat %>%glm(data=.,formula=coef~fork_length,family="gaussian")
modtl <-dat %>%glm(data=.,formula=coef~total_length,family="gaussian")
aic_size <- AIC(modnull,modweight,modfl,modtl)
aic_size$term <- terms_list_gray[[i]]
aic_size$model <- c("null","weight","fork length","total length")
if(i==1){
aic_size_gray <- aic_size
}else{
aic_size_gray <-rbind(aic_size_gray,aic_size)
}
}
aic_size_gray$short_names <- stringi::stri_replace_all_regex(aic_size_gray$term,
pattern=c('water_velocity_end_strdised', 'diff_svg_abs_ang_end_strdised', 'diff_svg_abs_ang_start_strdised',"log_sl__strdised","temp_start_strdised","depth_start_strdised","depth_end_strdised","svg_end_strdised","diff_vel_abs_ang_end_strdised","cos_ta__strdised","tod_end_NEWdusk","tod_end_NEWnight","tod_start_NEWdusk","tod_start_NEWnight", "water_velocity_start_strdised","svg_start_strdised","diff_vel_abs_ang_start_strdised",":"),
replacement=c('WV(end)', 'diffSVGang(end)', 'diffSVGang(start)',"log(SL)","Temp","D(start)","D(end)","SVG(end)","diffVang(end)","cos(TA)","TOD","TOD","TOD","TOD",'WV(start)',"SVG(start)","diffVang(start)",":"),
vectorize=FALSE)
ggplot(aic_size_gray,aes(x=model,y=AIC))+
geom_point()+
facet_wrap(~short_names,scales="free_y")+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))+
labs(title="grayling - model AIC values, comparing coefficients to size",x="Model",y="AIC")
```
Terms that differ significantly with size:
* SVG(start):SVG(end) and weight, but the only fish with this term is the heaviest