https://doi.org/10.5281/zenodo.14318846
predictions.Rmd
---
title: "Predictions 2"
author: ""
date: ""
output:
html_document:
toc: true
toc_float: true
theme: cerulean
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F,message=F)
#main packages #delete/add as needed
library(dplyr)
library(ggplot2)
library(knitr)
library(ggpubr)
library(plotly)
library(pracma) #for standard error
library(reactable)
library(raster)
library(momentuHMM)
library(tidyr)
library(mvtnorm) #for multivariate normal dist
library(MASS)
library(terra)
Rcpp::sourceCpp("cpp/simulation_function.cpp", showOutput = F, verbose = F) #james simulation function
source("simulations stuff/simulation_function_preprocessing.R")
```
## About
File to predict with start locations in the lower 100 m of the study site
## Crop rasters
For the three discharges (20, 50 and 80), a raster will be cropped to only the lower extent. This means that, for a given discharge, the correct extents of the river will be used to select.
```{r crop rasters, eval=F}
raster_list <- list.files("rasters - cropped",full.names = T)
#rasters
r20 <- raster(raster_list[2])
r50 <- raster(raster_list[5])
r80 <- raster(raster_list[8])
#shape to crop by
shape <- readOGR("startLocBox/start_loc_box.shp")
r20.2 <- crop(r20,extent(shape))
r20.2 <- mask(r20.2,shape)
writeRaster(r20.2,"start_loc_rasts/start_20.asc")
r50.2 <- crop(r50,extent(shape))
r50.2 <- mask(r50.2,shape)
writeRaster(r50.2,"start_loc_rasts/start_50.asc")
r80.2 <- crop(r80,extent(shape))
r80.2 <- mask(r80.2,shape)
writeRaster(r80.2,"start_loc_rasts/start_80.asc")
```
## Barbel
### Load in info
```{r set some variable names 2}
#read in hmms
barb_hmm <- readRDS("HMMs/barb_SI10.rds")
gray_hmm <- readRDS("HMMs/gr_SI10.rds")
n_steps <- 20 #number of steps the simulation can select from at each step
#get coefficients per state
barb_all_coef <- read.csv("FINAL RESULTS/barbel_all_coefs_both_states.csv")
gray_all_coef <- read.csv("FINAL RESULTS/grayling_all_coefs_both_states.csv")
#below - UNSTANDARDISED coefs, for all fish and species
all_coefs <- read.csv("FINAL RESULTS/all_coefs_both_states_UNSTANDARDISED.csv")
all_coefs$terms <- all_coefs$params
#remove all those where mean == 0 so will be using the formula based off all individuals
#get that
barb_means <- read.csv("FINAL RESULTS/barbel_means_both_states.csv")
barb_st1_terms <- barb_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==1 )
barb_st1_terms <- unique(barb_st1_terms$terms)# %>% str_remove_all("_strdised")
#barb_st1_terms <- lapply(barb_st1_terms,gsub,pattern="_strdised",replacement="")
for(i in 1: length(barb_st1_terms)){
barb_st1_terms[[i]] <- gsub("_strdised","",barb_st1_terms[[i]])
}
barb_st2_terms <- barb_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==2 )
barb_st2_terms <- unique(barb_st2_terms$terms)
for(i in 1: length(barb_st2_terms)){
barb_st2_terms[[i]] <- gsub("_strdised","",barb_st2_terms[[i]])
}
gray_means <- read.csv("FINAL RESULTS/grayling_means_both_states.csv")
gray_st1_terms <- gray_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==1 )
gray_st1_terms <- unique(gray_st1_terms$terms)
for(i in 1: length(gray_st1_terms)){
gray_st1_terms[[i]] <- gsub("_strdised","",gray_st1_terms[[i]])
}
gray_st2_terms <- gray_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==2 & terms!='cos_ta__strdised')
gray_st2_terms <- unique(gray_st2_terms$terms)
for(i in 1: length(gray_st2_terms)){
gray_st2_terms[[i]] <- gsub("_strdised","",gray_st2_terms[[i]])
}
#sl and ta distrs per fish per state
movement_distr_dat <- read.csv("FINAL RESULTS/movement_distr_params.csv")
#standardising info
std_info <- read.csv("data/summary_stats_for_standardising.csv")
std_info <- rename(std_info,term=Variable)
raster_list <- list.files("rasters - cropped",full.names = T)
start_locs <- list.files("start_loc_rasts",full.names=T)
```
```{r create raster list data fram 2}
#create a dataframe of raster name, raster file, and discharge
#realisitically going to do 20, 50 and 80 discharges to start with
rasters_info <- as.data.frame(raster_list)
names(rasters_info) <- "file_name"
rasters_info$ras_name <- case_when(grepl("dep",rasters_info$file_name)==T ~ "depth",
grepl("vel",rasters_info$file_name)==T ~ "water_velocity",
grepl("svg",rasters_info$file_name)==T ~ "svg",
grepl("Vel",rasters_info$file_name)==T ~ "vel_angle",
grepl("SVG",rasters_info$file_name)==T ~ "svg_angle")
rasters_info$discharge <- case_when(grepl("10",rasters_info$file_name)==T ~ 10,
grepl("20",rasters_info$file_name)==T ~ 20,
grepl("30",rasters_info$file_name)==T ~ 30,
grepl("40",rasters_info$file_name)==T ~ 40,
grepl("50",rasters_info$file_name)==T ~ 50,
grepl("60",rasters_info$file_name)==T ~ 60,
grepl("70",rasters_info$file_name)==T ~ 70,
grepl("80",rasters_info$file_name)==T ~ 80,)
rasters_info$directional <- case_when(grepl("Ang",rasters_info$file_name)==T ~ T,
grepl("Ang",rasters_info$file_name)==F ~ F)
```
### The loop - barbel
```{r parameterise elements for barbel,echo=F}
#get coefs for training fish
coefs <- all_coefs
spec="Barbel"
## state 1
if(spec=="Barbel"){
st1_coefs <- coefs %>% filter(state==1 & terms %in% barb_st1_terms)
} else if(spec=="Grayling"){
st1_coefs <- coefs %>% filter(state==1 & terms %in% gray_st1_terms)
}
#remove log sl since using adjusted kernels
st1_coefs <- st1_coefs %>% filter(terms!="log_sl_")
st1_coefs$terms <- st1_coefs$terms %>% str_replace_all("log_sl_","log(sl)")
#st1_coefs$terms <- st1_coefs[!grepl("start", st1_coefs$terms),]
#id terms where starts are
p <- unique(st1_coefs$terms)[grep("_start",unique(st1_coefs$terms))]
t <-strsplit(p,":")
#to get new names - but do the replace after making means.
for(i in 1:length(t)){
t2<- t[[i]]
t_st <- t2[grep("_start",t2)]
t_param <- t_st %>% gsub("_start","",.)
t_param <- paste0("previous(",t_param,")")
new_params_names <- cbind(t_st,t_param) %>% as.data.frame()
if(i==1){
st1_start_terms_names <- new_params_names
} else{
st1_start_terms_names <- rbind(st1_start_terms_names,new_params_names)
}
}
#then iterate to replace
for(i in 1:nrow(st1_start_terms_names)){
bb <- st1_start_terms_names[i,] #get a row
st1_coefs$terms <- st1_coefs$terms %>% str_replace_all(.,bb$t_st,bb$t_param)
}
## state 2
#get for species
if(spec=="Barbel"){
st2_coefs <- coefs %>% filter(state==2 & terms %in% barb_st2_terms)
} else if(spec=="Grayling"){
st2_coefs <- coefs %>% filter(state==2 & terms %in% gray_st2_terms)
}
#remove log sl since using adjusted kernels
st2_coefs <- st2_coefs %>% filter(terms!="log_sl_")
st2_coefs$terms <- st2_coefs$terms %>% str_replace_all("log_sl_","log(sl)")
#id terms where starts are
p <- unique(st2_coefs$terms)[grep("_start",unique(st2_coefs$terms))]
t <-strsplit(p,":")
#to get new names
for(i in 1:length(t)){
t2<- t[[i]]
t_st <- t2[grep("_start",t2)]
t_param <- t_st %>% gsub("_start","",.)
t_param <- paste0("previous(",t_param,")")
new_params_names <- cbind(t_st,t_param) %>% as.data.frame()
if(i==1){
st2_start_terms_names <- new_params_names
} else{
st2_start_terms_names <- rbind(st2_start_terms_names,new_params_names)
}
}
#then iterate to replace
for(i in 1:nrow(st2_start_terms_names)){
bb <- st2_start_terms_names[i,] #get a row
st2_coefs$terms <- st2_coefs$terms %>% str_replace_all(.,bb$t_st,bb$t_param)
}
#calculate mean and sd, minus outliers for both states
st1_means <- st1_coefs %>% group_by(terms) %>%
summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>%
ungroup() %>% filter(coef<=high.fence & coef>=low.fence) %>% #remove outliers
group_by(terms) %>%
summarise(mean_coef=mean(coef,na.rm=T),sd_coef=sd(coef,na.rm=T)) %>% #calc means
filter(mean_coef !=0 & sd_coef !=0) #remove terms that are 0
#get covariance
#state 1
st1_mat <- st1_coefs %>% #remove outliers?
group_by(terms) %>% summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>% ungroup() %>%
filter(coef<=high.fence & coef>=low.fence & terms %in% st1_means$terms) %>%
dplyr::select(terms,coef,fish_id)
st1_mat_prep <- spread(data=st1_mat,key=terms,value=coef)#[c(9:43)]
st1_cov <- stats::cov(st1_mat_prep[2:ncol(st1_mat_prep)],use = "complete.obs")
#state 2
st2_means <- st2_coefs %>% group_by(terms) %>% summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() %>% filter(coef<=high.fence & coef>=low.fence)%>% #remove outliers
group_by(terms) %>%
summarise(mean_coef=mean(coef,na.rm=T),sd_coef=sd(coef,na.rm=T)) %>% #calc means
filter(mean_coef !=0 & sd_coef !=0) #remove terms that are 0
#get covariance
st2_mat <- st2_coefs %>% #remove outliers
group_by(terms) %>% summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>% ungroup() %>%
filter(coef<=high.fence & coef>=low.fence & terms %in% st2_means$terms) %>% #remove terms that have mean == 0 exactly
dplyr::select(terms,coef,fish_id)
st2_mat_prep <- spread(data=st2_mat,key=terms,value=coef)
st2_cov <- stats::cov(st2_mat_prep[2:ncol(st2_mat_prep)],use = "complete.obs")
## movement kernels
st1_move_means <- movement_distr_dat %>% filter(state==1 & species==spec) %>%
#remove outliers for log sl coef
mutate(q1=quantile(log_sl_coef, na.rm = T)[2],q3=quantile(log_sl_coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>%
filter(log_sl_coef<=high.fence & log_sl_coef>=low.fence & sl_shape_adj>0) %>%
summarise(med=median(sl_shape_adj), mean_shape=mean(sl_shape_adj),sd_shape=sd(sl_shape_adj),mean_scale=mean(sl_scale),
sd_scale=sd(sl_scale),sd_kappa=sd(ta_kappa_adj),
mean_kappa=mean(ta_kappa_adj),mean_mu=mean(ta_mu))
st2_move_means <- movement_distr_dat %>% filter(state==2 & species==spec) %>%
#remove outliers for log sl coef
mutate(q1=quantile(log_sl_coef, na.rm = T)[2],q3=quantile(log_sl_coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>%
filter(log_sl_coef<=high.fence & log_sl_coef>=low.fence & sl_shape_adj>0)
#accoutn for the single grayling that this is an issue.
rows_st2 <- nrow(st2_move_means)
if(nrow(st2_move_means)>1) {
st2_move_means <-st2_move_means %>%
summarise(mean_shape=mean(sl_shape_adj,na.rm=T),sd_shape=sd(sl_shape_adj),mean_scale=mean(sl_scale),
sd_scale=sd(sl_scale),sd_kappa=sd(ta_kappa_adj),
mean_kappa=mean(ta_kappa_adj),mean_mu=mean(ta_mu))
} else if (nrow(st2_move_means)==1){
st2_move_means <- st2_move_means %>%
summarise(sl_shape=sl_shape_adj,scale=sl_scale,
ta_kappa = ta_kappa_adj) #so using their values
}
```
```{r loop for barbel}
all_agents <- read.csv(paste0("PREDICTIONS - downstream start/all_barbel_up_to_agent_",(id-1),".csv"))
trans_mat <- getTrProbs(barb_hmm)[1:4] # gives transition probabilities for every time step...
disch <- 20 #change for each dicsharge - 20, 50, 80
#now updated with start step values 👍
b <-2
for(b in 1:100){ #do 100 agents
#read in real data for info used to define simulations
#eg state sequences, start loc, length
#assing the agent an id
id <- b
#get data for that fish
#extract species info, used to limit population models to that species
spec <- "Barbel"
#number of tracks
no_tracks <- 0
while(no_tracks==0){
no_tracks <- rpois(1,5.714)
}
unique_track_ids <- 1:no_tracks
#total time of track #need to do per approach
#create props state and time df per approach for filtering in loop
for(z in 1:no_tracks){
#geenrate state seq
init_state <- sample(1:2,1,1)
state_seq <- data.frame(state=init_state,step_id=1) #make blank df
q=2
while(q<180){
state_seq[q,] <- c(NA,q)
if(state_seq$state[q-1]==1){ #using state of prev step
result <- rbinom(1,1,trans_mat[1]) # using probability stays in state 1
if(result==1){
state_seq$state[q] <- 1 #stays same state
} else {
state_seq$state[q] <- 2
}
}
if(state_seq$state[q-1]==2){ #using state of prev step
result <- rbinom(1,1,trans_mat[4]) # using probability stays in state 1
if(result==1){
state_seq$state[q] <- 2 #stays same state
} else {
state_seq$state[q] <- 1
}
}
q = q+1
}
state_seq$discharge <- disch
#identify where states change and discharge changes and time_stamp changes
state_seq$part <- 0
part_id <-1
for(i in 1:length(state_seq$state)){
if(i==1){
state_seq$part[i] <- 1
}else if(state_seq$state[i]==state_seq$state[i-1]){ #to get if state or discharge changes
state_seq$part[i] <- part_id
}else{
part_id <- part_id+1
state_seq$part[i] <- part_id
}
}
#add in empty rows by splitting into parts and if next Part has diff time >x, fill in gap
props_state1 <- state_seq %>% group_by(part,state) %>% summarise(steps_to_sim=n())
props_state1$approach <- unique_track_ids[z]
#get start location
# ras2 <- rasters_info %>% filter(discharge==disch)
#ras <- ras2$file_name[1] %>% raster()
ras <- start_locs[grepl(disch,start_locs)] %>% raster()
start_loc1 <- sampleRandom(ras,1,xy=T) %>% as.data.frame() %>% dplyr::select(c(x,y))
start_loc1$approach <- unique_track_ids[z] #associate start loc w the track
if(z==1){
props_state_all <- props_state1 #df with all state seq info
start_loc_all <- start_loc1 #df with start locations
} else{
props_state_all <- rbind(props_state_all,props_state1)
start_loc_all <- rbind(start_loc_all,start_loc1)
}
}
#parameterise the agent
#selection functions
agent_st1_coefs <- mvrnorm(n=1,mu=st1_means$mean_coef,Sigma = st1_cov) %>%
as.data.frame() %>%
tibble::rownames_to_column(var="terms") %>%
rename(val=".") #select value from multivariate distr
agent_st1_coefs$term <- agent_st1_coefs$term%>%
stringr::str_remove_all("_end") #remove "_end" from terms so compatible w function
agent_st1_coefs <- agent_st1_coefs %>% dplyr::select(c(term,val)) #retain only needed data
agent_st2_coefs <- mvrnorm(n=1,mu=st2_means$mean_coef,Sigma = st2_cov) %>%
as.data.frame() %>%
tibble::rownames_to_column(var="terms") %>%
rename(val=".")
agent_st2_coefs$term <- agent_st2_coefs$term%>%
stringr::str_remove_all("_end")
agent_st2_coefs <- agent_st2_coefs %>% dplyr::select(c(term,val))
#movement kernels
agent_st1_move_params <- data.frame(shape=abs(rnorm(1,mean=st1_move_means$mean_shape,sd=st1_move_means$sd_shape)),
scale=abs(rnorm(1,mean=st1_move_means$mean_scale,sd=st1_move_means$sd_scale)),
kappa = abs(rnorm(1,mean=st1_move_means$mean_kappa,sd=st1_move_means$sd_kappa)))
agent_st2_move_params <- data.frame(shape=abs(rnorm(1,mean=st2_move_means$mean_shape,sd=st2_move_means$sd_shape)),
scale=abs(rnorm(1,mean=st2_move_means$mean_scale,sd=st2_move_means$sd_scale)),
kappa = abs(rnorm(1,mean=st2_move_means$mean_kappa,sd=st2_move_means$sd_kappa)))
for(i in 1:no_tracks){ #here extract data for specific track e.g. state sequence
#get number of steps etc to sim
props_state <- props_state_all %>% filter(approach==unique_track_ids[[i]])
#get start loc
start_loc_track <- start_loc_all %>% filter(approach==unique_track_ids[[i]])
start_loc <- c(start_loc_track$x,start_loc_track$y)
for(z in 1:length(unique(props_state$part))){ #run simulation for 1 track
#get info
dat_part <- props_state%>% filter(part==unique(props_state$part)[z])
#get state
state <- unique(dat_part$state)
if(as.numeric(disch)>80){
disch <-as.character(80)
}
#length of dat_part is the number of steps to sim
len_track <- dat_part$steps_to_sim
#note: need to construct model formula
if(state==1){
gamma_shape <- agent_st1_move_params$shape
gamma_scale <- agent_st1_move_params$scale
vonMises_kappa <- agent_st1_move_params$kappa
beta <- agent_st1_coefs$val
names(beta) <- agent_st1_coefs$term
#replace the diff w just angl
names(beta) <- names(beta) %>% str_replace_all("diff_vel_abs_ang","vel_angle")
names(beta) <- names(beta) %>% str_replace_all("diff_svg_abs_ang","svg_angle")
issf_frm <- iSSF_formula(beta)
}
if(state == 2){
gamma_shape <- agent_st2_move_params$shape
gamma_scale <- agent_st2_move_params$scale
vonMises_kappa <- agent_st2_move_params$kappa
beta <- agent_st2_coefs$val
names(beta) <- agent_st2_coefs$term
#replace the diff w just angl
names(beta) <- names(beta) %>% str_replace_all("diff_vel_abs_ang","vel_angle")
names(beta) <- names(beta) %>% str_replace_all("diff_svg_abs_ang","svg_angle")
issf_frm <- iSSF_formula(beta)
}
#get raster info for that discharge
ras_info <- rasters_info %>% filter(discharge==disch)
#prep ras list for the function
ras_lst <- iSSF_raster_list(
files = ras_info$file_name,
layer_names = ras_info$ras_name,
directional = ras_info$directional)
#convert to pi
ras_lst[[5]]$values <- ras_lst[[5]]$values / 180 * pi
ras_lst[[3]]$values <- ras_lst[[3]]$values / 180 * pi
#simulate here !
bearing_start = pi/2
sim <- iSSF_simulation(
issf_frm = issf_frm, # Named vector of beta parameter values
ras_lst = ras_lst, # list of covariate raster layers
position_start = start_loc, # starting position
bearing_start = bearing_start, # starting bearing
n_steps = n_steps, # number of samples in random step distribution
len_track = len_track, # number of steps in a track
scale = gamma_scale, # Step len scale
shape = gamma_shape, # Step len shape
kappa = vonMises_kappa ) # Turning angle concentration
trk <- sim$track %>% as.data.frame() %>% rename(x=V1,y=V2)
#add correct env data now
trk$water_velocity <- terra::extract(rast(ras_info$file_name[4]),trk[1:2])[1:nrow(trk),2]
trk$depth <- terra::extract(rast(ras_info$file_name[1]),trk[1:2])[1:nrow(trk),2]
trk$svg <- terra::extract(rast(ras_info$file_name[2]),trk[1:2])[1:nrow(trk),2]
trk$svg_ang <- terra::extract(rast(ras_info$file_name[3]),trk[1:2])[1:nrow(trk),2]
trk$vel_ang <- terra::extract(rast(ras_info$file_name[5]),trk[1:2])[1:nrow(trk),2]
#add other parameters of interest
trk$state <- state
trk$discharge <- disch
trk$part <- unique(props_state$part)[z] #to assist matching to right time stamps
#update start location
start_loc <- sim$track[nrow(sim$track),]
#then the saving the final track
if(z==1){
final_track <- trk
} else{
final_track <- rbind(final_track,trk)
}
}
final_track$approach <- unique_track_ids[[i]]
final_track$step_id <- as.numeric(rownames(final_track))
if(i==1){
final_track_all_app <- final_track
} else{
final_track_all_app <- rbind(final_track_all_app,final_track)
}
}
final_track_all_app$fish_id <- id
final_track_all <- final_track_all_app
dat_all_fin <- final_track_all
#calculate diff between fish and thing angle
dat_all_fin$x2 <- lag(dat_all_fin$x)
dat_all_fin$y2 <- lag(dat_all_fin$y)
dat_all_fin$bearing <- -with(data = dat_all_fin, atan2(y2 - y, x2 - x)) + (pi/2)
dat_all_fin$bearing <- dat_all_fin$bearing %>% cos() #in radians
dat_all_fin$diff_svg_abs_ang_end <- cos(dat_all_fin$bearing - (dat_all_fin$svg_ang*pi/180))
dat_all_fin$diff_vel_abs_ang_end <- cos(dat_all_fin$bearing - (dat_all_fin$vel_ang*pi/180))
write.csv(dat_all_fin,paste0("PREDICTIONS - downstream start/barbel/agent_",id,"_disch",disch,".csv"),row.names = F) # so will save output at each stage incase of crashing
if(b==1){
all_agents <- dat_all_fin
} else {
all_agents <- bind_rows(all_agents,dat_all_fin)
}
write.csv(all_agents,paste0("PREDICTIONS - downstream start/all_barbel_up_to_agent_",id,"_disch",disch,".csv"),row.names=F)
}
```
## Grayling
### Load in info
```{r set some variable names 2}
#read in hmms
barb_hmm <- readRDS("HMMs/barb_SI10.rds")
gray_hmm <- readRDS("HMMs/gr_SI10.rds")
n_steps <- 20 #number of steps the simulation can select from at each step
#get coefficients per state
barb_all_coef <- read.csv("FINAL RESULTS/barbel_all_coefs_both_states.csv")
gray_all_coef <- read.csv("FINAL RESULTS/grayling_all_coefs_both_states.csv")
#below - UNSTANDARDISED coefs, for all fish and species
all_coefs <- read.csv("FINAL RESULTS/all_coefs_both_states_UNSTANDARDISED.csv")
all_coefs$terms <- all_coefs$params
#remove all those where mean == 0 so will be using the formula based off all individuals
#get that
barb_means <- read.csv("FINAL RESULTS/barbel_means_both_states.csv")
barb_st1_terms <- barb_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==1 )
barb_st1_terms <- unique(barb_st1_terms$terms)# %>% str_remove_all("_strdised")
#barb_st1_terms <- lapply(barb_st1_terms,gsub,pattern="_strdised",replacement="")
for(i in 1: length(barb_st1_terms)){
barb_st1_terms[[i]] <- gsub("_strdised","",barb_st1_terms[[i]])
}
barb_st2_terms <- barb_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==2 )
barb_st2_terms <- unique(barb_st2_terms$terms)
for(i in 1: length(barb_st2_terms)){
barb_st2_terms[[i]] <- gsub("_strdised","",barb_st2_terms[[i]])
}
gray_means <- read.csv("FINAL RESULTS/grayling_means_both_states.csv")
gray_st1_terms <- gray_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==1 )
gray_st1_terms <- unique(gray_st1_terms$terms)
for(i in 1: length(gray_st1_terms)){
gray_st1_terms[[i]] <- gsub("_strdised","",gray_st1_terms[[i]])
}
gray_st2_terms <- gray_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==2 & terms!='cos_ta__strdised')
gray_st2_terms <- unique(gray_st2_terms$terms)
for(i in 1: length(gray_st2_terms)){
gray_st2_terms[[i]] <- gsub("_strdised","",gray_st2_terms[[i]])
}
#sl and ta distrs per fish per state
movement_distr_dat <- read.csv("FINAL RESULTS/movement_distr_params.csv")
#standardising info
std_info <- read.csv("data/summary_stats_for_standardising.csv")
std_info <- rename(std_info,term=Variable)
raster_list <- list.files("rasters - cropped",full.names = T)
start_locs <- list.files("start_loc_rasts",full.names=T)
```
```{r create raster list data fram 2}
#create a dataframe of raster name, raster file, and discharge
#realisitically going to do 20, 50 and 80 discharges to start with
rasters_info <- as.data.frame(raster_list)
names(rasters_info) <- "file_name"
rasters_info$ras_name <- case_when(grepl("dep",rasters_info$file_name)==T ~ "depth",
grepl("vel",rasters_info$file_name)==T ~ "water_velocity",
grepl("svg",rasters_info$file_name)==T ~ "svg",
grepl("Vel",rasters_info$file_name)==T ~ "vel_angle",
grepl("SVG",rasters_info$file_name)==T ~ "svg_angle")
rasters_info$discharge <- case_when(grepl("10",rasters_info$file_name)==T ~ 10,
grepl("20",rasters_info$file_name)==T ~ 20,
grepl("30",rasters_info$file_name)==T ~ 30,
grepl("40",rasters_info$file_name)==T ~ 40,
grepl("50",rasters_info$file_name)==T ~ 50,
grepl("60",rasters_info$file_name)==T ~ 60,
grepl("70",rasters_info$file_name)==T ~ 70,
grepl("80",rasters_info$file_name)==T ~ 80,)
rasters_info$directional <- case_when(grepl("Ang",rasters_info$file_name)==T ~ T,
grepl("Ang",rasters_info$file_name)==F ~ F)
```
### The loop - grayling
```{r parameterise elements for gryqling,echo=F}
#get coefs for training fish
coefs <- all_coefs
spec="Grayling"
## state 1
if(spec=="Barbel"){
st1_coefs <- coefs %>% filter(state==1 & terms %in% barb_st1_terms)
} else if(spec=="Grayling"){
st1_coefs <- coefs %>% filter(state==1 & terms %in% gray_st1_terms)
}
#remove log sl since using adjusted kernels
st1_coefs <- st1_coefs %>% filter(terms!="log_sl_")
st1_coefs$terms <- st1_coefs$terms %>% str_replace_all("log_sl_","log(sl)")
#remove start stuff
#st1_coefs$terms <- st1_coefs[!grepl("start", st1_coefs$terms),]
#id terms where starts are
p <- unique(st1_coefs$terms)[grep("_start",unique(st1_coefs$terms))]
t <-strsplit(p,":")
#to get new names stuff - but do the replace after making means.
for(i in 1:length(t)){
#iterate through and id start stuff
t2<- t[[i]]
t_st <- t2[grep("_start",t2)]
t_param <- t_st %>% gsub("_start","",.)
t_param <- paste0("previous(",t_param,")")
new_params_names <- cbind(t_st,t_param) %>% as.data.frame()
if(i==1){
st1_start_terms_names <- new_params_names
} else{
st1_start_terms_names <- rbind(st1_start_terms_names,new_params_names)
}
}
#then iterate to replace since. cant do in 1 line booo
for(i in 1:nrow(st1_start_terms_names)){
bb <- st1_start_terms_names[i,] #get a row
st1_coefs$terms <- st1_coefs$terms %>% str_replace_all(.,bb$t_st,bb$t_param)
}
## state 2
#get for species
if(spec=="Barbel"){
st2_coefs <- coefs %>% filter(state==2 & terms %in% barb_st2_terms)
} else if(spec=="Grayling"){
st2_coefs <- coefs %>% filter(state==2 & terms %in% gray_st2_terms)
}
#remove log sl since using adjusted kernels
st2_coefs <- st2_coefs %>% filter(terms!="log_sl_")
st2_coefs$terms <- st2_coefs$terms %>% str_replace_all("log_sl_","log(sl)")
#remove start stuff
#st2_coefs$terms <- st2_coefs[!grepl("start", st2_coefs$terms),]
#id terms where starts are
p <- unique(st2_coefs$terms)[grep("_start",unique(st2_coefs$terms))]
t <-strsplit(p,":")
#to get new names stuff - but do the replace after making means.
for(i in 1:length(t)){
#iterate through and id start stuff
t2<- t[[i]]
t_st <- t2[grep("_start",t2)]
t_param <- t_st %>% gsub("_start","",.)
t_param <- paste0("previous(",t_param,")")
new_params_names <- cbind(t_st,t_param) %>% as.data.frame()
if(i==1){
st2_start_terms_names <- new_params_names
} else{
st2_start_terms_names <- rbind(st2_start_terms_names,new_params_names)
}
}
#then iterate to replace since. cant do in 1 line booo
for(i in 1:nrow(st2_start_terms_names)){
bb <- st2_start_terms_names[i,] #get a row
st2_coefs$terms <- st2_coefs$terms %>% str_replace_all(.,bb$t_st,bb$t_param)
}
#calculate mean and sd, minus outliers for both states
st1_means <- st1_coefs %>% group_by(terms) %>%
summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>%
ungroup() %>% filter(coef<=high.fence & coef>=low.fence) %>% #remove outliers
group_by(terms) %>%
summarise(mean_coef=mean(coef,na.rm=T),sd_coef=sd(coef,na.rm=T)) %>% #calc means
filter(mean_coef !=0 & sd_coef !=0) #remove terms that are 0
#get covariance stuff
#state 1
st1_mat <- st1_coefs %>% #remove outliers?
group_by(terms) %>% summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>% ungroup() %>%
filter(coef<=high.fence & coef>=low.fence & terms %in% st1_means$terms) %>%
dplyr::select(terms,coef,fish_id)
st1_mat_prep <- spread(data=st1_mat,key=terms,value=coef)#[c(9:43)]
st1_cov <- stats::cov(st1_mat_prep[2:ncol(st1_mat_prep)],use = "complete.obs")
#state 2
st2_means <- st2_coefs %>% group_by(terms) %>% summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() %>% filter(coef<=high.fence & coef>=low.fence)%>% #remove outliers
group_by(terms) %>%
summarise(mean_coef=mean(coef,na.rm=T),sd_coef=sd(coef,na.rm=T)) %>% #calc means
filter(mean_coef !=0 & sd_coef !=0) #remove terms that are 0
#get covariance stuff
st2_mat <- st2_coefs %>% #remove outliers?
group_by(terms) %>% summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>% ungroup() %>%
filter(coef<=high.fence & coef>=low.fence & terms %in% st2_means$terms) %>% #remove terms that have mean == 0 exactly
dplyr::select(terms,coef,fish_id)
st2_mat_prep <- spread(data=st2_mat,key=terms,value=coef)#[c(9:43)]
st2_cov <- stats::cov(st2_mat_prep[2:ncol(st2_mat_prep)],use = "complete.obs")
## movement kernels
st1_move_means <- movement_distr_dat %>% filter(state==1 & species==spec) %>%
#remove outliers for log sl coef
mutate(q1=quantile(log_sl_coef, na.rm = T)[2],q3=quantile(log_sl_coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>%
#ungroup() %>%
filter(log_sl_coef<=high.fence & log_sl_coef>=low.fence & sl_shape_adj>0) %>%
summarise(med=median(sl_shape_adj), mean_shape=mean(sl_shape_adj),sd_shape=sd(sl_shape_adj),mean_scale=mean(sl_scale),
sd_scale=sd(sl_scale),sd_kappa=sd(ta_kappa_adj),
mean_kappa=mean(ta_kappa_adj),mean_mu=mean(ta_mu))
st2_move_means <- movement_distr_dat %>% filter(state==2 & species==spec) %>%
#remove outliers for log sl coef
mutate(q1=quantile(log_sl_coef, na.rm = T)[2],q3=quantile(log_sl_coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>%
filter(log_sl_coef<=high.fence & log_sl_coef>=low.fence & sl_shape_adj>0)
#accoutn for the single grayling that this is an issue.
rows_st2 <- nrow(st2_move_means)
if(nrow(st2_move_means)>1) {
st2_move_means <-st2_move_means %>%
summarise(mean_shape=mean(sl_shape_adj,na.rm=T),sd_shape=sd(sl_shape_adj),mean_scale=mean(sl_scale),
sd_scale=sd(sl_scale),sd_kappa=sd(ta_kappa_adj),
mean_kappa=mean(ta_kappa_adj),mean_mu=mean(ta_mu))
} else if (nrow(st2_move_means)==1){
st2_move_means <- st2_move_means %>%
summarise(sl_shape=sl_shape_adj,scale=sl_scale,
ta_kappa = ta_kappa_adj) #so using their values
}
```
```{r loop for grayling}
all_agents <- read.csv(paste0("PREDICTIONS - downstream start/all_grayling_up_to_agent_",(id-1),".csv"))
trans_mat <- getTrProbs(gray_hmm)[1:4] # gives transition probabilities for every time step...
disch <- 50 #edit this for diff discharges
for(b in 1:100){ #do 100 agents
#read in real data for info used to define simulations
#eg state sequences, start loc, length
#assing the agent an id
id <- b
#get data for that fish
#extract species info, used to limit population models to that species
spec <- "Grayling"
#number of tracks
no_tracks <- 0
while(no_tracks==0){
no_tracks <- rpois(1,3.769) #to prevent returns of 0
}
unique_track_ids <- 1:no_tracks
#total time of track #need to do per appraoch - in a loop?
#create props state and time df per approach for filtering in loop
for(z in 1:no_tracks){
#geenrate state seq from stuff
init_state <- sample(1:2,1,1)
state_seq <- data.frame(state=init_state,step_id=1) #make blank df
q=2
while(q<180){
state_seq[q,] <- c(NA,q)
if(state_seq$state[q-1]==1){ #using state of prev step
result <- rbinom(1,1,trans_mat[1]) # using probability stays in state 1
if(result==1){
state_seq$state[q] <- 1 #stays same state
} else {
state_seq$state[q] <- 2
}
}
if(state_seq$state[q-1]==2){ #using state of prev step
result <- rbinom(1,1,trans_mat[4]) # using probability stays in state 1
if(result==1){
state_seq$state[q] <- 2 #stays same state
} else {
state_seq$state[q] <- 1
}
}
q = q+1
}
state_seq$discharge <- disch
#identify where states change and discharge changes and time_stamp changes
state_seq$part <- 0
part_id <-1
for(i in 1:length(state_seq$state)){
if(i==1){
state_seq$part[i] <- 1
}else if(state_seq$state[i]==state_seq$state[i-1]){ #to get if state or discharge changes
state_seq$part[i] <- part_id
}else{
part_id <- part_id+1
state_seq$part[i] <- part_id
}
}
props_state1 <- state_seq %>% group_by(part,state) %>% summarise(steps_to_sim=n())
props_state1$approach <- unique_track_ids[z]
ras <- start_locs[grepl(disch,start_locs)] %>% raster()
start_loc1 <- sampleRandom(ras,1,xy=T) %>% as.data.frame() %>% dplyr::select(c(x,y))
start_loc1$approach <- unique_track_ids[z] #associate start loc w the track
if(z==1){
props_state_all <- props_state1 #df with all state seq info
start_loc_all <- start_loc1 #df with start locations
} else{
props_state_all <- rbind(props_state_all,props_state1)
start_loc_all <- rbind(start_loc_all,start_loc1)
}
}
#parameterise the agent
#selection functions
agent_st1_coefs <- mvrnorm(n=1,mu=st1_means$mean_coef,Sigma = st1_cov) %>%
as.data.frame() %>%
tibble::rownames_to_column(var="terms") %>%
rename(val=".") #select value from multivariate distr
agent_st1_coefs$term <- agent_st1_coefs$term%>%
stringr::str_remove_all("_end") #remove "_end" so compatible w function
agent_st1_coefs <- agent_st1_coefs %>% dplyr::select(c(term,val)) #retain only needed data
agent_st2_coefs <- mvrnorm(n=1,mu=st2_means$mean_coef,Sigma = st2_cov) %>%
as.data.frame() %>%
tibble::rownames_to_column(var="terms") %>%
rename(val=".")
agent_st2_coefs$term <- agent_st2_coefs$term%>%
stringr::str_remove_all("_end")
agent_st2_coefs <- agent_st2_coefs %>% dplyr::select(c(term,val))
#movement kernels
agent_st1_move_params <- data.frame(shape=abs(rnorm(1,mean=st1_move_means$mean_shape,sd=st1_move_means$sd_shape)),
scale=abs(rnorm(1,mean=st1_move_means$mean_scale,sd=st1_move_means$sd_scale)),
kappa = abs(rnorm(1,mean=st1_move_means$mean_kappa,sd=st1_move_means$sd_kappa)))
agent_st2_move_params <- data.frame(shape=abs(rnorm(1,mean=st2_move_means$mean_shape,sd=st2_move_means$sd_shape)),
scale=abs(rnorm(1,mean=st2_move_means$mean_scale,sd=st2_move_means$sd_scale)),
kappa = abs(rnorm(1,mean=st2_move_means$mean_kappa,sd=st2_move_means$sd_kappa)))
for(i in 1:no_tracks){ #here extract data for specific track e.g. state sequence
#get number of steps etc to sim
props_state <- props_state_all %>% filter(approach==unique_track_ids[[i]])
#get start loc
start_loc_track <- start_loc_all %>% filter(approach==unique_track_ids[[i]])
start_loc <- c(start_loc_track$x,start_loc_track$y)
for(z in 1:length(unique(props_state$part))){ #run simulation for 1 track
dat_part <- props_state%>% filter(part==unique(props_state$part)[z])
state <- unique(dat_part$state)
if(as.numeric(disch)>80){
disch <-as.character(80)
}
len_track <- dat_part$steps_to_sim
if(state==1){
gamma_shape <- agent_st1_move_params$shape
gamma_scale <- agent_st1_move_params$scale
vonMises_kappa <- agent_st1_move_params$kappa
beta <- agent_st1_coefs$val
names(beta) <- agent_st1_coefs$term
names(beta) <- names(beta) %>% str_replace_all("diff_vel_abs_ang","vel_angle")
names(beta) <- names(beta) %>% str_replace_all("diff_svg_abs_ang","svg_angle")
issf_frm <- iSSF_formula(beta)
}
if(state == 2){
gamma_shape <- agent_st2_move_params$shape
gamma_scale <- agent_st2_move_params$scale
vonMises_kappa <- agent_st2_move_params$kappa
beta <- agent_st2_coefs$val
names(beta) <- agent_st2_coefs$term
names(beta) <- names(beta) %>% str_replace_all("diff_vel_abs_ang","vel_angle")
names(beta) <- names(beta) %>% str_replace_all("diff_svg_abs_ang","svg_angle")
issf_frm <- iSSF_formula(beta)
}
#get raster info for that discharge
ras_info <- rasters_info %>% filter(discharge==disch)
#prep ras list for the function
ras_lst <- iSSF_raster_list(
files = ras_info$file_name,
layer_names = ras_info$ras_name,
directional = ras_info$directional)
#convert to pi
ras_lst[[5]]$values <- ras_lst[[5]]$values / 180 * pi
ras_lst[[3]]$values <- ras_lst[[3]]$values / 180 * pi
#simulate here !
bearing_start = pi/2
sim <- iSSF_simulation(
issf_frm = issf_frm, # Named vector of beta parameter values
ras_lst = ras_lst, # list of covariate raster layers
position_start = start_loc, # starting position
bearing_start = bearing_start, # starting bearing
n_steps = n_steps, # number of samples in random step distribution
len_track = len_track, # number of steps in a track
scale = gamma_scale, # Step len scale
shape = gamma_shape, # Step len shape
kappa = vonMises_kappa ) # Turning angle concentration
trk <- sim$track %>% as.data.frame() %>% rename(x=V1,y=V2)
#add correct env data now
trk$water_velocity <- terra::extract(rast(ras_info$file_name[4]),trk[1:2])[1:nrow(trk),2]
trk$depth <- terra::extract(rast(ras_info$file_name[1]),trk[1:2])[1:nrow(trk),2]
trk$svg <- terra::extract(rast(ras_info$file_name[2]),trk[1:2])[1:nrow(trk),2]
trk$svg_ang <- terra::extract(rast(ras_info$file_name[3]),trk[1:2])[1:nrow(trk),2]
trk$vel_ang <- terra::extract(rast(ras_info$file_name[5]),trk[1:2])[1:nrow(trk),2]
trk$state <- state
trk$discharge <- disch
trk$part <- unique(props_state$part)[z] #to assist matching to right time stamps
#update start location
start_loc <- sim$track[nrow(sim$track),]
#then the saving the final track stuff
if(z==1){
final_track <- trk
} else{
final_track <- rbind(final_track,trk)
}
}
final_track$approach <- unique_track_ids[[i]]
final_track$step_id <- as.numeric(rownames(final_track))
if(i==1){
final_track_all_app <- final_track
} else{
final_track_all_app <- rbind(final_track_all_app,final_track)
}
}
final_track_all_app$fish_id <- id
final_track_all <- final_track_all_app
dat_all_fin <- final_track_all
dat_all_fin$x2 <- lag(dat_all_fin$x)
dat_all_fin$y2 <- lag(dat_all_fin$y)
dat_all_fin$bearing <- -with(data = dat_all_fin, atan2(y2 - y, x2 - x)) + (pi/2)
dat_all_fin$bearing <- dat_all_fin$bearing %>% cos() #in radians
dat_all_fin$diff_svg_abs_ang_end <- cos(dat_all_fin$bearing - (dat_all_fin$svg_ang*pi/180))
dat_all_fin$diff_vel_abs_ang_end <- cos(dat_all_fin$bearing - (dat_all_fin$vel_ang*pi/180))
write.csv(dat_all_fin,paste0("PREDICTIONS - downstream start/grayling/agent_",id,"_disch",disch,".csv"),row.names = F) # so will save output at each stage incase of crashing
if(b==1){
all_agents <- dat_all_fin
} else {
all_agents <- bind_rows(all_agents,dat_all_fin)
}
write.csv(all_agents,paste0("PREDICTIONS - downstream start/all_grayling_up_to_agent_",id,"_disch",disch,".csv"),row.names=F)
}
```