02 - validating SSFs via simulations.Rmd
---
title: "Validating SSFs"
author: "Rachel Mawer"
date: "2023-09-14"
output:
html_document:
toc: true
toc_float: true
theme: cerulean
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F,message=F)
#main packages #delete/add as needed
library(dplyr)
library(ggplot2)
library(knitr)
library(ggpubr)
library(plotly)
library(reactable)
library(amt)
library(MASS) #for cov
library(diagis)
library(mvtnorm) #for multivariate normal dist
library(tidyr)
Rcpp::sourceCpp("cpp/simulation_function.cpp", showOutput = F, verbose = F) #james simulation function
source("cpp/simulation_function_preprocessing.R")
```
## About
File for validating the HMM-SSFs. The idea is to use mean + SD of a population model made from models for n - 1 individuals to simulate tracks and compare to the tracks of the individual not used to make the model. Simulated tracks will start in the same location as the missing fish's tracks and be simulated for the same amount of time (or steps). Since there are behavioural states, state switches will occur at the same step IDs as in the real track.
Simulations will be evaluated in terms of:
* Habitat usage - is the distribution of the real track(s) usage within the distribution of simulated tracks?
* Spatial usage - do the simulated tracks occupy a similar space to the real track(s)?
* Step length and turning angle distributions - but error to be considered so might not be best
When doing the population models, will remove outliers still. It will then also be worth noting if the test fish was an outlier or not - e.g. if poor at predicting movement of the fish with outlier coefficients, can say it's due to that; if still ok for predicting outlier distributions then no worry.
Since we have two behavioural states and thus effectively two habitat selection functions, the same state sequence for the real track(s) will be used to guide the simulated.
Outputs are saved for plotting in a line-up.
## Loop for all fish
Effectively, this code will loop through every individual and do the cross validation simulations.
It will save:
* all simulated tracks
* simulated data with missing time steps removed, binded with the real fish data
### Load in info
So don't need to scroll up for this
```{r set some variable names and stuff 2}
n_steps <- 20 #number of steps the simulation can select from at each step
data_files <- list.files("data/final ssf + hmm df",full.names = T) #get dataframes
st1_file_names <- data_files[grepl("state1",data_files)] #get state 1 files
st2_file_names <- data_files[grepl("state1",data_files)] #get state 2 files
fish_ids <- list.files("ssf modelling - file per fish")[1:31] #get fish IDs for later stuff
#model_loc <-list.files("ssf modelling 2 electric boogaloo",full.name=T)[1:31] #get final model st
model_loc <-list.files("ssf modelling - file per individual fish",full.name=T)[1:31] #get final model st
barb_all_coef <- read.csv("FINAL RESULTS/barbel_all_coefs_both_states.csv")
gray_all_coef <- read.csv("FINAL RESULTS/grayling_all_coefs_both_states.csv")
#below - UNSTANDARDISED coefs, for all fish and species
all_coefs <- read.csv("FINAL RESULTS/all_coefs_both_states_UNSTANDARDISED.csv")
all_coefs$terms <- all_coefs$params
#remove all terms those where mean == 0 so will be using the formula based off all individuals
#get that
barb_means <- read.csv("FINAL RESULTS/barbel_means_both_states.csv")
barb_st1_terms <- barb_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==1 )
barb_st1_terms <- unique(barb_st1_terms$terms)# %>% str_remove_all("_strdised")
#barb_st1_terms <- lapply(barb_st1_terms,gsub,pattern="_strdised",replacement="")
for(i in 1: length(barb_st1_terms)){
barb_st1_terms[[i]] <- gsub("_strdised","",barb_st1_terms[[i]])
}
barb_st2_terms <- barb_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==2 )
barb_st2_terms <- unique(barb_st2_terms$terms)
for(i in 1: length(barb_st2_terms)){
barb_st2_terms[[i]] <- gsub("_strdised","",barb_st2_terms[[i]])
}
gray_means <- read.csv("FINAL RESULTS/grayling_means_both_states.csv")
gray_st1_terms <- gray_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==1 )
gray_st1_terms <- unique(gray_st1_terms$terms)
for(i in 1: length(gray_st1_terms)){
gray_st1_terms[[i]] <- gsub("_strdised","",gray_st1_terms[[i]])
}
gray_st2_terms <- gray_means %>% filter(mean_coef!=0 & sd_coef!=0 &state==2 & terms!='cos_ta__strdised')
gray_st2_terms <- unique(gray_st2_terms$terms)
for(i in 1: length(gray_st2_terms)){
gray_st2_terms[[i]] <- gsub("_strdised","",gray_st2_terms[[i]])
}
#sl and ta distrs per fish per state
movement_distr_dat <- read.csv("FINAL RESULTS/movement_distr_params.csv")
#standardising info
std_info <- read.csv("data/summary_stats_for_standardising.csv")
std_info <- rename(std_info,term=Variable)
raster_list <- list.files("rasters - cropped",full.names = T)
```
```{r create raster list data fram 2}
#create a dataframe of raster name, raster file, and discharge
rasters_info <- as.data.frame(raster_list)
names(rasters_info) <- "file_name"
rasters_info$ras_name <- case_when(grepl("dep",rasters_info$file_name)==T ~ "depth",
grepl("vel",rasters_info$file_name)==T ~ "water_velocity",
grepl("svg",rasters_info$file_name)==T ~ "svg",
grepl("Vel",rasters_info$file_name)==T ~ "vel_angle",
grepl("SVG",rasters_info$file_name)==T ~ "svg_angle")
rasters_info$discharge <- case_when(grepl("10",rasters_info$file_name)==T ~ 10,
grepl("20",rasters_info$file_name)==T ~ 20,
grepl("30",rasters_info$file_name)==T ~ 30,
grepl("40",rasters_info$file_name)==T ~ 40,
grepl("50",rasters_info$file_name)==T ~ 50,
grepl("60",rasters_info$file_name)==T ~ 60,
grepl("70",rasters_info$file_name)==T ~ 70,
grepl("80",rasters_info$file_name)==T ~ 80,)
rasters_info$directional <- case_when(grepl("Ang",rasters_info$file_name)==T ~ T,
grepl("Ang",rasters_info$file_name)==F ~ F)
```
### The loop
```{r the loop loop}
b <-1
for(b in 1:length(fish_ids)){
#read in real data for info used to define simulations
#eg state sequences, start loc, length
#pick a fish id to use
id <- fish_ids[b]
#get data for that fish
data_for_fish <- data_files[grepl(id,data_files)]
#make large df with both states
for(i in 1:length(data_for_fish)){
temp <- read.csv(data_for_fish[i]) %>% filter(case_==T) #keep only true steps
if(i==1){
dat <- temp
} else{
dat <- rbind(dat,temp)
}
}
#extract species info, used to limit population models to that species
spec <- dat$species %>% unique()
#convert to time stamps
dat$time1 <- as.POSIXct(dat$t1_)
dat$time2 <- as.POSIXct(dat$t2_)
#order by t1
dat2 <- arrange(dat,time1)
#number of tracks
no_tracks <- length(unique(dat2$approach))
unique_track_ids <- unique(dat2$approach)
#total time of track #need to do per approach
#create props state and time df per approach for filtering in loop
for(z in 1:no_tracks){
dat3 <- dat2 %>% filter(approach==unique_track_ids[z]) #filter for one track
track_length <- difftime(max(dat3$time2),min(dat3$time1),units="secs")
#doing per time in state
no_steps <- track_length/20
#make numeric and round up
no_steps <- no_steps %>% as.numeric() %>% round(digits=0)
#get state sequence
state_seq <- cbind(dat3$state, round(dat3$discharge_end,digits=-1),dat3$t1_) %>% as.data.frame()
names(state_seq) <- c("state","discharge","time")
state_seq$time <- as.POSIXct(state_seq$time)
state_seq$difftime <-difftime(state_seq$time,lag(state_seq$time),units="secs") %>% as.numeric
#identify where states change and discharge changes and time_stamp changes
state_seq$part <- 0
part_id <-1
for(i in 1:length(state_seq$state)){
if(i==1){
state_seq$part[i] <- 1
}else if(state_seq$state[i]==state_seq$state[i-1] & state_seq$discharge[i]==state_seq$discharge[i-1] & state_seq$difftime[i]==20){ #to get if state or discharge changes
state_seq$part[i] <- part_id
}else{
part_id <- part_id+1
state_seq$part[i] <- part_id
}
}
st_row <- 0 #starting row
#add in empty rows by splitting into parts and if next Part has diff time >x, fill in gap
for(i in 1:length(unique(state_seq$part))){
pt <- filter(state_seq,part==unique(state_seq$part)[i])
nrows <- nrow(pt) + st_row #get row index for main state_seq df
if(i!=length(unique(state_seq$part))){
if(state_seq$difftime[nrows+1]>20){#if gap >20...
rows_to_add <- state_seq$difftime[nrows+1]/20 #get number of rows to add
#make new df to bind
new_rows <- data.frame("state"=rep(0,rows_to_add),"discharge"=rep(0,rows_to_add),part=rep(0,rows_to_add),difftime=rep(0,rows_to_add))
new_rows$time <- 1:nrow(new_rows)*20+pt$time[nrow(pt)]#add correct time stamps
new_rows$discharge <- unique(pt$discharge)
new_rows$state <- unique(pt$state)
new_rows$part <- unique(pt$part)
pt <- rbind(pt,new_rows)
}
st_row <- nrows
}
if(i==1){
state_seq2 <- pt
} else{
state_seq2 <-rbind(state_seq2,pt)
}
}
props_state1 <- state_seq2 %>% group_by(part,discharge,state) %>% summarise(duration=difftime(max(time),min(time),units="secs"),steps_to_sim=(as.numeric(duration)/20)+1)
props_state1$approach <- unique_track_ids[z]
# save time stamps
real_time_steps1 <- state_seq[c("time","part")]
real_time_steps1$approach <- unique_track_ids[z]
#get start location
start_loc1 <- dat3[1,c("x1_","y1_")]
start_loc1$approach <- unique_track_ids[z]
if(z==1){
props_state_all <- props_state1 #df with all state seq info
real_time_steps_all <- real_time_steps1 #df with real time steps
start_loc_all <- start_loc1 #df with start locations
} else{
props_state_all <- rbind(props_state_all,props_state1)
real_time_steps_all <- rbind(real_time_steps_all,real_time_steps1)
start_loc_all <- rbind(start_loc_all,start_loc1)
}
}
#get coefs for training fish
coefs <- all_coefs %>% filter(fish_id != id & species==spec)
## state 1
if(spec=="Barbel"){
st1_coefs <- coefs %>% filter(state==1 & terms %in% barb_st1_terms)
} else if(spec=="Grayling"){
st1_coefs <- coefs %>% filter(state==1 & terms %in% gray_st1_terms)
}
#remove log sl since using adjusted kernels
st1_coefs <- st1_coefs %>% filter(terms!="log_sl_")
st1_coefs$terms <- st1_coefs$terms %>% str_replace_all("log_sl_","log(sl)")
#remove start stuff
#id terms where starts are
p <- unique(st1_coefs$terms)[grep("_start",unique(st1_coefs$terms))]
t <-strsplit(p,":")
#to get new names stuff - but do the replace after making means.
for(i in 1:length(t)){
#iterate through and id start terms
t2<- t[[i]]
t_st <- t2[grep("_start",t2)]
t_param <- t_st %>% gsub("_start","",.)
t_param <- paste0("previous(",t_param,")")
new_params_names <- cbind(t_st,t_param) %>% as.data.frame()
if(i==1){
st1_start_terms_names <- new_params_names
} else{
st1_start_terms_names <- rbind(st1_start_terms_names,new_params_names)
}
}
#then iterate to replace since not working in one line
for(i in 1:nrow(st1_start_terms_names)){
bb <- st1_start_terms_names[i,] #get a row
st1_coefs$terms <- st1_coefs$terms %>% str_replace_all(.,bb$t_st,bb$t_param)
}
## state 2
#get for species
if(spec=="Barbel"){
st2_coefs <- coefs %>% filter(state==2 & terms %in% barb_st2_terms)
} else if(spec=="Grayling"){
st2_coefs <- coefs %>% filter(state==2 & terms %in% gray_st2_terms)
}
#remove log sl since using adjusted kernels
st2_coefs <- st2_coefs %>% filter(terms!="log_sl_")
st2_coefs$terms <- st2_coefs$terms %>% str_replace_all("log_sl_","log(sl)")
#remove start stuff
#st2_coefs$terms <- st2_coefs[!grepl("start", st2_coefs$terms),]
#id terms where starts are
p <- unique(st2_coefs$terms)[grep("_start",unique(st2_coefs$terms))]
t <-strsplit(p,":")
for(i in 1:length(t)){
#iterate through and id start stuff
t2<- t[[i]]
t_st <- t2[grep("_start",t2)]
t_param <- t_st %>% gsub("_start","",.)
t_param <- paste0("previous(",t_param,")")
new_params_names <- cbind(t_st,t_param) %>% as.data.frame()
if(i==1){
st2_start_terms_names <- new_params_names
} else{
st2_start_terms_names <- rbind(st2_start_terms_names,new_params_names)
}
}
for(i in 1:nrow(st2_start_terms_names)){
bb <- st2_start_terms_names[i,] #get a row
st2_coefs$terms <- st2_coefs$terms %>% str_replace_all(.,bb$t_st,bb$t_param)
}
#calculate mean and sd, minus outliers for both states
st1_means <- st1_coefs %>% group_by(terms) %>%
summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>%
ungroup() %>% filter(coef<=high.fence & coef>=low.fence) %>% #remove outliers
group_by(terms) %>%
summarise(mean_coef=mean(coef,na.rm=T),sd_coef=sd(coef,na.rm=T)) %>% #calc means
filter(mean_coef !=0 & sd_coef !=0) #remove terms that are 0
#get covariance stuff
#state 1
st1_mat <- st1_coefs %>%
group_by(terms) %>% summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>% ungroup() %>%
filter(coef<=high.fence & coef>=low.fence & terms %in% st1_means$terms) %>%
dplyr::select(terms,coef,fish_id)
st1_mat_prep <- spread(data=st1_mat,key=terms,value=coef)#
st1_cov <- stats::cov(st1_mat_prep[2:ncol(st1_mat_prep)],use = "complete.obs")
#state 2
st2_means <- st2_coefs %>% group_by(terms) %>% summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% ungroup() %>% filter(coef<=high.fence & coef>=low.fence)%>% #remove outliers
group_by(terms) %>%
summarise(mean_coef=mean(coef,na.rm=T),sd_coef=sd(coef,na.rm=T)) %>% #calc means
filter(mean_coef !=0 & sd_coef !=0) #remove terms that are 0
#get covariance stuff
st2_mat <- st2_coefs %>%
group_by(terms) %>% summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>% ungroup() %>%
filter(coef<=high.fence & coef>=low.fence & terms %in% st2_means$terms) %>% #remove terms that have mean == 0 exactly
dplyr::select(terms,coef,fish_id)
st2_mat_prep <- spread(data=st2_mat,key=terms,value=coef)#[c(9:43)]
st2_cov <- stats::cov(st2_mat_prep[2:ncol(st2_mat_prep)],use = "complete.obs")
## movement kernels
st1_move_means <- movement_distr_dat %>% filter(fish_id!=id & state==1 & species==spec) %>%
#remove outliers for log sl coef
mutate(q1=quantile(log_sl_coef, na.rm = T)[2],q3=quantile(log_sl_coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>%
#ungroup() %>%
filter(log_sl_coef<=high.fence & log_sl_coef>=low.fence & sl_shape_adj>0) %>%
summarise(med=median(sl_shape_adj), mean_shape=mean(sl_shape_adj),sd_shape=sd(sl_shape_adj),mean_scale=mean(sl_scale),
sd_scale=sd(sl_scale),sd_kappa=sd(ta_kappa_adj),
mean_kappa=mean(ta_kappa_adj),mean_mu=mean(ta_mu))
st2_move_means <- movement_distr_dat %>% filter(fish_id!=id & state==2 & species==spec) %>%
#remove outliers for log sl coef
mutate(q1=quantile(log_sl_coef, na.rm = T)[2],q3=quantile(log_sl_coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>%
filter(log_sl_coef<=high.fence & log_sl_coef>=low.fence & sl_shape_adj>0)
#accoutn for the single grayling that this is an issue.
rows_st2 <- nrow(st2_move_means)
if(nrow(st2_move_means)>1) {
st2_move_means <-st2_move_means %>%
summarise(mean_shape=mean(sl_shape_adj,na.rm=T),sd_shape=sd(sl_shape_adj),mean_scale=mean(sl_scale),
sd_scale=sd(sl_scale),sd_kappa=sd(ta_kappa_adj),
mean_kappa=mean(ta_kappa_adj),mean_mu=mean(ta_mu))
} else if (nrow(st2_move_means)==1){
st2_move_means <- st2_move_means %>%
summarise(sl_shape=sl_shape_adj,scale=sl_scale,
ta_kappa = ta_kappa_adj) #so using their values
}
for(q in 1:19){ #parameterise individual as one of 19 agents
#selection functions
agent_st1_coefs <- mvrnorm(n=1,mu=st1_means$mean_coef,Sigma = st1_cov) %>%
as.data.frame() %>%
tibble::rownames_to_column(var="terms") %>%
rename(val=".") #select value from multivariate distr
agent_st1_coefs$term <- agent_st1_coefs$term%>%
stringr::str_remove_all("_end") #remove "_end" from stuff so compatible w function
agent_st1_coefs <- agent_st1_coefs %>% dplyr::select(c(term,val)) #retain only needed data
agent_st2_coefs <- mvrnorm(n=1,mu=st2_means$mean_coef,Sigma = st2_cov) %>%
as.data.frame() %>%
tibble::rownames_to_column(var="terms") %>%
rename(val=".")
agent_st2_coefs$term <- agent_st2_coefs$term%>%
stringr::str_remove_all("_end")
agent_st2_coefs <- agent_st2_coefs %>% dplyr::select(c(term,val))
#movement kernels #taking absolute mean to approximate the real distribution; with hindsight a log transformation would have been easier
agent_st1_move_params <- data.frame(shape=abs(rnorm(1,mean=st1_move_means$mean_shape,sd=st1_move_means$sd_shape)),
scale=abs(rnorm(1,mean=st1_move_means$mean_scale,sd=st1_move_means$sd_scale)),
kappa = abs(rnorm(1,mean=st1_move_means$mean_kappa,sd=st1_move_means$sd_kappa)))
if(rows_st2>1){
agent_st2_move_params <- data.frame(shape=abs(rnorm(1,mean=st2_move_means$mean_shape,sd=st2_move_means$sd_shape)),
scale=abs(rnorm(1,mean=st2_move_means$mean_scale,sd=st2_move_means$sd_scale)),
kappa = abs(rnorm(1,mean=st2_move_means$mean_kappa,sd=st2_move_means$sd_kappa)))
} else if(rows_st2==1){
agent_st2_move_params <- data.frame(shape=st2_move_means$sl_shape,scale=st2_move_means$scale,kappa=st2_move_means$ta_kappa)
}
for(i in 1:no_tracks){ #here extract data for specific track e.g. state sequence
#get number of steps etc to sim
props_state <- props_state_all %>% filter(approach==unique_track_ids[[i]])
#get start loc
start_loc_track <- start_loc_all %>% filter(approach==unique_track_ids[[i]])
start_loc <- c(start_loc_track$x1_,start_loc_track$y1_)
for(z in 1:length(unique(props_state$part))){ #run simulation for 1 track
#get info
dat_part <- props_state%>% filter(part==unique(props_state$part)[z])
#get state
state <- unique(dat_part$state)
#get discharge
disch <- unique(dat_part$discharge)
if(as.numeric(disch)>80){
disch <-as.character(80)
}
#length of dat_part is the number of steps to sim
len_track <- dat_part$steps_to_sim
#note: need to construct model formula
if(state==1){
gamma_shape <- agent_st1_move_params$shape
gamma_scale <- agent_st1_move_params$scale
vonMises_kappa <- agent_st1_move_params$kappa
beta <- agent_st1_coefs$val
names(beta) <- agent_st1_coefs$term
#replace the diff w just angle
names(beta) <- names(beta) %>% str_replace_all("diff_vel_abs_ang","vel_angle")
names(beta) <- names(beta) %>% str_replace_all("diff_svg_abs_ang","svg_angle")
issf_frm <- iSSF_formula(beta)
}
if(state == 2){
gamma_shape <- agent_st2_move_params$shape
gamma_scale <- agent_st2_move_params$scale
vonMises_kappa <- agent_st2_move_params$kappa
beta <- agent_st2_coefs$val
names(beta) <- agent_st2_coefs$term
#replace the diff w just angl
names(beta) <- names(beta) %>% str_replace_all("diff_vel_abs_ang","vel_angle")
names(beta) <- names(beta) %>% str_replace_all("diff_svg_abs_ang","svg_angle")
issf_frm <- iSSF_formula(beta)
}
#get raster info for that discharge
ras_info <- rasters_info %>% filter(discharge==disch)
#prep ras list for the function
ras_lst <- iSSF_raster_list(
files = ras_info$file_name,
layer_names = ras_info$ras_name,
directional = ras_info$directional)
#convert to pi
ras_lst[[5]]$values <- ras_lst[[5]]$values / 180 * pi
ras_lst[[3]]$values <- ras_lst[[3]]$values / 180 * pi
#simulate here !
bearing_start = pi/2
sim <- iSSF_simulation(
issf_frm = issf_frm, # Named vector of beta parameter values
ras_lst = ras_lst, # list of covariate raster layers
position_start = start_loc, # starting position
bearing_start = bearing_start, # starting bearing
n_steps = n_steps, # number of samples in random step distribution
len_track = len_track, # number of steps in a track
scale = gamma_scale, # Step len scale
shape = gamma_shape, # Step len shape
kappa = vonMises_kappa ) # Turning angle concentration
trk <- sim$track %>% as.data.frame() %>% rename(x=V1,y=V2)
#add correct env data now
trk$water_velocity <- extract(rast(ras_info$file_name[4]),trk[1:2])[1:nrow(trk),2]
trk$depth <- extract(rast(ras_info$file_name[1]),trk[1:2])[1:nrow(trk),2]
trk$svg <- extract(rast(ras_info$file_name[2]),trk[1:2])[1:nrow(trk),2]
trk$svg_ang <- extract(rast(ras_info$file_name[3]),trk[1:2])[1:nrow(trk),2]
trk$vel_ang <- extract(rast(ras_info$file_name[5]),trk[1:2])[1:nrow(trk),2]
#add other parameters of itnerest
trk$state <- state
trk$discharge <- disch
trk$part <- unique(props_state$part)[z] #to assist matching to right time stamps...
#update start location
start_loc <- sim$track[nrow(sim$track),]
#then the saving the final track stuff
if(z==1){
final_track <- trk
} else{
final_track <- rbind(final_track,trk)
}
}
final_track$approach <- unique_track_ids[[i]]
if(i==1){
final_track_all_app <- final_track
} else{
final_track_all_app <- rbind(final_track_all_app,final_track)
}
}
final_track_all_app$agent <- q
final_track_all_app$fish_id <- id
if(q==1){
final_track_all <- final_track_all_app
} else{
final_track_all <- rbind(final_track_all,final_track_all_app)
}
write.csv(final_track_all,paste0("fish agents/agents_test_fish_",id,".csv"),row.names = F) # so will save output at each stage in case of crashing
}
#remove time steps not in the data
real_time_steps_start <- real_time_steps_all %>% group_by(approach,part) %>% summarise(start_time=min(time))
for(q in 1:19){ #per agent
datt <- final_track_all %>% filter(agent==q)
for(i in 1:length(unique(final_track_all$approach))){
app <- unique(final_track_all$approach)[[i]]
dat <- datt %>% filter(approach==app)
start_times <-real_time_steps_start %>% filter(approach==app)
for(z in 1:length(unique(dat$part))){
prt <- unique(dat$part)[[z]]
dat4 <- dat %>% filter(part==prt)
stat_time <- start_times %>% filter(part==prt)
dat4$time <- seq(0,(20*(nrow(dat4)-1)),by=20) + stat_time$start_time
if(z==1){
dat_all_ap <- dat4
} else{
dat_all_ap <- rbind(dat_all_ap,dat4)
}
}
if(i==1){
dat_all <- dat_all_ap
}else{
dat_all <- rbind(dat_all,dat_all_ap)
}
}
if(q==1){
dat_all_fin <- dat_all
} else{
dat_all_fin <- rbind(dat_all_fin,dat_all)
}
}
#calculate diff between fish and thing angle
dat_all_fin$x2 <- lag(dat_all_fin$x)
dat_all_fin$y2 <- lag(dat_all_fin$y)
dat_all_fin$bearing <- -with(data = dat_all_fin, atan2(y2 - y, x2 - x)) + (pi/2)
dat_all_fin$bearing <- dat_all_fin$bearing %>% cos() #in radians
dat_all_fin$diff_svg_abs_ang_end <- cos(dat_all_fin$bearing - (dat_all_fin$svg_ang*pi/180))
dat_all_fin$diff_vel_abs_ang_end <- cos(dat_all_fin$bearing - (dat_all_fin$vel_ang*pi/180))
final_tracks <- dat_all_fin %>% filter(time%in%real_time_steps_all$time)
l <- real_time_steps_all[!(real_time_steps_all$time %in%dat_all_fin$time),]
fish_dat <- dat2 %>% dplyr::select(c(approach,x1_,y1_,water_velocity_end,depth_end,svg_end,diff_svg_abs_ang_end,diff_vel_abs_ang_end,state,discharge_end,fish_id))
fish_dat <- fish_dat %>% rename(water_velocity=water_velocity_end,depth=depth_end,svg=svg_end,x=x1_,y=y1_,discharge=discharge_end)
#for sim tracks, need to calculate diff between fish and thing angles... best to do before removing steps!
fish_dat$agent <- "real"
final_tracks$agent <- final_tracks$agent %>% as.character()
fish_dat$state <- as.character(fish_dat$state)
final_track$state <-as.character(final_track$state)
final_tracks$discharge <- as.numeric(final_tracks$discharge)
fish_dat$discharge <- as.numeric(fish_dat$discharge)
fish_dat$fish_id <- as.character(fish_dat$fish_id)
all <- bind_rows(final_tracks,fish_dat) #here combines with real data
write.csv(all,paste0("agents + real data/test_fish_",id,".csv"),row.names=F)
}
```