Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

  • c867f21
  • /
  • Cross validating codes
  • /
  • 03 - validating SSFs via simulations - NOT HMM.Rmd
Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • content
  • directory
content badge
swh:1:cnt:9336fc198eee152c91e558ec4f828db2cf111ee5
directory badge
swh:1:dir:f52c58daf971b90f0ea8392f9f530b88abe7d5d1

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • content
  • directory
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
03 - validating SSFs via simulations - NOT HMM.Rmd
---
title: "Validating SSFs, without HMMs"
author: "Rachel Mawer"
date: "2023-09-14"
output: 
  html_document:
    toc: true
    toc_float: true
    theme: cerulean
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F,message=F)

#main packages #delete/add as needed
library(dplyr)
library(ggplot2)
library(knitr)
library(ggpubr)
library(plotly)
#library(pracma) #for standard error
library(reactable)
library(amt)
library(MASS) #for cov
library(diagis)
library(mvtnorm) #for multivariate normal dist
library(tidyr)

Rcpp::sourceCpp("cpp/simulation_function.cpp", showOutput = F, verbose = F) #james simulation function
source("simulations stuff/simulation_function_preprocessing.R")


```

## About

File to try to validate the SSFs that did not have HMMs (i.e. behavioural state is not distinguished). This code has been copy-pasted from the version for the HMM-SSFs.

### Load in info


```{r set some variable names}

n_steps <- 20 #number of steps the simulation can select from at each step

data_files <- list.files("data/final ssf + hmm df",full.names = T) #get dataframes

#get fish id
fish_ids <- list.files("ssf modelling - file per fish")[1:31] #get fish IDs for late

barb_all_coef <- read.csv("FINAL RESULTS/barbel_all_coefs_pooled.csv") %>% dplyr::select(-X)
gray_all_coef <- read.csv("FINAL RESULTS/grayling_all_coefs_pooled.csv")%>% dplyr::select(-X)

#below - UNSTANDARDISED coefs, for all fish and species
all_coefs <- read.csv("FINAL RESULTS/all_coefs_pooled_UNSTANDARDISED.csv")
all_coefs$terms <- all_coefs$params

#get terms to keep #using existing means to filter

barb_means <- read.csv("FINAL RESULTS/barbel_means_pooled.csv")

barb_terms <- barb_means %>% filter(mean_coef!=0 & sd_coef!=0) 
barb_terms <- unique(barb_terms$terms)
for(i in 1: length(barb_terms)){
  barb_terms[[i]] <- gsub("_strdised","",barb_terms[[i]])
}

#remove cos

gray_means <- read.csv("FINAL RESULTS/grayling_means_pooled.csv")

gray_terms <- gray_means %>% filter(mean_coef!=0 & sd_coef!=0) 
gray_terms <- unique(gray_terms$terms)

for(i in 1: length(gray_terms)){
  gray_terms[[i]] <- gsub("_strdised","",gray_terms[[i]])
}


#sl and ta distrs per fish per state

movement_distr_dat <- read.csv("FINAL RESULTS/movement_distr_params_POOLED.csv")



raster_list <- list.files("rasters",full.names = T)

```

```{r add 0s to coefs where term not in model,eval=F}

#DOES NOT NEED TO BE RUN SINCE DID IT AND SAVED AS A FILE THATLL GET READ IN ABOVE !!

#code that will
#for all coef file
#split by species and state
#give individuals terms of 0 if not in their own model
#and remove those three barbel with few state 2 steps

z <-1
i <-1


for(i in 1:length(fish_ids)){ 
  id <- fish_ids[[i]]
  dat <- filter(all_coefs,fish_id==id)
  species <- unique(dat$species)
  dat_terms <- unique(dat$params)
  
  spec_terms <- unique(filter(all_coefs,species==species)$params)
  
  new_terms <- spec_terms[!(spec_terms %in% dat_terms)]
  if(length(new_terms) >=1){
    #make new df
    new_dat <- cbind(new_terms,0,NA,NA,NA,NA) %>% as.data.frame() # save as above
    names(new_dat) <- names(dat)[1:6]
    new_dat$fish_id <- id
    new_dat$species <- dat$species[1]
    new_dat$state <- "pooled"
    
    new_dat <- rbind(dat,new_dat)
  } else {
    new_dat <- dat
  }
  if(i==1){
    all_coefs_new <- new_dat
  } else{
    all_coefs_new <- rbind(all_coefs_new,new_dat)
  }
}

all_coefs_new <- all_coefs_new %>% filter(fish_id!="files listing coeffs and params")

write.csv(all_coefs_new,"FINAL RESULTS/all_coefs_pooled_UNSTANDARDISED.csv",row.names = F)

```

```{r parameterise step length distributions from all individuals,warning=F,eval=F}

for(i in 1:length(data_files)){
  dat <- read.csv(data_files[i]) #read in file
  sl_distr <- fit_distr(dat$sl_,"gamma")
  ta_distr <- fit_distr(dat$ta_,"vonmises")
  
  distr_dat <- data.frame(fish_id=unique(dat$fish_id),state="pooled",
                          species=unique(dat$species), #have fish id and state
                          sl_shape=sl_distr$params$shape,sl_scale=sl_distr$params$scale ,#add sl stuff
                          ta_kappa= ta_distr$params$kappa,ta_mu = ta_distr$params$mu) #add ta stuf
  if(i==1){
    movement_distr_dat <- distr_dat
  } else{
    movement_distr_dat <- rbind(movement_distr_dat,distr_dat)
  }
}

write.csv(movement_distr_dat,"FINAL RESULTS/movement_distr_params_POOLED.csv",row.names = F)
```

```{r transform movement distr dat based off log sl ,eval=F}
movement_distr_dat <- read.csv("FINAL RESULTS/movement_distr_params_POOLED.csv") #read in movement distr_dat

#now alter all coefs to just be log sl and cos ta and also reshape
all_coefs2 <- all_coefs %>% filter(terms=="log_sl_"|terms=="cos_ta_")%>% dplyr::select(c("terms","coef","state","fish_id"))
all_coefs3 <- all_coefs2  %>% spread(key=terms,value=coef)
names(all_coefs3)[3:4] <- c("cos_ta_coef","log_sl_coef")

combined <- merge(movement_distr_dat,all_coefs3,by=c("state","fish_id"))

combined$sl_shape_adj <-combined$sl_shape+combined$log_sl_coef
combined$ta_kappa_adj <-combined$ta_kappa + combined$cos_ta_coef

#save
write.csv(combined,"FINAL RESULTS/movement_distr_params_POOLED.csv",row.names = F)

```


```{r create raster list data frame 2}
#create a dataframe of raster name, raster file, and discharge


rasters_info <- as.data.frame(raster_list)

names(rasters_info) <- "file_name"
rasters_info$ras_name <- case_when(grepl("dep",rasters_info$file_name)==T ~ "depth",
                                   grepl("vel",rasters_info$file_name)==T ~ "water_velocity",
                                   grepl("svg",rasters_info$file_name)==T ~ "svg",
                                   grepl("Vel",rasters_info$file_name)==T ~ "vel_angle",
                                   grepl("SVG",rasters_info$file_name)==T ~ "svg_angle")

rasters_info$discharge <- case_when(grepl("10",rasters_info$file_name)==T ~ 10,
                                   grepl("20",rasters_info$file_name)==T ~ 20,
                                   grepl("30",rasters_info$file_name)==T ~ 30,
                                   grepl("40",rasters_info$file_name)==T ~ 40,
                                   grepl("50",rasters_info$file_name)==T ~ 50,
                                   grepl("60",rasters_info$file_name)==T ~ 60,
                                   grepl("70",rasters_info$file_name)==T ~ 70,
                                   grepl("80",rasters_info$file_name)==T ~ 80,)

rasters_info$directional <- case_when(grepl("Ang",rasters_info$file_name)==T ~ T,
                                      grepl("Ang",rasters_info$file_name)==F ~ F)

```

### The loop

Still does in "parts" due to changing discharges 👍

```{r run loop}

b <-1
for(b in 1:length(fish_ids)){
  #pick a fish id to use
  id <- fish_ids[b] 
  
  #get data for that fish
  
  data_for_fish <- data_files[grepl(id,data_files)]
  
  #make large df with both states
  for(i in 1:length(data_for_fish)){
    temp <- read.csv(data_for_fish[i]) %>% filter(case_==T) #keep only true steps
    if(i==1){
      dat <- temp
    } else{
      dat <- rbind(dat,temp)
    }
  }
  
  #extract species info, used to limit population models to that species
  spec <- dat$species %>% unique()
  
  #convert to time stamps
  
  dat$time1 <- as.POSIXct(dat$t1_)
  dat$time2 <- as.POSIXct(dat$t2_)
  
  #order by t1
  
  dat2 <- arrange(dat,time1)
  
  #number of tracks
  
  no_tracks <- length(unique(dat2$approach))
  
  unique_track_ids <- unique(dat2$approach)
  
  #total time of track #need to do per approach
  
  #create props state and time df per approach for filtering in loop
  for(z in 1:no_tracks){
     dat3 <- dat2 %>% filter(approach==unique_track_ids[z]) #filter for one track
      
     #don't think i need this anymore...
      track_length <- difftime(max(dat3$time2),min(dat3$time1),units="secs") %>% as.numeric()
      #redoing as like per time in state
      
      no_steps <-   track_length/20
      
      #make numeric and round up
      no_steps <- no_steps %>% as.numeric() %>% round(digits=0)
      
      #get state sequence
      state_seq <- cbind(round(dat3$discharge_end,digits=-1),dat3$t1_) %>% as.data.frame()
      names(state_seq) <- c("discharge","time")
      
      state_seq$time <- as.POSIXct(state_seq$time)
      state_seq$difftime <-difftime(state_seq$time,lag(state_seq$time),units="secs") %>% as.numeric
      
      #identify where states change and discharge changes and time_stamp changes
      state_seq$part <- 0
      part_id <-1
  
      for(i in 1:length(state_seq$discharge)){
        if(i==1){
          state_seq$part[i] <- 1
        }else if(state_seq$discharge[i]==state_seq$discharge[i-1] & state_seq$difftime[i]==20){ #to get if state or discharge changes
          state_seq$part[i] <- part_id
      
        }else{
          part_id <- part_id+1
          state_seq$part[i] <- part_id
        }
        }
  
  st_row <- 0 #starting row.
  
  #add in empty rows by splitting into parts and if next Part has diff time >x, fill in gap
   
  for(i in 1:length(unique(state_seq$part))){
     pt <- filter(state_seq,part==unique(state_seq$part)[i])
     nrows <- nrow(pt) + st_row #get row index for main state_seq df
     if(i!=length(unique(state_seq$part))){
       if(state_seq$difftime[nrows+1]>20){#if gap >20...
         rows_to_add <- state_seq$difftime[nrows+1]/20 #get number of rows to add
         #make new df to bind
         new_rows <- data.frame("discharge"=rep(0,rows_to_add),part=rep(0,rows_to_add),difftime=rep(0,rows_to_add))
         
         new_rows$time <- 1:nrow(new_rows)*20+pt$time[nrow(pt)]#add correct time stamps
         
         #nah for ease (due to parts) will interpolate for same state and discharge
           new_rows$discharge <- unique(pt$discharge)
           new_rows$state <- unique(pt$state)
         new_rows$part <- unique(pt$part)
         pt <- rbind(pt,new_rows)
       }
       st_row <- nrows
       
  
     }
          if(i==1){
         state_seq2 <- pt
       }  else{
         state_seq2 <-rbind(state_seq2,pt)
       }
   }
   
      
      
      props_state1 <- state_seq2 %>% group_by(part,discharge) %>% summarise(duration=difftime(max(time),min(time),units="secs"),steps_to_sim=(as.numeric(duration)/20)+1)
      
      
      props_state1$approach <- unique_track_ids[z]
      
    # save time stamps
    real_time_steps1 <- state_seq[c("time","part")]
    real_time_steps1$approach  <- unique_track_ids[z]
    
    #get start location
    
    start_loc1 <- dat3[1,c("x1_","y1_")]
    start_loc1$approach <- unique_track_ids[z]
     
    
    if(z==1){
      props_state_all <- props_state1 #df with all state seq info
      real_time_steps_all <- real_time_steps1 #df with real time steps
      start_loc_all <- start_loc1 #df with start locations
    } else{
      props_state_all <- rbind(props_state_all,props_state1)
      real_time_steps_all <- rbind(real_time_steps_all,real_time_steps1)
      start_loc_all <- rbind(start_loc_all,start_loc1)
    }
  
  }
  
  #get coefs for training fish
  coefs1 <- all_coefs %>% filter(fish_id != id & species==spec)
  
  
  if(spec=="Barbel"){
    coefs <- coefs1 %>% filter(terms %in% barb_terms)
  } else if(spec=="Grayling"){
    coefs <- coefs1 %>% filter(terms %in% gray_terms)
  }
  #remove log sl and cos ta since using adjusted kernels
  coefs <- coefs %>% filter(terms!="log_sl_" & terms!="cos_ta_")
  
  coefs$terms <- coefs$terms %>% str_replace_all("log_sl_","log(sl)")
  
  #id terms where starts are
  p <- unique(coefs$terms)[grep("_start",unique(coefs$terms))]
  t <-strsplit(p,":")

  #to get new names stuff - but do the replace after making means.
  for(i in 1:length(t)){
    #iterate through and id start stuff
    t2<- t[[i]]
    t_st <- t2[grep("_start",t2)]
    t_param <- t_st %>% gsub("_start","",.)
    t_param <- paste0("previous(",t_param,")")
    new_params_names <- cbind(t_st,t_param) %>% as.data.frame()
    if(i==1){
      start_terms_names <- new_params_names
    } else{
      start_terms_names <- rbind(start_terms_names,new_params_names)
    }
  }
  
  #then iterate to replace since. cant do in 1 line booo
  
  for(i in 1:nrow(start_terms_names)){
    bb <-  start_terms_names[i,] #get a row
    coefs$terms <-  coefs$terms %>% str_replace_all(.,bb$t_st,bb$t_param)
  }
 
  
 
  
  #calculate mean and sd, minus outliers for both states
  
  coef_means <- coefs %>% group_by(terms) %>% 
    summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr) %>% 
    ungroup() %>% filter(coef<=high.fence & coef>=low.fence) %>% #remove outliers
    group_by(terms) %>% 
    summarise(mean_coef=mean(coef,na.rm=T),sd_coef=sd(coef,na.rm=T)) %>% #calc means
    filter(mean_coef !=0 & sd_coef !=0) #remove terms that are 0
  
  #get covariance
  
  matr <- coefs %>% 
    group_by(terms) %>% summarise(coef=coef,q1=quantile(coef, na.rm = T)[2],q3=quantile(coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>% ungroup() %>%
    filter(coef<=high.fence & coef>=low.fence & terms %in% coef_means$terms) %>% 
    dplyr::select(terms,coef,fish_id)
    
  mat_prep <- spread(data=matr,key=terms,value=coef)
  
  coef_cov <- stats::cov(mat_prep[2:ncol(mat_prep)],use = "complete.obs")
  
  
  ## movement kernel means
  
  move_means <- movement_distr_dat %>% filter(fish_id!=id & species==spec) %>% 
    #remove outliers for log sl coef
    mutate(q1=quantile(log_sl_coef, na.rm = T)[2],q3=quantile(log_sl_coef, na.rm = T)[4],iqr=q3-q1,low.fence=q1-1.5*iqr,high.fence=q3+1.5*iqr,fish_id=fish_id) %>% 
    #ungroup() %>%
    filter(log_sl_coef<=high.fence & log_sl_coef>=low.fence&sl_shape_adj>0) %>% #remove shapes less than 0
    summarise(med=median(sl_shape_adj), mean_shape=mean(sl_shape_adj),sd_shape=sd(sl_shape_adj),mean_scale=mean(sl_scale),
              sd_scale=sd(sl_scale),sd_kappa=sd(ta_kappa_adj),
              mean_kappa=mean(ta_kappa_adj),mean_mu=mean(ta_mu)) 
  
  a <-1
  if(b==23){
    a <- 14
    final_track_all <- read.csv(paste0("fish agents - pooled/agents_test_fish_",id,".csv"))
  }

  for(q in a:19){ #parameterise individual as one of 19 agents
    #selection functions
    agent_coefs <- mvrnorm(n=1,mu=coef_means$mean_coef,Sigma = coef_cov) %>%
    as.data.frame() %>%
    tibble::rownames_to_column(var="terms") %>%
    rename(val=".") #select value from multivariate distr
    
    agent_coefs$term <- agent_coefs$term%>% 
      stringr::str_remove_all("_end") #remove "_end" from stuff so compatible w function
    
    agent_coefs <- agent_coefs %>% dplyr::select(c(term,val)) #retain only needed data

    
    #movement kernels
      agent_move_params <- data.frame(shape=abs(rnorm(1,mean=move_means$mean_shape,sd=move_means$sd_shape)),
  scale=abs(rnorm(1,mean=move_means$mean_scale,sd=move_means$sd_scale)),
  kappa = abs(rnorm(1,mean=move_means$mean_kappa,sd=move_means$sd_kappa))) #taking the absolute value to approximate the actual distribution; with hindsight, log transforming before calculating the mean wouldve been easier

    for(i in 1:no_tracks){ #here extract data for specific track e.g. state sequence
  
      #get number of steps etc to sim
      props_state <- props_state_all %>% filter(approach==unique_track_ids[[i]])
      
      #get start loc
      start_loc_track <- start_loc_all %>% filter(approach==unique_track_ids[[i]]) 
      start_loc <- c(start_loc_track$x1_,start_loc_track$y1_)
      for(z in 1:length(unique(props_state$part))){ #run simulation for 1 track
        #get info
        dat_part <-  props_state%>% filter(part==unique(props_state$part)[z])
        #get discharge
        disch <- unique(dat_part$discharge)
        #if discharge >80, set disch to 80 so will use the 80 discharge to sim
        if(as.numeric(disch)>80){
          disch <-as.character(80)
        }

        #in the if statements below, parameterise the appropriate selection function and movement kernel
        
        #length of dat_part is the number of steps to sim
        len_track <- dat_part$steps_to_sim
        
        #note: need to construct model formula. somehow....
        gamma_shape <- agent_move_params$shape
        gamma_scale <- agent_move_params$scale
        vonMises_kappa <- agent_move_params$kappa
        
        beta <- agent_coefs$val
        names(beta) <- agent_coefs$term
        
        
        #replace the diff w just angl
        names(beta) <- names(beta) %>% str_replace_all("diff_vel_abs_ang","vel_angle")
        names(beta) <- names(beta) %>% str_replace_all("diff_svg_abs_ang","svg_angle")
    
        issf_frm <- iSSF_formula(beta)
    
        #get raster info for that discharge
        ras_info <- rasters_info %>% filter(discharge==disch)
        
        #prep ras list for the function
        ras_lst <- iSSF_raster_list(
        files = ras_info$file_name,
        layer_names = ras_info$ras_name, 
        directional = ras_info$directional)
        
        #convert to pi
        ras_lst[[5]]$values <- ras_lst[[5]]$values / 180 * pi
        ras_lst[[3]]$values <- ras_lst[[3]]$values / 180 * pi
      
        
        #simulate here !
        
        bearing_start = pi/2
        
        sim <- iSSF_simulation(
        issf_frm = issf_frm, # Named vector of beta parameter values
        ras_lst = ras_lst, # list of covariate raster layers 
        position_start = start_loc, # starting position
        bearing_start = bearing_start,  # starting bearing
        n_steps = n_steps, # number of samples in random step distribution
        len_track =  len_track, # number of steps in a track
        scale = gamma_scale,     # Step len scale   
        shape = gamma_shape,     # Step len shape
        kappa = vonMises_kappa )    # Turning angle concentration 
        
        
        trk <- sim$track %>% as.data.frame() %>% rename(x=V1,y=V2)
        
        #add correct env data now
        
        trk$water_velocity <- extract(rast(ras_info$file_name[4]),trk[1:2])[1:nrow(trk),2]
        trk$depth <- extract(rast(ras_info$file_name[1]),trk[1:2])[1:nrow(trk),2]
        trk$svg <- extract(rast(ras_info$file_name[2]),trk[1:2])[1:nrow(trk),2]
        trk$svg_ang <- extract(rast(ras_info$file_name[3]),trk[1:2])[1:nrow(trk),2]
        trk$vel_ang <- extract(rast(ras_info$file_name[5]),trk[1:2])[1:nrow(trk),2]
        
        #add other parameters of interest
        trk$discharge <- disch
        trk$part <- unique(props_state$part)[z] #to assist matching to right time stamps...
      
        #update start location
        
        start_loc <- sim$track[nrow(sim$track),]
        
        #then the saving the final track stuff
        if(z==1){
          final_track <- trk
        } else{
          final_track <- rbind(final_track,trk)
        }
      }
      
      final_track$approach <- unique_track_ids[[i]]
      
      if(i==1){
        final_track_all_app <- final_track
      } else{
        final_track_all_app <- rbind(final_track_all_app,final_track)
      }
    }
    
    final_track_all_app$agent <- q
    final_track_all_app$fish_id <- id
    if(q==1){
      final_track_all <- final_track_all_app
    } else{
      final_track_all <- rbind(final_track_all,final_track_all_app)
    }

    write.csv(final_track_all,paste0("fish agents - pooled/agents_test_fish_",id,".csv"),row.names = F) # so will save output at each stage incase of crashing
  }
  
}
```

### Adding real data + dealing with missingness
```{r getting only real time steps and adding real fish data}
  
b <-1
for(b in 1:length(fish_ids)){
  #pick a fish id to use
  id <- fish_ids[b] 
  
  #get data for that fish
  
  data_for_fish <- data_files[grepl(id,data_files)]
  
  #make large df with both states
  for(i in 1:length(data_for_fish)){
    temp <- read.csv(data_for_fish[i]) %>% filter(case_==T) #keep only true steps
    if(i==1){
      dat <- temp
    } else{
      dat <- rbind(dat,temp)
    }
  }
  
  #extract species info, used to limit population models to that species
  spec <- dat$species %>% unique()
  
  #convert to time stamps
  
  dat$time1 <- as.POSIXct(dat$t1_)
  dat$time2 <- as.POSIXct(dat$t2_)
  
  #order by t1
  
  dat2 <- arrange(dat,time1)
  
  #number of tracks
  
  no_tracks <- length(unique(dat2$approach))
  
  unique_track_ids <- unique(dat2$approach)
  
  #total time of track #need to do per appraoch - in a loop?
  
  #create props state and time df per approach for filtering in loop
  for(z in 1:no_tracks){
     dat3 <- dat2 %>% filter(approach==unique_track_ids[z]) #filter for one track
      
     #don't think i need this anymore...
      track_length <- difftime(max(dat3$time2),min(dat3$time1),units="secs") %>% as.numeric()
      #redoing as like per time in state
      
      no_steps <-   track_length/20
      
      #make numeric and round up
      no_steps <- no_steps %>% as.numeric() %>% round(digits=0)
      
      #get time steps + discharges; its called state_seq as a remnant from the hmm-ssf version
      state_seq <- cbind(round(dat3$discharge_end,digits=-1),dat3$t1_) %>% as.data.frame()
      names(state_seq) <- c("discharge","time")
      
      state_seq$time <- as.POSIXct(state_seq$time)
      state_seq$difftime <-difftime(state_seq$time,lag(state_seq$time),units="secs") %>% as.numeric
      
      #identify where states change and discharge changes and time_stamp changes
      state_seq$part <- 0
      part_id <-1
  
      for(i in 1:length(state_seq$discharge)){
        if(i==1){
          state_seq$part[i] <- 1
        }else if(state_seq$discharge[i]==state_seq$discharge[i-1] & state_seq$difftime[i]==20){ #to get if state or discharge changes
          state_seq$part[i] <- part_id
      
        }else{
          part_id <- part_id+1
          state_seq$part[i] <- part_id
        }
        }
  
  st_row <- 0 #starting row
  
  #add in empty rows by splitting into parts and if next Part has diff time >x, fill in gap
   
  for(i in 1:length(unique(state_seq$part))){
     pt <- filter(state_seq,part==unique(state_seq$part)[i])
     nrows <- nrow(pt) + st_row #get row index for main state_seq df
     if(i!=length(unique(state_seq$part))){
       if(state_seq$difftime[nrows+1]>20){#if gap >20...
         rows_to_add <- state_seq$difftime[nrows+1]/20 #get number of rows to add
         #make new df to bind
         new_rows <- data.frame("discharge"=rep(0,rows_to_add),part=rep(0,rows_to_add),difftime=rep(0,rows_to_add))
         
         new_rows$time <- 1:nrow(new_rows)*20+pt$time[nrow(pt)]#add correct time stamps
           new_rows$discharge <- unique(pt$discharge)
           new_rows$state <- unique(pt$state)
         new_rows$part <- unique(pt$part)
         pt <- rbind(pt,new_rows)
       }
       st_row <- nrows
       
  
     }
          if(i==1){
         state_seq2 <- pt
       }  else{
         state_seq2 <-rbind(state_seq2,pt)
       }
   }
   
      
      
      props_state1 <- state_seq2 %>% group_by(part,discharge) %>% summarise(duration=difftime(max(time),min(time),units="secs"),steps_to_sim=(as.numeric(duration)/20)+1)
      
      
      props_state1$approach <- unique_track_ids[z]
      
    # save time stamps
    real_time_steps1 <- state_seq[c("time","part")]
    real_time_steps1$approach  <- unique_track_ids[z]
    
    #get start location
    
    start_loc1 <- dat3[1,c("x1_","y1_")]
    start_loc1$approach <- unique_track_ids[z]
     
    
    if(z==1){
      props_state_all <- props_state1 #df with all state seq info
      real_time_steps_all <- real_time_steps1 #df with real time steps
      start_loc_all <- start_loc1 #df with start locations
    } else{
      props_state_all <- rbind(props_state_all,props_state1)
      real_time_steps_all <- rbind(real_time_steps_all,real_time_steps1)
      start_loc_all <- rbind(start_loc_all,start_loc1)
    }
  
  }
  
  final_track_all <- read.csv(paste0("fish agents - pooled/agents_test_fish_",id,".csv"))
  
  #doing the times here
  q <-1
  
  real_time_steps_start <- real_time_steps_all %>% group_by(approach,part) %>% summarise(start_time=min(time))
  
  for(q in 1:19){ #per agent
    datt <- final_track_all %>% filter(agent==q)
    for(i in 1:length(unique(final_track_all$approach))){
      app <- unique(final_track_all$approach)[[i]]
      dat <- datt %>% filter(approach==app)
      start_times <- real_time_steps_start %>% filter(approach==app)
      for(z in 1:length(unique(dat$part))){
        prt <- unique(dat$part)[[z]]
        dat4 <- dat %>% filter(part==prt)
        stat_time <- start_times %>% filter(part==prt)
        
        dat4$time <- seq(0,(20*(nrow(dat4)-1)),by=20) + stat_time$start_time
        if(z==1){
          dat_all_ap <- dat4
        } else{
          dat_all_ap <- rbind(dat_all_ap,dat4)
        }
      }
      if(i==1){
        dat_all <- dat_all_ap
      }else{
        dat_all <- rbind(dat_all,dat_all_ap)
      }
    }
    if(q==1){
      
      dat_all_fin <- dat_all
    } else{
      dat_all_fin <- rbind(dat_all_fin,dat_all)
    }
  }
  
  #calculate diff between fish and thing angle
  dat_all_fin$x2 <- lag(dat_all_fin$x)
  dat_all_fin$y2 <- lag(dat_all_fin$y)
  
  dat_all_fin$bearing <- -with(data = dat_all_fin, atan2(y2 - y, x2 - x)) + (pi/2)
  dat_all_fin$bearing <- dat_all_fin$bearing %>% cos() #in radians
  
  dat_all_fin$diff_svg_abs_ang_end <- cos(dat_all_fin$bearing - (dat_all_fin$svg_ang*pi/180))
  
  dat_all_fin$diff_vel_abs_ang_end <- cos(dat_all_fin$bearing - (dat_all_fin$vel_ang*pi/180))
  
  final_tracks <- dat_all_fin %>% filter(time%in%real_time_steps_all$time)
  
  l <- real_time_steps_all[!(real_time_steps_all$time %in%dat_all_fin$time),]
  
  fish_dat <- dat2 %>% dplyr::select(c(approach,x1_,y1_,water_velocity_end,depth_end,svg_end,diff_svg_abs_ang_end,diff_vel_abs_ang_end,state,discharge_end,fish_id))
  
  fish_dat <- fish_dat %>% rename(water_velocity=water_velocity_end,depth=depth_end,svg=svg_end,x=x1_,y=y1_,discharge=discharge_end)
  
  
  fish_dat$agent <- "real"
  final_tracks$agent <- final_tracks$agent %>% as.character()
  
  fish_dat$state <- as.character(fish_dat$state) #will just be "Pooled"

  final_tracks$discharge <- as.numeric(final_tracks$discharge)
  fish_dat$discharge <- as.numeric(fish_dat$discharge)
  
  fish_dat$fish_id <- as.character(fish_dat$fish_id)
  final_tracks$fish_id <- as.character(final_tracks$fish_id)
  
  all <- bind_rows(final_tracks,fish_dat)
  
  write.csv(all,paste0("agents + real data - pooled/test_fish_",id,".csv"),row.names=F)
}
```

back to top

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API