https://doi.org/10.5281/zenodo.14318846
ssf_datamaking_HMMs.R
####set up####
rm(list=ls())
#this file will take tracks with states and 1) add random steps; 2) add covariate
#info; and 3) add fish attributes; then save the file
#
# in 'code when figuring it out section', this is code for when a) figuring out
# how to apply the functions and b) processing to get the resampling rate and
# figure out which entry events have insufficient data
# theres a lot of loops as need to apply the functions within entry events
# and nesting the data wasnt working
#### package ####
library(raster)
library(ggplot2)
library(dplyr)
library(data.table)
library(rgdal)#shapefile
library(sp)
library(rgeos)
library(amt) #for step selection
library(lubridate)
library(tidyr)
library(crawl)
library(sf)
library(mapview)
library(maptools)
library(suncalc)
####read file list####
#ok do file by file
#dont need 2 check sampling rate as interpolated for 5s
files <- list.files("data/steps with states/individual fish",full.names = T)
####other files####
#get raster file names for use later
raster_files <- list.files("rasters") #identify file names
#flow
flow <-read.csv('data/hourly_flow.csv')
flow$time <- as.POSIXct(flow$date, format='%d/%m/%Y %H:%M')
#temperatre
temp <- read.csv('data/median_temperatures.csv')
#info for standardising
stand <- read.csv("data/summary_stats_for_standardising.csv")
variable_list <- stand$Variable #list of variables
#other
data_on_fish <-read.csv('data/Tagged_fish_data.csv')
temp <- read.csv('data/median_temperatures.csv')
####START HERE - pick file to do####
#pick a dataframe
####MEGA LOOP####
q <-1
for (q in 1:length(files)){
starttime <- Sys.time()
data_to_read <- files[q]
data_frame <- read.csv(data_to_read)
#time as posixct
data_frame$time2 <- as.POSIXct(data_frame$t1_)
#getstates
dat_1 <- filter(data_frame,State==1)
dat_2 <- filter(data_frame,State==2)
#need ifs for if only 1 state...
#so if dat_1 >0 and if dat_2>0
#fully remake steps tbh, using x1, y1
fish_id_for_print <- data_frame$fish_id[1]
if(length(dat_1$approach>0)){
print(paste0('started fish id ',fish_id_for_print,'state 1, at time: ',starttime))
####loop all - state 1
#set up blank df to append to - can probs remove this and do a (if i ==1, new df, else...)
#final_df <- data.frame(matrix(ncol = 29, nrow = 0))
#colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",
# "dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",
# "substrate_end","time_start","time_end","discharge_start",
# "discharge_end","depth_start","water_velocity_start",
# "water_vel_grad_start","depth_end","water_velocity_end",
# "water_vel_grad_end", "species","fish_id","approach" )
#ok, break this loop into smaller loops
approaches <- unique(dat_1$approach)
num_entries <- length(approaches)
#currently not appending the data?
#resampled_data_total <- data.frame(matrix(ncol=4,nrow=0))
#colnames(resampled_data_total) <-c("x_","y_","t_","burst_")
#data_with_random_steps <- data.frame(matrix(ncol=17,nrow=0))
#colnames(data_with_random_steps) <- c("x1_","x2_","y1_","y2_","sl_",
# "direction_p","ta_","t1_","t2_","dt_",
# "step_id_","case_","cos_ta_","log_sl_",
# "species","approach","fish_id")
#blank list to append entry events which have insufficient turning angles to
#shouldnt need TOO much since these df already did this level of processing - but can check
not_enough_ta_ <-data.frame(matrix(ncol=3,nrow=0))
colnames(not_enough_ta_) <-c('fish_id','approach',"state")
not_enough_ta_blank <- not_enough_ta_
#blank list to append entry events with too few points
not_enough_points <- data.frame(matrix(ncol=3,nrow=0))
colnames(not_enough_points) <-c('fish_id','approach',"state")
not_enough_points_blank <- not_enough_points
#get fish ids as names
fish_id_list <-unique(dat_1$fish_id)
#LOOP
#note to me: tbh since im using bursts I can actually not loop this by approach?
#then again, if sl_ ta_ differs with approach, this way accounts for that at least
#but then again, i could also. lookat the sl and ta distributions for each approach separately and then specify them myself
#REMOVE THE DOUBLES IN THE PREDICTED V OBSERVED STUFF FROM CRAWL
for (i in 1:length(approaches)) {
#convert df to tibble so the code works
apprach <- approaches[i]
data2 <- dat_1 %>% filter(approach==apprach )%>% as_tibble()
#make a track
data_track <- make_track(data2,x,y,time2,crs=32632) #error as na in time stamps? :(
data_track <-arrange(data_track,t_)
#resample rate 20s - to give burst id
resampled_data <- track_resample(data_track,rate = seconds(20),#changed to 30s
tolerance = seconds(0.5))
#keeping tolerance very low since should have positions every 5s
#filter min burst so at least 3 points (2 steps) per burst
resampled_data <- filter_min_n_burst(resampled_data,min_n = 3)
#LOOP TO RESAMPLE APART FROM THOSE WITHOUT ENOUGH TA (AKA PART 1)
if(length(resampled_data$x_)>0){
#anyways, generate the steps from the data
resampled_steps <- steps_by_burst(resampled_data)
#if loop to check if >2 ta_
#dont think i will need this if only >3 points per burst
no.ta <- sum(!is.na(resampled_steps$ta_))
if(no.ta>=2){
#generate random steps and add to the data
random_steps_data <-random_steps(resampled_steps, n_control=100) #100 random steps so can cut this down later <3
#further error making random steps as lack of turning angles
#add time of day NOW
random_steps_data <- time_of_day(random_steps_data,where='both',include.crepuscule = TRUE)
#add cosine of turning angle and log of step length
#as may want to use later
random_steps_data$cos_ta_ <- cos(random_steps_data$ta_)
random_steps_data$log_sl_ <- log(random_steps_data$sl_)
#add other variables of importance from the og dataset!
#add species, done this way so its easier
random_steps_data$species <- unique(data2$species)
random_steps_data$state <- unique(data2$State)
#right will need to recalculate dist from fishway if want to use that
#add entry event
random_steps_data$approach <- data2$approach[1]
random_steps_data$track <- data2$track[1]
#add fish id
random_steps_data$fish_id <- unique(data2$fish_id)
##ok this loop can end here and append to larger df for example
if(i==1){
data_with_random_steps <- random_steps_data
}else{
data_with_random_steps <- rbind(data_with_random_steps,random_steps_data)
}
}
else{
not_enough_ta_ <-append(not_enough_ta_, unique(data2$approach),unique(data2$State))
not_enough_ta_2 <-not_enough_ta_blank
not_enough_ta_2[1,] <- c(data2$fish_id[1],i,unique(data2$State))
not_enough_ta_ <- rbind(not_enough_ta_,not_enough_ta_2)
}
}
else {
not_enough_points <- append(not_enough_points, unique(data2$approach))
not_enough_points2 <-not_enough_points_blank
not_enough_points2[1,] <- c(data2$fish_id[1],i)
not_enough_points <- rbind(not_enough_points,not_enough_points2)
}
}
####add env data by discharge to datas####
#rename data4 just because
data4 <- data_with_random_steps
#data4 <- read.csv('with_random_steps_all_steps_define_distr.csv')
data4_backup <- data4
data4 <- as_tibble(data4)
#get start and end points of the data
data.startpoints <-data4[,c(2,4)]
data.endpoints <- data4[,c(3,5)]
#and diischarges
#need hourly time first
data4$time_start <- round_date(as.POSIXct(data4$t1_,format='%Y-%m-%d %H:%M:%S') ,unit='hour')
data4$time_end <- round_date(as.POSIXct(data4$t2_,format='%Y-%m-%d %H:%M:%S'),unit='hour')
#then can add discharges
data4$discharge_start <- flow[match(data4$time_start,flow$time),2]
data4$discharge_end <- flow[match(data4$time_end,flow$time),2]
#and then df to save the extracted points to
data.startpoints.fin <- data.startpoints
data.endpoints.fin <-data.endpoints
#NEED LOOP FOR IF DISCHARGE >80 TO TREAT AS 80
for(i in 1:length(raster_files)) {
j <-gsub('.asc','', raster_files[i])
data.startpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
raster_files[i])),data.startpoints)
data.endpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
raster_files[i])),data.endpoints)
}
#ok so the next step will use the 2 seaprate df
#below is confusing bc we make another variable data4 but its just how the code was made initially <3
#ok so for some reason. this might not have worked :(
#just to save a copy
data5 <- data4
#will add correct depth, velcity and gradient values plus angles to above df
#in loop below data4 is the data w random steps and substrate and discharge
#and data_all_env is the MASSIVE file of all random steps plus every single
#value from the rasters like 62 columns
#ok below loop. may or may not work.
#lets do it line by line
#backup data5
data5_2 <- data5
#data5 <-data5_2
approaches <- unique(data5_2$approach)
#use this loop if got covariates via raster::extract
#use the line below if need to run tihs loop over several days (:
for(i in 1:length(data5$step_id_)) {
#FOR START POINTS
#get discharge at i
discharge_start_10s <- round(data5$discharge_start[i],digits=-1)
#need if ... for discharge >80
#ok so below extracts headings for a discharge! and it WORK!
#aka picks the headings containing the discharge value from the all env df
if(discharge_start_10s<80){
headings_start <- names(data.startpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
}
if(discharge_start_10s>=80){
headings_start <- names(data.startpoints.fin)[grepl(80,names(data.startpoints.fin))]
}
#dont need line below anymore since these headings are only start
#headings_start <- headings_start[grepl('start',headings_start)]
#individual headings for each variable
#this means can direct r where to TAKE VALUES FROM
depth_head_start <- headings_start[grepl('dep',headings_start)]
vel_head_start <- headings_start[grepl('vel_',headings_start)]
velgrad_head_start <- headings_start[grepl('svg',headings_start)]
vel_angle_head_start <- headings_start[grepl('Vel_Angle',headings_start)]
svg_angle_head_start <- headings_start[grepl('SVG_',headings_start)]
#then add to df
#IT WORKS!!
data5[i, 'depth_start'] <- data.startpoints.fin[i, depth_head_start]
data5[i, 'water_velocity_start'] <- data.startpoints.fin[i, vel_head_start]
data5[i, 'svg_start'] <- data.startpoints.fin[i, velgrad_head_start]
data5[i, 'velocity_angle_start'] <- data.startpoints.fin[i, vel_angle_head_start]
data5[i, 'svg_angle_start'] <- data.startpoints.fin[i, svg_angle_head_start]
#FOR END POINTS
#FOR START POINTS
#get discharge at i
discharge_end_10s <- round(data5$discharge_end[i],digits=-1)
#ok so below extracts headings for a discharge! and it WORK!
#aka picks the headings containing the discharge value from the all env df
# headings_end <- names(data.endpoints.fin)[grepl(discharge_end_10s,names(data.endpoints.fin))]
#dont need line below anymore
#headings_end <- headings_start[grepl('start',headings_start)]
if(discharge_end_10s<80){
headings_end <- names(data.endpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
}
if(discharge_end_10s>=80){
headings_end <- names(data.endpoints.fin)[grepl(80,names(data.startpoints.fin))]
}
#individual headings for each variable
#this means can direct r where to TAKE VALUES FROM
depth_head_end <- headings_end[grepl('dep',headings_end)]
vel_head_end <- headings_end[grepl('vel_',headings_end)]
velgrad_head_end <- headings_end[grepl('svg',headings_end)]
vel_angle_head_end <- headings_end[grepl('Vel_Angle',headings_end)]
svg_angle_head_end <- headings_end[grepl('SVG_',headings_end)]
#then add to df
#IT WORKS!!
data5[i, 'depth_end'] <-data.endpoints.fin[i, depth_head_end]
data5[i, 'water_velocity_end'] <- data.endpoints.fin[i, vel_head_end]
data5[i, 'svg_end'] <- data.endpoints.fin[i, velgrad_head_end]
data5[i, 'velocity_angle_end'] <- data.endpoints.fin[i, vel_angle_head_end]
data5[i, 'svg_angle_end'] <- data.endpoints.fin[i, svg_angle_head_end]
}
####make step id sequential for all approaches####
#step id per fish eg not per approach
data.sub <- summarise(group_by(data5,fish_id,approach,step_id_),count=n())
data.sub$step_id_new <- ave(data.sub$step_id_,data.sub$fish_id,
FUN = seq_along)
#ok above works
#merge to get new step_id and then drop additional columns <3
data5.5 <-merge(data5,data.sub,by=c('approach',"step_id_",'fish_id'))
data5.5 <-subset(data5.5,select=c(-count))
#OK HERE FILTER TO REMOVE RANDOM STEPS OUT OF RIVER
#check number of true steps before and after removing nas!!
#data5.5.2 <- na.omit(data5.5)
#THEN REUDCE RANDOM STEPS PER STEP to 10
#test 1
data_true_only <- filter(data5.5,case_==TRUE)
data_false_only <- filter(data5.5,case_==FALSE)
data_false_only2 <- data_false_only[24:35]
#remove NAs
data_false_only <- data_false_only[(complete.cases(data_false_only2)),]
#get 10 null steps per step ID
data6 <- data_false_only %>% group_by(approach,step_id_,case_) %>% slice_sample(n=10) %>% ungroup
#ok well. THEN. match step id of remaining trues to new falses
truestepid <- unique(data_true_only$step_id_new)
data6.2 <- filter(data6, step_id_new %in% truestepid) #ok good
#then rbind
data7 <- rbind(data_true_only,data6.2)
data8 <- arrange(data7,t1_)
#save
#also, calculate ABSOLUTE angle of fish so can do stuff relative to flow angle
#tan of difference in x and diff y
diffx <- data8$x2_ - data8$x1_
diffy <- data8$y2_ - data8$y1_
angle <- atan(diffy/diffx)*180/pi #ok the maths is good
#ok but this only calculates like the lil angle, not realtive to north
angle_stuff<-as.data.frame(cbind(diffx,diffy,angle))
angle_stuff<-mutate(angle_stuff,absolute=case_when(diffx>0 ~ 90-angle,
diffx<0 ~270-angle))
data8$absolute_angle <- angle_stuff$absolute
#cos of the angles
data8$cos_vel_ang_start <- cos(data8$velocity_angle_start*pi/180)
data8$cos_svg_ang_start<- cos(data8$svg_angle_start*pi/180)
data8$cos_vel_ang_end <- cos(data8$velocity_angle_end*pi/180)
data8$cos_svg_ang_end <- cos(data8$svg_angle_end*pi/180)
data8$cos_abs_ang_end <- cos(data8$absolute_angle *pi/180)
##relative angles
#should do as cos? to linearise?
#diff vel and ab angle
#do maths in brackets for regular angles and then cos it
#need to be between 0 and 1, or -1 and 1
data8$diff_vel_abs_ang_start <- cos((data8$velocity_angle_start-data8$absolute_angle)*pi/180)
data8$diff_svg_abs_ang_start <- cos((data8$svg_angle_start-data8$absolute_angle)*pi/180)
data8$diff_vel_abs_ang_end <- cos((data8$velocity_angle_end-data8$absolute_angle)*pi/180)
data8$diff_svg_abs_ang_end <- cos((data8$svg_angle_end-data8$absolute_angle)*pi/180)
#examine this
#data.test <-data8[1:10,]
#data.test$dif_ang_vek_abs <- data.test$velocity_angle_end-data.test$absolute_angle
#data.test$dif_ang_vek_abs[1:2] <- c(45,-45)
#data.test$cos_diff_ang_vel_abs <- cos(data.test$dif_ang_vek_abs*pi/180)
data_final <- data8
####add fish lengths etc####
#ok so want to match tag id on this sheet to tag id on the main data sheet
data_on_fish_i <- filter(data_on_fish,Tag_ID == data_final$fish_id[1])
data_final$fork_length <- data_on_fish_i$Fork_length_mm
data_final$total_length <- data_on_fish_i$Total_length_mm
data_final$weight <- data_on_fish_i$Weight_g
####add temp####
temp$time3 <- as.POSIXct(temp$time2,format='%Y-%m-%d %H:%M:%S')
#sort temperature and remove receiver
data_final$time_start <- round_date(as.POSIXct(data_final$t1_,format='%Y-%m-%d %H:%M:%S') ,'10mins')
data_final$time_end <- round_date(as.POSIXct(data_final$t2_,format='%Y-%m-%d %H:%M:%S'),unit='10mins')
#sort temperature and remove receiver
data_final$temp_start <- temp[match(data_final$time_start,temp$time3),3]
data_final$temp_end <- temp[match(data_final$time_end,temp$time3),3]
data_final <- subset(data_final,select=-c(time_start,time_end))
num_cols <- length(data_final)
if(length(data_final$approach)>0){
for(i in 1:length(variable_list)){
var <- variable_list[i]
r0w <- filter(stand,Variable==var)
col_id <- (num_cols+i)
data_final[col_id] <- 0
data_final[col_id] <- unlist((data_final[var]-r0w$Mean)/r0w$Std..Dev.)
names(data_final)[col_id] <- paste0(var,"_strdised")
} }
#### save final df <3####
#hashed so i dont ever accidentally run it <3
write.csv(data_final,paste0('data/final ssf + hmm df/',data_final$fish_id[1],' ssf_data_20sec_time_gap_60min_track_state1.csv'),row.names=F)
####end of potential loop
endtime <- Sys.time()
print(paste0('Finished fish id ',fish_id_for_print,' state 1 at time: ',endtime))
print(paste0('time taken: ',as.numeric(endtime-starttime,units='mins'), ' minutes'))
} else{
print(paste0("fish id ",fish_id_for_print," state 1 has no datapoints"))
}
###state 2
if(length(dat_2$approach>0)){
starttime <- Sys.time()
print(paste0('started fish id ',fish_id_for_print,'state 2, at time: ',starttime))
####loop all - state 1
#set up blank df to append to - can probs remove this and do a (if i ==1, new df, else...)
#final_df <- data.frame(matrix(ncol = 29, nrow = 0))
#colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",
# "dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",
# "substrate_end","time_start","time_end","discharge_start",
# "discharge_end","depth_start","water_velocity_start",
# "water_vel_grad_start","depth_end","water_velocity_end",
# "water_vel_grad_end", "species","fish_id","approach" )
#ok, break this loop into smaller loops
approaches <- unique(dat_2$approach)
num_entries <- length(approaches)
#currently not appending the data?
#resampled_data_total <- data.frame(matrix(ncol=4,nrow=0))
#colnames(resampled_data_total) <-c("x_","y_","t_","burst_")
#data_with_random_steps <- data.frame(matrix(ncol=17,nrow=0))
#colnames(data_with_random_steps) <- c("x1_","x2_","y1_","y2_","sl_",
# "direction_p","ta_","t1_","t2_","dt_",
# "step_id_","case_","cos_ta_","log_sl_",
# "species","approach","fish_id")
#blank list to append entry events which have insufficient turning angles to
#shouldnt need TOO much since these df already did this level of processing - but can check
not_enough_ta_ <-data.frame(matrix(ncol=3,nrow=0))
colnames(not_enough_ta_) <-c('fish_id','approach',"state")
not_enough_ta_blank <- not_enough_ta_
#blank list to append entry events with too few points
not_enough_points <- data.frame(matrix(ncol=3,nrow=0))
colnames(not_enough_points) <-c('fish_id','approach',"state")
not_enough_points_blank <- not_enough_points
#get fish ids as names
fish_id_list <-unique(dat_2$fish_id)
#LOOP
#note to me: tbh since im using bursts I can actually not loop this by approach?
#then again, if sl_ ta_ differs with approach, this way accounts for that at least
#but then again, i could also. lookat the sl and ta distributions for each approach separately and then specify them myself
#REMOVE THE DOUBLES IN THE PREDICTED V OBSERVED STUFF FROM CRAWL
for (i in 1:length(approaches)) {
apprach <- approaches[i]
data2 <- dat_2 %>% filter(approach==apprach )%>% as_tibble()
#make a track
data_track <- make_track(data2,x,y,time2,crs=32632) #error as na in time stamps? :(
data_track <-arrange(data_track,t_)
#resample rate 20s - to give burst id
resampled_data <- track_resample(data_track,rate = seconds(20),#changed to 30s
tolerance = seconds(0.5))
#keeping tolerance very low since should have positions every 5s
#filter min burst so at least 3 points (2 steps) per burst
resampled_data <- filter_min_n_burst(resampled_data,min_n = 3)
#LOOP TO RESAMPLE APART FROM THOSE WITHOUT ENOUGH TA (AKA PART 1)
if(length(resampled_data$x_)>0){
#anyways, generate the steps from the data
resampled_steps <- steps_by_burst(resampled_data)
#if loop to check if >2 ta_
#dont think i will need this if only >3 points per burst
no.ta <- sum(!is.na(resampled_steps$ta_))
if(no.ta>=2){
#generate random steps and add to the data
random_steps_data <-random_steps(resampled_steps, n_control=100) #100 random steps so can cut this down later <3
#further error making random steps as lack of turning angles
#add time of day NOW
random_steps_data <- time_of_day(random_steps_data,where='both',include.crepuscule = TRUE)
#add cosine of turning angle and log of step length
#as may want to use later
random_steps_data$cos_ta_ <- cos(random_steps_data$ta_)
random_steps_data$log_sl_ <- log(random_steps_data$sl_)
#add other variables of importance from the og dataset!
#add species, done this way so its easier
random_steps_data$species <- unique(data2$species)
random_steps_data$state <- unique(data2$State)
#right will need to recalculate dist from fishway if want to use that
#add entry event
random_steps_data$approach <- data2$approach[1]
random_steps_data$track <- data2$track[1]
#add fish id
random_steps_data$fish_id <- unique(data2$fish_id)
##ok this loop can end here and append to larger df for example
if(i==1){ #here! error with adding extra things. but. i SHOULD be 1?
data_with_random_steps <- random_steps_data
}else{
data_with_random_steps <- rbind(data_with_random_steps,random_steps_data)
}
}
else{
not_enough_ta_ <-append(not_enough_ta_, unique(data2$approach),unique(data2$State))
not_enough_ta_2 <-not_enough_ta_blank
not_enough_ta_2[1,] <- c(data2$fish_id[1],i,unique(data2$State))
not_enough_ta_ <- rbind(not_enough_ta_,not_enough_ta_2)
}
}
else {
not_enough_points <- append(not_enough_points, unique(data2$approach))
not_enough_points2 <-not_enough_points_blank
not_enough_points2[1,] <- c(data2$fish_id[1],i)
not_enough_points <- rbind(not_enough_points,not_enough_points2)
}
}
####add env data by discharge to datas####
#rename data4 just because
data4 <- data_with_random_steps
#data4 <- read.csv('with_random_steps_all_steps_define_distr.csv')
data4_backup <- data4
data4 <- as_tibble(data4)
#get start and end points of the data
data.startpoints <-data4[,c(2,4)]
data.endpoints <- data4[,c(3,5)]
#and diischarges
#need hourly time first
data4$time_start <- round_date(as.POSIXct(data4$t1_,format='%Y-%m-%d %H:%M:%S') ,unit='hour')
data4$time_end <- round_date(as.POSIXct(data4$t2_,format='%Y-%m-%d %H:%M:%S'),unit='hour')
#then can add discharges
data4$discharge_start <- flow[match(data4$time_start,flow$time),2]
data4$discharge_end <- flow[match(data4$time_end,flow$time),2]
#and then df to save the extracted points to
data.startpoints.fin <- data.startpoints
data.endpoints.fin <-data.endpoints
#NEED LOOP FOR IF DISCHARGE >80 TO TREAT AS 80
for(i in 1:length(raster_files)) {
j <-gsub('.asc','', raster_files[i])
data.startpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
raster_files[i])),data.startpoints)
data.endpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
raster_files[i])),data.endpoints)
}
#ok so the next step will use the 2 seaprate df
#below is confusing bc we make another variable data4 but its just how the code was made initially <3
#ok so for some reason. this might not have worked :(
#just to save a copy
data5 <- data4
#will add correct depth, velcity and gradient values plus angles to above df
#in loop below data4 is the data w random steps and substrate and discharge
#and data_all_env is the MASSIVE file of all random steps plus every single
#value from the rasters like 62 columns
#ok below loop. may or may not work.
#lets do it line by line
#backup data5
data5_2 <- data5
#data5 <-data5_2
approaches <- unique(data5_2$approach)
final_df <- data.frame(matrix(ncol = 29, nrow = 0))
colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",
"dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",
"substrate_end","time_start","time_end","discharge_start",
"discharge_end","depth_start","water_velocity_start",
"water_vel_grad_start","depth_end","water_velocity_end",
"water_vel_grad_end", "species","fish_id","approach" )
#use this loop if got covariates via raster::extract
#use the line below if need to run tihs loop over several days (:
for(i in 1:length(data5$step_id_)) {
#FOR START POINTS
#get discharge at i
discharge_start_10s <- round(data5$discharge_start[i],digits=-1)
#need if ... for discharge >80
#ok so below extracts headings for a discharge! and it WORK!
#aka picks the headings containing the discharge value from the all env df
if(discharge_start_10s<80){
headings_start <- names(data.startpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
}
if(discharge_start_10s>=80){
headings_start <- names(data.startpoints.fin)[grepl(80,names(data.startpoints.fin))]
}
#dont need line below anymore since these headings are only start
#headings_start <- headings_start[grepl('start',headings_start)]
#individual headings for each variable
#this means can direct r where to TAKE VALUES FROM
depth_head_start <- headings_start[grepl('dep',headings_start)]
vel_head_start <- headings_start[grepl('vel_',headings_start)]
velgrad_head_start <- headings_start[grepl('svg',headings_start)]
vel_angle_head_start <- headings_start[grepl('Vel_Angle',headings_start)]
svg_angle_head_start <- headings_start[grepl('SVG_',headings_start)]
#then add to df
#IT WORKS!!
data5[i, 'depth_start'] <- data.startpoints.fin[i, depth_head_start]
data5[i, 'water_velocity_start'] <- data.startpoints.fin[i, vel_head_start]
data5[i, 'svg_start'] <- data.startpoints.fin[i, velgrad_head_start]
data5[i, 'velocity_angle_start'] <- data.startpoints.fin[i, vel_angle_head_start]
data5[i, 'svg_angle_start'] <- data.startpoints.fin[i, svg_angle_head_start]
#FOR END POINTS
#FOR START POINTS
#get discharge at i
discharge_end_10s <- round(data5$discharge_end[i],digits=-1)
#ok so below extracts headings for a discharge! and it WORK!
#aka picks the headings containing the discharge value from the all env df
# headings_end <- names(data.endpoints.fin)[grepl(discharge_end_10s,names(data.endpoints.fin))]
#dont need line below anymore
#headings_end <- headings_start[grepl('start',headings_start)]
if(discharge_end_10s<80){
headings_end <- names(data.endpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
}
if(discharge_end_10s>=80){
headings_end <- names(data.endpoints.fin)[grepl(80,names(data.startpoints.fin))]
}
#individual headings for each variable
#this means can direct r where to TAKE VALUES FROM
depth_head_end <- headings_end[grepl('dep',headings_end)]
vel_head_end <- headings_end[grepl('vel_',headings_end)]
velgrad_head_end <- headings_end[grepl('svg',headings_end)]
vel_angle_head_end <- headings_end[grepl('Vel_Angle',headings_end)]
svg_angle_head_end <- headings_end[grepl('SVG_',headings_end)]
#then add to df
#IT WORKS!!
data5[i, 'depth_end'] <-data.endpoints.fin[i, depth_head_end]
data5[i, 'water_velocity_end'] <- data.endpoints.fin[i, vel_head_end]
data5[i, 'svg_end'] <- data.endpoints.fin[i, velgrad_head_end]
data5[i, 'velocity_angle_end'] <- data.endpoints.fin[i, vel_angle_head_end]
data5[i, 'svg_angle_end'] <- data.endpoints.fin[i, svg_angle_head_end]
}
####make step id sequential for all approaches####
#step id per fish eg not per approach
data.sub <- summarise(group_by(data5,fish_id,approach,step_id_),count=n())
data.sub$step_id_new <- ave(data.sub$step_id_,data.sub$fish_id,
FUN = seq_along)
#ok above works
#merge to get new step_id and then drop additional columns <3
data5.5 <-merge(data5,data.sub,by=c('approach',"step_id_",'fish_id'))
data5.5 <-subset(data5.5,select=c(-count))
#OK HERE FILTER TO REMOVE RANDOM STEPS OUT OF RIVER
#check number of true steps before and after removing nas!!
#data5.5.2 <- na.omit(data5.5)
#THEN REUDCE RANDOM STEPS PER STEP to 10
#test 1
data_true_only <- filter(data5.5,case_==TRUE)
data_false_only <- filter(data5.5,case_==FALSE)
data_false_only2 <- data_false_only[24:35]
data_false_only <- data_false_only[(complete.cases(data_false_only2)),]
#get 10 null steps per step ID
data6 <- data_false_only %>% group_by(approach,step_id_,case_) %>% slice_sample(n=10) %>% ungroup
#ok well. THEN. match step id of remaining trues to new falses
truestepid <- unique(data_true_only$step_id_new)
data6.2 <- filter(data6, step_id_new %in% truestepid) #ok good
#then rbind
data7 <- rbind(data_true_only,data6.2)
data8 <- arrange(data7,t1_)
#save
#also, calculate ABSOLUTE angle of fish so can do stuff relative to flow angle
#tan of difference in x and diff y
diffx <- data8$x2_ - data8$x1_
diffy <- data8$y2_ - data8$y1_
angle <- atan(diffy/diffx)*180/pi #ok the maths is good
#ok but this only calculates like the lil angle, not realtive to north
angle_stuff<-as.data.frame(cbind(diffx,diffy,angle))
angle_stuff<-mutate(angle_stuff,absolute=case_when(diffx>0 ~ 90-angle,
diffx<0 ~270-angle))
data8$absolute_angle <- angle_stuff$absolute
#cos of the angles
data8$cos_vel_ang_start <- cos(data8$velocity_angle_start*pi/180)
data8$cos_svg_ang_start<- cos(data8$svg_angle_start*pi/180)
data8$cos_vel_ang_end <- cos(data8$velocity_angle_end*pi/180)
data8$cos_svg_ang_end <- cos(data8$svg_angle_end*pi/180)
data8$cos_abs_ang_end <- cos(data8$absolute_angle *pi/180)
##relative angles
#should do as cos? to linearise?
#diff vel and ab angle
#do maths in brackets for regular angles and then cos it
#need to be between 0 and 1, or -1 and 1
data8$diff_vel_abs_ang_start <- cos((data8$velocity_angle_start-data8$absolute_angle)*pi/180)
data8$diff_svg_abs_ang_start <- cos((data8$svg_angle_start-data8$absolute_angle)*pi/180)
data8$diff_vel_abs_ang_end <- cos((data8$velocity_angle_end-data8$absolute_angle)*pi/180)
data8$diff_svg_abs_ang_end <- cos((data8$svg_angle_end-data8$absolute_angle)*pi/180)
#examine this
#data.test <-data8[1:10,]
#data.test$dif_ang_vek_abs <- data.test$velocity_angle_end-data.test$absolute_angle
#data.test$dif_ang_vek_abs[1:2] <- c(45,-45)
#data.test$cos_diff_ang_vel_abs <- cos(data.test$dif_ang_vek_abs*pi/180)
data_final <- data8
####add fish lengths etc####
#ok so want to match tag id on this sheet to tag id on the main data sheet
data_on_fish_i <- filter(data_on_fish,Tag_ID == data_final$fish_id[1])
data_final$fork_length <- data_on_fish_i$Fork_length_mm
data_final$total_length <- data_on_fish_i$Total_length_mm
data_final$weight <- data_on_fish_i$Weight_g
####add temp####
temp$time3 <- as.POSIXct(temp$time2,format='%Y-%m-%d %H:%M:%S')
#sort temperature and remove receiver
data_final$time_start <- round_date(as.POSIXct(data_final$t1_,format='%Y-%m-%d %H:%M:%S') ,'10mins')
data_final$time_end <- round_date(as.POSIXct(data_final$t2_,format='%Y-%m-%d %H:%M:%S'),unit='10mins')
#sort temperature and remove receiver
data_final$temp_start <- temp[match(data_final$time_start,temp$time3),3]
data_final$temp_end <- temp[match(data_final$time_end,temp$time3),3]
data_final <- subset(data_final,select=-c(time_start,time_end))
#strdise parameters :>
#### save final df <3####
num_cols <- length(data_final)
if(length(data_final$approach)>0){
for(i in 1:length(variable_list)){
var <- variable_list[i]
r0w <- filter(stand,Variable==var)
col_id <- (num_cols+i)
data_final[col_id] <- 0
data_final[col_id] <- unlist((data_final[var]-r0w$Mean)/r0w$Std..Dev.)
names(data_final)[col_id] <- paste0(var,"_strdised")
}
#hashed so i dont ever accidentally run it <3
write.csv(data_final,paste0('data/final ssf + hmm df/',data_final$fish_id[1],' ssf_data_20sec_time_gap_60min_track_state2.csv'),row.names=F)
####end of potential loop
endtime <- Sys.time()
print(paste0('Finished fish id ',fish_id_for_print,' state 2 at time: ',endtime))
print(paste0('time taken: ',as.numeric(endtime-starttime,units='mins'), ' minutes'))
} else{
print(paste0("fish id ",fish_id_for_print," state 2 has no datapoints"))
}
}
}
####FOR THREE FISH WITHOUT STATE 2 FOR SOME REASON!!####
#redo 46838 46844 amd 46840 state 2 only
q <-1
files_short <- files[c(1:3)]
for (q in 1:length(files_short)){
starttime <- Sys.time()
data_to_read <- files_short[q]
data_frame <- read.csv(data_to_read)
#time as posixct
data_frame$time2 <- as.POSIXct(data_frame$t1_)
#getstates
#dat_1 <- filter(data_frame,State==1)
dat_2 <- filter(data_frame,State==2)
#need ifs for if only 1 state...
#so if dat_1 >0 and if dat_2>0
#fully remake steps tbh, using x1, y1
fish_id_for_print <- data_frame$fish_id[1]
###state 2
if(length(dat_2$approach>0)){
starttime <- Sys.time()
print(paste0('started fish id ',fish_id_for_print,'state 2, at time: ',starttime))
####loop all - state 1
#set up blank df to append to - can probs remove this and do a (if i ==1, new df, else...)
#final_df <- data.frame(matrix(ncol = 29, nrow = 0))
#colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",
# "dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",
# "substrate_end","time_start","time_end","discharge_start",
# "discharge_end","depth_start","water_velocity_start",
# "water_vel_grad_start","depth_end","water_velocity_end",
# "water_vel_grad_end", "species","fish_id","approach" )
#ok, break this loop into smaller loops
approaches <- unique(dat_2$approach)
num_entries <- length(approaches)
#currently not appending the data?
#resampled_data_total <- data.frame(matrix(ncol=4,nrow=0))
#colnames(resampled_data_total) <-c("x_","y_","t_","burst_")
#data_with_random_steps <- data.frame(matrix(ncol=17,nrow=0))
#colnames(data_with_random_steps) <- c("x1_","x2_","y1_","y2_","sl_",
# "direction_p","ta_","t1_","t2_","dt_",
# "step_id_","case_","cos_ta_","log_sl_",
# "species","approach","fish_id")
#blank list to append entry events which have insufficient turning angles to
#shouldnt need TOO much since these df already did this level of processing - but can check
not_enough_ta_ <-data.frame(matrix(ncol=3,nrow=0))
colnames(not_enough_ta_) <-c('fish_id','approach',"state")
not_enough_ta_blank <- not_enough_ta_
#blank list to append entry events with too few points
not_enough_points <- data.frame(matrix(ncol=3,nrow=0))
colnames(not_enough_points) <-c('fish_id','approach',"state")
not_enough_points_blank <- not_enough_points
#get fish ids as names
fish_id_list <-unique(dat_2$fish_id)
#LOOP
#note to me: tbh since im using bursts I can actually not loop this by approach?
#then again, if sl_ ta_ differs with approach, this way accounts for that at least
#but then again, i could also. lookat the sl and ta distributions for each approach separately and then specify them myself
#REMOVE THE DOUBLES IN THE PREDICTED V OBSERVED STUFF FROM CRAWL
for (i in 1:length(approaches)) {
apprach <- approaches[i]
data2 <- dat_2 %>% filter(approach==apprach )%>% as_tibble()
#make a track
data_track <- make_track(data2,x,y,time2,crs=32632) #error as na in time stamps? :(
data_track <-arrange(data_track,t_)
#resample rate 20s - to give burst id
resampled_data <- track_resample(data_track,rate = seconds(20),#changed to 30s
tolerance = seconds(0.5))
#keeping tolerance very low since should have positions every 5s
#filter min burst so at least 3 points (2 steps) per burst
resampled_data <- filter_min_n_burst(resampled_data,min_n = 3)
#LOOP TO RESAMPLE APART FROM THOSE WITHOUT ENOUGH TA (AKA PART 1)
if(length(resampled_data$x_)>0){
#anyways, generate the steps from the data
resampled_steps <- steps_by_burst(resampled_data)
#if loop to check if >2 ta_
#dont think i will need this if only >3 points per burst
no.ta <- sum(!is.na(resampled_steps$ta_))
if(no.ta>=2){
#generate random steps and add to the data
random_steps_data <-random_steps(resampled_steps, n_control=100) #100 random steps so can cut this down later <3
#further error making random steps as lack of turning angles
#add time of day NOW
random_steps_data <- time_of_day(random_steps_data,where='both',include.crepuscule = TRUE,solar.dep=12)
#add cosine of turning angle and log of step length
#as may want to use later
random_steps_data$cos_ta_ <- cos(random_steps_data$ta_)
random_steps_data$log_sl_ <- log(random_steps_data$sl_)
#add other variables of importance from the og dataset!
#add species, done this way so its easier
random_steps_data$species <- unique(data2$species)
random_steps_data$state <- unique(data2$State)
#right will need to recalculate dist from fishway if want to use that
#add entry event
random_steps_data$approach <- data2$approach[1]
random_steps_data$track <- data2$track[1]
#add fish id
random_steps_data$fish_id <- unique(data2$fish_id)
##ok this loop can end here and append to larger df for example
if(i==1){ #here! error with adding extra things. but. i SHOULD be 1?
data_with_random_steps <- random_steps_data
}else{
data_with_random_steps <- rbind(data_with_random_steps,random_steps_data)
}
}
else{
not_enough_ta_ <-append(not_enough_ta_, unique(data2$approach),unique(data2$State))
not_enough_ta_2 <-not_enough_ta_blank
not_enough_ta_2[1,] <- c(data2$fish_id[1],i,unique(data2$State))
not_enough_ta_ <- rbind(not_enough_ta_,not_enough_ta_2)
}
}
else {
not_enough_points <- append(not_enough_points, unique(data2$approach))
not_enough_points2 <-not_enough_points_blank
not_enough_points2[1,] <- c(data2$fish_id[1],i)
not_enough_points <- rbind(not_enough_points,not_enough_points2)
}
}
####add env data by discharge to datas####
#rename data4 just because
data4 <- data_with_random_steps
#data4 <- read.csv('with_random_steps_all_steps_define_distr.csv')
data4_backup <- data4
data4 <- as_tibble(data4)
#get start and end points of the data
data.startpoints <-data4[,c(2,4)]
data.endpoints <- data4[,c(3,5)]
#and diischarges
#need hourly time first
data4$time_start <- round_date(as.POSIXct(data4$t1_,format='%Y-%m-%d %H:%M:%S') ,unit='hour')
data4$time_end <- round_date(as.POSIXct(data4$t2_,format='%Y-%m-%d %H:%M:%S'),unit='hour')
#then can add discharges
data4$discharge_start <- flow[match(data4$time_start,flow$time),2]
data4$discharge_end <- flow[match(data4$time_end,flow$time),2]
#and then df to save the extracted points to
data.startpoints.fin <- data.startpoints
data.endpoints.fin <-data.endpoints
#NEED LOOP FOR IF DISCHARGE >80 TO TREAT AS 80
for(i in 1:length(raster_files)) {
j <-gsub('.asc','', raster_files[i])
data.startpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
raster_files[i])),data.startpoints)
data.endpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
raster_files[i])),data.endpoints)
}
#ok so the next step will use the 2 seaprate df
#below is confusing bc we make another variable data4 but its just how the code was made initially <3
#ok so for some reason. this might not have worked :(
#just to save a copy
data5 <- data4
#will add correct depth, velcity and gradient values plus angles to above df
#in loop below data4 is the data w random steps and substrate and discharge
#and data_all_env is the MASSIVE file of all random steps plus every single
#value from the rasters like 62 columns
#ok below loop. may or may not work.
#lets do it line by line
#backup data5
data5_2 <- data5
#data5 <-data5_2
approaches <- unique(data5_2$approach)
final_df <- data.frame(matrix(ncol = 29, nrow = 0))
colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",
"dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",
"substrate_end","time_start","time_end","discharge_start",
"discharge_end","depth_start","water_velocity_start",
"water_vel_grad_start","depth_end","water_velocity_end",
"water_vel_grad_end", "species","fish_id","approach" )
#use this loop if got covariates via raster::extract
#use the line below if need to run tihs loop over several days (:
for(i in 1:length(data5$step_id_)) {
#FOR START POINTS
#get discharge at i
discharge_start_10s <- round(data5$discharge_start[i],digits=-1)
#need if ... for discharge >80
#ok so below extracts headings for a discharge! and it WORK!
#aka picks the headings containing the discharge value from the all env df
if(discharge_start_10s<80){
headings_start <- names(data.startpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
}
if(discharge_start_10s>=80){
headings_start <- names(data.startpoints.fin)[grepl(80,names(data.startpoints.fin))]
}
#dont need line below anymore since these headings are only start
#headings_start <- headings_start[grepl('start',headings_start)]
#individual headings for each variable
#this means can direct r where to TAKE VALUES FROM
depth_head_start <- headings_start[grepl('dep',headings_start)]
vel_head_start <- headings_start[grepl('vel_',headings_start)]
velgrad_head_start <- headings_start[grepl('svg',headings_start)]
vel_angle_head_start <- headings_start[grepl('Vel_Angle',headings_start)]
svg_angle_head_start <- headings_start[grepl('SVG_',headings_start)]
#then add to df
#IT WORKS!!
data5[i, 'depth_start'] <- data.startpoints.fin[i, depth_head_start]
data5[i, 'water_velocity_start'] <- data.startpoints.fin[i, vel_head_start]
data5[i, 'svg_start'] <- data.startpoints.fin[i, velgrad_head_start]
data5[i, 'velocity_angle_start'] <- data.startpoints.fin[i, vel_angle_head_start]
data5[i, 'svg_angle_start'] <- data.startpoints.fin[i, svg_angle_head_start]
#FOR END POINTS
#FOR START POINTS
#get discharge at i
discharge_end_10s <- round(data5$discharge_end[i],digits=-1)
#ok so below extracts headings for a discharge! and it WORK!
#aka picks the headings containing the discharge value from the all env df
# headings_end <- names(data.endpoints.fin)[grepl(discharge_end_10s,names(data.endpoints.fin))]
#dont need line below anymore
#headings_end <- headings_start[grepl('start',headings_start)]
if(discharge_end_10s<80){
headings_end <- names(data.endpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
}
if(discharge_end_10s>=80){
headings_end <- names(data.endpoints.fin)[grepl(80,names(data.startpoints.fin))]
}
#individual headings for each variable
#this means can direct r where to TAKE VALUES FROM
depth_head_end <- headings_end[grepl('dep',headings_end)]
vel_head_end <- headings_end[grepl('vel_',headings_end)]
velgrad_head_end <- headings_end[grepl('svg',headings_end)]
vel_angle_head_end <- headings_end[grepl('Vel_Angle',headings_end)]
svg_angle_head_end <- headings_end[grepl('SVG_',headings_end)]
#then add to df
#IT WORKS!!
data5[i, 'depth_end'] <-data.endpoints.fin[i, depth_head_end]
data5[i, 'water_velocity_end'] <- data.endpoints.fin[i, vel_head_end]
data5[i, 'svg_end'] <- data.endpoints.fin[i, velgrad_head_end]
data5[i, 'velocity_angle_end'] <- data.endpoints.fin[i, vel_angle_head_end]
data5[i, 'svg_angle_end'] <- data.endpoints.fin[i, svg_angle_head_end]
}
####make step id sequential for all approaches####
#step id per fish eg not per approach
data.sub <- summarise(group_by(data5,fish_id,approach,step_id_),count=n())
data.sub$step_id_new <- ave(data.sub$step_id_,data.sub$fish_id,
FUN = seq_along)
#ok above works
#merge to get new step_id and then drop additional columns <3
data5.5 <-merge(data5,data.sub,by=c('approach',"step_id_",'fish_id'))
data5.5 <-subset(data5.5,select=c(-count))
#OK HERE FILTER TO REMOVE RANDOM STEPS OUT OF RIVER
#check number of true steps before and after removing nas!!
#data5.5.2 <- na.omit(data5.5)
#THEN REUDCE RANDOM STEPS PER STEP to 10
#test 1
data_true_only <- filter(data5.5,case_==TRUE)
data_false_only <- filter(data5.5,case_==FALSE)
data_false_only2 <- data_false_only[24:35]
data_false_only <- data_false_only[(complete.cases(data_false_only2)),]
#get 10 null steps per step ID
data6 <- data_false_only %>% group_by(approach,step_id_,case_) %>% slice_sample(n=10) %>% ungroup
#ok well. THEN. match step id of remaining trues to new falses
truestepid <- unique(data_true_only$step_id_new)
data6.2 <- filter(data6, step_id_new %in% truestepid) #ok good
#then rbind
data7 <- rbind(data_true_only,data6.2)
data8 <- arrange(data7,t1_)
#save
#also, calculate ABSOLUTE angle of fish so can do stuff relative to flow angle
#tan of difference in x and diff y
diffx <- data8$x2_ - data8$x1_
diffy <- data8$y2_ - data8$y1_
angle <- atan(diffy/diffx)*180/pi #ok the maths is good
#ok but this only calculates like the lil angle, not realtive to north
angle_stuff<-as.data.frame(cbind(diffx,diffy,angle))
angle_stuff<-mutate(angle_stuff,absolute=case_when(diffx>0 ~ 90-angle,
diffx<0 ~270-angle))
data8$absolute_angle <- angle_stuff$absolute
#cos of the angles
data8$cos_vel_ang_start <- cos(data8$velocity_angle_start*pi/180)
data8$cos_svg_ang_start<- cos(data8$svg_angle_start*pi/180)
data8$cos_vel_ang_end <- cos(data8$velocity_angle_end*pi/180)
data8$cos_svg_ang_end <- cos(data8$svg_angle_end*pi/180)
data8$cos_abs_ang_end <- cos(data8$absolute_angle *pi/180)
##relative angles
#should do as cos? to linearise?
#diff vel and ab angle
#do maths in brackets for regular angles and then cos it
#need to be between 0 and 1, or -1 and 1
data8$diff_vel_abs_ang_start <- cos((data8$velocity_angle_start-data8$absolute_angle)*pi/180)
data8$diff_svg_abs_ang_start <- cos((data8$svg_angle_start-data8$absolute_angle)*pi/180)
data8$diff_vel_abs_ang_end <- cos((data8$velocity_angle_end-data8$absolute_angle)*pi/180)
data8$diff_svg_abs_ang_end <- cos((data8$svg_angle_end-data8$absolute_angle)*pi/180)
#examine this
#data.test <-data8[1:10,]
#data.test$dif_ang_vek_abs <- data.test$velocity_angle_end-data.test$absolute_angle
#data.test$dif_ang_vek_abs[1:2] <- c(45,-45)
#data.test$cos_diff_ang_vel_abs <- cos(data.test$dif_ang_vek_abs*pi/180)
data_final <- data8
####add fish lengths etc####
#ok so want to match tag id on this sheet to tag id on the main data sheet
data_on_fish_i <- filter(data_on_fish,Tag_ID == data_final$fish_id[1])
data_final$fork_length <- data_on_fish_i$Fork_length_mm
data_final$total_length <- data_on_fish_i$Total_length_mm
data_final$weight <- data_on_fish_i$Weight_g
####add temp####
temp$time3 <- as.POSIXct(temp$time2,format='%Y-%m-%d %H:%M:%S')
#sort temperature and remove receiver
data_final$time_start <- round_date(as.POSIXct(data_final$t1_,format='%Y-%m-%d %H:%M:%S') ,'10mins')
data_final$time_end <- round_date(as.POSIXct(data_final$t2_,format='%Y-%m-%d %H:%M:%S'),unit='10mins')
#sort temperature and remove receiver
data_final$temp_start <- temp[match(data_final$time_start,temp$time3),3]
data_final$temp_end <- temp[match(data_final$time_end,temp$time3),3]
data_final <- subset(data_final,select=-c(time_start,time_end))
#strdise parameters :>
#### save final df <3####
num_cols <- length(data_final)
if(length(data_final$approach)>0){
for(i in 1:length(variable_list)){
var <- variable_list[i]
r0w <- filter(stand,Variable==var)
col_id <- (num_cols+i)
data_final[col_id] <- 0
data_final[col_id] <- unlist((data_final[var]-r0w$Mean)/r0w$Std..Dev.)
names(data_final)[col_id] <- paste0(var,"_strdised")
}
#hashed so i dont ever accidentally run it <3
write.csv(data_final,paste0('data/final ssf + hmm df/',data_final$fish_id[1],' ssf_data_20sec_time_gap_60min_track_state2.csv'),row.names=F)
####end of potential loop
endtime <- Sys.time()
print(paste0('Finished fish id ',fish_id_for_print,' state 2 at time: ',endtime))
print(paste0('time taken: ',as.numeric(endtime-starttime,units='mins'), ' minutes'))
} else{
print(paste0("fish id ",fish_id_for_print," state 2 has no datapoints"))
}
}
}