Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

https://doi.org/10.5281/zenodo.14318846
17 December 2024, 12:45:03 UTC
  • Code
  • Branches (0)
  • Releases (1)
  • Visits
    • Branches
    • Releases
      • 1
      • 1
    • c8b2287
    • /
    • combining-hmm-and-ssf-code
    • /
    • scripts
    • /
    • ssf_datamaking_HMMs.R
    Raw File Download

    To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
    Select below a type of object currently browsed in order to display its associated SWHID and permalink.

    • content
    • directory
    • snapshot
    • release
    origin badgecontent badge
    swh:1:cnt:6ca0d82339a35f9ece949f85ee9fc19fefb3a899
    origin badgedirectory badge
    swh:1:dir:30ac199afd3b1d7a0d0bdca02408cfa6e4b4363a
    origin badgesnapshot badge
    swh:1:snp:05a2af42b588522ca08f036c1f785d8457dcf25e
    origin badgerelease badge
    swh:1:rel:aa35d9e39d94cbf3f73362c2c2b5cd04c355e955

    This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
    Select below a type of object currently browsed in order to generate citations for them.

    • content
    • directory
    • snapshot
    • release
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    ssf_datamaking_HMMs.R
    ####set up####
    
    rm(list=ls())
    
    
    #this file will take tracks with states and 1) add random steps; 2) add covariate
    #info; and 3) add fish attributes; then save the file
    
    #
    # in 'code when figuring it out section', this is code for when a) figuring out 
    # how to apply the functions and b) processing to get the resampling rate and 
    # figure out which entry events have insufficient data
    
    # theres a lot of loops as need to apply the functions within entry events
    # and nesting the data wasnt working
    
    
    #### package ####
    
    library(raster)
    
    library(ggplot2)
    library(dplyr)
    library(data.table)
    library(rgdal)#shapefile
    library(sp)
    library(rgeos)
    library(amt) #for step selection
    library(lubridate) 
    library(tidyr)
    library(crawl)
    library(sf)
    library(mapview)
    library(maptools)
    library(suncalc)
    
    
    ####read file list####
    
    #ok do file by file
    #dont need 2 check sampling rate as interpolated for 5s
    
    
    files <- list.files("data/steps with states/individual fish",full.names = T)
    
    ####other files####
    
    #get raster file names for use later
    raster_files <- list.files("rasters")  #identify file names
    
    #flow
    flow <-read.csv('data/hourly_flow.csv')
    flow$time <- as.POSIXct(flow$date, format='%d/%m/%Y %H:%M')
    
    #temperatre
    temp <- read.csv('data/median_temperatures.csv')
    
    #info for standardising
    stand <- read.csv("data/summary_stats_for_standardising.csv")
    variable_list <- stand$Variable #list of variables
    
    #other
    data_on_fish <-read.csv('data/Tagged_fish_data.csv')
    temp <- read.csv('data/median_temperatures.csv')
    
    ####START HERE - pick file to do####
    
    
    #pick a dataframe
    ####MEGA LOOP####
    
    q <-1
    
    for (q in 1:length(files)){
      starttime <- Sys.time()
      data_to_read <- files[q]
      data_frame <- read.csv(data_to_read)
      #time as posixct
      data_frame$time2 <- as.POSIXct(data_frame$t1_)
    
      #getstates
      dat_1 <- filter(data_frame,State==1)
      dat_2 <- filter(data_frame,State==2)
      
      #need ifs for if only 1 state...
      #so if dat_1 >0 and if dat_2>0
      
      #fully remake steps tbh, using x1, y1
    
    
      fish_id_for_print <- data_frame$fish_id[1]
      
      if(length(dat_1$approach>0)){
        
        print(paste0('started fish id ',fish_id_for_print,'state 1, at time: ',starttime))
        
        ####loop all - state 1
        #set up blank df to append to - can probs remove this and do a (if i ==1, new df, else...)
        #final_df <- data.frame(matrix(ncol = 29, nrow = 0))
        #colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",                    
         #                    "dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",        
          #                   "substrate_end","time_start","time_end","discharge_start",
           #                  "discharge_end","depth_start","water_velocity_start",
            #                 "water_vel_grad_start","depth_end","water_velocity_end",     
             #                "water_vel_grad_end", "species","fish_id","approach"  )
        
        #ok, break this loop into smaller loops 
        
        approaches <- unique(dat_1$approach)
        num_entries <- length(approaches)
        
        #currently not appending the data?
        
        #resampled_data_total <- data.frame(matrix(ncol=4,nrow=0))
        #colnames(resampled_data_total) <-c("x_","y_","t_","burst_")
        
        #data_with_random_steps <- data.frame(matrix(ncol=17,nrow=0))
        #colnames(data_with_random_steps) <- c("x1_","x2_","y1_","y2_","sl_",
         #                                     "direction_p","ta_","t1_","t2_","dt_",
          #                                    "step_id_","case_","cos_ta_","log_sl_",
           #                                   "species","approach","fish_id")
        
        #blank list to append entry events which have insufficient turning angles to
        #shouldnt need TOO much since these df already did this level of processing - but can check
        not_enough_ta_ <-data.frame(matrix(ncol=3,nrow=0))
        colnames(not_enough_ta_) <-c('fish_id','approach',"state")
        not_enough_ta_blank <- not_enough_ta_
        
        #blank list to append entry events with too few points
        
        not_enough_points <- data.frame(matrix(ncol=3,nrow=0))
        colnames(not_enough_points) <-c('fish_id','approach',"state")
        not_enough_points_blank <- not_enough_points
        
      
        
        #get fish ids as names
        fish_id_list <-unique(dat_1$fish_id)
        
        
        #LOOP
        
        #note to me: tbh since im using bursts I can actually not loop this by approach?
        #then again, if sl_ ta_ differs with approach, this way accounts for that at least
        #but then again, i could also. lookat the sl and ta distributions for each approach separately and then specify them myself
        #REMOVE THE DOUBLES IN THE PREDICTED V OBSERVED STUFF FROM CRAWL
        
        for (i in 1:length(approaches)) {
          #convert df to tibble so the code works
          apprach <- approaches[i]
          data2 <- dat_1 %>% filter(approach==apprach )%>% as_tibble()
        
          #make a track
          data_track <- make_track(data2,x,y,time2,crs=32632) #error as na in time stamps? :(
          data_track <-arrange(data_track,t_)
        
          #resample rate 20s - to give burst id
          resampled_data <- track_resample(data_track,rate = seconds(20),#changed to 30s
                                             tolerance = seconds(0.5))
          #keeping tolerance very low since should have positions every 5s
          
          #filter min burst so at least 3 points (2 steps) per burst
          resampled_data <- filter_min_n_burst(resampled_data,min_n = 3)
          
          #LOOP TO RESAMPLE APART FROM THOSE WITHOUT ENOUGH TA (AKA PART 1)
          if(length(resampled_data$x_)>0){
            #anyways, generate the steps from the data
            resampled_steps <- steps_by_burst(resampled_data)
            
            #if loop to check if >2 ta_
            #dont think i will need this if only >3 points per burst
            no.ta <- sum(!is.na(resampled_steps$ta_))
            
            
            if(no.ta>=2){
              #generate random steps and add to the data
              random_steps_data <-random_steps(resampled_steps, n_control=100) #100 random steps so can cut this down later <3
              #further error making random steps as lack of turning angles
              
              #add time of day NOW
              random_steps_data <- time_of_day(random_steps_data,where='both',include.crepuscule = TRUE)
              
              #add cosine of turning angle and log of step length
              #as may want to use later
              random_steps_data$cos_ta_ <- cos(random_steps_data$ta_)
              random_steps_data$log_sl_ <- log(random_steps_data$sl_)
              
              
              #add other variables of importance from the og dataset!
              #add species, done this way so its easier 
              random_steps_data$species <- unique(data2$species)
              random_steps_data$state <- unique(data2$State)
              
              #right will need to recalculate dist from fishway if want to use that
              
              #add entry event
              random_steps_data$approach <- data2$approach[1]
              random_steps_data$track <- data2$track[1]
              
              #add fish id
              random_steps_data$fish_id <- unique(data2$fish_id)
        
              
              ##ok this loop can end here and append to larger df for example
              if(i==1){
                data_with_random_steps <- random_steps_data
              }else{
              data_with_random_steps <- rbind(data_with_random_steps,random_steps_data)
              }
            }
            else{
              not_enough_ta_ <-append(not_enough_ta_, unique(data2$approach),unique(data2$State))
              not_enough_ta_2 <-not_enough_ta_blank
              not_enough_ta_2[1,] <- c(data2$fish_id[1],i,unique(data2$State))
              not_enough_ta_ <- rbind(not_enough_ta_,not_enough_ta_2)
              
            }
          }
          else {
            not_enough_points <- append(not_enough_points, unique(data2$approach))
            not_enough_points2 <-not_enough_points_blank
            not_enough_points2[1,] <- c(data2$fish_id[1],i)
            not_enough_points <- rbind(not_enough_points,not_enough_points2)
          }
        
          
        }
        
        
          
      
        ####add env data by discharge to datas####
          
        #rename data4 just because
        data4 <- data_with_random_steps
        #data4 <- read.csv('with_random_steps_all_steps_define_distr.csv')
        
        data4_backup <- data4
        data4 <- as_tibble(data4)
        
        #get start and end points of the data
        data.startpoints <-data4[,c(2,4)]
        data.endpoints <- data4[,c(3,5)]
        
        #and diischarges
        #need hourly time first
        data4$time_start <- round_date(as.POSIXct(data4$t1_,format='%Y-%m-%d %H:%M:%S') ,unit='hour')
        data4$time_end <- round_date(as.POSIXct(data4$t2_,format='%Y-%m-%d %H:%M:%S'),unit='hour')
        #then can add discharges
        data4$discharge_start <- flow[match(data4$time_start,flow$time),2]
        data4$discharge_end <- flow[match(data4$time_end,flow$time),2]
        #and then df to save the extracted points to
        
        data.startpoints.fin <- data.startpoints
        data.endpoints.fin <-data.endpoints
        
        #NEED LOOP FOR IF DISCHARGE >80 TO TREAT AS 80
        
        for(i in 1:length(raster_files)) {   
          j <-gsub('.asc','', raster_files[i]) 
          data.startpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
                                                                             raster_files[i])),data.startpoints)
          
          data.endpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
                                                                           raster_files[i])),data.endpoints)
          
        }
        
        #ok so the next step will use the 2 seaprate df
        
        #below is confusing bc we make another variable data4 but its just how the code was made initially <3
          #ok so for some reason. this might not have worked :(
          #just to save a copy
          data5 <- data4
          #will add correct depth, velcity and gradient values plus angles to above df
          
      
          #in loop below data4 is the data w random steps and substrate and discharge
          #and data_all_env is the MASSIVE file of all random steps plus every single
          #value from the rasters like 62 columns
          
        
        #ok below loop. may or may not work.
        #lets do it line by line
          
        #backup data5
        data5_2 <- data5
        #data5 <-data5_2
        
        approaches <- unique(data5_2$approach)
        
        
        
        #use this loop if got covariates via raster::extract
        
        #use the line below if need to run tihs loop over several days (:
        for(i in 1:length(data5$step_id_)) {
          #FOR START POINTS
          #get discharge at i
          discharge_start_10s <- round(data5$discharge_start[i],digits=-1)
          #need if ... for discharge >80
          
          
          #ok so below extracts headings for a discharge! and it WORK!
          #aka picks the headings containing the discharge value from the all env df
          
          if(discharge_start_10s<80){
           headings_start <- names(data.startpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
          }
          if(discharge_start_10s>=80){
            headings_start <- names(data.startpoints.fin)[grepl(80,names(data.startpoints.fin))]
            
          }
           #dont need line below anymore since these headings are only start
          #headings_start <- headings_start[grepl('start',headings_start)]
          
          #individual headings for each variable
          #this means can direct r where to TAKE VALUES FROM
          depth_head_start <- headings_start[grepl('dep',headings_start)]
          vel_head_start <- headings_start[grepl('vel_',headings_start)]
          velgrad_head_start <- headings_start[grepl('svg',headings_start)]
          vel_angle_head_start <- headings_start[grepl('Vel_Angle',headings_start)]
          svg_angle_head_start <- headings_start[grepl('SVG_',headings_start)]
          
          #then add to df
          #IT WORKS!!
          data5[i, 'depth_start'] <- data.startpoints.fin[i, depth_head_start]
          data5[i, 'water_velocity_start'] <- data.startpoints.fin[i, vel_head_start]
          data5[i, 'svg_start'] <- data.startpoints.fin[i, velgrad_head_start]
          data5[i, 'velocity_angle_start'] <- data.startpoints.fin[i, vel_angle_head_start]
          data5[i, 'svg_angle_start'] <- data.startpoints.fin[i, svg_angle_head_start]
          
          #FOR END POINTS
          #FOR START POINTS
          #get discharge at i
          discharge_end_10s <- round(data5$discharge_end[i],digits=-1)
          
          #ok so below extracts headings for a discharge! and it WORK!
          #aka picks the headings containing the discharge value from the all env df
         # headings_end <- names(data.endpoints.fin)[grepl(discharge_end_10s,names(data.endpoints.fin))]
          #dont need line below anymore
          #headings_end <- headings_start[grepl('start',headings_start)]
          
          if(discharge_end_10s<80){
            headings_end <- names(data.endpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
          }
          if(discharge_end_10s>=80){
            headings_end <- names(data.endpoints.fin)[grepl(80,names(data.startpoints.fin))]
            
          }
          
          #individual headings for each variable
          #this means can direct r where to TAKE VALUES FROM
          depth_head_end <- headings_end[grepl('dep',headings_end)]
          vel_head_end <- headings_end[grepl('vel_',headings_end)]
          velgrad_head_end <- headings_end[grepl('svg',headings_end)]
          vel_angle_head_end <- headings_end[grepl('Vel_Angle',headings_end)]
          svg_angle_head_end <- headings_end[grepl('SVG_',headings_end)]
          
          
          #then add to df
          #IT WORKS!!
          data5[i, 'depth_end'] <-data.endpoints.fin[i, depth_head_end]
          data5[i, 'water_velocity_end'] <- data.endpoints.fin[i, vel_head_end]
          data5[i, 'svg_end'] <- data.endpoints.fin[i, velgrad_head_end]
          data5[i, 'velocity_angle_end'] <- data.endpoints.fin[i, vel_angle_head_end]
          data5[i, 'svg_angle_end'] <- data.endpoints.fin[i, svg_angle_head_end]
          
          
        }
        
      
        ####make step id sequential for all approaches####
        
        #step id per fish eg not per approach
        
        data.sub <- summarise(group_by(data5,fish_id,approach,step_id_),count=n())
        data.sub$step_id_new <- ave(data.sub$step_id_,data.sub$fish_id,
                                    FUN = seq_along)
        
        #ok above works
        #merge to get new step_id and then drop additional columns <3
        data5.5 <-merge(data5,data.sub,by=c('approach',"step_id_",'fish_id'))
        data5.5 <-subset(data5.5,select=c(-count))
        
        
        #OK HERE FILTER TO REMOVE RANDOM STEPS OUT OF RIVER
        
        #check number of true steps before and after removing nas!!
        
        #data5.5.2 <- na.omit(data5.5)
      
        #THEN REUDCE RANDOM STEPS PER STEP to 10
        
        #test 1
        data_true_only <- filter(data5.5,case_==TRUE)
        data_false_only <- filter(data5.5,case_==FALSE)
        data_false_only2 <- data_false_only[24:35]
        
        #remove NAs
        data_false_only <- data_false_only[(complete.cases(data_false_only2)),]
        
        
        #get 10 null steps per step ID
        data6 <- data_false_only %>% group_by(approach,step_id_,case_) %>% slice_sample(n=10) %>% ungroup
        
        #ok well. THEN. match step id of remaining trues to new falses
        
        truestepid <- unique(data_true_only$step_id_new)
        data6.2 <- filter(data6, step_id_new %in% truestepid) #ok good
        
        #then rbind
        data7 <- rbind(data_true_only,data6.2)
        
        data8 <- arrange(data7,t1_)
        
        #save
      
        #also, calculate ABSOLUTE angle of fish so can do stuff relative to flow angle
        #tan of difference in x and diff y
        
        diffx <- data8$x2_ - data8$x1_
        diffy <- data8$y2_ - data8$y1_
        
        angle <- atan(diffy/diffx)*180/pi #ok the maths is good
        #ok but this only calculates like the lil angle, not realtive to north
        angle_stuff<-as.data.frame(cbind(diffx,diffy,angle))
        
        angle_stuff<-mutate(angle_stuff,absolute=case_when(diffx>0 ~ 90-angle,
                                              diffx<0 ~270-angle))
        
        
        data8$absolute_angle <- angle_stuff$absolute
        
        #cos of the angles
        data8$cos_vel_ang_start <- cos(data8$velocity_angle_start*pi/180)
        data8$cos_svg_ang_start<- cos(data8$svg_angle_start*pi/180)
        
        data8$cos_vel_ang_end <- cos(data8$velocity_angle_end*pi/180)
        data8$cos_svg_ang_end <- cos(data8$svg_angle_end*pi/180)
        data8$cos_abs_ang_end <- cos(data8$absolute_angle *pi/180)
        
          ##relative angles
        #should do as cos? to linearise?
        
        #diff vel and ab angle
        #do maths in brackets for regular angles and then cos it
        #need to be between 0 and 1, or -1 and 1
        
        data8$diff_vel_abs_ang_start <- cos((data8$velocity_angle_start-data8$absolute_angle)*pi/180)
        data8$diff_svg_abs_ang_start <- cos((data8$svg_angle_start-data8$absolute_angle)*pi/180)
        
        data8$diff_vel_abs_ang_end <- cos((data8$velocity_angle_end-data8$absolute_angle)*pi/180)
        data8$diff_svg_abs_ang_end <- cos((data8$svg_angle_end-data8$absolute_angle)*pi/180)
        
        #examine this
        #data.test <-data8[1:10,]
        #data.test$dif_ang_vek_abs <- data.test$velocity_angle_end-data.test$absolute_angle
        #data.test$dif_ang_vek_abs[1:2] <- c(45,-45)
        #data.test$cos_diff_ang_vel_abs <- cos(data.test$dif_ang_vek_abs*pi/180)
        
        
        data_final <- data8
        
        ####add fish lengths etc####
          
        #ok so want to match tag id on this sheet to tag id on the main data sheet
        
        data_on_fish_i <- filter(data_on_fish,Tag_ID == data_final$fish_id[1])
        
        data_final$fork_length <- data_on_fish_i$Fork_length_mm
        data_final$total_length <- data_on_fish_i$Total_length_mm
        data_final$weight <- data_on_fish_i$Weight_g
        
        
        ####add temp####
        temp$time3 <- as.POSIXct(temp$time2,format='%Y-%m-%d %H:%M:%S')
        #sort temperature and remove receiver
        
        data_final$time_start <- round_date(as.POSIXct(data_final$t1_,format='%Y-%m-%d %H:%M:%S') ,'10mins')
        data_final$time_end <- round_date(as.POSIXct(data_final$t2_,format='%Y-%m-%d %H:%M:%S'),unit='10mins')
        
        #sort temperature and remove receiver
        
        data_final$temp_start <- temp[match(data_final$time_start,temp$time3),3]
        data_final$temp_end <- temp[match(data_final$time_end,temp$time3),3]
        
        data_final <- subset(data_final,select=-c(time_start,time_end))
        
        
        num_cols <- length(data_final)
        if(length(data_final$approach)>0){
          for(i in 1:length(variable_list)){
            var <- variable_list[i]
            r0w <- filter(stand,Variable==var)
            col_id <- (num_cols+i)
            data_final[col_id] <- 0
            data_final[col_id] <- unlist((data_final[var]-r0w$Mean)/r0w$Std..Dev.)
            names(data_final)[col_id] <- paste0(var,"_strdised")
          } }
        
        #### save final df <3####
        
        #hashed so i dont ever accidentally run it <3
        write.csv(data_final,paste0('data/final ssf + hmm df/',data_final$fish_id[1],' ssf_data_20sec_time_gap_60min_track_state1.csv'),row.names=F)
        
        
        ####end of potential loop
        
        endtime <- Sys.time()
        print(paste0('Finished fish id ',fish_id_for_print,' state 1 at time: ',endtime))
        print(paste0('time taken: ',as.numeric(endtime-starttime,units='mins'), ' minutes'))
      } else{
      print(paste0("fish id ",fish_id_for_print," state 1 has no datapoints"))
    }
      
      ###state 2
      
      if(length(dat_2$approach>0)){
        starttime <- Sys.time()
        print(paste0('started fish id ',fish_id_for_print,'state 2, at time: ',starttime))
        
        ####loop all - state 1
        #set up blank df to append to - can probs remove this and do a (if i ==1, new df, else...)
        #final_df <- data.frame(matrix(ncol = 29, nrow = 0))
        #colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",                    
        #                    "dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",        
        #                   "substrate_end","time_start","time_end","discharge_start",
        #                  "discharge_end","depth_start","water_velocity_start",
        #                 "water_vel_grad_start","depth_end","water_velocity_end",     
        #                "water_vel_grad_end", "species","fish_id","approach"  )
        
        #ok, break this loop into smaller loops 
        
        approaches <- unique(dat_2$approach)
        num_entries <- length(approaches)
        
        #currently not appending the data?
        
        #resampled_data_total <- data.frame(matrix(ncol=4,nrow=0))
        #colnames(resampled_data_total) <-c("x_","y_","t_","burst_")
        
        #data_with_random_steps <- data.frame(matrix(ncol=17,nrow=0))
        #colnames(data_with_random_steps) <- c("x1_","x2_","y1_","y2_","sl_",
        #                                     "direction_p","ta_","t1_","t2_","dt_",
        #                                    "step_id_","case_","cos_ta_","log_sl_",
        #                                   "species","approach","fish_id")
        
        #blank list to append entry events which have insufficient turning angles to
        #shouldnt need TOO much since these df already did this level of processing - but can check
        not_enough_ta_ <-data.frame(matrix(ncol=3,nrow=0))
        colnames(not_enough_ta_) <-c('fish_id','approach',"state")
        not_enough_ta_blank <- not_enough_ta_
        
        #blank list to append entry events with too few points
        
        not_enough_points <- data.frame(matrix(ncol=3,nrow=0))
        colnames(not_enough_points) <-c('fish_id','approach',"state")
        not_enough_points_blank <- not_enough_points
        
        
        
        #get fish ids as names
        fish_id_list <-unique(dat_2$fish_id)
        
        
        #LOOP
        
        #note to me: tbh since im using bursts I can actually not loop this by approach?
        #then again, if sl_ ta_ differs with approach, this way accounts for that at least
        #but then again, i could also. lookat the sl and ta distributions for each approach separately and then specify them myself
        #REMOVE THE DOUBLES IN THE PREDICTED V OBSERVED STUFF FROM CRAWL
        
        for (i in 1:length(approaches)) {
          apprach <- approaches[i]
          data2 <- dat_2 %>% filter(approach==apprach )%>% as_tibble()
          
          #make a track
          data_track <- make_track(data2,x,y,time2,crs=32632) #error as na in time stamps? :(
          data_track <-arrange(data_track,t_)
          
          #resample rate 20s - to give burst id
          resampled_data <- track_resample(data_track,rate = seconds(20),#changed to 30s
                                           tolerance = seconds(0.5))
          #keeping tolerance very low since should have positions every 5s
          
          #filter min burst so at least 3 points (2 steps) per burst
          resampled_data <- filter_min_n_burst(resampled_data,min_n = 3)
          
          #LOOP TO RESAMPLE APART FROM THOSE WITHOUT ENOUGH TA (AKA PART 1)
          if(length(resampled_data$x_)>0){
            #anyways, generate the steps from the data
            resampled_steps <- steps_by_burst(resampled_data)
            
            #if loop to check if >2 ta_
            #dont think i will need this if only >3 points per burst
            no.ta <- sum(!is.na(resampled_steps$ta_))
            
            
            if(no.ta>=2){
              #generate random steps and add to the data
              random_steps_data <-random_steps(resampled_steps, n_control=100) #100 random steps so can cut this down later <3
              #further error making random steps as lack of turning angles
              
              #add time of day NOW
              random_steps_data <- time_of_day(random_steps_data,where='both',include.crepuscule = TRUE)
              
              #add cosine of turning angle and log of step length
              #as may want to use later
              random_steps_data$cos_ta_ <- cos(random_steps_data$ta_)
              random_steps_data$log_sl_ <- log(random_steps_data$sl_)
              
              
              #add other variables of importance from the og dataset!
              #add species, done this way so its easier 
              random_steps_data$species <- unique(data2$species)
              random_steps_data$state <- unique(data2$State)
              
              #right will need to recalculate dist from fishway if want to use that
              
              #add entry event
              random_steps_data$approach <- data2$approach[1]
              random_steps_data$track <- data2$track[1]
              
              #add fish id
              random_steps_data$fish_id <- unique(data2$fish_id)
              
              
              ##ok this loop can end here and append to larger df for example
              if(i==1){ #here! error with adding extra things. but. i SHOULD be 1?
                data_with_random_steps <- random_steps_data
              }else{
                data_with_random_steps <- rbind(data_with_random_steps,random_steps_data)
              }
            }
            else{
              not_enough_ta_ <-append(not_enough_ta_, unique(data2$approach),unique(data2$State))
              not_enough_ta_2 <-not_enough_ta_blank
              not_enough_ta_2[1,] <- c(data2$fish_id[1],i,unique(data2$State))
              not_enough_ta_ <- rbind(not_enough_ta_,not_enough_ta_2)
              
            }
          }
          else {
            not_enough_points <- append(not_enough_points, unique(data2$approach))
            not_enough_points2 <-not_enough_points_blank
            not_enough_points2[1,] <- c(data2$fish_id[1],i)
            not_enough_points <- rbind(not_enough_points,not_enough_points2)
          }
          
          
        }
        
        
        
        
        ####add env data by discharge to datas####
        
        #rename data4 just because
        data4 <- data_with_random_steps
        #data4 <- read.csv('with_random_steps_all_steps_define_distr.csv')
        
        data4_backup <- data4
        data4 <- as_tibble(data4)
        
        #get start and end points of the data
        data.startpoints <-data4[,c(2,4)]
        data.endpoints <- data4[,c(3,5)]
        
        #and diischarges
        #need hourly time first
        data4$time_start <- round_date(as.POSIXct(data4$t1_,format='%Y-%m-%d %H:%M:%S') ,unit='hour')
        data4$time_end <- round_date(as.POSIXct(data4$t2_,format='%Y-%m-%d %H:%M:%S'),unit='hour')
        #then can add discharges
        data4$discharge_start <- flow[match(data4$time_start,flow$time),2]
        data4$discharge_end <- flow[match(data4$time_end,flow$time),2]
        #and then df to save the extracted points to
        
        data.startpoints.fin <- data.startpoints
        data.endpoints.fin <-data.endpoints
        
        #NEED LOOP FOR IF DISCHARGE >80 TO TREAT AS 80
        
        for(i in 1:length(raster_files)) {   
          j <-gsub('.asc','', raster_files[i]) 
          data.startpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
                                                                             raster_files[i])),data.startpoints)
          
          data.endpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
                                                                           raster_files[i])),data.endpoints)
          
        }
        
        #ok so the next step will use the 2 seaprate df
        
        #below is confusing bc we make another variable data4 but its just how the code was made initially <3
        #ok so for some reason. this might not have worked :(
        #just to save a copy
        data5 <- data4
        #will add correct depth, velcity and gradient values plus angles to above df
        
        
        #in loop below data4 is the data w random steps and substrate and discharge
        #and data_all_env is the MASSIVE file of all random steps plus every single
        #value from the rasters like 62 columns
        
        
        #ok below loop. may or may not work.
        #lets do it line by line
        
        #backup data5
        data5_2 <- data5
        #data5 <-data5_2
        
        approaches <- unique(data5_2$approach)
        
        final_df <- data.frame(matrix(ncol = 29, nrow = 0))
        colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",                    
                                "dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",        
                                "substrate_end","time_start","time_end","discharge_start",
                                "discharge_end","depth_start","water_velocity_start",
                                "water_vel_grad_start","depth_end","water_velocity_end",     
                                "water_vel_grad_end", "species","fish_id","approach"  )
        
        
        
        #use this loop if got covariates via raster::extract
        
        #use the line below if need to run tihs loop over several days (:
        for(i in 1:length(data5$step_id_)) {
          #FOR START POINTS
          #get discharge at i
          discharge_start_10s <- round(data5$discharge_start[i],digits=-1)
          #need if ... for discharge >80
          
          
          #ok so below extracts headings for a discharge! and it WORK!
          #aka picks the headings containing the discharge value from the all env df
          
          if(discharge_start_10s<80){
            headings_start <- names(data.startpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
          }
          if(discharge_start_10s>=80){
            headings_start <- names(data.startpoints.fin)[grepl(80,names(data.startpoints.fin))]
            
          }
          #dont need line below anymore since these headings are only start
          #headings_start <- headings_start[grepl('start',headings_start)]
          
          #individual headings for each variable
          #this means can direct r where to TAKE VALUES FROM
          depth_head_start <- headings_start[grepl('dep',headings_start)]
          vel_head_start <- headings_start[grepl('vel_',headings_start)]
          velgrad_head_start <- headings_start[grepl('svg',headings_start)]
          vel_angle_head_start <- headings_start[grepl('Vel_Angle',headings_start)]
          svg_angle_head_start <- headings_start[grepl('SVG_',headings_start)]
          
          #then add to df
          #IT WORKS!!
          data5[i, 'depth_start'] <- data.startpoints.fin[i, depth_head_start]
          data5[i, 'water_velocity_start'] <- data.startpoints.fin[i, vel_head_start]
          data5[i, 'svg_start'] <- data.startpoints.fin[i, velgrad_head_start]
          data5[i, 'velocity_angle_start'] <- data.startpoints.fin[i, vel_angle_head_start]
          data5[i, 'svg_angle_start'] <- data.startpoints.fin[i, svg_angle_head_start]
          
          #FOR END POINTS
          #FOR START POINTS
          #get discharge at i
          discharge_end_10s <- round(data5$discharge_end[i],digits=-1)
          
          #ok so below extracts headings for a discharge! and it WORK!
          #aka picks the headings containing the discharge value from the all env df
          # headings_end <- names(data.endpoints.fin)[grepl(discharge_end_10s,names(data.endpoints.fin))]
          #dont need line below anymore
          #headings_end <- headings_start[grepl('start',headings_start)]
          
          if(discharge_end_10s<80){
            headings_end <- names(data.endpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
          }
          if(discharge_end_10s>=80){
            headings_end <- names(data.endpoints.fin)[grepl(80,names(data.startpoints.fin))]
            
          }
          
          #individual headings for each variable
          #this means can direct r where to TAKE VALUES FROM
          depth_head_end <- headings_end[grepl('dep',headings_end)]
          vel_head_end <- headings_end[grepl('vel_',headings_end)]
          velgrad_head_end <- headings_end[grepl('svg',headings_end)]
          vel_angle_head_end <- headings_end[grepl('Vel_Angle',headings_end)]
          svg_angle_head_end <- headings_end[grepl('SVG_',headings_end)]
          
          
          #then add to df
          #IT WORKS!!
          data5[i, 'depth_end'] <-data.endpoints.fin[i, depth_head_end]
          data5[i, 'water_velocity_end'] <- data.endpoints.fin[i, vel_head_end]
          data5[i, 'svg_end'] <- data.endpoints.fin[i, velgrad_head_end]
          data5[i, 'velocity_angle_end'] <- data.endpoints.fin[i, vel_angle_head_end]
          data5[i, 'svg_angle_end'] <- data.endpoints.fin[i, svg_angle_head_end]
          
          
        }
        
        
        ####make step id sequential for all approaches####
        
        #step id per fish eg not per approach
        
        data.sub <- summarise(group_by(data5,fish_id,approach,step_id_),count=n())
        data.sub$step_id_new <- ave(data.sub$step_id_,data.sub$fish_id,
                                    FUN = seq_along)
        
        #ok above works
        #merge to get new step_id and then drop additional columns <3
        data5.5 <-merge(data5,data.sub,by=c('approach',"step_id_",'fish_id'))
        data5.5 <-subset(data5.5,select=c(-count))
        
        
        #OK HERE FILTER TO REMOVE RANDOM STEPS OUT OF RIVER
        
        #check number of true steps before and after removing nas!!
        
        #data5.5.2 <- na.omit(data5.5)
        
        #THEN REUDCE RANDOM STEPS PER STEP to 10
        
        #test 1
        data_true_only <- filter(data5.5,case_==TRUE)
        data_false_only <- filter(data5.5,case_==FALSE)
        
        data_false_only2 <- data_false_only[24:35]
        
        data_false_only <- data_false_only[(complete.cases(data_false_only2)),]
        #get 10 null steps per step ID
        data6 <- data_false_only %>% group_by(approach,step_id_,case_) %>% slice_sample(n=10) %>% ungroup
        
        #ok well. THEN. match step id of remaining trues to new falses
        
        truestepid <- unique(data_true_only$step_id_new)
        data6.2 <- filter(data6, step_id_new %in% truestepid) #ok good
        
        #then rbind
        data7 <- rbind(data_true_only,data6.2)
        
        data8 <- arrange(data7,t1_)
        
        #save
        
        #also, calculate ABSOLUTE angle of fish so can do stuff relative to flow angle
        #tan of difference in x and diff y
        
        diffx <- data8$x2_ - data8$x1_
        diffy <- data8$y2_ - data8$y1_
        
        angle <- atan(diffy/diffx)*180/pi #ok the maths is good
        #ok but this only calculates like the lil angle, not realtive to north
        angle_stuff<-as.data.frame(cbind(diffx,diffy,angle))
        
        angle_stuff<-mutate(angle_stuff,absolute=case_when(diffx>0 ~ 90-angle,
                                                           diffx<0 ~270-angle))
        
        
        data8$absolute_angle <- angle_stuff$absolute
        
        #cos of the angles
        data8$cos_vel_ang_start <- cos(data8$velocity_angle_start*pi/180)
        data8$cos_svg_ang_start<- cos(data8$svg_angle_start*pi/180)
        
        data8$cos_vel_ang_end <- cos(data8$velocity_angle_end*pi/180)
        data8$cos_svg_ang_end <- cos(data8$svg_angle_end*pi/180)
        data8$cos_abs_ang_end <- cos(data8$absolute_angle *pi/180)
        
        ##relative angles
        #should do as cos? to linearise?
        
        #diff vel and ab angle
        #do maths in brackets for regular angles and then cos it
        #need to be between 0 and 1, or -1 and 1
        
        data8$diff_vel_abs_ang_start <- cos((data8$velocity_angle_start-data8$absolute_angle)*pi/180)
        data8$diff_svg_abs_ang_start <- cos((data8$svg_angle_start-data8$absolute_angle)*pi/180)
        
        data8$diff_vel_abs_ang_end <- cos((data8$velocity_angle_end-data8$absolute_angle)*pi/180)
        data8$diff_svg_abs_ang_end <- cos((data8$svg_angle_end-data8$absolute_angle)*pi/180)
        
        #examine this
        #data.test <-data8[1:10,]
        #data.test$dif_ang_vek_abs <- data.test$velocity_angle_end-data.test$absolute_angle
        #data.test$dif_ang_vek_abs[1:2] <- c(45,-45)
        #data.test$cos_diff_ang_vel_abs <- cos(data.test$dif_ang_vek_abs*pi/180)
        
        
        data_final <- data8
        
        ####add fish lengths etc####
        
        #ok so want to match tag id on this sheet to tag id on the main data sheet
        
        data_on_fish_i <- filter(data_on_fish,Tag_ID == data_final$fish_id[1])
        
        data_final$fork_length <- data_on_fish_i$Fork_length_mm
        data_final$total_length <- data_on_fish_i$Total_length_mm
        data_final$weight <- data_on_fish_i$Weight_g
        
        
        ####add temp####
        temp$time3 <- as.POSIXct(temp$time2,format='%Y-%m-%d %H:%M:%S')
        #sort temperature and remove receiver
        
        data_final$time_start <- round_date(as.POSIXct(data_final$t1_,format='%Y-%m-%d %H:%M:%S') ,'10mins')
        data_final$time_end <- round_date(as.POSIXct(data_final$t2_,format='%Y-%m-%d %H:%M:%S'),unit='10mins')
        
        #sort temperature and remove receiver
        
        data_final$temp_start <- temp[match(data_final$time_start,temp$time3),3]
        data_final$temp_end <- temp[match(data_final$time_end,temp$time3),3]
        
        data_final <- subset(data_final,select=-c(time_start,time_end))
        
        #strdise parameters :>
        
        
        #### save final df <3####
        
        num_cols <- length(data_final)
        if(length(data_final$approach)>0){
          for(i in 1:length(variable_list)){
            var <- variable_list[i]
            r0w <- filter(stand,Variable==var)
            col_id <- (num_cols+i)
            data_final[col_id] <- 0
            data_final[col_id] <- unlist((data_final[var]-r0w$Mean)/r0w$Std..Dev.)
            names(data_final)[col_id] <- paste0(var,"_strdised")
          } 
        #hashed so i dont ever accidentally run it <3
        write.csv(data_final,paste0('data/final ssf + hmm df/',data_final$fish_id[1],' ssf_data_20sec_time_gap_60min_track_state2.csv'),row.names=F)
        
    
        ####end of potential loop
        
        endtime <- Sys.time()
        print(paste0('Finished fish id ',fish_id_for_print,' state 2 at time: ',endtime))
        print(paste0('time taken: ',as.numeric(endtime-starttime,units='mins'), ' minutes'))
        
        } else{
          print(paste0("fish id ",fish_id_for_print," state 2 has no datapoints"))
        }
    
      }
    }
    
    
    ####FOR THREE FISH WITHOUT STATE 2 FOR SOME REASON!!####
    
    #redo 46838 46844 amd 46840 state 2 only
    
    q <-1
    
    files_short <- files[c(1:3)]
    for (q in 1:length(files_short)){
      starttime <- Sys.time()
      data_to_read <- files_short[q]
      data_frame <- read.csv(data_to_read)
      #time as posixct
      data_frame$time2 <- as.POSIXct(data_frame$t1_)
      
      #getstates
      #dat_1 <- filter(data_frame,State==1)
      dat_2 <- filter(data_frame,State==2)
      
      #need ifs for if only 1 state...
      #so if dat_1 >0 and if dat_2>0
      
      #fully remake steps tbh, using x1, y1
      
      
      fish_id_for_print <- data_frame$fish_id[1]
      
      
      ###state 2
      
      if(length(dat_2$approach>0)){
        starttime <- Sys.time()
        print(paste0('started fish id ',fish_id_for_print,'state 2, at time: ',starttime))
        
        ####loop all - state 1
        #set up blank df to append to - can probs remove this and do a (if i ==1, new df, else...)
        #final_df <- data.frame(matrix(ncol = 29, nrow = 0))
        #colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",                    
        #                    "dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",        
        #                   "substrate_end","time_start","time_end","discharge_start",
        #                  "discharge_end","depth_start","water_velocity_start",
        #                 "water_vel_grad_start","depth_end","water_velocity_end",     
        #                "water_vel_grad_end", "species","fish_id","approach"  )
        
        #ok, break this loop into smaller loops 
        
        approaches <- unique(dat_2$approach)
        num_entries <- length(approaches)
        
        #currently not appending the data?
        
        #resampled_data_total <- data.frame(matrix(ncol=4,nrow=0))
        #colnames(resampled_data_total) <-c("x_","y_","t_","burst_")
        
        #data_with_random_steps <- data.frame(matrix(ncol=17,nrow=0))
        #colnames(data_with_random_steps) <- c("x1_","x2_","y1_","y2_","sl_",
        #                                     "direction_p","ta_","t1_","t2_","dt_",
        #                                    "step_id_","case_","cos_ta_","log_sl_",
        #                                   "species","approach","fish_id")
        
        #blank list to append entry events which have insufficient turning angles to
        #shouldnt need TOO much since these df already did this level of processing - but can check
        not_enough_ta_ <-data.frame(matrix(ncol=3,nrow=0))
        colnames(not_enough_ta_) <-c('fish_id','approach',"state")
        not_enough_ta_blank <- not_enough_ta_
        
        #blank list to append entry events with too few points
        
        not_enough_points <- data.frame(matrix(ncol=3,nrow=0))
        colnames(not_enough_points) <-c('fish_id','approach',"state")
        not_enough_points_blank <- not_enough_points
        
        
        
        #get fish ids as names
        fish_id_list <-unique(dat_2$fish_id)
        
        
        #LOOP
        
        #note to me: tbh since im using bursts I can actually not loop this by approach?
        #then again, if sl_ ta_ differs with approach, this way accounts for that at least
        #but then again, i could also. lookat the sl and ta distributions for each approach separately and then specify them myself
        #REMOVE THE DOUBLES IN THE PREDICTED V OBSERVED STUFF FROM CRAWL
        
        for (i in 1:length(approaches)) {
          apprach <- approaches[i]
          data2 <- dat_2 %>% filter(approach==apprach )%>% as_tibble()
          
          #make a track
          data_track <- make_track(data2,x,y,time2,crs=32632) #error as na in time stamps? :(
          data_track <-arrange(data_track,t_)
          
          #resample rate 20s - to give burst id
          resampled_data <- track_resample(data_track,rate = seconds(20),#changed to 30s
                                           tolerance = seconds(0.5))
          #keeping tolerance very low since should have positions every 5s
          
          #filter min burst so at least 3 points (2 steps) per burst
          resampled_data <- filter_min_n_burst(resampled_data,min_n = 3)
          
          #LOOP TO RESAMPLE APART FROM THOSE WITHOUT ENOUGH TA (AKA PART 1)
          if(length(resampled_data$x_)>0){
            #anyways, generate the steps from the data
            resampled_steps <- steps_by_burst(resampled_data)
            
            #if loop to check if >2 ta_
            #dont think i will need this if only >3 points per burst
            no.ta <- sum(!is.na(resampled_steps$ta_))
            
            
            if(no.ta>=2){
              #generate random steps and add to the data
              random_steps_data <-random_steps(resampled_steps, n_control=100) #100 random steps so can cut this down later <3
              #further error making random steps as lack of turning angles
              
              #add time of day NOW
              random_steps_data <- time_of_day(random_steps_data,where='both',include.crepuscule = TRUE,solar.dep=12)
              
              #add cosine of turning angle and log of step length
              #as may want to use later
              random_steps_data$cos_ta_ <- cos(random_steps_data$ta_)
              random_steps_data$log_sl_ <- log(random_steps_data$sl_)
              
              
              #add other variables of importance from the og dataset!
              #add species, done this way so its easier 
              random_steps_data$species <- unique(data2$species)
              random_steps_data$state <- unique(data2$State)
              
              #right will need to recalculate dist from fishway if want to use that
              
              #add entry event
              random_steps_data$approach <- data2$approach[1]
              random_steps_data$track <- data2$track[1]
              
              #add fish id
              random_steps_data$fish_id <- unique(data2$fish_id)
              
              
              ##ok this loop can end here and append to larger df for example
              if(i==1){ #here! error with adding extra things. but. i SHOULD be 1?
                data_with_random_steps <- random_steps_data
              }else{
                data_with_random_steps <- rbind(data_with_random_steps,random_steps_data)
              }
            }
            else{
              not_enough_ta_ <-append(not_enough_ta_, unique(data2$approach),unique(data2$State))
              not_enough_ta_2 <-not_enough_ta_blank
              not_enough_ta_2[1,] <- c(data2$fish_id[1],i,unique(data2$State))
              not_enough_ta_ <- rbind(not_enough_ta_,not_enough_ta_2)
              
            }
          }
          else {
            not_enough_points <- append(not_enough_points, unique(data2$approach))
            not_enough_points2 <-not_enough_points_blank
            not_enough_points2[1,] <- c(data2$fish_id[1],i)
            not_enough_points <- rbind(not_enough_points,not_enough_points2)
          }
          
          
        }
        
        
        
        
        ####add env data by discharge to datas####
        
        #rename data4 just because
        data4 <- data_with_random_steps
        #data4 <- read.csv('with_random_steps_all_steps_define_distr.csv')
        
        data4_backup <- data4
        data4 <- as_tibble(data4)
        
        #get start and end points of the data
        data.startpoints <-data4[,c(2,4)]
        data.endpoints <- data4[,c(3,5)]
        
        #and diischarges
        #need hourly time first
        data4$time_start <- round_date(as.POSIXct(data4$t1_,format='%Y-%m-%d %H:%M:%S') ,unit='hour')
        data4$time_end <- round_date(as.POSIXct(data4$t2_,format='%Y-%m-%d %H:%M:%S'),unit='hour')
        #then can add discharges
        data4$discharge_start <- flow[match(data4$time_start,flow$time),2]
        data4$discharge_end <- flow[match(data4$time_end,flow$time),2]
        #and then df to save the extracted points to
        
        data.startpoints.fin <- data.startpoints
        data.endpoints.fin <-data.endpoints
        
        #NEED LOOP FOR IF DISCHARGE >80 TO TREAT AS 80
        
        for(i in 1:length(raster_files)) {   
          j <-gsub('.asc','', raster_files[i]) 
          data.startpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
                                                                             raster_files[i])),data.startpoints)
          
          data.endpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters - cropped/",
                                                                           raster_files[i])),data.endpoints)
          
        }
        
        #ok so the next step will use the 2 seaprate df
        
        #below is confusing bc we make another variable data4 but its just how the code was made initially <3
        #ok so for some reason. this might not have worked :(
        #just to save a copy
        data5 <- data4
        #will add correct depth, velcity and gradient values plus angles to above df
        
        
        #in loop below data4 is the data w random steps and substrate and discharge
        #and data_all_env is the MASSIVE file of all random steps plus every single
        #value from the rasters like 62 columns
        
        
        #ok below loop. may or may not work.
        #lets do it line by line
        
        #backup data5
        data5_2 <- data5
        #data5 <-data5_2
        
        approaches <- unique(data5_2$approach)
        
        final_df <- data.frame(matrix(ncol = 29, nrow = 0))
        colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",                    
                                "dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",        
                                "substrate_end","time_start","time_end","discharge_start",
                                "discharge_end","depth_start","water_velocity_start",
                                "water_vel_grad_start","depth_end","water_velocity_end",     
                                "water_vel_grad_end", "species","fish_id","approach"  )
        
        
        
        #use this loop if got covariates via raster::extract
        
        #use the line below if need to run tihs loop over several days (:
        for(i in 1:length(data5$step_id_)) {
          #FOR START POINTS
          #get discharge at i
          discharge_start_10s <- round(data5$discharge_start[i],digits=-1)
          #need if ... for discharge >80
          
          
          #ok so below extracts headings for a discharge! and it WORK!
          #aka picks the headings containing the discharge value from the all env df
          
          if(discharge_start_10s<80){
            headings_start <- names(data.startpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
          }
          if(discharge_start_10s>=80){
            headings_start <- names(data.startpoints.fin)[grepl(80,names(data.startpoints.fin))]
            
          }
          #dont need line below anymore since these headings are only start
          #headings_start <- headings_start[grepl('start',headings_start)]
          
          #individual headings for each variable
          #this means can direct r where to TAKE VALUES FROM
          depth_head_start <- headings_start[grepl('dep',headings_start)]
          vel_head_start <- headings_start[grepl('vel_',headings_start)]
          velgrad_head_start <- headings_start[grepl('svg',headings_start)]
          vel_angle_head_start <- headings_start[grepl('Vel_Angle',headings_start)]
          svg_angle_head_start <- headings_start[grepl('SVG_',headings_start)]
          
          #then add to df
          #IT WORKS!!
          data5[i, 'depth_start'] <- data.startpoints.fin[i, depth_head_start]
          data5[i, 'water_velocity_start'] <- data.startpoints.fin[i, vel_head_start]
          data5[i, 'svg_start'] <- data.startpoints.fin[i, velgrad_head_start]
          data5[i, 'velocity_angle_start'] <- data.startpoints.fin[i, vel_angle_head_start]
          data5[i, 'svg_angle_start'] <- data.startpoints.fin[i, svg_angle_head_start]
          
          #FOR END POINTS
          #FOR START POINTS
          #get discharge at i
          discharge_end_10s <- round(data5$discharge_end[i],digits=-1)
          
          #ok so below extracts headings for a discharge! and it WORK!
          #aka picks the headings containing the discharge value from the all env df
          # headings_end <- names(data.endpoints.fin)[grepl(discharge_end_10s,names(data.endpoints.fin))]
          #dont need line below anymore
          #headings_end <- headings_start[grepl('start',headings_start)]
          
          if(discharge_end_10s<80){
            headings_end <- names(data.endpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
          }
          if(discharge_end_10s>=80){
            headings_end <- names(data.endpoints.fin)[grepl(80,names(data.startpoints.fin))]
            
          }
          
          #individual headings for each variable
          #this means can direct r where to TAKE VALUES FROM
          depth_head_end <- headings_end[grepl('dep',headings_end)]
          vel_head_end <- headings_end[grepl('vel_',headings_end)]
          velgrad_head_end <- headings_end[grepl('svg',headings_end)]
          vel_angle_head_end <- headings_end[grepl('Vel_Angle',headings_end)]
          svg_angle_head_end <- headings_end[grepl('SVG_',headings_end)]
          
          
          #then add to df
          #IT WORKS!!
          data5[i, 'depth_end'] <-data.endpoints.fin[i, depth_head_end]
          data5[i, 'water_velocity_end'] <- data.endpoints.fin[i, vel_head_end]
          data5[i, 'svg_end'] <- data.endpoints.fin[i, velgrad_head_end]
          data5[i, 'velocity_angle_end'] <- data.endpoints.fin[i, vel_angle_head_end]
          data5[i, 'svg_angle_end'] <- data.endpoints.fin[i, svg_angle_head_end]
          
          
        }
        
        
        ####make step id sequential for all approaches####
        
        #step id per fish eg not per approach
        
        data.sub <- summarise(group_by(data5,fish_id,approach,step_id_),count=n())
        data.sub$step_id_new <- ave(data.sub$step_id_,data.sub$fish_id,
                                    FUN = seq_along)
        
        #ok above works
        #merge to get new step_id and then drop additional columns <3
        data5.5 <-merge(data5,data.sub,by=c('approach',"step_id_",'fish_id'))
        data5.5 <-subset(data5.5,select=c(-count))
        
        
        #OK HERE FILTER TO REMOVE RANDOM STEPS OUT OF RIVER
        
        #check number of true steps before and after removing nas!!
        
        #data5.5.2 <- na.omit(data5.5)
        
        #THEN REUDCE RANDOM STEPS PER STEP to 10
        
        #test 1
        data_true_only <- filter(data5.5,case_==TRUE)
        data_false_only <- filter(data5.5,case_==FALSE)
        
        data_false_only2 <- data_false_only[24:35]
        
        data_false_only <- data_false_only[(complete.cases(data_false_only2)),]
        #get 10 null steps per step ID
        data6 <- data_false_only %>% group_by(approach,step_id_,case_) %>% slice_sample(n=10) %>% ungroup
        
        #ok well. THEN. match step id of remaining trues to new falses
        
        truestepid <- unique(data_true_only$step_id_new)
        data6.2 <- filter(data6, step_id_new %in% truestepid) #ok good
        
        #then rbind
        data7 <- rbind(data_true_only,data6.2)
        
        data8 <- arrange(data7,t1_)
        
        #save
        
        #also, calculate ABSOLUTE angle of fish so can do stuff relative to flow angle
        #tan of difference in x and diff y
        
        diffx <- data8$x2_ - data8$x1_
        diffy <- data8$y2_ - data8$y1_
        
        angle <- atan(diffy/diffx)*180/pi #ok the maths is good
        #ok but this only calculates like the lil angle, not realtive to north
        angle_stuff<-as.data.frame(cbind(diffx,diffy,angle))
        
        angle_stuff<-mutate(angle_stuff,absolute=case_when(diffx>0 ~ 90-angle,
                                                           diffx<0 ~270-angle))
        
        
        data8$absolute_angle <- angle_stuff$absolute
        
        #cos of the angles
        data8$cos_vel_ang_start <- cos(data8$velocity_angle_start*pi/180)
        data8$cos_svg_ang_start<- cos(data8$svg_angle_start*pi/180)
        
        data8$cos_vel_ang_end <- cos(data8$velocity_angle_end*pi/180)
        data8$cos_svg_ang_end <- cos(data8$svg_angle_end*pi/180)
        data8$cos_abs_ang_end <- cos(data8$absolute_angle *pi/180)
        
        ##relative angles
        #should do as cos? to linearise?
        
        #diff vel and ab angle
        #do maths in brackets for regular angles and then cos it
        #need to be between 0 and 1, or -1 and 1
        
        data8$diff_vel_abs_ang_start <- cos((data8$velocity_angle_start-data8$absolute_angle)*pi/180)
        data8$diff_svg_abs_ang_start <- cos((data8$svg_angle_start-data8$absolute_angle)*pi/180)
        
        data8$diff_vel_abs_ang_end <- cos((data8$velocity_angle_end-data8$absolute_angle)*pi/180)
        data8$diff_svg_abs_ang_end <- cos((data8$svg_angle_end-data8$absolute_angle)*pi/180)
        
        #examine this
        #data.test <-data8[1:10,]
        #data.test$dif_ang_vek_abs <- data.test$velocity_angle_end-data.test$absolute_angle
        #data.test$dif_ang_vek_abs[1:2] <- c(45,-45)
        #data.test$cos_diff_ang_vel_abs <- cos(data.test$dif_ang_vek_abs*pi/180)
        
        
        data_final <- data8
        
        ####add fish lengths etc####
        
        #ok so want to match tag id on this sheet to tag id on the main data sheet
        
        data_on_fish_i <- filter(data_on_fish,Tag_ID == data_final$fish_id[1])
        
        data_final$fork_length <- data_on_fish_i$Fork_length_mm
        data_final$total_length <- data_on_fish_i$Total_length_mm
        data_final$weight <- data_on_fish_i$Weight_g
        
        
        ####add temp####
        temp$time3 <- as.POSIXct(temp$time2,format='%Y-%m-%d %H:%M:%S')
        #sort temperature and remove receiver
        
        data_final$time_start <- round_date(as.POSIXct(data_final$t1_,format='%Y-%m-%d %H:%M:%S') ,'10mins')
        data_final$time_end <- round_date(as.POSIXct(data_final$t2_,format='%Y-%m-%d %H:%M:%S'),unit='10mins')
        
        #sort temperature and remove receiver
        
        data_final$temp_start <- temp[match(data_final$time_start,temp$time3),3]
        data_final$temp_end <- temp[match(data_final$time_end,temp$time3),3]
        
        data_final <- subset(data_final,select=-c(time_start,time_end))
        
        #strdise parameters :>
        
        
        #### save final df <3####
        
        num_cols <- length(data_final)
        if(length(data_final$approach)>0){
          for(i in 1:length(variable_list)){
            var <- variable_list[i]
            r0w <- filter(stand,Variable==var)
            col_id <- (num_cols+i)
            data_final[col_id] <- 0
            data_final[col_id] <- unlist((data_final[var]-r0w$Mean)/r0w$Std..Dev.)
            names(data_final)[col_id] <- paste0(var,"_strdised")
          } 
          #hashed so i dont ever accidentally run it <3
          write.csv(data_final,paste0('data/final ssf + hmm df/',data_final$fish_id[1],' ssf_data_20sec_time_gap_60min_track_state2.csv'),row.names=F)
          
          
          ####end of potential loop
          
          endtime <- Sys.time()
          print(paste0('Finished fish id ',fish_id_for_print,' state 2 at time: ',endtime))
          print(paste0('time taken: ',as.numeric(endtime-starttime,units='mins'), ' minutes'))
          
        } else{
          print(paste0("fish id ",fish_id_for_print," state 2 has no datapoints"))
        }
        
      }
    }
    

    back to top

    Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
    The source code of Software Heritage itself is available on our development forge.
    The source code files archived by Software Heritage are available under their own copyright and licenses.
    Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API