Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

https://doi.org/10.5281/zenodo.14318846
17 December 2024, 12:45:03 UTC
  • Code
  • Branches (0)
  • Releases (1)
  • Visits
    • Branches
    • Releases
      • 1
      • 1
    • c8b2287
    • /
    • combining-hmm-and-ssf-code
    • /
    • scripts
    • /
    • ssf_datamaking_noHMM.R
    Raw File Download

    To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
    Select below a type of object currently browsed in order to display its associated SWHID and permalink.

    • content
    • directory
    • snapshot
    • release
    origin badgecontent badge
    swh:1:cnt:d41d407f000a80df0a5b7b6aa5f6fcc8a1f7211b
    origin badgedirectory badge
    swh:1:dir:30ac199afd3b1d7a0d0bdca02408cfa6e4b4363a
    origin badgesnapshot badge
    swh:1:snp:05a2af42b588522ca08f036c1f785d8457dcf25e
    origin badgerelease badge
    swh:1:rel:aa35d9e39d94cbf3f73362c2c2b5cd04c355e955

    This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
    Select below a type of object currently browsed in order to generate citations for them.

    • content
    • directory
    • snapshot
    • release
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    ssf_datamaking_noHMM.R
    ####set up####
    
    #ctrl alt t to run sections
    
    rm(list=ls())
    
    
    #THIS IS THE DATAMAKING FILE FOR THE SSF
    # theres a lot of loops as need to apply the functions within entry events
    # and nesting the data wasnt working
    
    
    #### packages ####
    
    library(raster)
    
    library(ggplot2)
    library(dplyr)
    library(data.table)
    library(rgdal)#shapefile
    library(sp)
    library(rgeos)
    library(amt) #for step selection
    library(lubridate) #for step selection
    #library(purrr) #for nesting? #or not
    library(tidyr)
    library(crawl)
    library(sf)
    library(mapview)
    library(maptools)
    
    
    ####read file list####
    
    files <- list.files("data/find ladder/data near ladder CRAWLED")
    
    ####other files####
    
    #get raster file names for use later
    raster_files <- list.files("rasters")  #identify file names
    
    #get substrate data
    substrate_poly  <-readOGR('substrate/Materials_hydraulicModel.shp')
    
    #flow
    flow <-read.csv('data/hourly_flow.csv')
    flow$time <- as.POSIXct(flow$date, format='%d/%m/%Y %H:%M')
    
    #temperatre
    temp <- read.csv('data/median_temperatures.csv')
    
    
    ####START HERE - pick file to do####
    
    data_on_fish <-read.csv('data/Tagged_fish_data.csv')
    temp <- read.csv('data/median_temperatures.csv')
    
    #pick a dataframe
    ####LOOP####
    
    for (q in 1:length(files)){
      starttime <- Sys.time()
      data_to_read <- files[q]
      data_frame <- read.csv(paste0('data/find ladder/data near ladder CRAWLED/',data_to_read))
      
      #get time as posixct
      data_frame$time2 <- as.POSIXct(data_frame$time2)
      
      #rename data_Frame
      data <- data_frame
    
      fish_id_for_print <- data$fish_id[1]
      print(paste0('started fish id ',fish_id_for_print,' at time: ',starttime))
      
      ####loop all
      #set up blank df to append to
      final_df <- data.frame(matrix(ncol = 29, nrow = 0))
      colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",                    
                           "dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",        
                           "substrate_end","time_start","time_end","discharge_start",
                           "discharge_end","depth_start","water_velocity_start",
                           "water_vel_grad_start","depth_end","water_velocity_end",     
                           "water_vel_grad_end", "species","fish_id","approach"  )
      
      #break this loop into smaller loops 
      
      approaches <- unique(data$approach)
      num_entries <- length(approaches)
      
      resampled_data_total <- data.frame(matrix(ncol=4,nrow=0))
      colnames(resampled_data_total) <-c("x_","y_","t_","burst_")
      
      data_with_random_steps <- data.frame(matrix(ncol=17,nrow=0))
      colnames(data_with_random_steps) <- c("x1_","x2_","y1_","y2_","sl_",
                                            "direction_p","ta_","t1_","t2_","dt_",
                                            "step_id_","case_","cos_ta_","log_sl_",
                                            "species","approach","fish_id")
      
      #blank list to append entry events which have insufficient turning angles to
      
      not_enough_ta_ <-data.frame(matrix(ncol=2,nrow=0))
      colnames(not_enough_ta_) <-c('fish_id','approach')
      not_enough_ta_blank <- not_enough_ta_
      
      #blank list to append entry events with too few points
      
      not_enough_points <- data.frame(matrix(ncol=2,nrow=0))
      colnames(not_enough_points) <-c('fish_id','approach')
      not_enough_points_blank <- not_enough_points
      
    
      
      #get fish ids as names
      fish_id_list <-unique(data$fish_id)
      
      
      #LOOP
        
      for (i in approaches) {
        #select approach and keep only p locations from crawl (e.g. every 20 s)
        data2 <- filter(data, approach==i&locType=="p")
      
        #convert df to tibble so the code works
        data2 <- as_tibble(data2)
      
        #make a track
        data_track <- make_track(data2,mu.x,mu.y,time2,crs=32632) 
        data_track <-arrange(data_track,t_)
      
        #resample rate 20s
        resampled_data <- track_resample(data_track,rate = seconds(20),
                                           tolerance = seconds(0.5))
        #keeping tolerance very low since should have positions every 5s
        
        #filter min burst so at least 3 points (2 steps) per burst
        resampled_data <- filter_min_n_burst(resampled_data,min_n = 3)
        
        #LOOP TO RESAMPLE APART FROM THOSE WITHOUT ENOUGH TA (AKA PART 1)
        if(length(resampled_data$x_)>0){
          #generate the steps from the data
          resampled_steps <- steps_by_burst(resampled_data)
          
          #if loop to check if >2 ta_
          #dont think i will need this if only >3 points per burst
          no.ta <- sum(!is.na(resampled_steps$ta_))
          
          
          if(no.ta>=2){
            #generate random steps and add to the data
            random_steps_data <-random_steps(resampled_steps, n_control=100) #100 random steps so can cut this down later
            
            #add time of day
            random_steps_data <- time_of_day(random_steps_data,where='both',include.crepuscule = FALSE)
            
            #add cosine of turning angle and log of step length
            random_steps_data$cos_ta_ <- cos(random_steps_data$ta_)
            random_steps_data$log_sl_ <- log(random_steps_data$sl_)
            
            
            #add other variables of importance from the og dataset!
            #add species, done this way so its easier 
            random_steps_data$species <- unique(data2$species)
            
            #add entry event
            random_steps_data$approach <- data2$approach[1]
            
            #add fish id
            random_steps_data$fish_id <- unique(data2$id)
      
            
            ##ok this loop can end here and append to larger df for example
            
            data_with_random_steps <- rbind(data_with_random_steps,random_steps_data)
          }
          else{
            not_enough_ta_ <-append(not_enough_ta_, unique(data2$approach))
            not_enough_ta_2 <-not_enough_ta_blank
            not_enough_ta_2[1,] <- c(data2$fish_id[1],i)
            not_enough_ta_ <- rbind(not_enough_ta_,not_enough_ta_2)
            
          }
        }
        else {
          not_enough_points <- append(not_enough_points, unique(data2$approach))
          not_enough_points2 <-not_enough_points_blank
          not_enough_points2[1,] <- c(data2$fish_id[1],i)
          not_enough_points <- rbind(not_enough_points,not_enough_points2)
        }
      
        
      }
      
      ##substrate
        
      data3 <- data_with_random_steps
        
        #get points for extraction
        points_start <- data.frame(data3$x1_,data3$y1_)
        names(points_start) <-c('x','y')
        points_end <- data.frame(data3$x2_,data3$y2_)
        names(points_end) <-c('x','y')
        
        substrate_extract_start <- raster::extract(substrate_poly,points_start)
        substrate_start <- substrate_extract_start$MATNAME
        
        substrate_extract_end <- raster::extract(substrate_poly,points_end)
        substrate_end <- substrate_extract_end$MATNAME
        
        #duplicated data to be safe and check its ok
        data_with_substrate<- cbind(data3, substrate_start,substrate_end)
        
    
      ####add env data by discharge to datas####
        
      #rename data4 just because
      data4 <- data_with_random_steps
      #data4 <- read.csv('with_random_steps_all_steps_define_distr.csv')
      
      data4_backup <- data4
      data4 <- as_tibble(data4)
      
      #get start and end points of the data
      data.startpoints <-data4[,c(2,4)]
      data.endpoints <- data4[,c(3,5)]
      
      #and diischarges
      #need hourly time first
      data4$time_start <- round_date(as.POSIXct(data4$t1_,format='%Y-%m-%d %H:%M:%S') ,unit='hour')
      data4$time_end <- round_date(as.POSIXct(data4$t2_,format='%Y-%m-%d %H:%M:%S'),unit='hour')
      #then can add discharges
      data4$discharge_start <- flow[match(data4$time_start,flow$time),2]
      data4$discharge_end <- flow[match(data4$time_end,flow$time),2]
      #and then df to save the extracted points to
      
      data.startpoints.fin <- data.startpoints
      data.endpoints.fin <-data.endpoints
      
      #NEED LOOP FOR IF DISCHARGE >80 TO TREAT AS 80
      
      for(i in 1:length(raster_files)) {   
        j <-gsub('.tif','', raster_files[i]) 
        data.startpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters/",
                                                                           raster_files[i])),data.startpoints)
        
        data.endpoints.fin[[paste0(j)]] <- raster::extract(raster(paste0("rasters/",
                                                                         raster_files[i])),data.endpoints)
        
      }
      
      #ok so the next step will use the 2 separate df
      
      #below is confusing bc we make another variable data4 but its just how the code was made initially
        #just to save a copy
        data5 <- data_with_substrate
    
        #will add correct depth, velcity and gradient values plus angles to above df
        
    
        #in loop below data4 is the data with random steps and substrate and discharge
        #and data_all_env is the MASSIVE file of all random steps plus every single
        #value from the rasters like 62 columns
        
      
      #ok below loop    
      #backup data5
      data5_2 <- data5
          
      data5$time_start <- round_date(as.POSIXct(data5$t1_,format='%Y-%m-%d %H:%M:%S') ,unit='hour')
      data5$time_end <- round_date(as.POSIXct(data5$t2_,format='%Y-%m-%d %H:%M:%S'),unit='hour')
    
      #then can add discharges
      data5$discharge_start <- flow[match(data5$time_start,flow$time),2]
      data5$discharge_end <- flow[match(data5$time_end,flow$time),2]
      
      #drop new time columns
      data5 <-subset(data5,select=-c(time_start,time_end))
      
      
      approaches <- unique(data5_2$approach)
      
      final_df <- data.frame(matrix(ncol = 29, nrow = 0))
      colnames(final_df) <- c("x1_","x2_","y1_","y2_","sl_" ,"direction_p","ta_","t1_","t2_",                    
                              "dt_","step_id_","case_","cos_ta_","log_sl_","substrate_start",        
                              "substrate_end","time_start","time_end","discharge_start",
                              "discharge_end","depth_start","water_velocity_start",
                              "water_vel_grad_start","depth_end","water_velocity_end",     
                              "water_vel_grad_end", "species","fish_id","approach"  )
      
      
      
      #use this loop if got covariates via raster::extract
      
      #use the line below if need to run this loop over several sessions
    
      for(i in 1:length(data5$step_id_)) {
        #FOR START POINTS
        #get discharge at i
        discharge_start_10s <- round(data5$discharge_start[i],digits=-1)
        #need if ... for discharge >80
        
        
        #below extracts headings for a discharge
        #aka picks the headings containing the discharge value from the all env df
        
        if(discharge_start_10s<80){
         headings_start <- names(data.startpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
        }
        if(discharge_start_10s>=80){
          headings_start <- names(data.startpoints.fin)[grepl(80,names(data.startpoints.fin))]
          
        }
         #dont need line below anymore since these headings are only start
        #headings_start <- headings_start[grepl('start',headings_start)]
        
        #individual headings for each variable
        #this means can direct r where to TAKE VALUES FROM
        depth_head_start <- headings_start[grepl('dep',headings_start)]
        vel_head_start <- headings_start[grepl('vel_',headings_start)]
        velgrad_head_start <- headings_start[grepl('svg',headings_start)]
        vel_angle_head_start <- headings_start[grepl('Vel_Angle',headings_start)]
        svg_angle_head_start <- headings_start[grepl('SVG_',headings_start)]
        
        #then add to df
        data5[i, 'depth_start'] <- data.startpoints.fin[i, depth_head_start]
        data5[i, 'water_velocity_start'] <- data.startpoints.fin[i, vel_head_start]
        data5[i, 'svg_start'] <- data.startpoints.fin[i, velgrad_head_start]
        data5[i, 'velocity_angle_start'] <- data.startpoints.fin[i, vel_angle_head_start]
        data5[i, 'svg_angle_start'] <- data.startpoints.fin[i, svg_angle_head_start]
        
        #FOR END POINTS
    
        #get discharge at i
        discharge_end_10s <- round(data5$discharge_end[i],digits=-1)
        
        
        if(discharge_end_10s<80){
          headings_end <- names(data.endpoints.fin)[grepl(discharge_start_10s,names(data.startpoints.fin))]
        }
        if(discharge_end_10s>=80){
          headings_end <- names(data.endpoints.fin)[grepl(80,names(data.startpoints.fin))]
          
        }
        
        #individual headings for each variable
        #this means can direct r where to TAKE VALUES FROM
    
        depth_head_end <- headings_end[grepl('dep',headings_end)]
        vel_head_end <- headings_end[grepl('vel_',headings_end)]
        velgrad_head_end <- headings_end[grepl('svg',headings_end)]
        vel_angle_head_end <- headings_end[grepl('Vel_Angle',headings_end)]
        svg_angle_head_end <- headings_end[grepl('SVG_',headings_end)]
        
        
        #then add to df
        #IT WORKS!!
        data5[i, 'depth_end'] <-data.endpoints.fin[i, depth_head_end]
        data5[i, 'water_velocity_end'] <- data.endpoints.fin[i, vel_head_end]
        data5[i, 'svg_end'] <- data.endpoints.fin[i, velgrad_head_end]
        data5[i, 'velocity_angle_end'] <- data.endpoints.fin[i, vel_angle_head_end]
        data5[i, 'svg_angle_end'] <- data.endpoints.fin[i, svg_angle_head_end]
        
        
      }
      
    
      ####make step id sequential for all approaches####
      
      #step id per fish eg not per approach
      
      data.sub <- summarise(group_by(data5,fish_id,approach,step_id_),count=n())
      data.sub$step_id_new <- ave(data.sub$step_id_,data.sub$fish_id,
                                  FUN = seq_along)
      
      #ok above works
      #merge to get new step_id and then drop additional columns
    
      data5.5 <-merge(data5,data.sub,by=c('approach',"step_id_",'fish_id'))
      data5.5 <-subset(data5.5,select=c(-count))
      
      
      #FILTER TO REMOVE RANDOM STEPS OUT OF RIVER
      
      #check number of true steps before and after removing nas!!
      
      data5.5.2 <- na.omit(data5.5) #since if out of river, env data will be NA
    
      #THEN REUDCE RANDOM STEPS PER STEP to 10
      
      #test 1
      data_true_only <- filter(data5.5.2,case_==TRUE)
      data_false_only <- filter(data5.5.2,case_==FALSE)
      #get 10 null steps per step ID
      data6 <- data_false_only %>% group_by(approach,step_id_,case_) %>% slice_sample(n=10) %>% ungroup
      
      #ok well. THEN. match step id of remaining trues to new falses
      
      truestepid <- unique(data_true_only$step_id_new)
      data6.2 <- filter(data6, step_id_new %in% truestepid) #ok good
      
      #then rbind
      data7 <- rbind(data_true_only,data6.2)
      
      data8 <- arrange(data7,t1_)
      
      #save
    
      #also, calculate ABSOLUTE angle of fish so can do stuff relative to flow angle
      #tan of difference in x and diff y
      
      diffx <- data8$x2_ - data8$x1_
      diffy <- data8$y2_ - data8$y1_
      
      angle <- atan(diffy/diffx)*180/pi
    
      angle_stuff<-as.data.frame(cbind(diffx,diffy,angle))
      
      angle_stuff<-mutate(angle_stuff,absolute=case_when(diffx>0 ~ 90-angle,
                                            diffx<0 ~270-angle))
      
      
      data8$absolute_angle <- angle_stuff$absolute
      
      #cos of the angles
      data8$cos_vel_ang_start <- cos(data8$velocity_angle_start*pi/180)
      data8$cos_svg_ang_start<- cos(data8$svg_angle_start*pi/180)
      
      data8$cos_vel_ang_end <- cos(data8$velocity_angle_end*pi/180)
      data8$cos_svg_ang_end <- cos(data8$svg_angle_end*pi/180)
      data8$cos_abs_ang_end <- cos(data8$absolute_angle *pi/180)
      
      #relative angles
      
      #diff vel and ab angle
      #do maths in brackets for regular angles and then cos it
      #need to be between 0 and 1, or -1 and 1
      
      data8$diff_vel_abs_ang_start <- cos((data8$velocity_angle_start-data8$absolute_angle)*pi/180)
      data8$diff_svg_abs_ang_start <- cos((data8$svg_angle_start-data8$absolute_angle)*pi/180)
      
      data8$diff_vel_abs_ang_end <- cos((data8$velocity_angle_end-data8$absolute_angle)*pi/180)
      data8$diff_svg_abs_ang_end <- cos((data8$svg_angle_end-data8$absolute_angle)*pi/180)
      
      
      data_final <- data8
      
      ####add fish lengths etc####
        
      #ok so want to match tag id on this sheet to tag id on the main data sheet
      
      data_on_fish_i <- filter(data_on_fish,Tag_ID == data_final$fish_id[1])
      
      data_final$fork_length <- data_on_fish_i$Fork_length_mm
      data_final$total_length <- data_on_fish_i$Total_length_mm
      data_final$weight <- data_on_fish_i$Weight_g
      
      
      ####add temp####
      temp$time3 <- as.POSIXct(temp$time2,format='%Y-%m-%d %H:%M:%S')
      #sort temperature and remove receiver
      
      data_final$time_start <- round_date(as.POSIXct(data_final$t1_,format='%Y-%m-%d %H:%M:%S') ,'10mins')
      data_final$time_end <- round_date(as.POSIXct(data_final$t2_,format='%Y-%m-%d %H:%M:%S'),unit='10mins')
      
      #sort temperature and remove receiver
      
      data_final$temp_start <- temp[match(data_final$time_start,temp$time3),3]
      data_final$temp_end <- temp[match(data_final$time_end,temp$time3),3]
      
      data_final <- subset(data_final,select=-c(time_start,time_end))
      
    
      
    
      #### save final df <3####
      
      #hashed so i dont ever accidentally run it
      write.csv(data_final,paste0('data/find ladder/final ssf dataframes/',data_final$fish_id[1],' ssf_data_20sec_time_gap_60min_track.csv'),row.names=F)
      
      
      ####end of potential loop
      
      endtime <- Sys.time()
      print(paste0('Finished fish id ',fish_id_for_print,' at time: ',endtime))
      print(paste0('time taken: ',as.numeric(endtime-starttime,units='mins'), ' minutes'))
    
    
    
    }
    
    ####end loop####
    
    

    back to top

    Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
    The source code of Software Heritage itself is available on our development forge.
    The source code files archived by Software Heritage are available under their own copyright and licenses.
    Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API