Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

https://doi.org/10.5281/zenodo.14318846
17 December 2024, 12:45:03 UTC
  • Code
  • Branches (0)
  • Releases (1)
  • Visits
    • Branches
    • Releases
      • 1
      • 1
    • c8b2287
    • /
    • combining-hmm-and-ssf-code
    • /
    • scripts
    • /
    • model selection loop no HMM.R
    Raw File Download

    To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
    Select below a type of object currently browsed in order to display its associated SWHID and permalink.

    • content
    • directory
    • snapshot
    • release
    origin badgecontent badge
    swh:1:cnt:d0c951bb549d6b534fcbf21ad86a77a58844acbb
    origin badgedirectory badge
    swh:1:dir:30ac199afd3b1d7a0d0bdca02408cfa6e4b4363a
    origin badgesnapshot badge
    swh:1:snp:05a2af42b588522ca08f036c1f785d8457dcf25e
    origin badgerelease badge
    swh:1:rel:aa35d9e39d94cbf3f73362c2c2b5cd04c355e955

    This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
    Select below a type of object currently browsed in order to generate citations for them.

    • content
    • directory
    • snapshot
    • release
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    Generate software citation in BibTex format (requires biblatex-software package)
    Generating citation ...
    model selection loop no HMM.R
    
    ####clear variables####
    rm(list=ls())
    
    ####packages####
    library(dplyr)
    library(data.table)
    library(amt) #for step selection
    library(fastDummies)
    library(MASS)
    library(survival)
    library(gridExtra)
    library(ggpubr)#better than gridextra?
    
    
    
    ####read in file names ####
    files <- list.files('data/find ladder/FINAL final SSF dataframes - standardised and filtered - decision tree and distances',full.names = T)
    
    
    #files <- list.files('data/find ladder/FINAL final SSF dataframes - standardised and filtered',full.names = F)
    
    
    for(z in 1:length(files)){
    ####read data####
    #select files #change number per fish
    data_to_read <- files[z]
    
    #read in file
    data_ssf <- read.csv(data_to_read)
    #get fish id
    fish_id <- data_ssf$fish_id[[1]]
    if(fish_id==46838){##this was for removing fish 46838's bad track
    
    data_ssf <- data_ssf %>% filter(track !=2) 
    }
    
    
    data_ssf$t1_ <- as.POSIXct(data_ssf$t1_)
    data_ssf$t2_ <- as.POSIXct(data_ssf$t2_)
    
    
    
    #check folder exists for fish, make if not
    folder_nam <- paste0("ssf modelling - file per fish/",fish_id)
    
    
    if(dir.exists(folder_nam)==FALSE){
      dir.create(folder_nam)
    }
    
    #### Model selection####
    
    #### Saturated model####
    
    if(length(unique(data_ssf$tod_end_NEW))>1 &length(unique(data_ssf$tod_start_NEW))>1){
      sttime <- Sys.time()
      print(paste0("started sat model run at ",sttime))
      satmod <- fit_issf(data=data_ssf,case_~water_velocity_end_strdised+depth_end_strdised+ 
                           svg_end_strdised+diff_vel_abs_ang_end_strdised+ 
                           diff_svg_abs_ang_end_strdised+ log_sl__strdised+
                           water_velocity_end_strdised:(depth_end_strdised+ 
                                                          svg_end_strdised+
                                                          diff_vel_abs_ang_end_strdised+ 
                                                          diff_svg_abs_ang_end_strdised)+
                           depth_end_strdised:(svg_end_strdised+ #depth interact
                                                 diff_vel_abs_ang_end_strdised+ 
                                                 diff_svg_abs_ang_end_strdised)+
                           svg_end_strdised:(diff_vel_abs_ang_end_strdised+ #svg interact
                                               diff_svg_abs_ang_end_strdised)+
                           tod_end_NEW:(water_velocity_end_strdised+depth_end_strdised+ #tod
                                          svg_end_strdised+diff_vel_abs_ang_end_strdised+ 
                                          diff_svg_abs_ang_end_strdised)+
                           #now start and end
                           water_velocity_end_strdised:water_velocity_start_strdised+
                           depth_end_strdised:depth_start_strdised+
                           diff_svg_abs_ang_end_strdised:diff_svg_abs_ang_start_strdised+
                           diff_vel_abs_ang_end_strdised:diff_vel_abs_ang_start_strdised+
                           svg_start_strdised:svg_end_strdised+
                           cos_ta__strdised+
                           log_sl__strdised:(water_velocity_start_strdised+depth_start_strdised+
                                      svg_start_strdised+diff_vel_abs_ang_start_strdised+ 
                                      diff_svg_abs_ang_start_strdised+
                                      temp_start_strdised+tod_start_NEW)+
                           strata(step_id_new),model=T
      )
      endtime <- Sys.time()
      print(paste0("ended at ",endtime))
      duration <-endtime-sttime
      print(paste0("sat model took ",duration,units(duration)," to run"))
    } else {
      sttime <- Sys.time()
      print(paste0("started sat model run at ",sttime, ", no tod parameters"))
      satmod <- fit_issf(data=data_ssf,case_~water_velocity_end_strdised+depth_end_strdised+ 
                           svg_end_strdised+diff_vel_abs_ang_end_strdised+ 
                           diff_svg_abs_ang_end_strdised+ log_sl__strdised+
                           water_velocity_end_strdised:(depth_end_strdised+ 
                                                          svg_end_strdised+
                                                          diff_vel_abs_ang_end_strdised+ 
                                                          diff_svg_abs_ang_end_strdised)+
                           depth_end_strdised:(svg_end_strdised+ #depth interact
                                                 diff_vel_abs_ang_end_strdised+ 
                                                 diff_svg_abs_ang_end_strdised)+
                           svg_end_strdised:(diff_vel_abs_ang_end_strdised+ #svg interact
                                               diff_svg_abs_ang_end_strdised)+
                           #now start and end
                           water_velocity_end_strdised:water_velocity_start_strdised+
                           depth_end_strdised:depth_start_strdised+
                           diff_svg_abs_ang_end_strdised:diff_svg_abs_ang_start_strdised+
                           diff_vel_abs_ang_end_strdised:diff_vel_abs_ang_start_strdised+
                           svg_start_strdised:svg_end_strdised+
                           cos_ta__strdised+
                           log_sl__strdised:(water_velocity_start_strdised+depth_start_strdised+
                                      svg_start_strdised+diff_vel_abs_ang_start_strdised+ 
                                      diff_svg_abs_ang_start_strdised+
                                      temp_start_strdised)+
                           strata(step_id_new)
      )
      
      endtime <- Sys.time()
      print(paste0("ended at ",endtime))
      duration <-endtime-sttime
      print(paste0("sat model took ",duration,units(duration)," to run"))
      }
    
    
    #save sat model
    saveRDS(satmod,paste0("ssf modelling - file per fish/",fish_id,"/saturated_model1_",fish_id,".rds"))
    
    
    
    ####model sel loop####
    
    ####loop####
      startloop <-Sys.time()
      print(paste0("Model selection fish",fish_id, " - begin at ",startloop))
      
      best_row <-0 #set up a value for "best_row" - the row id for the best model at each stage
      
      #specify the data
      data <- data_ssf
      
      #mod_start <- model #name the starting position for model selection
      mod_start <- satmod$model
      tab_list <- list() #list for saving final tables for each stage
      q=1 #this is just a variable for indexing where in tab list to save the table at each step to. qill be increased by 1 in each loop
      
      
      #also add in condition re length model scope, so when >1 can continue, otherwise...
      #since i dont thinki can run a model with just strata.. gives error.
      
      length_scope = 5 #set a parameter, just any value over 2. just for running the while.
      
      while(best_row!=1&length_scope>2){ #need another condition for when reach end of model selection
        start_time <- Sys.time()
        print(paste0("Model selection step ",q," started at ",start_time))
        #need model terms
        trms <- mod_start$terms
        #list the terms
        list_terms <- attr(trms, "factors") 
        
        #below, remove strata from a list of terms # code taken from stepAIC source code
        if(!is.null(sp <- attr(trms, "specials")) &&
           !is.null(st <- sp$strata)) list_terms <- list_terms[-st,] 
        
        #have now hashed out the above as for some reason the strata was moving about in the formula and erroneously being removed due to not being last. so will just condition on it in the loop ;)
        
        fdrop <- numeric()
        fadd <- attr(trms, "factors")
        #create as list
        scope <- factor.scope(list_terms, list(add = fadd, drop = fdrop)) # provides terms as a list, can use for iterating?
        #ok above moves strata around like >:( leave her last!!!
        
        scope2 <- attr(trms,"term.labels") #here lists all the terms in order of the formula<3 for comparing to scope$drop[i] on where to remove
        #scope$drop # contains the terms to be dropped #and doesnt have single effects where theres an interaction
        
        #get AICs for starting model 
        sAIC <- extractAIC(mod_start)
        s_df <- sAIC[1]
        s_AIC <- sAIC[2]
        
        #create table
        tabl <- as.data.frame(matrix(ncol=6,nrow=(length(scope$drop)+1)))
        names(tabl) <- c("ID","Formula","Dropped term","df","AIC","diffAIC")
        
        #have formula in table as "case_~etc" so easier at end of loop
        start_frm1 <-  as.character(mod_start$formula)[3]
        start_frm <- paste0("case_~",start_frm1) #need failsafe here to make sure strata(blah blah blah) is always the last term so won't get dropped.
        
        #fill first entry of tabl
        tabl$ID[1] <- "start"
        tabl$Formula[1] <- start_frm
        tabl$`Dropped term`[1] <- "-"
        tabl$AIC[1] <- s_AIC
        tabl$df[1] <- s_df
        tabl$diffAIC[1] <- 0
        
        mod_list_temp <- list()
        
        for(i in 1:length(scope$drop)){ #possibly save models to a list so don't need to run again?
          #here, can update formula, fit model, extract AICs? and save to a table
          if(scope$drop[i]!="strata(step_id_new)"){ #here, so doesnt do anything where scopedrop is strata
            term_loc <- match(scope$drop[i],scope2)
            frm_temp <- drop.terms(trms,term_loc) #new formula #error here since drops strata- keep strata last.     
            #still drops strata even if. its not the right number
            mod_new <- fit_issf(data=data,formula=as.formula(paste0("case_~",as.character(frm_temp)[2])),
                                model=T)
            mod_new_mod <- mod_new$model
            eAIC <-extractAIC(mod_new_mod)
            e_AIC <- eAIC[2]
            e_df <- eAIC[1]
            
            #save to table, row i+1 (as first row starting pos)
            tabl$ID[(i+1)] <- i
            tabl$Formula[(i+1)] <- paste0("case_",as.character(frm_temp)[2])
            tabl$`Dropped term`[(i+1)] <- scope$drop[i]
            tabl$AIC[(i+1)] <- e_AIC
            tabl$df[(i+1)] <- e_df
            tabl$diffAIC[(i+1)] <- e_AIC - s_AIC
            
            #save to model list so can retrieve later
            mod_list_temp[[(i+1)]] <- mod_new #save as full issf product
          }
        }
        
        #get row where AIC is min
        best_row <- which.min(tabl$AIC)
        if(best_row==1&2>min(tabl$diffAIC[2:length(tabl$diffAIC)],na.rm=T)){ #if min row is the starting point e.g. more paramters and not 2 units smaller than other model
          best_row <- which.min(tabl$diffAIC[2:length(tabl$diffAIC)]) + 1 #plus one cos since removed start pos from the data, need to add 1
        }
        
        best_frm <- tabl$Formula[best_row]
        
        
        #print that this step is over
        end_time <- Sys.time()
        print(paste0("Model selection step ",q," ended at ",end_time))
        duration <- end_time-start_time
        unit_dur <- units(duration)
        duration <- as.numeric(duration) %>% round(digits=2)
        print(paste0("Step ",q," lasted ",duration," ",unit_dur))
        
        
        
        # to get a positiion to save the table in the list
        tab_list[[q]] <- tabl
        q=q+1
        
        length_scope <- length(scope$drop) #save length of temrs to drop in this level
        #if scope$drop length is 2, then next round can only drop 1 more variable and thus cannot run as cant work with just strata
        
        #ok i want it to print what the best model is + why model selection was stopped
        
        if(best_row==1){
          print(paste0("Model selection ",fish_id, " ended due to no further removal of terms improving AIC"))
          print(paste0("Final model formula: ",best_frm))
          mod_final <- mod_start
        } else if(length_scope==2){
          print(paste0("Model selection ",fish_id, " ended due to being at the smallest possible model"))
          print(paste0("Final model formula: ",best_frm))
          mod_final <- mod_start
        } else {
          #so if loop is continuing, can get mod start now
          mod_start <- mod_list_temp[[best_row]]
          mod_start <- mod_start$model     }
        
      }
      
      #works!
      endloop <- Sys.time()
      duration <- endloop - startloop
      print(paste0("Model selection took ",duration," ",units(duration)))
      
      #save tables and models
      saveRDS(tab_list,paste0("ssf modelling - file per fish/",fish_id,"/mod_selection_tables_",fish_id,".rds"))
      saveRDS(mod_final,paste0("ssf modelling - file per fish/",fish_id,"/final_model_",fish_id,".rds"))
      
      #and coefficient stuff
      sum <- summary(mod_final)
      coefs <- sum$coefficients
      coefs <-as.data.frame(coefs)
      
      coefs <- tibble::rownames_to_column(coefs,"params")
      coefs$fish_id <- fish_id
      coefs$species <- data_ssf$species[1]
      
      
      
      write.csv(coefs,paste0("ssf modelling - file per fish/files listing coeffs and params/",fish_id,"_coeffs_params.csv"))
      
    }
    
    
    
    
    

    back to top

    Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
    The source code of Software Heritage itself is available on our development forge.
    The source code files archived by Software Heritage are available under their own copyright and licenses.
    Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API