https://doi.org/10.5281/zenodo.14318846
model selection loop no HMM.R
####clear variables####
rm(list=ls())
####packages####
library(dplyr)
library(data.table)
library(amt) #for step selection
library(fastDummies)
library(MASS)
library(survival)
library(gridExtra)
library(ggpubr)#better than gridextra?
####read in file names ####
files <- list.files('data/find ladder/FINAL final SSF dataframes - standardised and filtered - decision tree and distances',full.names = T)
#files <- list.files('data/find ladder/FINAL final SSF dataframes - standardised and filtered',full.names = F)
for(z in 1:length(files)){
####read data####
#select files #change number per fish
data_to_read <- files[z]
#read in file
data_ssf <- read.csv(data_to_read)
#get fish id
fish_id <- data_ssf$fish_id[[1]]
if(fish_id==46838){##this was for removing fish 46838's bad track
data_ssf <- data_ssf %>% filter(track !=2)
}
data_ssf$t1_ <- as.POSIXct(data_ssf$t1_)
data_ssf$t2_ <- as.POSIXct(data_ssf$t2_)
#check folder exists for fish, make if not
folder_nam <- paste0("ssf modelling - file per fish/",fish_id)
if(dir.exists(folder_nam)==FALSE){
dir.create(folder_nam)
}
#### Model selection####
#### Saturated model####
if(length(unique(data_ssf$tod_end_NEW))>1 &length(unique(data_ssf$tod_start_NEW))>1){
sttime <- Sys.time()
print(paste0("started sat model run at ",sttime))
satmod <- fit_issf(data=data_ssf,case_~water_velocity_end_strdised+depth_end_strdised+
svg_end_strdised+diff_vel_abs_ang_end_strdised+
diff_svg_abs_ang_end_strdised+ log_sl__strdised+
water_velocity_end_strdised:(depth_end_strdised+
svg_end_strdised+
diff_vel_abs_ang_end_strdised+
diff_svg_abs_ang_end_strdised)+
depth_end_strdised:(svg_end_strdised+ #depth interact
diff_vel_abs_ang_end_strdised+
diff_svg_abs_ang_end_strdised)+
svg_end_strdised:(diff_vel_abs_ang_end_strdised+ #svg interact
diff_svg_abs_ang_end_strdised)+
tod_end_NEW:(water_velocity_end_strdised+depth_end_strdised+ #tod
svg_end_strdised+diff_vel_abs_ang_end_strdised+
diff_svg_abs_ang_end_strdised)+
#now start and end
water_velocity_end_strdised:water_velocity_start_strdised+
depth_end_strdised:depth_start_strdised+
diff_svg_abs_ang_end_strdised:diff_svg_abs_ang_start_strdised+
diff_vel_abs_ang_end_strdised:diff_vel_abs_ang_start_strdised+
svg_start_strdised:svg_end_strdised+
cos_ta__strdised+
log_sl__strdised:(water_velocity_start_strdised+depth_start_strdised+
svg_start_strdised+diff_vel_abs_ang_start_strdised+
diff_svg_abs_ang_start_strdised+
temp_start_strdised+tod_start_NEW)+
strata(step_id_new),model=T
)
endtime <- Sys.time()
print(paste0("ended at ",endtime))
duration <-endtime-sttime
print(paste0("sat model took ",duration,units(duration)," to run"))
} else {
sttime <- Sys.time()
print(paste0("started sat model run at ",sttime, ", no tod parameters"))
satmod <- fit_issf(data=data_ssf,case_~water_velocity_end_strdised+depth_end_strdised+
svg_end_strdised+diff_vel_abs_ang_end_strdised+
diff_svg_abs_ang_end_strdised+ log_sl__strdised+
water_velocity_end_strdised:(depth_end_strdised+
svg_end_strdised+
diff_vel_abs_ang_end_strdised+
diff_svg_abs_ang_end_strdised)+
depth_end_strdised:(svg_end_strdised+ #depth interact
diff_vel_abs_ang_end_strdised+
diff_svg_abs_ang_end_strdised)+
svg_end_strdised:(diff_vel_abs_ang_end_strdised+ #svg interact
diff_svg_abs_ang_end_strdised)+
#now start and end
water_velocity_end_strdised:water_velocity_start_strdised+
depth_end_strdised:depth_start_strdised+
diff_svg_abs_ang_end_strdised:diff_svg_abs_ang_start_strdised+
diff_vel_abs_ang_end_strdised:diff_vel_abs_ang_start_strdised+
svg_start_strdised:svg_end_strdised+
cos_ta__strdised+
log_sl__strdised:(water_velocity_start_strdised+depth_start_strdised+
svg_start_strdised+diff_vel_abs_ang_start_strdised+
diff_svg_abs_ang_start_strdised+
temp_start_strdised)+
strata(step_id_new)
)
endtime <- Sys.time()
print(paste0("ended at ",endtime))
duration <-endtime-sttime
print(paste0("sat model took ",duration,units(duration)," to run"))
}
#save sat model
saveRDS(satmod,paste0("ssf modelling - file per fish/",fish_id,"/saturated_model1_",fish_id,".rds"))
####model sel loop####
####loop####
startloop <-Sys.time()
print(paste0("Model selection fish",fish_id, " - begin at ",startloop))
best_row <-0 #set up a value for "best_row" - the row id for the best model at each stage
#specify the data
data <- data_ssf
#mod_start <- model #name the starting position for model selection
mod_start <- satmod$model
tab_list <- list() #list for saving final tables for each stage
q=1 #this is just a variable for indexing where in tab list to save the table at each step to. qill be increased by 1 in each loop
#also add in condition re length model scope, so when >1 can continue, otherwise...
#since i dont thinki can run a model with just strata.. gives error.
length_scope = 5 #set a parameter, just any value over 2. just for running the while.
while(best_row!=1&length_scope>2){ #need another condition for when reach end of model selection
start_time <- Sys.time()
print(paste0("Model selection step ",q," started at ",start_time))
#need model terms
trms <- mod_start$terms
#list the terms
list_terms <- attr(trms, "factors")
#below, remove strata from a list of terms # code taken from stepAIC source code
if(!is.null(sp <- attr(trms, "specials")) &&
!is.null(st <- sp$strata)) list_terms <- list_terms[-st,]
#have now hashed out the above as for some reason the strata was moving about in the formula and erroneously being removed due to not being last. so will just condition on it in the loop ;)
fdrop <- numeric()
fadd <- attr(trms, "factors")
#create as list
scope <- factor.scope(list_terms, list(add = fadd, drop = fdrop)) # provides terms as a list, can use for iterating?
#ok above moves strata around like >:( leave her last!!!
scope2 <- attr(trms,"term.labels") #here lists all the terms in order of the formula<3 for comparing to scope$drop[i] on where to remove
#scope$drop # contains the terms to be dropped #and doesnt have single effects where theres an interaction
#get AICs for starting model
sAIC <- extractAIC(mod_start)
s_df <- sAIC[1]
s_AIC <- sAIC[2]
#create table
tabl <- as.data.frame(matrix(ncol=6,nrow=(length(scope$drop)+1)))
names(tabl) <- c("ID","Formula","Dropped term","df","AIC","diffAIC")
#have formula in table as "case_~etc" so easier at end of loop
start_frm1 <- as.character(mod_start$formula)[3]
start_frm <- paste0("case_~",start_frm1) #need failsafe here to make sure strata(blah blah blah) is always the last term so won't get dropped.
#fill first entry of tabl
tabl$ID[1] <- "start"
tabl$Formula[1] <- start_frm
tabl$`Dropped term`[1] <- "-"
tabl$AIC[1] <- s_AIC
tabl$df[1] <- s_df
tabl$diffAIC[1] <- 0
mod_list_temp <- list()
for(i in 1:length(scope$drop)){ #possibly save models to a list so don't need to run again?
#here, can update formula, fit model, extract AICs? and save to a table
if(scope$drop[i]!="strata(step_id_new)"){ #here, so doesnt do anything where scopedrop is strata
term_loc <- match(scope$drop[i],scope2)
frm_temp <- drop.terms(trms,term_loc) #new formula #error here since drops strata- keep strata last.
#still drops strata even if. its not the right number
mod_new <- fit_issf(data=data,formula=as.formula(paste0("case_~",as.character(frm_temp)[2])),
model=T)
mod_new_mod <- mod_new$model
eAIC <-extractAIC(mod_new_mod)
e_AIC <- eAIC[2]
e_df <- eAIC[1]
#save to table, row i+1 (as first row starting pos)
tabl$ID[(i+1)] <- i
tabl$Formula[(i+1)] <- paste0("case_",as.character(frm_temp)[2])
tabl$`Dropped term`[(i+1)] <- scope$drop[i]
tabl$AIC[(i+1)] <- e_AIC
tabl$df[(i+1)] <- e_df
tabl$diffAIC[(i+1)] <- e_AIC - s_AIC
#save to model list so can retrieve later
mod_list_temp[[(i+1)]] <- mod_new #save as full issf product
}
}
#get row where AIC is min
best_row <- which.min(tabl$AIC)
if(best_row==1&2>min(tabl$diffAIC[2:length(tabl$diffAIC)],na.rm=T)){ #if min row is the starting point e.g. more paramters and not 2 units smaller than other model
best_row <- which.min(tabl$diffAIC[2:length(tabl$diffAIC)]) + 1 #plus one cos since removed start pos from the data, need to add 1
}
best_frm <- tabl$Formula[best_row]
#print that this step is over
end_time <- Sys.time()
print(paste0("Model selection step ",q," ended at ",end_time))
duration <- end_time-start_time
unit_dur <- units(duration)
duration <- as.numeric(duration) %>% round(digits=2)
print(paste0("Step ",q," lasted ",duration," ",unit_dur))
# to get a positiion to save the table in the list
tab_list[[q]] <- tabl
q=q+1
length_scope <- length(scope$drop) #save length of temrs to drop in this level
#if scope$drop length is 2, then next round can only drop 1 more variable and thus cannot run as cant work with just strata
#ok i want it to print what the best model is + why model selection was stopped
if(best_row==1){
print(paste0("Model selection ",fish_id, " ended due to no further removal of terms improving AIC"))
print(paste0("Final model formula: ",best_frm))
mod_final <- mod_start
} else if(length_scope==2){
print(paste0("Model selection ",fish_id, " ended due to being at the smallest possible model"))
print(paste0("Final model formula: ",best_frm))
mod_final <- mod_start
} else {
#so if loop is continuing, can get mod start now
mod_start <- mod_list_temp[[best_row]]
mod_start <- mod_start$model }
}
#works!
endloop <- Sys.time()
duration <- endloop - startloop
print(paste0("Model selection took ",duration," ",units(duration)))
#save tables and models
saveRDS(tab_list,paste0("ssf modelling - file per fish/",fish_id,"/mod_selection_tables_",fish_id,".rds"))
saveRDS(mod_final,paste0("ssf modelling - file per fish/",fish_id,"/final_model_",fish_id,".rds"))
#and coefficient stuff
sum <- summary(mod_final)
coefs <- sum$coefficients
coefs <-as.data.frame(coefs)
coefs <- tibble::rownames_to_column(coefs,"params")
coefs$fish_id <- fish_id
coefs$species <- data_ssf$species[1]
write.csv(coefs,paste0("ssf modelling - file per fish/files listing coeffs and params/",fish_id,"_coeffs_params.csv"))
}