--- title: "Applying HMM to tracks for SSFs" author: "Rachel Mawer" date: "2023-06-23" output: html_document: toc: true toc_float: true theme: cerulean --- ```{r setup, include=FALSE} knitr::opts_chunk$set(echo = F,message=F) #main packages #delete/add as needed library(dplyr) library(ggplot2) library(knitr) library(ggpubr) library(plotly) library(pracma) #for standard error library(reactable) library(momentuHMM) library(amt) library(sf) ``` ## About This file is to apply HMMs to the tracks used in previous SSF analysis. HMMs will be made for all fish of a species, then used to assign behavioural states to tracks. In this file: * calculating straightness indexes for steps * fitting HMMs to the data, and find the best model * assigning states to fish tracks * saving resulting files for SSF modelling ```{r read in data, calculate straightness indexes + nsd} # will read in all data, and form 2 dataframes - barbel and grayling files <- list.files("data/SSF dataframes",full.names = T) #loop, read in, calculate SI, save resulting file and then add to a barbel or grayling df #for the latter, can do a "if df exists, else" thing #for testing l <-1 d <-1 #windows for SI, 1min, 5min and 10min windows <- list(60/20,300/20,600/20) for(l in 1:length(files)){ file_to_read <- files[l] dat_a <- read.csv(file_to_read) %>% filter(case_==TRUE) dat <- dat_a dat$time <- as.POSIXct(dat$t1_) for(i in 1:length(windows)){ window <- windows[[i]] # Get n to find SI from middle-point of window. n <- floor(window/2) if(i==1){ dat$SI <- NA # Initiate SI-value in df dat$nsd <- NA dat$displ <- NA X <- split(dat, list(dat$fish_id,dat$track,dat$burst_),drop=T) }else{ dat_with_SI$SI <- NA # Initiate SI-value in df IF adding multiple to the tracks dat_with_SI$nsd <- NA dat_with_SI$displ <- NA X <- split(dat_with_SI, list(dat_with_SI$fish_id,dat_with_SI$track,dat_with_SI$burst_),drop=T) } for (d in 1:length(X)) { # track <- data.frame(X[d]) # Call individual track data if(i==1){ colnames(track) <- colnames(dat) # Rename columns for continuity }else{ colnames(track) <- colnames(dat_with_SI) } #doing it this way instead of via SL so things are in the right order track$step <- sqrt((track$x1_-lag(track$x1_))^2+(track$y1_-lag(track$y1_))^2) #dont need below if repeating if(i==1){ track <- track %>% st_as_sf(coords = c('x1_', 'y1_'), crs = 'EPSG:32632', remove=F) # Convert to sf-object # using start coords } print(paste('Track', d, '/', length(X))) pb <- txtProgressBar(0, nrow(track), style=3) # Loop for even number of detections in window if ((window %% 2) == 0) { for (c in n+1:(nrow(track)-n)) { if(nrow(track)>=(2*n+1)){ #if track is smaller than n+8 e.g. max mid point, do nothing, otherwise yeah setTxtProgressBar(pb, c) TDist <- sum(track$step[(c-(n)):(c+n-1)],na.rm=T) EDist <- as.numeric(st_distance(track$geometry[c-(n)], track$geometry[c+n-1])) # Calculate effective distance track$SI[c] <- EDist / TDist # Calculate SI track$nsd[c] <- EDist^2 track$displ[c] <-EDist }else{ track$SI <- NA } } } # Loop for uneven number of detections in window if ((window %% 2) == 1) { for (c in n+1:(nrow(track)-n)){ if(nrow(track)>=(2*n+1)){ #if track is smaller than n+8 e.g. max mid point, do nothing, otherwise yeah setTxtProgressBar(pb, c) TDist <- sum(track$step[(c-n):(c+(n))],na.rm=T) EDist <- st_distance(track$geometry[c-n], track$geometry[(c+(n))]) track$SI[c] <- EDist / TDist track$nsd[c] <- EDist^2 track$displ[c] <-EDist } else{ track$SI <- NA track$nsd <- NA track$displ <- NA } } } close(pb) # Loop to combine individual tracks into 1 df if (d == 1) { dat_with_SI <- track } else { dat_with_SI <- rbind(dat_with_SI, track) } } if(i==1){ dat_with_SI <- rename(dat_with_SI,SI_1min=SI,displ_1min=displ,nsd_1min = nsd) } if(i==2){ dat_with_SI <- rename(dat_with_SI,SI_5min=SI,displ_5min=displ,nsd_5min = nsd) } if(i==3){ dat_with_SI <- rename(dat_with_SI,SI_10min=SI,displ_10min=displ,nsd_10min = nsd) } } #save csv to df for later individual state assignment fish_id <- unique(dat$fish_id) #make sep dat_with_SI without geometry dat_to_save <- dat_with_SI %>% select(-geometry) write.csv(dat_to_save,paste0("data/SSF dataframes with SI etc/",fish_id,".csv"),row.names = F) species <- unique(dat$species) if(species == "Barbel"){ if(exists("barb_all")==FALSE){ barb_all <- dat_to_save } else{ barb_all <- rbind(barb_all,dat_to_save) } } if(species == "Grayling"){ if(exists("gray_all")==FALSE){ gray_all <- dat_to_save } else{ gray_all <- rbind(gray_all,dat_to_save) } } } write.csv(barb_all,"data/large df with SI/barbel.csv",row.names=F) write.csv(gray_all,"data/large df with SI/grayling.csv",row.names=F) ``` ## Fit HMM to each species ### Grayling ```{r fit hmm to grayling with SI 10,warning=F} ### prepare data gray_dat_to_prep <- read.csv("data/large df with SI/grayling.csv") %>% select(-step) gray_dat_to_prep <- gray_dat_to_prep %>% mutate(ID = paste(fish_id, approach, sep = '-')) %>% dplyr::arrange(ID, t1_) #needs the arrange in order to prep gr_data <- prepData(gray_dat_to_prep, coordNames = c('x1_', 'y1_'), type = 'UTM') %>% # Prepping data for HMM mutate(fish_id = as.factor(fish_id)) dist <- list(SI_1min = 'beta', SI_5min = 'beta', SI_10min = 'beta', disp_10min = 'gamma', disp_5min = 'gamma') niter <- 50 gr_list_1min <- vector('list', length = niter) gr_list_5min <- vector('list', length = niter) gr_list_10min <- vector('list', length = niter) for (n in 1:niter) { SI1_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) # randomly picking mean for si for the 2 states SI1_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) #randomly picking sd for si for the 2 states Par0 <- list(SI_1min = c(SI1_mean, SI1_sd)) #list parameters gr_list_1min[[n]] <- fitHMM(gr_data, nbStates = 2, dist[c(1)], Par0) } for (n in 1:niter) { SI5_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) SI5_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) Par0 <- list(SI_5min = c(SI5_mean, SI5_sd)) gr_list_5min[[n]] <- fitHMM(gr_data, nbStates = 2, dist[c(2)], Par0) } for (n in 1:niter) { SI10_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) SI10_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) Par0 <- list(SI_10min = c(SI10_mean, SI10_sd)) gr_list_10min[[n]] <- fitHMM(gr_data, nbStates = 2, dist[c(3)], Par0) } gr_SI1LL <- unlist(lapply(gr_list_1min, function(m) m$mod$minimum)) gr_SI5LL <- unlist(lapply(gr_list_5min, function(m) m$mod$minimum)) gr_SI10LL <- unlist(lapply(gr_list_10min, function(m) m$mod$minimum)) gr_SI1 <- gr_list_1min[[which.min(gr_SI1LL)]] gr_SI5 <- gr_list_5min[[which.min(gr_SI5LL)]] gr_SI10 <- gr_list_10min[[which.min(gr_SI10LL)]] saveRDS(gr_SI10,"HMMs/gr_SI10.rds") saveRDS(gr_SI1,"HMMs/gr_SI1.rds") saveRDS(gr_SI5,"HMMs/gr_SI5.rds") ``` ```{r compare AIC of grayling models} gr_SI10 <-readRDS("HMMs/gr_SI10.rds") gr_SI1 <- readRDS("HMMs/gr_SI1.rds") gr_SI5 <- readRDS("HMMs/gr_SI5.rds") AIC(gr_SI1,gr_SI5,gr_SI10) ``` Taking SI_10 model ```{r assign states} #to edit this code, from Jelger gr_SI10 <-readRDS("HMMs/gr_SI10.rds") gr_data$State <- as.factor(viterbi(gr_SI10)) write.csv(gr_data,"data/steps with states/grayling_all.csv") ``` ```{r split gr data into individual fish} fish_ids <- unique(gr_data$fish_id) for(i in 1:length(fish_ids)){ id <- fish_ids[i] dat <- gr_data %>% filter(fish_id==id) write.csv(dat,paste0("data/steps with states/individual fish/",id,".csv")) } ``` ### Barbel ```{r fit hmm to barbel with SI 10,warning=F} ### prepare data barb_dat_to_prep <- read.csv("data/large df with SI/barbel.csv") %>% select(-step) barb_dat_to_prep <- barb_dat_to_prep %>% mutate(ID = paste(fish_id, approach, sep = '-')) %>% dplyr::arrange(ID, t1_) #needs the arrange in order to prep barb_data <- prepData(barb_dat_to_prep, coordNames = c('x1_', 'y1_'), type = 'UTM') %>% # Prepping data for HMM mutate(fish_id = as.factor(fish_id)) dist <- list(SI_1min = 'beta', SI_5min = 'beta', SI_10min = 'beta', disp_10min = 'gamma', disp_5min = 'gamma') niter <- 50 barb_list_1min <- vector('list', length = niter) barb_list_5min <- vector('list', length = niter) barb_list_10min <- vector('list', length = niter) for (n in 1:niter) { SI1_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) # randomly picking mean for si for the 2 states SI1_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) #randomly picking sd for si for the 2 states Par0 <- list(SI_1min = c(SI1_mean, SI1_sd)) #list parameters barb_list_1min[[n]] <- fitHMM(barb_data, nbStates = 2, dist[c(1)], Par0) } for (n in 1:niter) { SI5_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) SI5_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) Par0 <- list(SI_5min = c(SI5_mean, SI5_sd)) barb_list_5min[[n]] <- fitHMM(barb_data, nbStates = 2, dist[c(2)], Par0) } for (n in 1:niter) { SI10_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) SI10_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) Par0 <- list(SI_10min = c(SI10_mean, SI10_sd)) barb_list_10min[[n]] <- fitHMM(barb_data, nbStates = 2, dist[c(3)], Par0) } barb_SI1LL <- unlist(lapply(barb_list_1min, function(m) m$mod$minimum)) barb_SI5LL <- unlist(lapply(barb_list_5min, function(m) m$mod$minimum)) barb_SI10LL <- unlist(lapply(barb_list_10min, function(m) m$mod$minimum)) barb_SI1 <- barb_list_1min[[which.min(barb_SI1LL)]] barb_SI5 <- barb_list_5min[[which.min(barb_SI5LL)]] barb_SI10 <- barb_list_10min[[which.min(barb_SI10LL)]] saveRDS(barb_SI10,"HMMs/barb_SI10.rds") saveRDS(barb_SI1,"HMMs/barb_SI1.rds") saveRDS(barb_SI5,"HMMs/barb_SI5.rds") ``` ```{r compare AIC of barbling models} barb_SI10 <-readRDS("HMMs/barb_SI10.rds") barb_SI1 <- readRDS("HMMs/barb_SI1.rds") barb_SI5 <- readRDS("HMMs/barb_SI5.rds") AIC(barb_SI1,barb_SI5,barb_SI10) ``` Taking the SI_10 model. ```{r assign states} #to edit this code, from Jelger barb_SI10 <-readRDS("HMMs/barb_SI10.rds") barb_data$State <- as.factor(viterbi(barb_SI10)) write.csv(barb_data,"data/steps with states/barbel_all.csv") ``` ```{r split barb data into individual fish} barb_data<- read.csv("data/steps with states/barbel_all.csv") fish_ids <- unique(barb_data$fish_id) for(i in 1:length(fish_ids)){ id <- fish_ids[i] dat <- barb_data %>% filter(fish_id==id) write.csv(dat,paste0("data/steps with states/individual fish/",id,".csv")) } ```