Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Raw File Download

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • content
content badge
swh:1:cnt:1d2e6ecafd2df719868120c2524d478f40f1cf6b

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
---
title: "Applying HMM to tracks for SSFs"
author: "Rachel Mawer"
date: "2023-06-23"
output: 
  html_document:
    toc: true
    toc_float: true
    theme: cerulean
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = F,message=F)

#main packages #delete/add as needed
library(dplyr)
library(ggplot2)
library(knitr)
library(ggpubr)
library(plotly)
library(pracma) #for standard error
library(reactable)
library(momentuHMM)
library(amt)
library(sf)
```

## About

This file is to apply HMMs to the tracks used in previous SSF analysis. HMMs will be made for all fish of a species, then used to assign behavioural states to tracks.

In this file:

* calculating straightness indexes for steps 
* fitting HMMs to the data, and find the best model
* assigning states to fish tracks
* saving resulting files for SSF modelling

```{r read in data, calculate straightness indexes + nsd}
# will read in all data, and form 2 dataframes - barbel and grayling
files <- list.files("data/SSF dataframes",full.names = T)


#loop, read in, calculate SI, save resulting file and then add to a barbel or grayling df
#for the latter, can do a "if df exists, else" thing


#for testing
l <-1
d <-1

#windows for SI, 1min, 5min and 10min
windows <- list(60/20,300/20,600/20)


for(l in 1:length(files)){
  file_to_read <- files[l]
  dat_a <- read.csv(file_to_read) %>% filter(case_==TRUE)
  
  dat <- dat_a
  
  dat$time <- as.POSIXct(dat$t1_)


  for(i in 1:length(windows)){
     window <- windows[[i]]
  
  # Get n to find SI from middle-point of window. 
  n <- floor(window/2)
  
  if(i==1){
    dat$SI <- NA               # Initiate SI-value in df
    dat$nsd <- NA
    dat$displ <- NA
     X <- split(dat, list(dat$fish_id,dat$track,dat$burst_),drop=T)
  }else{
    dat_with_SI$SI <- NA              # Initiate SI-value in df IF adding multiple    to the tracks
    dat_with_SI$nsd <- NA
    dat_with_SI$displ <- NA
    X <- split(dat_with_SI, list(dat_with_SI$fish_id,dat_with_SI$track,dat_with_SI$burst_),drop=T)
  }
  

  for (d in 1:length(X)) { #
    track <- data.frame(X[d])                  # Call individual track data
    if(i==1){
      colnames(track) <- colnames(dat)    # Rename columns for continuity
    }else{
      colnames(track) <- colnames(dat_with_SI)
    }

    #doing it this way instead of via SL so things are in the right order
    track$step <- sqrt((track$x1_-lag(track$x1_))^2+(track$y1_-lag(track$y1_))^2)
  
    #dont need below if repeating
    if(i==1){
    track <- track %>% st_as_sf(coords = c('x1_', 'y1_'), crs = 'EPSG:32632', remove=F) # Convert to sf-object # using start coords
    }
    
    print(paste('Track', d, '/', length(X)))
    pb <- txtProgressBar(0, nrow(track), style=3)
    # Loop for even number of detections in window
    if ((window %% 2) == 0) {
      for (c in n+1:(nrow(track)-n)) {
        if(nrow(track)>=(2*n+1)){ #if track is smaller than n+8 e.g. max mid point, do nothing, otherwise yeah
          setTxtProgressBar(pb, c)
          TDist <- sum(track$step[(c-(n)):(c+n-1)],na.rm=T)                       
          EDist <- as.numeric(st_distance(track$geometry[c-(n)], track$geometry[c+n-1])) # Calculate effective distance
          track$SI[c] <- EDist / TDist # Calculate SI
          track$nsd[c] <- EDist^2
          track$displ[c] <-EDist
        }else{
          track$SI <- NA
        }
      }
    }
    # Loop for uneven number of detections in window
    if ((window %% 2) == 1) {
      for (c in n+1:(nrow(track)-n)){
        if(nrow(track)>=(2*n+1)){ #if track is smaller than n+8 e.g. max mid point, do nothing, otherwise yeah
          setTxtProgressBar(pb, c)
          TDist <- sum(track$step[(c-n):(c+(n))],na.rm=T)
          EDist <- st_distance(track$geometry[c-n], track$geometry[(c+(n))])
          track$SI[c] <- EDist / TDist
          track$nsd[c] <- EDist^2
          track$displ[c] <-EDist
        } else{
          track$SI <- NA
          track$nsd <- NA
          track$displ <- NA
        }
      }
    }
    close(pb)
    
    # Loop to combine individual tracks into 1 df
    if (d == 1) {
      dat_with_SI <- track
    }
    else {
      dat_with_SI <- rbind(dat_with_SI, track)
    }
  }
  
  if(i==1){
    dat_with_SI <- rename(dat_with_SI,SI_1min=SI,displ_1min=displ,nsd_1min = nsd)
  }
  if(i==2){
    dat_with_SI <- rename(dat_with_SI,SI_5min=SI,displ_5min=displ,nsd_5min = nsd)
  }
  if(i==3){
    dat_with_SI <- rename(dat_with_SI,SI_10min=SI,displ_10min=displ,nsd_10min = nsd)

  }
  
}

  #save csv to df for later individual state assignment
  fish_id <- unique(dat$fish_id)
  #make sep dat_with_SI without geometry
  dat_to_save <- dat_with_SI %>% select(-geometry)
  
  write.csv(dat_to_save,paste0("data/SSF dataframes with SI etc/",fish_id,".csv"),row.names = F)
  
  species <- unique(dat$species)

  if(species == "Barbel"){
    if(exists("barb_all")==FALSE){
    barb_all <- dat_to_save
    } else{
      barb_all <- rbind(barb_all,dat_to_save)
    }
  }
  
  if(species == "Grayling"){
  if(exists("gray_all")==FALSE){
  gray_all <- dat_to_save
  } else{
    gray_all <- rbind(gray_all,dat_to_save)
  }
}

  
}

write.csv(barb_all,"data/large df with SI/barbel.csv",row.names=F)
write.csv(gray_all,"data/large df with SI/grayling.csv",row.names=F)

```

## Fit HMM to each species


### Grayling

```{r fit hmm to grayling with SI 10,warning=F}

### prepare data 
gray_dat_to_prep <- read.csv("data/large df with SI/grayling.csv") %>% select(-step)


gray_dat_to_prep <- gray_dat_to_prep %>% mutate(ID = paste(fish_id, approach, sep = '-')) %>% dplyr::arrange(ID, t1_) #needs the arrange in order to prep

gr_data <- prepData(gray_dat_to_prep, coordNames = c('x1_', 'y1_'), type = 'UTM') %>% # Prepping data for HMM
  mutate(fish_id = as.factor(fish_id)) 


dist <- list(SI_1min  = 'beta',
             SI_5min  = 'beta',
             SI_10min = 'beta',
             disp_10min = 'gamma',
             disp_5min = 'gamma')

niter <- 50
gr_list_1min  <- vector('list', length = niter)
gr_list_5min  <- vector('list', length = niter)
gr_list_10min <- vector('list', length = niter)


for (n in 1:niter) {
  SI1_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) # randomly picking mean for si for the 2 states
  SI1_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) #randomly picking sd for si for the 2 states
  
  Par0 <- list(SI_1min = c(SI1_mean, SI1_sd)) #list parameters
  
  gr_list_1min[[n]] <- fitHMM(gr_data, nbStates = 2,
                              dist[c(1)], Par0)
}

for (n in 1:niter) {
  SI5_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999))
  SI5_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999))
  
  Par0 <- list(SI_5min = c(SI5_mean, SI5_sd))
  
  gr_list_5min[[n]] <- fitHMM(gr_data, nbStates = 2,
                              dist[c(2)], Par0)
}

for (n in 1:niter) {
  SI10_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999))
  SI10_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999))
  
  Par0 <- list(SI_10min = c(SI10_mean, SI10_sd))
  
  gr_list_10min[[n]] <- fitHMM(gr_data, nbStates = 2,
                               dist[c(3)], Par0)
}


gr_SI1LL  <- unlist(lapply(gr_list_1min, function(m) m$mod$minimum))
gr_SI5LL  <- unlist(lapply(gr_list_5min, function(m) m$mod$minimum))
gr_SI10LL <- unlist(lapply(gr_list_10min, function(m) m$mod$minimum))

gr_SI1 <- gr_list_1min[[which.min(gr_SI1LL)]]
gr_SI5 <- gr_list_5min[[which.min(gr_SI5LL)]]
gr_SI10 <- gr_list_10min[[which.min(gr_SI10LL)]]

saveRDS(gr_SI10,"HMMs/gr_SI10.rds")
saveRDS(gr_SI1,"HMMs/gr_SI1.rds")
saveRDS(gr_SI5,"HMMs/gr_SI5.rds")

```

```{r compare AIC of grayling models}
gr_SI10 <-readRDS("HMMs/gr_SI10.rds")
gr_SI1 <- readRDS("HMMs/gr_SI1.rds")
gr_SI5 <- readRDS("HMMs/gr_SI5.rds")
AIC(gr_SI1,gr_SI5,gr_SI10)


```

Taking SI_10 model

```{r assign states}
#to edit this code, from Jelger
gr_SI10 <-readRDS("HMMs/gr_SI10.rds")

gr_data$State <- as.factor(viterbi(gr_SI10))


write.csv(gr_data,"data/steps with states/grayling_all.csv")

```

```{r split gr data into individual fish}

fish_ids <- unique(gr_data$fish_id)

for(i in 1:length(fish_ids)){
  id <- fish_ids[i]
  dat <- gr_data %>% filter(fish_id==id)
  write.csv(dat,paste0("data/steps with states/individual fish/",id,".csv"))


}
```

### Barbel

```{r fit hmm to barbel with SI 10,warning=F}

### prepare data 
barb_dat_to_prep <- read.csv("data/large df with SI/barbel.csv") %>% select(-step)


barb_dat_to_prep <- barb_dat_to_prep %>% mutate(ID = paste(fish_id, approach, sep = '-')) %>% dplyr::arrange(ID, t1_) #needs the arrange in order to prep

barb_data <- prepData(barb_dat_to_prep, coordNames = c('x1_', 'y1_'), type = 'UTM') %>% # Prepping data for HMM
  mutate(fish_id = as.factor(fish_id)) 


dist <- list(SI_1min  = 'beta',
             SI_5min  = 'beta',
             SI_10min = 'beta',
             disp_10min = 'gamma',
             disp_5min = 'gamma')

niter <- 50
barb_list_1min  <- vector('list', length = niter)
barb_list_5min  <- vector('list', length = niter)
barb_list_10min <- vector('list', length = niter)


for (n in 1:niter) {
  SI1_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) # randomly picking mean for si for the 2 states
  SI1_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999)) #randomly picking sd for si for the 2 states
  
  Par0 <- list(SI_1min = c(SI1_mean, SI1_sd)) #list parameters
  
  barb_list_1min[[n]] <- fitHMM(barb_data, nbStates = 2,
                              dist[c(1)], Par0)
}

for (n in 1:niter) {
  SI5_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999))
  SI5_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999))
  
  Par0 <- list(SI_5min = c(SI5_mean, SI5_sd))
  
  barb_list_5min[[n]] <- fitHMM(barb_data, nbStates = 2,
                              dist[c(2)], Par0)
}

for (n in 1:niter) {
  SI10_mean <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999))
  SI10_sd <- runif(n=2, min=c(0.001, 0.5), max=c(0.5, 0.999))
  
  Par0 <- list(SI_10min = c(SI10_mean, SI10_sd))
  
  barb_list_10min[[n]] <- fitHMM(barb_data, nbStates = 2,
                               dist[c(3)], Par0)
}


barb_SI1LL  <- unlist(lapply(barb_list_1min, function(m) m$mod$minimum))
barb_SI5LL  <- unlist(lapply(barb_list_5min, function(m) m$mod$minimum))
barb_SI10LL <- unlist(lapply(barb_list_10min, function(m) m$mod$minimum))

barb_SI1 <- barb_list_1min[[which.min(barb_SI1LL)]]
barb_SI5 <- barb_list_5min[[which.min(barb_SI5LL)]]
barb_SI10 <- barb_list_10min[[which.min(barb_SI10LL)]]

saveRDS(barb_SI10,"HMMs/barb_SI10.rds")
saveRDS(barb_SI1,"HMMs/barb_SI1.rds")
saveRDS(barb_SI5,"HMMs/barb_SI5.rds")

```

```{r compare AIC of barbling models}
barb_SI10 <-readRDS("HMMs/barb_SI10.rds")
barb_SI1 <- readRDS("HMMs/barb_SI1.rds")
barb_SI5 <- readRDS("HMMs/barb_SI5.rds")
AIC(barb_SI1,barb_SI5,barb_SI10)


```

Taking the SI_10 model.

```{r assign states}
#to edit this code, from Jelger
barb_SI10 <-readRDS("HMMs/barb_SI10.rds")

barb_data$State <- as.factor(viterbi(barb_SI10))


write.csv(barb_data,"data/steps with states/barbel_all.csv")

```

```{r split barb data into individual fish}
barb_data<- read.csv("data/steps with states/barbel_all.csv")

fish_ids <- unique(barb_data$fish_id)



for(i in 1:length(fish_ids)){
  id <- fish_ids[i]
  
  
  dat <- barb_data %>% filter(fish_id==id)
  write.csv(dat,paste0("data/steps with states/individual fish/",id,".csv"))


}
```


back to top

Software Heritage — Copyright (C) 2015–2026, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Content policy— Contact— JavaScript license information— Web API