https://doi.org/10.5281/zenodo.14318846
01 - crawl tracks.R
##
# script for interpolating tracks using crawl prior to SSF resampling
####packages####
rm(list=ls())
library(crawl)
library(dplyr)
library(sf)
library(ggplot2)
library(rgdal)
library(raster)
library(mapview)
library(tidyr)
###get data####
#for finding ladder #one large file
data_frame <- read.csv('data/find ladder/all_approaches_60mins.csv')
#check data
names(data_frame)
head(data_frame)
#rename some columns
data_frame$fish_id <- data_frame$id
#for some reason discharge columns were weird, use whatever lines are needed
#data_frame$discharge <- data_frame$discharge_x
data_frame$discharge_nearest_10 <- data_frame$discharge_nearest_10_x
#drop extra discharge colums
data_frame <- subset(data_frame,select=-c(discharge_x,discharge_y,
discharge_nearest_10_x,discharge_nearest_10_y))
summary(data_frame)
str(data_frame)
#get time as posixct
data_frame$time2 <- as.POSIXct(data_frame$Time)
#get raster files for checking if in river
raster_files <- list.files("rasters") #identify file names
#get substrate data for checking if in river #actually dont need too much but eh keep it
substrate_poly <-readOGR('substrate/Materials_hydraulicModel.shp')
####LOOP####
#since runs fast
####prepare for crawl####
#
#
#
data <- data_frame
fish_id_list <-unique(data$fish_id)
#with river shape
river_shape <-readOGR('rivershp/new_river_shapefile.shp')
river_shape2 <- fortify(river_shape)
for(q in 1:length(fish_id_list)){
data_crawled_fish <- data.frame(matrix(ncol=30,nrow=0))
colnames(data_crawled_fish) <- c("TimeNum","locType","Time","id", "tag",
"lat","lon", "error_x","error_y" ,"error_xy",
"nobs", "signal_strength" ,
"noise_rec" ,"quality_score","species",
"dist.from.fishway" ,"near_entrance" , "approach" ,
"if.within.15m.of.ladder",
"most_track_near_ladder","fish_id" ,
"discharge" ,"discharge_nearest_10", "time2",
"x", "y", "mu.x","nu.x", "mu.y", "nu.y" )
errors_small <- data.frame(matrix(ncol=4,nrow=0))
colnames(errors_small) <-c('fish_id','approach','track','reason')
errors_small_blank <- errors_small
####crawl for new positions prior to ssf####
#do this fish by fish actually <3 no loop
#set id number
#set fish id
j <-fish_id_list[q]
data_j <-filter(data,fish_id==j)
#make blank df to append to
#remove approaches outside migration period for Altusried
if(data_j$species[1] == "Barbel"){
data_j <- filter(data_j,time2<"2018-06-30"&time2>"2018-04-16")
}
if(data_j$species[1] == "Grayling"){
data_j <- filter(data_j,time2<"2018-05-15"&time2>"2018-03-01")
}
approaches <- unique(data_j$approach)
##ok first: code to split approaches where >30s between points so crawl
#doesnt interpolate there
####loop####
starttime <- Sys.time()
print(paste0('started fish id ',j,' at time: ',starttime))
for (i in approaches) {
#select approach
start_approachtime <- Sys.time()
data2 <- filter(data_j, approach==i)
data2$time_diff <- as.numeric(data2$time2 - lag(data2$time2),units='secs')
#remove points with less than 1.1s burst
data2 <-filter(data2,time_diff>=1.1)
print(paste0('Started approach ',i,' for fish ',j,' at time ',start_approachtime))
#remove if no data
#remove if most in ladder
if(length(data2$Time)>1){
if (data2$most_track_near_ladder[1]=='No'){
data2$track_part <- 1
for (p in 2:length(data2$id)){
data2$track_part[p] <- ifelse(data2$time_diff[p]>60,
data2$track_part[p-1]+1,
data2$track_part[p-1])
}
#add in if for total time of track as well?
start_time <- min(data2$time2) #get start time
end_time <- max(data2$time2)
track_time <- as.numeric(end_time - start_time,units='secs')
#if track lasts longer than 10mins? 15mins?
if(track_time>900){
#so if track is >30 mins
#then split into track parts
num_tracks <-unique(data2$track_part)
for(z in num_tracks){
#get track
data3 <-filter(data2,track_part==z)
if (length(data3$Time)>10){ #if >10 detections in a part
#THEN filter data for if in river or not <3
#get coords
data.points <-data3[,c(14,15)]
data.points.fin <- data.points
for(x in 1:length(raster_files)) {
#line below is to get the heading name for pasting to dataframe
c <-gsub('.tif','', raster_files[x])
data.points.fin[[paste0(c)]] <- raster::extract(raster(paste0("rasters/",
raster_files[x])),data.points)
}
for(y in 1:length(data3$fish_id)) {
#get discharge at i
discharge_start_10s <- data3$discharge_nearest_10[y]
if (discharge_start_10s>80){
discharge_start_10s = 80
}
#getheadings for the discharge
headings <- names(data.points.fin)[grepl(discharge_start_10s,names(data.points.fin))]
#individual headings for each variable
#this means can direct r where to TAKE VALUES FROM
depth_head <- headings[grepl('depth',headings )]
vel_head <- headings[grepl('vel_',headings )]
velgrad_head <- headings[grepl('svg',headings )]
#then add to df
#IT WORKS!!
data3[y, 'depth'] <- data.points.fin[y, depth_head]
data3[y, 'water_velocity'] <- data.points.fin[y, vel_head]
data3[y, 'svg'] <- data.points.fin[y, velgrad_head]
}
#add substrate
substrate_extract <- raster::extract(substrate_poly,data.points)
substrate_type <- substrate_extract$MATNAME
#duplicated data to be safe and check its ok
data_with_substrate<- cbind(data3, substrate_type)
#filter to remove nas
data_frame_filtered <- drop_na(data_with_substrate,c(svg,depth,
water_velocity,substrate_type))
#crawl?
#make into sf data type
sf_data <- st_as_sf(data_frame_filtered,coords=c('x','y')) %>% st_set_crs(32632)
#get crawl errors
crawl_model <- crwMLE(data=sf_data,Time.name='time2')
#make time sequence
start_time <- min(sf_data$time2) #get start time
end_time <- max(sf_data$time2) #get end time #possibly do as +5 to ensure get the end?
times <- seq.POSIXt(from=start_time, to=end_time, by='20 sec') #by 5s as easier to filter to 10s
#this uses the model make above with crwMLE to predict positions at the specified time interval
crawled_data <- crwPredict(crawl_model,times)
data_crawled_fish <- rbind(data_crawled_fish,crawled_data)
}
else{
print(paste0('ERROR, fish id ',j,' approach ',i,' track number ',z, ' has less than 10 data points'))
errors_small2 <-errors_small_blank
errors_small2[1,] <- c(j,i,z,'<10 points')
errors_small <- rbind(errors_small,errors_small2)
}
}
}
else{
print(paste0('ERROR, fish id ',j,' approach ',i,' is less than 15mins'))
errors_small2 <-errors_small_blank
errors_small2[1,] <- c(j,i,'NA','<15mins')
errors_small <- rbind(errors_small,errors_small2)
}
}
else{
print(paste0('ERROR, fish id ',j,' approach ',i,' is mostly in the area near ladder'))
errors_small2 <-errors_small_blank
errors_small2[1,] <- c(j,i,'NA','most points within 15m of ladder')
errors_small <- rbind(errors_small,errors_small2)
}
}
else{
print(paste0('ERROR, fish id ',j,' approach ',i,' has 0 data points after filtering on burst interval'))
errors_small2 <-errors_small_blank
errors_small2[1,] <- c(j,i,'NA','0 detections after filter on burst interval')
errors_small <- rbind(errors_small,errors_small2)
}
}
#then save to a csv.
#but 1 csv per fish
endtime <- Sys.time()
print(paste0('Finished fish id ',j,' at time: ',endtime))
print(paste0('time taken: ',as.numeric(endtime-starttime,units='mins'), ' minutes'))
####save csv####
write.csv(data_crawled_fish,paste0('data/find ladder/data near ladder CRAWLED/',data_crawled_fish$fish_id[1],'_60min_track_crawl_20s.csv'),row.names = F)
write.csv(errors_small,paste0('data/find ladder/ERRORS when crawl - if too small/',data_crawled_fish$fish_id[1],'_60min_small_approaches.csv'),row.names=F)
####actual plot####
#final approaches
fin_approaches <- unique(data_crawled_fish$approach)
#fin_approaches <-fin_approaches[fin_approaches<20]
for (i in fin_approaches){
data_i <- subset(data_crawled_fish,approach==i)
data_i <- as_tibble(data_i)
data_i <- mutate(data_i,num=row_number())
plot <- ggplot(data_i,aes(x=mu.x,y=mu.y))+
geom_point(aes(col=num))+
geom_path(aes(col=num))+
geom_polygon(data=river_shape2,aes(long,lat),col='black', fill=NA)+
#coord_cartesian(xlim=c(591660,591690),ylim=c(5296870,5296895))+
#point to represent the ladder
geom_point(aes(x=591683.2,y=5296867.5), colour='red')+
labs(x='x',y='y',title=paste0('approach number ',i,' for fish id',data_i$fish_id[1]))
print(plot)
}
}