Raw File
gps_pbp_collator.R
###############################################################################
## To be run in /rds/general/project/bacterial_evo_genomics/live/gps_annotations_4_2_2020/gps_pbp_runs

require(dplyr)

missing_files <- list.files("./", "__missing_isolates.txt")

missing_iso <- NULL

for(k in missing_files){
  current_files <- readLines(k)
  
  current_files <- sub(".velvet.gff", "",current_files)
  
  missing_isos <- unique(current_files)
  current_cluster <- sub("__missing_isolates.txt","",sub("cluster_","gpsc.",k))
  
  current_df <- cbind.data.frame(missing_isos, rep(current_cluster))
  colnames(current_df) <- c("id", "cluster_name")
  
  missing_iso <- bind_rows(missing_iso, current_df)
  
}


hit_files <- list.files("./", "__pbp_predictions.csv")

gps_hits <- NULL

for(k in hit_files){
  current_hits <- read.csv(k, stringsAsFactors = FALSE) %>% select(c(id, penicillin_cat))
  
  current_cluster <- sub("__pbp_predictions.csv","",sub("cluster_","gpsc.",k))
  
  current_hits$cluster_name <- current_cluster
  
  gps_hits <- bind_rows(gps_hits, current_hits)
  
}


write.csv(missing_iso, file = "./gps_missing_pbp_isolates.csv", row.names = FALSE)

write.csv(gps_hits, "./gps_pbp_profiles.csv", row.names = FALSE)



back to top