#!/usr/bin/env Rscript # This script adapted from Peter Carbonetto fit_poisson_nmf_purified_pbmc.R # which fit a Poisson non-negative factorization to the purified PBMC # single-cell RNA-seq data of Zheng et al (2017). # # This version runs the analysis on input data prepared previously # from a seurat object # # This script is intended to be run from the command-line shell, with # options that are processed with the optparse package. For example, # to fit a rank-4 Poisson non-negative matrix factorization by running # 500 SCD updates with extrapolation, 2 threads, with estimates # initialized from fit.rds, and with results saved to test.rds, run # this command: # # ./fit_poisson_nmf_purified_pbmc.R -k 4 --method scd --nc 2 \ # --extrapolate --numiter 500 --prefit fit.rds -o test.rds # # Poisson non-negative matrix factorization by running 1,000 EM # updates without extrapolation, without multithreading, and with # results saved to out.rds. The estimates of the factors and loadings # are initialized from ../output/prefit-k=3.rds by default when k = 3. # # Load a few packages. library(Matrix) library(optparse) library(readr) library(fastTopics) # Process the command-line arguments. parser <- OptionParser() parser <- add_option(parser,c("--out","-o"),type="character",default="out.rds") parser <- add_option(parser,c("--k","-k"),type = "integer",default = 3) parser <- add_option(parser,"--method",type = "character",default = "em") parser <- add_option(parser,"--nc",type = "integer",default = 1) parser <- add_option(parser,"--extrapolate",action = "store_true") parser <- add_option(parser,"--numiter",type = "integer",default = 1000) parser <- add_option(parser,"--prefit",type = "character",default = "auto") out <- parse_args(parser) outfile <- out$out k <- out$k method <- out$method nc <- out$nc #numiter <- out$numiter #hard setting numiter to 500 here, can revert to the optionparser input using the line above numiter <- 500 prefit.file <- out$prefit extrapolate <- !is.null(out$extrapolate) rm(parser,out) # Initialize the sequence of pseudorandom numbers. set.seed(1) # LOAD DATA # --------- # Load the previously prepared data. cat("Loading data.\n") load("/project2/gilad/katie/Pilot_HumanEBs/fastTopics/prepared_data_YorubaOnly_genesExpressedInMoreThan10Cells.RData") cat(sprintf("Loaded %d x %d counts matrix.\n",nrow(counts),ncol(counts))) # LOAD PRE-FITTED MODEL # --------------------- if (prefit.file == "auto") prefit.file <- sprintf("prefit-k=%d.rds",k) prefit.file <- file.path(".",prefit.file) cat(sprintf("Loading pre-fitted model from %s\n",prefit.file)) fit0 <- readRDS(prefit.file)$fit # FIT POISSON NON-NEGATIVE MATRIX FACTORIZATION # --------------------------------------------- # Now we are ready to perform the main model-fitting step. cat("Fitting Poisson NMF to count data.\n") control <- list(extrapolate = extrapolate,nc = nc) if (method != "ccd") control$numiter <- 4 timing <- system.time({ fit <- fit_poisson_nmf(counts,fit0 = fit0,numiter = numiter, method = method,control = control) }) cat(sprintf("Computation took %0.2f seconds.\n",timing["elapsed"])) # SAVE RESULTS # ------------ cat("Saving results.\n") saveRDS(list(k = k,method = method,control = control,fit = fit), file = outfile) # SESSION INFO # ------------ print(sessionInfo())