context("Load GEO and its utils") library(jsonlite) library(Biobase) library(data.table) test_that("loadGEO finishes with result", { options(phantasusMirrorPath = "https://genome.ifmo.ru/files/software/phantasus", phantasusCacheDir = tempdir()) cacheDir <- getOption("phantasusCacheDir") x <- loadGEO("GSE27112") expect_is(x, "json") binPath <- file.path(cacheDir, fromJSON(x)) ess <- protolite::unserialize_pb(readBin(binPath, what="raw", n=100000000)) expect_equal(length(ess), 2) x <- loadGEO("GSE27112-GPL6885") expect_is(x, "json") binPath <- file.path(cacheDir, fromJSON(x)) ess <- protolite::unserialize_pb(readBin(binPath, what="raw", n=100000000)) expect_equal(length(ess), 1) expect_is(loadGEO("GSE14308"), "json") expect_is(loadGEO("GDS4885"), "json") expect_error(loadGEO("WRONGNAME")) options(phantasusMirrorPath = NULL, phantasusCacheDir = NULL) }) test_that("getGDS adds id field for GDS datasets", { a <- getGDS("GDS4885")[[1]] expect_true("id" %in% tolower(fvarLabels(a))) }) test_that("filterPhenoAnnotations saves colnames", { cacheDir <- tempdir() es <- getES("GSE53986", destdir = cacheDir)[[1]] expect_true(all(colnames(es) == colnames(exprs(es)))) }) test_that("reparseCachedGSEs works", { cacheDir <- tempdir() getES("GSE14308", destdir = cacheDir) expect_true("GSE14308" %in% reparseCachedESs(destdir = cacheDir)) }) test_that("checkGPLs counts gpls correctly", { options(phantasusMirrorPath = "https://genome.ifmo.ru/files/software/phantasus") expect_equal(fromJSON(checkGPLs("GSE14308")), c("GSE14308")) expect_equal(fromJSON(checkGPLs("GDS4885")), c("GDS4885")) expect_length(fromJSON(checkGPLs("GSE27112")), 2) expect_length(fromJSON(checkGPLs("GSE10000")), 2) expect_warning(checkGPLs("GSE101")) expect_warning(checkGPLs("GSE201")) options(phantasusMirrorPath = NULL) }) test_that("checkGPLs works with fully specified name", { options(phantasusMirrorPath = "https://genome.ifmo.ru/files/software/phantasus") expect_equal(fromJSON(checkGPLs("GSE27112-GPL6885")), c("GSE27112-GPL6885")) options(phantasusMirrorPath = NULL) }) # TODO: adapt to new checkGPLs #test_that("checkGPLs counts existing files correctly without connection", { #options(phantasusMirrorPath = "https://genome.ifmo.ru/files/software/phantasus") #options(phantasusCacheDir = tempfile()) #expect_length(fromJSON(checkGPLs("GSE27112")), 2) #options(phantasusMirrorPath = "https://notworkingdomain") #expect_message(checkGPLs("GSE14308"), regexp = "Problems establishing connection") #expect_length(fromJSON(checkGPLs("GSE27112")), 2) #options(phantasusCacheDir = NULL, #phantasusMirrorPath = NULL) #}) test_that("getGSE works with ARCHS4", { ess <- getGSE("GSE99709", destdir=system.file("testdata", package="phantasus")) expect_gt(nrow(ess[[1]]), 0) expect_gt(ncol(ess[[1]]), 0) }) test_that("InferConditionImpl works correctly", { tests <- fread(system.file("testdata/dts.tsv", package="phantasus")) test_ds <- data.table(title=tests$Title, series=tests$Series, accession=tests$Accession, rep=tests$Replicate, inferCondition=tests$InferCondition) cond <- split(test_ds$title, test_ds$series) inf_cond_test <- split(test_ds$inferCondition, test_ds$series) rep_test <- split(test_ds$rep, test_ds$series) new_cond <- lapply(cond, inferConditionImpl) expect_equal(new_cond$GSE100221, list()) # text in all titles is unique expect_equal(new_cond$GSE10380, list()) # long dataset expect_equal(new_cond$GSE10382, list()) # number-only titles expect_equal(new_cond$GSE10383, list()) # two-color datasets expect_equal(new_cond$GSE10385, list()) # the same text and replicate number in all titles expect_equal(new_cond$GSE10039, list()) # ambiguous replicate number "High_Mo_seg_pool_Ler_col_F2" "Low_Mo_seg_pool_Ler_col_F2" "Col-0 3" expect_equal(new_cond$GSE101508$condition, inf_cond_test$GSE101508) #"IFNγ+LPS rep2" -> "IFNγ+LPS" + "2" expect_equal(new_cond$GSE101508$replicate, as.character(rep_test$GSE101508)) expect_equal(new_cond$GSE10392$condition, inf_cond_test$GSE10392) # "MPA 1" - > "MPA" + "1" expect_equal(new_cond$GSE10392$replicate, as.character(rep_test$GSE10392)) expect_equal(new_cond$GSE10123$condition, inf_cond_test$GSE10123) # "WT-GFP-lamin A Induction: Day 0 Replicate A" -> "WT-GFP-lamin A Induction: Day 0" + "A" expect_equal(new_cond$GSE10123$replicate, as.character(rep_test$GSE10123)) }) test_that("getGPLAnnotation works with errorneous empty annotation files", { destdir <- file.path(tempdir(), "cache_bad") dir.create(destdir) GPL <- "GPL17021" stub = gsub('\\d{1,3}$','nnn',GPL,perl=TRUE) GPLDirPath <- '%s/geo/platforms/%s/%s/annot' fullGPLDirPath <- file.path(sprintf(GPLDirPath, destdir, stub, GPL)) dir.create(fullGPLDirPath, showWarnings = FALSE, recursive = TRUE) file.create(file.path(fullGPLDirPath, paste0(GPL, ".annot.gz"))) gpl <- getGPLAnnotation(GPL, destdir) expect_true(!is.null(gpl)) }) test_that("getGSEType works", { expect_true(checkGSEType('GSE53986', tempdir())) expect_true(checkGSEType('GSE99709', tempdir())) expect_false(checkGSEType('GSE33356', tempdir())) expect_false(checkGSEType('GSE33356-GPL6801', tempdir())) })