Revision 7b0ba684ca388613b1b679578c14eeb6f6e2d304 authored by Joost van Griethuysen on 20 May 2023, 11:43:39 UTC, committed by Joost van Griethuysen on 20 May 2023, 11:43:39 UTC
1 parent d6ec90d
DatasetHierarchyReader.py
# -*- coding: utf-8 -*-
from __future__ import print_function
import collections
import glob
import os
import six
class DatasetHierarchyReader(object):
def __init__(self, inputDatasetDirectory, filetype='.nrrd'):
self.inputDatasetDirectory = inputDatasetDirectory
self.filetype = filetype
self.DatabaseHierarchyDict = collections.OrderedDict()
def setInputDatasetDirectory(self, inputDatasetDirectory):
self.inputDatasetDirectory = inputDatasetDirectory
def setFiletype(self, filetype):
self.filetype = filetype
def ReadDatasetHierarchy(self, create=False):
patientDirectories = glob.glob(os.path.join(self.inputDatasetDirectory, '*'))
for patientDirectory in patientDirectories:
self.DatabaseHierarchyDict[patientDirectory] = collections.OrderedDict()
studyDirectories = glob.glob(os.path.join(patientDirectory, '*'))
for studyDirectory in studyDirectories:
self.DatabaseHierarchyDict[patientDirectory][studyDirectory] = collections.OrderedDict()
subfolders = [dirpath for dirpath in glob.glob(os.path.join(studyDirectory, '*')) if os.path.isdir(dirpath)]
reconstructionsDirectory, images = self.readReconstructionsDirectory(studyDirectory, subfolders, create=create)
self.DatabaseHierarchyDict[patientDirectory][studyDirectory]["reconstructions"] = images
resourcesDirectory, resources = self.readResourcesDirectory(studyDirectory, subfolders, create=create)
self.DatabaseHierarchyDict[patientDirectory][studyDirectory]["resources"] = resources
segmentationsDirectory, labels = self.readSegmentationsDirectory(studyDirectory, subfolders, create=create)
self.DatabaseHierarchyDict[patientDirectory][studyDirectory]["segmentations"] = labels
return self.DatabaseHierarchyDict
def readReconstructionsDirectory(self, studyDirectory, subfolders, create=False):
images = []
recDirectory = "NONE"
try:
recDirectory = [item for item in subfolders if 'reconstructions' in os.path.basename(item).lower()][0]
images = [item for item in glob.glob(os.path.join(recDirectory, "*")) if self.filetype in os.path.basename(item)]
except IndexError:
if create:
recDirectory = os.path.join(studyDirectory, "Reconstructions")
if not os.path.exists(recDirectory):
os.mkdir(recDirectory)
print("\tCreated:", recDirectory)
return recDirectory, images
def readSegmentationsDirectory(self, studyDirectory, subfolders, create=False):
labels = []
segDirectory = "NONE"
try:
segDirectory = [item for item in subfolders if 'segmentations' in os.path.basename(item).lower()][0]
labels = [item for item in glob.glob(os.path.join(segDirectory, "*")) if self.filetype in os.path.basename(item)]
except IndexError:
if create:
segDirectory = os.path.join(studyDirectory, "Segmentations")
if not os.path.exists(segDirectory):
os.mkdir(segDirectory)
print("\tCreated:", segDirectory)
return segDirectory, labels
def readResourcesDirectory(self, studyDirectory, subfolders, create=False):
resources = []
resDirectory = "NONE"
try:
resDirectory = [item for item in subfolders if 'resources' in os.path.basename(item).lower()][0]
resources = [item for item in glob.glob(os.path.join(resDirectory, "*"))]
except IndexError:
if create:
resDirectory = os.path.join(studyDirectory, "Resources")
if not os.path.exists(resDirectory):
os.mkdir(resDirectory)
print("\tCreated:", resDirectory)
return resDirectory, resources
def findImageAndLabelPair(self, imageFilepaths, maskFilepaths, keywordSettings):
"""
Accepts a list of image filepaths, a list of mask/label filepaths, and a
dict of keyword settings in the form:
keywordSettings['image'] = ""
keywordSettings['imageExclusion'] = ""
keywordSettings['mask'] = ""
keywordSettings['maskExclusion'] = ""
where each field is a string of words separated by commas (case and spaces do not matter).
The output is the image filepath and mask/label filepath pair that satisfies the keyword
conditions.
"""
keywordSettings = {k: [str(keyword.strip()) for keyword in v.split(',')]
for (k, v) in six.iteritems(keywordSettings)}
matchedImages = []
for imageFilepath in imageFilepaths:
imageFilename = str(os.path.basename(imageFilepath))
if self.testString(imageFilename, keywordSettings['image'], keywordSettings['imageExclusion']):
matchedImages.append(imageFilepath)
matchedMasks = []
for maskFilepath in maskFilepaths:
maskFilename = str(os.path.basename(maskFilepath))
if self.testString(maskFilename, keywordSettings['mask'], keywordSettings['maskExclusion']):
matchedMasks.append(maskFilepath)
if len(matchedImages) < 1:
print("ERROR: No Images Matched")
elif len(matchedImages) > 1:
print("ERROR: Multiple Images Matched")
if len(matchedMasks) < 1:
print("ERROR: No Masks Matched")
elif len(matchedMasks) > 1:
print("ERROR: Multiple Masks Matched")
if (len(matchedImages) == 1) and (len(matchedMasks) == 1):
return matchedImages[0], matchedMasks[0]
else:
return None, None
def testString(self, fileName, inclusionKeywords, exclusionKeywords):
fileName = fileName.upper()
inclusionKeywords = [keyword.upper() for keyword in inclusionKeywords if (keyword != '')]
exclusionKeywords = [keyword.upper() for keyword in exclusionKeywords if (keyword != '')]
result = False
if (len(inclusionKeywords) == 0) and (len(exclusionKeywords) > 0):
if (not any(keyword in fileName for keyword in exclusionKeywords)):
result = True
elif (len(inclusionKeywords) > 0) and (len(exclusionKeywords) == 0):
if (all(keyword in fileName for keyword in inclusionKeywords)):
result = True
elif (len(inclusionKeywords) > 0) and (len(exclusionKeywords) > 0):
if (all(keyword in fileName for keyword in inclusionKeywords)) and \
(not any(keyword in fileName for keyword in exclusionKeywords)):
result = True
elif (len(inclusionKeywords) == 0) and (len(exclusionKeywords) == 0):
result = True
return result
Computing file changes ...