https://github.com/kit-parco/networkit
Raw File
Tip revision: d8e952f1e4d5e2758e4744e7c6ea7429a59c7cdf authored by Fabian Brandt on 29 May 2020, 15:04:07 UTC
Merge pull request #558 from fabratu/fix-ctd
Tip revision: d8e952f
graphio.py
# extension imports
from _NetworKit import (METISGraphReader, METISGraphWriter, DotGraphWriter, EdgeListWriter, \
						GMLGraphWriter, LineFileReader, SNAPGraphWriter, DGSWriter, GraphToolBinaryWriter, \
						GraphToolBinaryReader, DGSStreamParser, GraphUpdater, SNAPEdgeListPartitionReader, \
						SNAPGraphReader, EdgeListReader, CoverReader, CoverWriter, EdgeListCoverReader, \
						KONECTGraphReader, GMLGraphReader, MultipleEdgesHandling, ThrillGraphBinaryReader, \
						ThrillGraphBinaryWriter, NetworkitBinaryReader, NetworkitBinaryWriter)
from _NetworKit import Graph as __Graph
# local imports
from .GraphMLIO import GraphMLReader, GraphMLWriter
from .GEXFIO import GEXFReader, GEXFWriter
from . import algebraic
from .support import MissingDependencyError

# external imports
import os
import logging
import numpy
import scipy.io
import fnmatch

from enum import Enum

class __AutoNumber(Enum):
	def __new__(cls):
		value = len(cls.__members__) + 1
		obj = object.__new__(cls)
		obj._value_ = value
		return obj


class Format(__AutoNumber):
	""" Simple enumeration class to list supported file types. Currently supported
	file types: SNAP, EdgeListSpaceZero, EdgeListSpaceOne, EdgeListTabZero, EdgeListTabOne,
	METIS, GraphML, GEXF, GML, EdgeListCommaOne, GraphViz, DOT, EdgeList, LFR, KONEC, GraphToolBinary,
			NetworkitBinary"""
	SNAP = ()
	EdgeListSpaceZero = ()
	EdgeListSpaceOne = ()
	EdgeListTabZero = ()
	EdgeListTabOne = ()
	METIS = ()
	GraphML = ()
	GEXF = ()
	GML = ()
	EdgeListCommaOne = ()
	GraphViz = ()
	DOT = ()
	EdgeList = ()
	LFR = ()
	KONECT = ()
	GraphToolBinary = ()
	MAT = ()
	ThrillBinary = ()
	NetworkitBinary = ()

# reading

def getReader(fileformat, *kargs, **kwargs):
	#define your [edgelist] reader here:
	readers = {
		Format.METIS:			METISGraphReader(),
		Format.GraphML:			GraphMLReader(),
		Format.GEXF:			GEXFReader(),
		Format.SNAP:			SNAPGraphReader(),
		Format.EdgeListCommaOne:	EdgeListReader(',',1,),
		Format.EdgeListSpaceOne:	EdgeListReader(' ',1),
		Format.EdgeListSpaceZero:	EdgeListReader(' ',0),
		Format.EdgeListTabOne:		EdgeListReader('\t',1),
		Format.EdgeListTabZero:		EdgeListReader('\t',0),
		Format.LFR:			EdgeListReader('\t',1),
		Format.KONECT:			KONECTGraphReader(),
		Format.GML:			GMLGraphReader(),
		Format.GraphToolBinary:		GraphToolBinaryReader(),
		Format.MAT:			MatReader(),
		Format.ThrillBinary:		ThrillGraphBinaryReader(),
		Format.NetworkitBinary:         NetworkitBinaryReader()
	}

	# special case for custom Edge Lists
	if fileformat == Format.EdgeList:
		if "continuous" in kwargs and kwargs["continuous"] == False:
			kwargs["firstNode"] = 0
		reader = EdgeListReader(*kargs, **kwargs)

	else:
		if fileformat not in readers:
			raise Exception("unrecognized format/format not supported as input: {0}".format(fileformat))
		reader = readers[fileformat]#(**kwargs)

	return reader


def readGraph(path, fileformat, *kargs, **kwargs):
	""" Read graph file in various formats and return a NetworKit::Graph
	    Parameters:
		- fileformat: An element of the Format enumeration. Currently supported file types:
		SNAP, EdgeListSpaceZero, EdgeListSpaceOne, EdgeListTabZero, EdgeListTabOne, METIS,
		GraphML, GEXF, GML, EdgeListCommaOne, GraphViz, DOT, EdgeList, LFR, KONECT, GraphToolBinary, ThrillBinary
		- **kwargs: in case of a custom edge list, pass the genereic Fromat.EdgeList accompanied by
			the defining paramaters as follows:
			"separator=CHAR, firstNode=NODE, commentPrefix=STRING, continuous=BOOL, directed=BOOL"
			commentPrefix='#', continuous=True and directed=False are optional because of their default values;
			firstNode is not needed when continuous=True.
	"""
	reader = getReader(fileformat, *kargs, **kwargs)

	if ("~" in path):
		path = os.path.expanduser(path)
		print("path expanded to: {0}".format(path))
	if not os.path.isfile(path):
		raise IOError("{0} is not a file".format(path))
	else:
		with open(path, "r") as file:    # catch a wrong path before it crashes the interpreter
			try:
				G = reader.read(path)
				G.setName(os.path.basename(path).split(".")[0])	# set name of graph to name of file
				return G
			except Exception as e:
				raise IOError("{0} is not a valid {1} file: {2}".format(path,fileformat,e))
	return None

def readGraphs(dirPath, pattern, fileformat, some=None, exclude=None, **kwargs):
	"""
	Read all graph files contained in a directory whose filename contains the pattern, return a dictionary of name to Graph object.
    Parameters:
	- pattern: unix-style string pattern
	- fileformat: An element of the Format enumeration
	- some: restrict number of graphs to be read
	- **kwargs: in case of a custom edge list, provide the defining paramaters as follows:
		"separator=CHAR, firstNode=NODE, commentPrefix=STRING, continuous=BOOL"
		commentPrefix and continuous are optional
	"""
	graphs = {}
	for root, dirs, files in os.walk(dirPath):
		for file in files:
			if fnmatch.fnmatch(file, pattern):
				if (exclude is None) or (not fnmatch.fnmatch(file, exclude)):
					G = readGraph(os.path.join(root, file), fileformat, **kwargs)
					graphs[G.getName()] = G
					if some:
						if len(graphs) == some:
							return graphs
	return graphs


class MatReader:
	def __init__(self, key = "G"):
		self.key = key

	def read(self, path):
		return readMat(path, self.key)

def readMat(path, key="G"):
	""" Reads a Graph from a matlab object file containing an adjacency matrix and returns a NetworKit::Graph
		Parameters:
		- key: The key of the adjacency matrix in the matlab object file (default: A)"""
	matlabObject = scipy.io.loadmat(path)
	# result is a dictionary of variable names and objects, representing the matlab object
	if key in matlabObject:
		A = matlabObject[key]
	else:
		raise Exception("Key {0} not found in the matlab object file".format(key))
	(n, n2) = A.shape
	if n != n2:
		raise Exception("this ({0}x{1}) matrix is not square".format(n, n2))
#	if not numpy.array_equal(A, A.transpose): # FIXME this is slow and doesn't work as expected, seems to be False for valid inputs
#		logging.warning("the adjacency matrix is not symmetric")
	G = __Graph(n)
	nz = A.nonzero()
	for (u,v) in zip(nz[0], nz[1]):
		if not G.hasEdge(u, v):
			G.addEdge(u, v)
	return G

class MatWriter:
	def __init__(self, key="G"):
		self.key = key

	def write(self, G, path, key="G"):
		writeMat(G, path, key)

def writeMat(G, path, key="G"):
	""" Writes a NetworKit::Graph to a Matlab object file.
		Parameters:
		- G: The graph
		- path: Path of the matlab file
		- key: Dictionary Key
	"""
	matrix = algebraic.adjacencyMatrix(G, matrixType='sparse')
	scipy.io.savemat(path, {key : matrix})


# writing
def getWriter(fileformat, *kargs, **kwargs):
	writers =	{
		Format.METIS:			METISGraphWriter(),
		Format.GraphML:			GraphMLWriter(),
		Format.GEXF:			GEXFWriter(),
		Format.SNAP:			SNAPGraphWriter(),
		Format.EdgeListCommaOne:	EdgeListWriter(',',1,),
		Format.EdgeListSpaceOne:	EdgeListWriter(' ',1),
		Format.EdgeListSpaceZero:	EdgeListWriter(' ',0),
		Format.EdgeListTabOne:		EdgeListWriter('\t',1),
		Format.EdgeListTabZero:		EdgeListWriter('\t',0),
		Format.GraphViz:		DotGraphWriter(),
		Format.DOT:			DotGraphWriter(),
		Format.GML:			GMLGraphWriter(),
		Format.LFR:			EdgeListWriter('\t',1),
		Format.GraphToolBinary:         GraphToolBinaryWriter(),
		Format.MAT:			MatWriter(),
		Format.ThrillBinary:		ThrillGraphBinaryWriter(),
		Format.NetworkitBinary:         NetworkitBinaryWriter()
	}

	# special case for custom Edge Lists
	if fileformat == Format.EdgeList:
		return EdgeListWriter(*kargs, **kwargs)

	else:
		if fileformat not in writers:
			raise Exception("format {0} currently not supported".format(fileformat))

		return writers[fileformat]#(**kwargs)

def writeGraph(G, path, fileformat, *kargs, **kwargs):
	""" Write graph to various output formats.

	Paramaters:
	- G:			a graph
	- path: 		output path
	- fileformat: 	an element of the Format enumeration

	"""

	dirname = os.path.dirname(os.path.realpath(path))
	# the given file path does not exist yet
	if not os.path.isfile(path):
		# check write permissions on the directory
		if not os.access(dirname, os.W_OK):
			# we may not write on this directory, raise Error
			raise IOError("No permission to write")
		# else everthing is alright
	else:
		# the given path points to a file
		if not os.access(path, os.W_OK):
			raise IOError("No permission to write")
		else:
			logging.warning("overriding given file")
	writer = getWriter(fileformat, *kargs, **kwargs)
	writer.write(G, path)
	logging.info("wrote graph {0} to file {1}".format(G, path))


class GraphConverter:

	def __init__(self, reader, writer):
		self.reader = reader
		self.writer = writer

	def convert(self, inPath, outPath):
		G = self.reader.read(inPath)
		self.writer.write(G, outPath)

	def __str__(self):
		return "GraphConverter: {0} => {0}".format(self.reader, self.writer)

def getConverter(fromFormat, toFormat):
	reader = getReader(fromFormat)
	writer = getWriter(toFormat)
	return GraphConverter(reader, writer)


def convertGraph(fromFormat, toFormat, fromPath, toPath=None):
	converter = getConverter(fromFormat, toFormat)
	if toPath is None:
		toPath = "{0}.{1}.graph".format(fromPath.split(".")[0], toFormat)
	converter.convert(fromPath, toPath)
	print("converted {0} to {1}".format(fromPath, toPath))



# dynamic

def readStream(path, mapped=True, baseIndex=0):
	"""
		Read a graph event stream from a file.
	"""
	return DGSStreamParser(path, mapped, baseIndex).getStream()

def writeStream(stream, path):
	"""
		Write a graph event stream to a file.
	"""
	DGSWriter().write(stream, path)
back to top