https://github.com/kit-parco/networkit
Raw File
Tip revision: 5fdf33f9eba8915607b3f043210b5a2735a2706a authored by Kolja Esders on 13 December 2016, 20:47:18 UTC
Removed tag 4.2
Tip revision: 5fdf33f
GEXFIO.py
import queue
import xml.etree.cElementTree as ET
from xml.dom import minidom
from _NetworKit import Graph, GraphEvent

# GEXF Reader
class GEXFReader:
	def __init__(self):
		""" Initializes the GEXFReader class """
		self.mapping = dict()
		self.g = Graph(0)
		self.weighted = False
		self.directed = False
		self.dynamic = False
		self.hasDynamicWeights = False
		self.q = queue.Queue()
		self.eventStream = []
		self.nInitialNodes = 0
		self.timeFormat = ""

	def read(self, fpath):
		""" Reads and returns the graph object defined in fpath """
		#0. Reset internal vars and parse the xml
		self.__init__()
		doc = minidom.parse(fpath)

		#1. Determine if graph is dynamic, directed and has dynamically changing weights
		graph = doc.getElementsByTagName("graph")[0]
		if (graph.getAttribute("defaultedgetype") == "directed"):
			self.directed = True
		if (graph.getAttribute("mode") == "dynamic"):
			self.dynamic = True
		if self.dynamic:
			self.timeFormat = graph.getAttribute("timeformat")
		attributes = graph.getElementsByTagName("attribute")
		for att in attributes:
			if att.getAttribute("id") == "weight":
				self.hasDynamicWeights = True
				self.weighted = True

		#2. Read nodes and map them to IDs defined in GEXF file
		nodes = doc.getElementsByTagName("node")
		for n in nodes:
			u = n.getAttribute("id")
			if self.dynamic:
				"""
				A GEXF ID can be a string. However, this version of parser accepts ids
				in only 2 formats:
				1. id = "0,1,2," etc.
				2. id = "n0, n1, n2" etc.
				So either an integer or an integer that has n prefix.
				Gephi generates its random graphs in 2nd format for example.
				"""
				_id = ""
				try:
					_id = int(u)
				except:
					_id = int(u[1:])
				# 2-way mapping to refer nodes back in mapDynamicNodes() method
				self.mapping[u] = _id
				self.mapping[_id] = u
				controlList = {'elementAdded': False, 'elementDeleted': False}
				spells = n.getElementsByTagName("spell")
				if len(spells) > 0:
					for s in spells:
						self.parseDynamics(s, "n", controlList, u)
				else:
					self.parseDynamics(n, "n", controlList, u)
			else:
				self.mapping[u] = self.nInitialNodes
				self.nInitialNodes +=1
		if self.dynamic:
			self.mapDynamicNodes()

		#3. Read edges and determine if graph is weighted
		edges = doc.getElementsByTagName("edge")
		for e in edges:
			u = e.getAttribute("source")
			v = e.getAttribute("target")
			w = "1.0"
			if e.hasAttribute("weight"):
				self.weighted = True
				w = e.getAttribute("weight")
			if self.dynamic:
				controlList = {'elementAdded': False, 'elementDeleted': False}
				spells = e.getElementsByTagName("spell")
				if len(spells) > 0:
					for s in spells:
						self.parseDynamics(s, "e", controlList, u, v, w)
				else:
					self.parseDynamics(e, "e", controlList, u, v, w)
			else:
				self.q.put((u, v, w))

		#4. Create graph object
		self.g = Graph(self.nInitialNodes, self.weighted, self.directed)

		#5. Add initial edges to the graph and sort the eventStream by time
		#5.1 Adding initial edges
		while not self.q.empty():
			edge = self.q.get()
			(u, v, w) = (edge[0], edge[1], float(edge[2]))
			self.g.addEdge(self.mapping[u], self.mapping[v], w)

		#5.2 Sorting the eventStream by time and adding timeStep between events that happen in different times
		self.eventStream.sort(key=lambda x:x[1])
		for i in range(1, len(self.eventStream)):
			if self.eventStream[i][1] != self.eventStream[i-1][1]:
				self.eventStream.append((GraphEvent(GraphEvent.TIME_STEP, 0, 0, 0), self.eventStream[i-1][1]))
		self.eventStream.sort(key=lambda x:x[1])
		self.eventStream = [event[0] for event in self.eventStream]
		return (self.g, self.eventStream)



	def parseDynamics(self, element, elementType, controlList,  u,  v = "0", w = "0"):
		"""
			Determine the operations as follows:
			1.Element has start and not deleted before: Create add event
			2.Element has start and deleted before: Create restore event
			3.Element has end:Create del event
			4.If an element has end before start(or no start at all), add it to the initial graph
			5.For dynamic edges, simply go over the attvalues and create
			weight update events

			* A dynamic element must be defined either using only spells
			or inline attributes. These 2 shouldn't be mixed.
			(For example, Gephi will treat them differently. It'll ignore the inline declaration
			if the same element also contains spells)

		"""
		startTime = element.getAttribute("start")
		if startTime == "":
			startTime = element.getAttribute("startopen")
		endTime	= element.getAttribute("end")
		if endTime == "":
			endTime	= element.getAttribute("endopen")
		if self.timeFormat != "date":
			try:
				startTime = float(startTime)
			except:
				pass
			try:
				endTime = float(endTime)
			except:
				pass
				
		if startTime != "" and endTime != "":
			if startTime < endTime and not controlList['elementDeleted']:
				self.createEvent(startTime, "a"+elementType, u, v, w)
				controlList['elementAdded'] = True
			else:
				self.createEvent(startTime, "r"+elementType, u, v, w)
			self.createEvent(endTime, "d"+elementType, u, v, w)
			controlList['elementDeleted'] = True

		if startTime != "" and endTime == "":
			if controlList['elementDeleted']:
				self.createEvent(startTime, "r"+elementType, u, v, w)
			else:
				self.createEvent(startTime, "a"+elementType, u, v, w)
				controlList['elementAdded'] = True

	 	# Handle dynamic edge weights here
		if elementType == "e" and self.hasDynamicWeights:
			attvalues = element.getElementsByTagName("attvalue")
			# If a spell is traversed, attvalues are siblings
			if len(attvalues) == 0:
				attvalues = element.parentNode.parentNode.getElementsByTagName("attvalue")
			for att in attvalues:
				if att.getAttribute("for") == "weight":
					w = att.getAttribute("value")
					startTime = att.getAttribute("start")
					if startTime == "":
						startTime = att.getAttribute("startopen")
					if self.timeFormat != "date":
						startTime = float(startTime)
					# If this edge is not added, first weight update indicates edge addition
					if not controlList['elementAdded']:
						self.createEvent(startTime, "a"+elementType, u, v, w)
						controlList['elementAdded'] = True
					else:
						self.createEvent(startTime, "c"+elementType, u, v, w)

		if startTime == "":
			if not controlList['elementAdded']:
				if elementType == "n":
					self.mapping[u] = self.nInitialNodes
					self.nInitialNodes += 1
				else:
					self.q.put((u,v,w))
				controlList['elementAdded'] = True
			if endTime != "":
				self.createEvent(endTime, "d"+elementType, u, v, w)
				controlList['elementDeleted'] = True

	def createEvent(self, eventTime, eventType, u, v, w):
		"""
			 Creates a NetworKit::GraphEvent from the supplied parameters
			 and passes it to eventStream
		"""
		event, u = None, self.mapping[u]
		if eventType[1] == "e":
			v, w = self.mapping[v], float(w)
		if eventType == "an":
			event = GraphEvent(GraphEvent.NODE_ADDITION, u, 0, 0)
		elif eventType == "dn":
			event = GraphEvent(GraphEvent.NODE_REMOVAL, u, 0, 0)
		elif eventType == "rn":
			event = GraphEvent(GraphEvent.NODE_RESTORATION, u, 0, 0)
		elif eventType == "ae" or eventType == "re":
			event = GraphEvent(GraphEvent.EDGE_ADDITION, u, v, w)
		elif eventType == "de":
			event = GraphEvent(GraphEvent.EDGE_REMOVAL, u, v, w)
		elif eventType == "ce":
			event = GraphEvent(GraphEvent.EDGE_WEIGHT_UPDATE, u, v, w)
		self.eventStream.append((event, eventTime))

	def mapDynamicNodes(self):
		"""
			Node ID of a dynamic node must be determined before it's mapped to its GEXF ID.
			This requires processing the sorted eventStream and figuring out the addition order of the nodes.
			After that, node addition/deletion/restoration operations of this node must be readded to eventStream
			with correct mapping.

			!Note: New mapping of a node can be equal to old mapping of a node. In order to prevent collisions,
			isMapped array must be maintained and controlled.
		"""
		nNodes = self.nInitialNodes
		nEvent = len(self.eventStream)
		isMapped = [False] * nEvent
		self.eventStream.sort(key=lambda x:x[1])
		for i in range(0, nEvent):
			event = self.eventStream[i]
			# Only the nodes with addition event will get remapped.
			if not isMapped[i] and event[0].type == GraphEvent.NODE_ADDITION:
				u = event[0].u
				self.mapping[self.mapping[u]] = nNodes
				# All the other events of that node comes after it's addition event
				for j in range(i, len(self.eventStream)):
					event = self.eventStream[j]
					if not isMapped[j] and event[0].u == u:
						mappedEvent = GraphEvent(event[0].type, self.mapping[self.mapping[u]], 0, 0)
						self.eventStream[j] = (mappedEvent, event[1])
						isMapped[j] = True
				nNodes +=1
				isMapped[i] = True

	def getNodeMap(self):
		""" Returns GEXF ID -> NetworKit::Graph node ID mapping. """
		forwardMap = dict()
		for key in self.mapping:
			if type(key) == str:
				forwardMap[key] = self.mapping[key]
		return forwardMap


# GEXFWriter
class GEXFWriter:
	""" This class provides a function to write a NetworKit graph to a file in the
		GEXF format. """

	def __init__(self):
		""" Initializes the class. """
		self.edgeIdctr = 0
		self.q = queue.Queue()
		self.hasDynamicWeight = False

	def write(self, graph, fname, eventStream = [], mapping = []):
		"""
			Writes a NetworKit::Graph to the specified file fname.
			Parameters:
				- graph: a NetworKit::Graph python object
				- fname: the desired file path and name to be written to
				- eventStream: stream of events
				- mapping: random node mapping
		"""
		#0. Reset internal vars
		self.__init__()

		#1. Start with the root element and the right header information
		root = ET.Element('gexf')
		root.set("xmlns:xsi","http://www.w3.org/2001/XMLSchema-instance")
		root.set("xsi:schemaLocation","http://www.gexf.net/1.2draft http://www.gexf.net/1.2draft/gexf.xsd")
		root.set('version', '1.2')

		#2. Create graph element with appropriate information
		graphElement = ET.SubElement(root,"graph")
		if graph.isDirected():
			graphElement.set('defaultedgetype', 'directed')
		else:
			graphElement.set('defaultedgetype', 'undirected')
		if len(eventStream) > 0:
			graphElement.set('mode', 'dynamic')
			graphElement.set('timeformat', 'double')
			for event in eventStream:
				if event.type == GraphEvent.EDGE_WEIGHT_UPDATE:
					dynamicAtt = ET.SubElement(graphElement, "attributes")
					dynamicAtt.set('class', 'edge')
					dynamicAtt.set('mode', 'dynamic')
					dynamicWeight = ET.SubElement(dynamicAtt, "attribute")
					dynamicWeight.set('id', 'weight')
					dynamicWeight.set('title', 'Weight')
					dynamicWeight.set('type', 'float')
					self.hasDynamicWeight = True
					break
		else:
			graphElement.set('mode', 'static')

		#3. Add nodes
		nodesElement = ET.SubElement(graphElement, "nodes")
		nNodes, idArray = 0, []
		#3.1 Count the # of nodes (inital + dynamic nodes)
		for event in eventStream:
			if event.type == GraphEvent.NODE_ADDITION:
				nNodes +=1
		nNodes += len(graph.nodes())
		for i in range(0, nNodes):
			idArray.append(i)
		# Optional:Map nodes to a random mapping if user provided one
		if (len(mapping) > 0):
			if(nNodes != len(mapping)):
				 raise Exception('Size of nodes and mapping must match')
			else:
				for i in range(0, nNodes):
					idArray[i] = mapping[i]

		#3.2 Write nodes to the gexf file
		for n in range(nNodes):
			nodeElement = ET.SubElement(nodesElement,'node')
			nodeElement.set('id', str(idArray[n]))
			self.writeEvent(nodeElement, eventStream, n)

		#4. Add edges
		edgesElement = ET.SubElement(graphElement, "edges")
		#4.1 Put all edges into a queue(inital + dynamic edges)
		for e in graph.edges():
			self.q.put((e[0], e[1], graph.weight(e[0], e[1])))
		for event in eventStream:
			if event.type == GraphEvent.EDGE_ADDITION:
				self.q.put((event.u, event.v, event.w))
		#4.2 Write edges to the gexf file
		while not self.q.empty():
			edgeElement = ET.SubElement(edgesElement,'edge')
			e = self.q.get()
			edgeElement.set('source', str(idArray[e[0]]))
			edgeElement.set('target', str(idArray[e[1]]))
			edgeElement.set('id', "{0}".format(self.edgeIdctr))
			self.edgeIdctr += 1
			if graph.isWeighted():
				edgeElement.set('weight', str(e[2]))
			self.writeEvent(edgeElement, eventStream, e)

		#5. Write the generated tree to the file
		tree = ET.ElementTree(root)
		tree.write(fname,"utf-8",True)

	def writeEvent(self, xmlElement, eventStream, graphElement):
		# A var that indicates if the event belongs the graph element we traverse on
		matched = False
		startEvents = [GraphEvent.NODE_ADDITION, GraphEvent.EDGE_ADDITION, GraphEvent.NODE_RESTORATION]
		endEvents = [GraphEvent.NODE_REMOVAL, GraphEvent.EDGE_REMOVAL]
		nodeEvents = [GraphEvent.NODE_ADDITION, GraphEvent.NODE_REMOVAL, GraphEvent.NODE_RESTORATION]
		edgeEvents = [GraphEvent.EDGE_ADDITION, GraphEvent.EDGE_REMOVAL, GraphEvent.EDGE_WEIGHT_UPDATE]
		spellTag, weightTag, operation = False, False, ""
		timeStep = 0
		spellsElement, attValuesElement = None, None

		for event in eventStream:
			if event.type == GraphEvent.TIME_STEP:
				timeStep += 1
			if type(graphElement) == type(0): #a node is an integer
				matched = (event.type in nodeEvents and event.u == graphElement)
			else:
				matched = (event.type in edgeEvents and (event.u == graphElement[0] and event.v == graphElement[1]))
			if matched:
				# Handle weight update seperately
				if event.type == GraphEvent.EDGE_WEIGHT_UPDATE:
					if not weightTag:
						attvaluesElement = ET.SubElement(xmlElement, "attvalues")
						weightTag = True
					attvalue = ET.SubElement(attvaluesElement, "attvalue")
					attvalue.set('for', 'weight')
					attvalue.set('value', str(event.w))
					attvalue.set('start', str(timeStep))
					attvalue.set('endopen', str(timeStep + 1))
				else:
					if event.type in startEvents:
						operation = "start"
					else:
						operation = "end"
					if not spellTag:
						spellsElement = ET.SubElement(xmlElement, "spells")
						spellTag = True
					spellElement = ET.SubElement(spellsElement, "spell")
					spellElement.set(operation, str(timeStep))
back to top