https://github.com/kit-parco/networkit
Raw File
Tip revision: f5585caf898df1a7c49826420c2146ceeb909268 authored by petholza on 19 January 2024, 11:49:27 UTC
cpp: improves RmatGenerator and tests
Tip revision: f5585ca
linkprediction.pyx
# distutils: language=c++

from libcpp.vector cimport vector
from libcpp.utility cimport pair

from .graph cimport _Graph, Graph
from .support import MissingDependencyError
from .structures cimport count, index, node

import numpy as np
import warnings
try:
	import sklearn
except ImportError:
	have_sklearn = False
else:
	have_sklearn = True

cdef extern from "<algorithm>" namespace "std":
	vector[pair[pair[node, node], double]] move(vector[pair[pair[node, node], double]]) nogil
	vector[pair[node, node]] move(vector[pair[node, node]]) nogil

cdef extern from "<networkit/linkprediction/LinkPredictor.hpp>":

	cdef cppclass _LinkPredictor "NetworKit::LinkPredictor":
		_LinkPredictor(const _Graph& G) except +
		double run(node u, node v) except +
		vector[pair[pair[node, node], double]] runAll() except +
		vector[pair[pair[node, node], double]] runOn(vector[pair[node, node]] nodePairs) except +
		void setGraph(const _Graph& newGraph) except +

cdef class LinkPredictor:
	""" 
	LinkPredictor(G=None)
	
	Abstract base class for link predictors.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None.
	"""
	cdef _LinkPredictor* _this

	def __cinit__(self, *args):
		# The construction is handled by the subclasses
		return

	def __dealloc__(self):
		if self._this is not NULL:
			del self._this
			self._this = NULL

	def setGraph(self, Graph newGraph):
		""" 
		setGraph(newGraph)

		Sets the graph to work on.

		Parameters
		----------
		newGraph : networkit.Graph
			The graph to work on.
   		"""
		self._this.setGraph(newGraph._this)

	def run(self, node u, node v):
		""" 
		run(u, v)
		
		Returns a score indicating the likelihood of a future link between the given nodes.

		Prior to calling this method a graph should be provided through the constructor or
		by calling setGraph. Note that only undirected graphs are accepted.
		There is also no lower or upper bound for scores and the actual range of values depends
		on the specific link predictor implementation. In case u == v a 0 is returned.
		If suitable this method might make use of parallelization to enhance performance.

		Parameters
		----------
		u : int
			First node in graph.
		v : int
			Second node in graph.

		Returns
		-------
		float
			A prediction-score indicating the likelihood of a future link between the given nodes.
		"""
		return self._this.run(u, v)

	def runAll(self):
		""" 
		runAll()
		
		Runs the link predictor on all currently unconnected node-pairs.

		Possible self-loops are also excluded. The method makes use of parallelisation.

		Returns
		-------
		list(tuple(int, int))
			A list of pairs containing all currently unconnected node-pairs as the first elements 
			and the corresponding scores as the second elements. The list is sorted ascendingly by node-pair.
		"""
		return move(self._this.runAll())

	def runOn(self, vector[pair[node, node]] nodePairs):
		""" 
		runOn(nodePairs)
		
		Executes the run-method on al given node-pairs and returns a list of predictions.

		The result is a list of pairs where the first element is the node-pair and it's second
		element the corresponding score generated by the run-method. The method makes use of
		parallelisation.

		Parameters
		----------
		nodePairs : list(tuple(int, int))
			Node-pairs to run the predictor on.

		Returns
		-------
		list(tuple(int, float))
			A list of pairs containing the given node-pair as the first element and it's corresponding score 
			as the second element. The list is sorted ascendingly by node-pair.
		"""
		return move(self._this.runOn(nodePairs))

cdef extern from "<networkit/linkprediction/KatzIndex.hpp>":

	cdef cppclass _KatzIndex "NetworKit::KatzIndex"(_LinkPredictor):
		_KatzIndex(count maxPathLength, double dampingValue) except +
		_KatzIndex(const _Graph& G, count maxPathLength, double dampingValue) except +

cdef class KatzIndex(LinkPredictor):
	""" 
	KatzIndex(G = None, maxPathLength = 5, dampingValue = 0.005)
	
	Implementation of the Katz index.

	Katz index assigns a pair of nodes a similarity score
	that is based on the sum of the weighted number of paths of length l
	where l is smaller than a given limit.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to operate on. Default: None
	maxPathLength : int, optional
		Maximal length of the paths to consider. Default: 5
	dampingValue : float, optional
		Used to exponentially damp every addend of the sum. Should be in (0, 1]. Default: 0.005
	"""

	def __cinit__(self, Graph G = None, count maxPathLength = 5, double dampingValue = 0.005):
		if G is None:
			self._this = new _KatzIndex(maxPathLength, dampingValue)
		else:
			self._this = new _KatzIndex(G._this, maxPathLength, dampingValue)

cdef extern from "<networkit/linkprediction/CommonNeighborsIndex.hpp>":

	cdef cppclass _CommonNeighborsIndex "NetworKit::CommonNeighborsIndex"(_LinkPredictor):
		_CommonNeighborsIndex() except +
		_CommonNeighborsIndex(const _Graph& G) except +

cdef class CommonNeighborsIndex(LinkPredictor):
	""" 
	CommonNeighborsIndex(G=None)
	
	The CommonNeighborsIndex calculates the number of common neighbors of a node-pair in a given graph.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""

	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _CommonNeighborsIndex()
		else:
			self._this = new _CommonNeighborsIndex(G._this)

cdef extern from "<networkit/linkprediction/PreferentialAttachmentIndex.hpp>":

	cdef cppclass _PreferentialAttachmentIndex "NetworKit::PreferentialAttachmentIndex"(_LinkPredictor):
		_PreferentialAttachmentIndex() except +
		_PreferentialAttachmentIndex(const _Graph& G) except +

cdef class PreferentialAttachmentIndex(LinkPredictor):
	""" 
	PreferentialAttachmentIndex(G=None)

	Implementation of the Preferential Attachment Index.

	The run-method simply calculates the product of the number of nodes in the neighborhoods
	regarding the given nodes.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""

	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _PreferentialAttachmentIndex()
		else:
			self._this = new _PreferentialAttachmentIndex(G._this)

cdef extern from "<networkit/linkprediction/JaccardIndex.hpp>":

	cdef cppclass _JaccardIndex "NetworKit::JaccardIndex"(_LinkPredictor):
		_JaccardIndex() except +
		_JaccardIndex(const _Graph& G) except +

cdef class JaccardIndex(LinkPredictor):
	""" 
	JaccardIndex(G=None)
	
	Implementation of the Jaccard index which normalizes the Common Neighbors Index.

	This is done through dividing the number of common neighbors by the number of nodes
	in the neighboorhood-union.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""
	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _JaccardIndex()
		else:
			self._this = new _JaccardIndex(G._this)

cdef extern from "<networkit/linkprediction/AdamicAdarIndex.hpp>":

	cdef cppclass _AdamicAdarIndex "NetworKit::AdamicAdarIndex"(_LinkPredictor):
		_AdamicAdarIndex() except +
		_AdamicAdarIndex(const _Graph& G) except +

cdef class AdamicAdarIndex(LinkPredictor):
	""" 
	AdamicAdarIndex(G=None)
	
	Implementation of the Adamic/Adar Index.

	The index sums up the reciprocals of the logarithm of the degree of all
	common neighbors of u and v.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""

	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _AdamicAdarIndex()
		else:
			self._this = new _AdamicAdarIndex(G._this)

cdef extern from "<networkit/linkprediction/UDegreeIndex.hpp>":

	cdef cppclass _UDegreeIndex "NetworKit::UDegreeIndex"(_LinkPredictor):
		_UDegreeIndex() except +
		_UDegreeIndex(const _Graph& G) except +

cdef class UDegreeIndex(LinkPredictor):
	""" 
	UDegreeIndex(G=None)
	
	Index that simply returns the degree of the first given node.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""

	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _UDegreeIndex()
		else:
			self._this = new _UDegreeIndex(G._this)

cdef extern from "<networkit/linkprediction/VDegreeIndex.hpp>":

	cdef cppclass _VDegreeIndex "NetworKit::VDegreeIndex"(_LinkPredictor):
		_VDegreeIndex() except +
		_VDegreeIndex(const _Graph& G) except +

cdef class VDegreeIndex(LinkPredictor):
	""" 
	VDegreeIndex(G=None)
	
	Index that simply returns the degree of the second given node.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""

	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _VDegreeIndex()
		else:
			self._this = new _VDegreeIndex(G._this)

cdef extern from "<networkit/linkprediction/AlgebraicDistanceIndex.hpp>":

	cdef cppclass _AlgebraicDistanceIndex "NetworKit::AlgebraicDistanceIndex"(_LinkPredictor):
		_AlgebraicDistanceIndex(count numberSystems, count numberIterations, double omega, index norm) except +
		_AlgebraicDistanceIndex(const _Graph& G, count numberSystems, count numberIterations, double omega, index norm) except +
		void preprocess() except +

cdef class AlgebraicDistanceIndex(LinkPredictor):
	""" 
	AlgebraicDistanceIndex(G, numberSystems, numberIterations, omega=0.5, norm=2)
	
	Algebraic distance assigns a distance value to pairs of nodes according to their structural closeness in the graph.

	Parameters
	----------
	G : networkit.Graph
		The graph to work on. Can be set to None and default is None.
	numberSystems : int
		Number of vectors/systems used for algebraic iteration.
	numberIterations : int
		Number of iterations in each system.
	omega : float, optional
		Overrelaxation parameter. Default: 0.5
	norm : int, optional
		The norm factor of the extended algebraic distance. Maximum norm is realized by setting the norm to 0. Default: 2
	"""

	def __cinit__(self, Graph G, count numberSystems, count numberIterations, double omega = 0.5, index norm = 2):
		if G is None:
			self._this = new _AlgebraicDistanceIndex(numberSystems, numberIterations, omega, norm)
		else:
			self._this = new _AlgebraicDistanceIndex(G._this, numberSystems, numberIterations, omega, norm)

	def preprocess(self):
		""" 
		preprocess()
		
		Executes necessary initializations.

		Starting with random initialization, compute for all numberSystems
		"diffusion" systems the situation after numberIterations iterations
		of overrelaxation with overrelaxation parameter omega.

		Notes
		-----
		Needs to be called before algdist delivers meaningful results!
		"""
		(<_AlgebraicDistanceIndex *>self._this).preprocess()

cdef extern from "<networkit/linkprediction/NeighborhoodDistanceIndex.hpp>":

	cdef cppclass _NeighborhoodDistanceIndex "NetworKit::NeighborhoodDistanceIndex"(_LinkPredictor):
		_NeighborhoodDistanceIndex() except +
		_NeighborhoodDistanceIndex(const _Graph& G) except +

cdef class NeighborhoodDistanceIndex(LinkPredictor):
	""" 
	NeighborhoodDistanceIndex(G=None)
	
	Assigns a distance value to pairs of nodes according to the overlap of their neighborhoods.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""
	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _NeighborhoodDistanceIndex()
		else:
			self._this = new _NeighborhoodDistanceIndex(G._this)

cdef extern from "<networkit/linkprediction/TotalNeighborsIndex.hpp>":

	cdef cppclass _TotalNeighborsIndex "NetworKit::TotalNeighborsIndex"(_LinkPredictor):
		_TotalNeighborsIndex() except +
		_TotalNeighborsIndex(const _Graph& G) except +

cdef class TotalNeighborsIndex(LinkPredictor):
	"""
	TotalNeighborsIndex(G=None)
	
	Implementation of the Total Neighbors Index.

	This index is also known as Total Friends Index and returns
	the number of nodes in the neighborhood-union of u and v.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""

	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _TotalNeighborsIndex()
		else:
			self._this = new _TotalNeighborsIndex(G._this)

cdef extern from "<networkit/linkprediction/NeighborsMeasureIndex.hpp>":

	cdef cppclass _NeighborsMeasureIndex "NetworKit::NeighborsMeasureIndex"(_LinkPredictor):
		_NeighborsMeasureIndex() except +
		_NeighborsMeasureIndex(const _Graph& G) except +

cdef class NeighborsMeasureIndex(LinkPredictor):
	""" 
	NeighborsMeasureIndex(G=None)
	
	Implementation of the Neighbors Measure Index.

	This index is also known as Friends Measure and simply returns
	the number of connections between neighbors of the given nodes u and v.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""

	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _NeighborsMeasureIndex()
		else:
			self._this = new _NeighborsMeasureIndex(G._this)

cdef extern from "<networkit/linkprediction/SameCommunityIndex.hpp>":

	cdef cppclass _SameCommunityIndex "NetworKit::SameCommunityIndex"(_LinkPredictor):
		_SameCommunityIndex() except +
		_SameCommunityIndex(const _Graph& G) except +

cdef class SameCommunityIndex(LinkPredictor):
	""" 
	SameCommunityIndex(G=None)
	
	Index to determine whether two nodes are in the same community.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""

	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _SameCommunityIndex()
		else:
			self._this = new _SameCommunityIndex(G._this)

cdef extern from "<networkit/linkprediction/AdjustedRandIndex.hpp>":

	cdef cppclass _AdjustedRandIndex "NetworKit::AdjustedRandIndex"(_LinkPredictor):
		_AdjustedRandIndex() except +
		_AdjustedRandIndex(const _Graph& G) except +

cdef class AdjustedRandIndex(LinkPredictor):
	""" 
	AdjustedRandIndex(G=None)
	
	AdjustedRandIndex proposed by Hoffman et al. with natural threshold of 0.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""

	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _AdjustedRandIndex()
		else:
			self._this = new _AdjustedRandIndex(G._this)

cdef extern from "<networkit/linkprediction/ResourceAllocationIndex.hpp>":

	cdef cppclass _ResourceAllocationIndex "NetworKit::ResourceAllocationIndex"(_LinkPredictor):
		_ResourceAllocationIndex() except +
		_ResourceAllocationIndex(const _Graph& G) except +

cdef class ResourceAllocationIndex(LinkPredictor):
	""" 
	ResourceAllocationIndex(G=None)
	
	Implementation of the ResourceAllocationIndex.

	The index is similar to Adamic/Adar and sums up the reciprocals of
	the degree of all common neighbors of u and v.

	Parameters
	----------
	G : networkit.Graph, optional
		The graph to work on. Default: None
	"""

	def __cinit__(self, Graph G = None):
		if G is None:
			self._this = new _ResourceAllocationIndex()
		else:
			self._this = new _ResourceAllocationIndex(G._this)

cdef extern from "<networkit/linkprediction/RandomLinkSampler.hpp>" namespace "NetworKit::RandomLinkSampler":

	_Graph byPercentage(_Graph G, double percentage) except +
	_Graph byCount(_Graph G, count numLinks) except +

cdef class RandomLinkSampler:
	""" Provides methods to randomly sample a number of edges from a given graph. """

	@staticmethod
	def byPercentage(Graph G, double percentage):
		""" 
		byPercentage(G, percentage)
		
		Returns a graph that contains percentage percent of links form the given graph G.

		The links are randomly selected from G until the given percentage is reached.

		Parameters
		----------
		G : networkit.Graph
			The graph to construct the training graph from.
		percentage : float
			Percentage of links regarding the number of links in the given graph that should
			be in the returned graph.

		Returns
		-------
		networkit.Graph
			A graph that contains the given percentage of links from G.
		"""
		return Graph().setThis(byPercentage(G._this, percentage))

	@staticmethod
	def byCount(Graph G, count numLinks):
		""" 
		byCount(G, numLinks)
		
		Returns a graph that contains numLinks links from the given graph G.

		The links are randomly selected from G until the given count is reached.

		Parameters
		----------
		G : networkit.Graph
			The graph to construct the training graph from.
		numLinks : int
			Number of links the returned graph should consist of.

		Returns
		-------
		networkit.Graph
			A graph that contains the given number of links from G.
		"""
		return Graph().setThis(byCount(G._this, numLinks))

cdef extern from "<networkit/linkprediction/EvaluationMetric.hpp>":

	cdef cppclass _EvaluationMetric "NetworKit::EvaluationMetric":
		_EvaluationMetric() except +
		_EvaluationMetric(const _Graph& testGraph) except +
		void setTestGraph(const _Graph& newTestGraph) except +
		pair[vector[double], vector[double]] getCurve(vector[pair[pair[node, node], double]] predictions, count numThresholds) except +
		double getAreaUnderCurve() except +
		double getAreaUnderCurve(pair[vector[double], vector[double]] curve) except +

cdef class EvaluationMetric:
	"""
	EvaluationMetric(testGraph)

	Abstract base class for evaluation curves.

	The evualation curves are generated based on the predictions calculated
	by the link predictor and a testGraph to compare against.

	Parameters
	----------
	testGraph : networkit.Graph
		Graph containing the links to use for evaluation. Can be set to None and default is None.
	"""
	cdef _EvaluationMetric *_this

	def __cinit__(self, *args):
		# The construction is handled by the subclasses
		return

	def __dealloc__(self):
		if self._this is not NULL:
			del self._this
			self._this = NULL

	def setTestGraph(self, Graph newTestGraph):
		""" 
		setTestGraph(newTestGraph)
		
		Sets a new graph to use as ground truth for evaluation.

		Note that this won't reset the most recently calculated curve and as a consequence
		getAreaUnderCurve() will still behave as expected by returning the AUC of the most recent curve.

		Parameters
		----------
		newTestGraph : networkit.Graph
			New graph to use as ground truth.
		"""
		self._this.setTestGraph(newTestGraph._this)

	def getCurve(self, vector[pair[pair[node, node], double]] predictions, count numThresholds = 1000):
		""" 
		getCurve(predictions, numThresholds=1000)
		
		Returns a pair of X- and Y-lists describing the evaluation curve generated from the given predictions.

		The latest y-value will be used as a tie-breaker in case there are multiple y-values for one x-value.
		Note that the given number of thresholds is an upper bound for the number of points returned. 
		This is due to the fact that multiple y-values can map to one x-value in which case the tie-breaking 
		behaviour described above will intervene.

		Parameters
		----------
		predictions : list(tuple(tuple(int, int), float))
			Predictions to evaluate.
		numThresholds : int, optional
			The number of thresholds to use the metric on. Default: 1000

		Returns
		-------
		tuple(list(float), list(float))
			A pair of lists where the first list contains all x-values and the second one contains the corresponding y-value.
		"""
		return self._this.getCurve(predictions, numThresholds)

	def getAreaUnderCurve(self, pair[vector[double], vector[double]] curve = pair[vector[double], vector[double]]()):
		""" 
		getAreaUnderCurve(curve = ([], []))
		
		Returns the area under the most recently calculated or optionally the given curve by using the trapezoidal rule.

		Note that if there is no curve specified or the lists of the given curves are empty than
		the area under the most recently calculated curve will be returned.

		Parameters
		----------
		curve : tuple(list(float), list(float)), optional
			Curve whose AUC to determine. Default: Empty lists.

		Returns
		-------
		float
			The area under the given curve.
		"""
		if len(curve.first) == 0:
			return self._this.getAreaUnderCurve()
		return self._this.getAreaUnderCurve(curve)

cdef extern from "<networkit/linkprediction/ROCMetric.hpp>":

	cdef cppclass _ROCMetric "NetworKit::ROCMetric"(_EvaluationMetric):
		_ROCMetric() except +
		_ROCMetric(const _Graph& testGraph) except +

cdef class ROCMetric(EvaluationMetric):
	""" 
	ROCMetric(testGraph=None)
	
	Provides points that define the Receiver Operating Characteristic curve for a given set of predictions.

	Based on the generated points the area under the curve can be calculated with the trapzoidal rule.

	Parameters
	----------
	testGraph : networkit.Graph, optional
		Graph containing the links to use for evaluation. Default: None
	"""

	def __cinit__(self, Graph testGraph = None):
		if testGraph is None:
			self._this = new _ROCMetric()
		else:
			self._this = new _ROCMetric(testGraph._this)

cdef extern from "<networkit/linkprediction/PrecisionRecallMetric.hpp>":

	cdef cppclass _PrecisionRecallMetric "NetworKit::PrecisionRecallMetric"(_EvaluationMetric):
		_PrecisionRecallMetric() except +
		_PrecisionRecallMetric(const _Graph& testGraph) except +

cdef class PrecisionRecallMetric(EvaluationMetric):
	""" 
	PrecisionRecallMetric(testGraph=None)
	
	Provides points that define the Precision-Recall curve for a given set of predictions.

	Based on the generated points the area under the curve can be calculated with the trapzoidal rule.

	Parameter
	-----------
	testGraph : networkit.Graph, optional
		Graph containing the links to use for evaluation. Default: None
	"""

	def __cinit__(self, Graph testGraph = None):
		if testGraph is None:
			self._this = new _PrecisionRecallMetric()
		else:
			self._this = new _PrecisionRecallMetric(testGraph._this)

cdef extern from "<networkit/linkprediction/MissingLinksFinder.hpp>":

	cdef cppclass _MissingLinksFinder "NetworKit::MissingLinksFinder":
		_MissingLinksFinder(const _Graph& G) except +
		vector[pair[node, node]] findAtDistance(count k) except +
		vector[pair[node, node]] findFromNode(node u, count k) except +

cdef class MissingLinksFinder:
	""" 
	MissingLinksFinder(G)
	
	Allows the user to find missing links in the given graph.

	The absent links to find are narrowed down by providing a distance
	that the nodes of the missing links should have.
	For example in case of distance 2 only node-pairs that would close
	a triangle in the given graph get returned.

	Parameters
	----------
	G : networkit.Graph
		The graph to find missing links in.
	"""
	cdef _MissingLinksFinder* _this

	def __cinit__(self, Graph G):
		self._this = new _MissingLinksFinder(G._this)

	def __dealloc__(self):
		del self._this

	def findAtDistance(self, count k):
		""" 
		findAtDistance(k)
		
		Returns all missing links in the graph that have distance k.

		Note that a distance of k actually means that there are k different links
		on the path of the two nodes that are connected through that path.

		Parameters
		----------
		k : int
			Distance of the absent links.

		Returns
		-------
		list(tuple(int, int))
			An ascendingly sorted list of node-pairs where there is a missing link of distance k between the two nodes.
		"""
		return move(self._this.findAtDistance(k))

	def findFromNode(self, node u, count k):
		""" 
		findFromNode(u, k)
		
		Returns all missing links in the graph that have distance k and are connected to u.

		Note that a distance of k actually means that there are k different links
		on the path of the two nodes that are connected through that path.

		Parameters
		----------
		u : int
			Node to find missing links from.
		k : int
			Distance of the absent links.

		Returns
		-------
		list(tuple(int, int))
			A list of node-pairs where there is a missing link of distance k
			between the given node u and another node in the graph.
		"""
		return move(self._this.findFromNode(u, k))

cdef extern from "<networkit/linkprediction/NeighborhoodUtility.hpp>" namespace "NetworKit::NeighborhoodUtility":

	vector[node] getNeighborsUnion(const _Graph& G, node u, node v) except +
	vector[node] getCommonNeighbors(const _Graph& G, node u, node v) except +

cdef class NeighborhoodUtility:
	""" 
	NeighborhoodUtility()
	
	Provides basic operations on neighborhoods in a given graph.
	"""

	@staticmethod
	def getNeighborsUnion(Graph G, node u, node v):
		""" 
		getNeighborsUnion(G, u, v)
		
		Returns the union of the neighboorhoods of u and v.

		Parameters
		----------
		G : networkit.Graph
			Graph to obtain neighbors-union from.
		u : int
			The first node.
		v : int
			The second node.

		Returns
		-------
		list(int)
			A list containing all the nodes in the neighboorhood-union of u and v.
		"""
		return getNeighborsUnion(G._this, u, v)

	@staticmethod
	def getCommonNeighbors(Graph G, node u, node v):
		""" 
		getCommonNeighbors(G, u, v)
		
		Returns a list containing the node-ids of all common neighbors of u and v.

		Parameters
		----------
		G : networkit.Graph
			Graph to obtain common neighbors from.
		u : int
			First node.
		v : int
			Second node.

		Returns
		-------
		list(int)
			A list containing the node-ids of all common neighbors of u and v.
		"""
		return getCommonNeighbors(G._this, u, v)

cdef extern from "<networkit/linkprediction/LinkThresholder.hpp>" namespace "NetworKit::LinkThresholder":

	vector[pair[node, node]] byScore(vector[pair[pair[node, node], double]] predictions, double minScore)
	vector[pair[node, node]] byCount(vector[pair[pair[node, node], double]] predictions, count numLinks)
	vector[pair[node, node]] byPercentage(vector[pair[pair[node, node], double]] predictions, double percentageLinks)

cdef class LinkThresholder:
	""" 
	LinkThresholder()
	
	Filters given predictions based on some criterion and returns a list of node-pairs that fulfill the given criterion.

	This can be used to determine which node-pairs should actually be interpreted
	as future links and which shouldn't.
	"""

	@staticmethod
	def byScore(vector[pair[pair[node, node], double]] predictions, double minScore):
		""" 
		byScore(predictions,minScore)
		
		Returns the node-pairs whose scores are at least equal to the given minScore.

		Parameters
		----------
		predictions : list(tuple(tuple(int, int), float))
			Predictions to filter.
		minScore : float
			Minimal score that the returned node-pairs should have.

		Returns
		-------
		list(tuple(int, int))
			A list of node-pairs whose scores are at least equal to the given minScore.
		"""
		return byScore(predictions, minScore)

	@staticmethod
	def byCount(vector[pair[pair[node, node], double]] predictions, count numLinks):
		""" 
		byCount(predictions, numLinks)
		
		Returns the first numLinks highest scored node-pairs.

		Parameters
		----------
		predictions : list(tuple(tuple(int, int), float))
			Predictions to filter.
		numLinks : int
			Number of top-scored node-pairs to return.

		Returns
		-------
		list(tuple(int, int))
			The first numLinks highest scored node-pairs.
		"""
		return byCount(predictions, numLinks)

	@staticmethod
	def byPercentage(vector[pair[pair[node, node], double]] predictions, double percentageLinks):
		""" 
		byPercentage(predictions, percentageLinks)
		
		Returns the first percentageLinks percent of the highest scores node-pairs.

		Parameters
		----------
		predictions : list(tuple(tuple(int, int), float))
			Predictions to filter.
		percentageLinks : float
			Percentage of highest scored node-pairs to return.

		Returns
		-------
		list(tuple(int, int))
			The first percentageLinks percent of the highest scores node-pairs.
		"""
		return byPercentage(predictions, percentageLinks)

cdef extern from "<networkit/linkprediction/PredictionsSorter.hpp>" namespace "NetworKit::PredictionsSorter":

	void sortByScore (vector[pair[pair[node, node], double]]& predictions) except +
	void sortByNodePair (vector[pair[pair[node, node], double]]& predictions) except +

cdef class PredictionsSorter:
	""" 
	PredictionsSorter()
	
	Allows the sorting of predictions by score or node-pair.
	"""

	@staticmethod
	def sortByScore(list predictions):
		""" 
		sortByScore(predictions)

		Sorts the given predictions descendingly by score.

		In case there is a tie the node-pairs are used as a tie-breaker by sorting them
		ascendingly on the first node and on a tie ascendingly by the second node.

		Parameters
		----------
		predictions : list(tuple(tuple(int, int), float))
			The predictions to sort.
		"""
		cdef vector[pair[pair[node, node], double]] predCopy = predictions
		sortByScore(predCopy)
		predictions[:] = predCopy

	@staticmethod
	def sortByNodePair(list predictions):
		""" 
		sortByNodePair(predictions)
		
		Sorts the predictions ascendingly by node-pair.

		This means for example (0, 0) < (0, 1) and (1, 1) < (1, 0).

		Parameters
		----------
		predictions : list(tuple(tuple(int, int), float))
			The predictions to sort.
		"""
		cdef vector[pair[pair[node, node], double]] predCopy = predictions
		sortByNodePair(predCopy)
		predictions[:] = predCopy

def trainClassifier(trainingSet, trainingGraph, classifier, *linkPredictors):
	""" 
	trainClassifier(trainingSet, trainingGraph, classifier, *linkPredictors)
	
	Trains the given classifier with the feature-vectors generated by the given linkPredictors.

	Notes
	-----
	Needs Scikit-learn Python package to work. For details concerning installation see here: https://scikit-learn.org/stable/install.html

	Parameters
	----------
	trainingSet : list(tuple(int, int))
		Vector of node-pairs to generate features for.
	trainingGraph : networkit.Graph
		Training graph containing all edges from the training set.
	classifier: sklearn classifier
		Scikit-learn classifier to train. See here: https://scikit-learn.org/stable/auto_examples/classification/plot_classifier_comparison.html
	`*linkPredictors`: tuple()
		Predictors used for the generation of feature-vectors.
	"""
	# Make sure the set is sorted because the samples will be sorted by node-pairs (asc.)
	# and the labels would be sorted by the initial order. That would lead to an incorrect
	# matching between labels and samples.
	if not have_sklearn:
		raise MissingDependencyError("sklearn")
	trainingSet.sort()
	trainingLabels = getLabels(trainingSet, trainingGraph)
	trainingFeatures = getFeatures(trainingSet, *linkPredictors)
	classifier.fit(trainingFeatures, trainingLabels)

def getFeatures(nodePairs, *linkPredictors):
	""" 
	getFeatures(nodePairs, *linkPredictors)
	
	Returns a numpy-array containing the generated scores from the predictors for the given node-pairs.

	Parameters
	----------
	nodePairs : list(tuple(int, int))
		Node-pairs to get the samples for.
	`*linkPredictors` : tuple()
		List of link predictors to use for sample-generation.

	Returns
	-------
	numpy.array
		A numpy-array of shape (#nodePairs, #linkPredictors) containing the generated scores from the predictors for the given node-pairs.
	"""
	return np.column_stack(([list(zip(*p.runOn(nodePairs)))[1] for p in linkPredictors]))

def getLabels(nodePairs, G):
	""" 
	getLabels(nodePairs, G)
	
	Returns a numpy-array containing the labels of the given node-pairs.

	The labels are defined as follows: 1 = link, 0 = absent link.

	Parameters
	----------
	nodePairs : list(tuple(int, int))
		Node-pairs to get the labels for.
	G : networkit.Graph
		Graph which provides ground truth for the labels.

	Returns
	-------
	numpy.array
		A numpy-array containing the labels of the given node-pairs.
	"""
	return np.array(list(map(lambda p: 1 if G.hasEdge(p[0], p[1]) else 0, nodePairs)))
back to top