https://github.com/kit-parco/networkit
Raw File
Tip revision: 66548e5fc80394bfdfed7efdcf8bea770d268a6d authored by maxv on 10 November 2015, 19:30:55 UTC
version bump for automated generation of documentation
Tip revision: 66548e5
linkprediction.py

from _NetworKit import KatzIndex, CommonNeighborsIndex, JaccardIndex, PreferentialAttachmentIndex, AdamicAdarIndex, UDegreeIndex, VDegreeIndex, AlgebraicDistanceIndex, NeighborhoodDistanceIndex, TotalNeighborsIndex, NeighborsMeasureIndex, SameCommunityIndex, AdjustedRandIndex, ResourceAllocationIndex, RandomLinkSampler, ROCMetric, PrecisionRecallMetric, MissingLinksFinder, LinkThresholder, PredictionsSorter

from .graph import Graph

import numpy as np

try:
	import sklearn
except ImportError:
	print(""" WARNING: module 'sklearn' not found, supervised link prediction won't be available """)

def trainClassifier(trainingSet, trainingGraph, classifier, *linkPredictors):
	""" Trains the given classifier with the feature-vectors generated by the given linkPredictors.

	Parameters
	----------
	trainingSet : vector[pair[node, node]]
		Vector of node-pairs to generate features for,
	trainingGraph : Graph
		Training graph containing all edges from the training set.
	classifier:
		Scikit-learn classifier to train.
	linkPredictors:
		Predictors used for the generation of feature-vectors.
	"""
	# Make sure the set is sorted because the samples will be sorted by node-pairs (asc.)
	# and the labels would be sorted by the initial order. That would lead to an incorrect
	# matching between labels and samples.
	trainingSet.sort()
	trainingLabels = getLabels(trainingSet, trainingGraph)
	trainingFeatures = getFeatures(trainingSet, *linkPredictors)
	classifier.fit(trainingFeatures, trainingLabels)
	
def getFeatures(nodePairs, *linkPredictors):
	""" Returns a numpy-array containing the generated scores from the predictors for the given node-pairs.

	Parameters
	----------
	nodePairs : vector[pair[node, node]]
		Node-pairs to get the samples for.
	*linkPredictors
		List of link predictors to use for sample-generation.

	Returns
	-------
	A numpy-array of shape (#nodePairs, #linkPredictors) containing the generated scores
	from the predictors for the given node-pairs.
	"""
	return np.column_stack(([list(zip(*p.runOn(nodePairs)))[1] for p in linkPredictors]))

def getLabels(nodePairs, G):
	""" Returns a numpy-array containing the labels of the given node-pairs.

	The labels are defined as follows: 1 = link, 0 = absent link.

	Parameters
	----------
	nodePairs : vector[pair[node, node]]
		Node-pairs to get the labels for.
	G : Graph
		Graph which provides ground truth for the labels.

	Returns
	-------
	A numpy-array containing the labels of the given node-pairs.
	"""
	return np.array(list(map(lambda p: 1 if G.hasEdge(p[0], p[1]) else 0, nodePairs)))
back to top