https://github.com/wintericie/visual-analysis-class-boundary
Tip revision: 6161210037eedf38304dc60b8ed6c545faff1f06 authored by Yuxin Ma on 08 September 2020, 18:36:18 UTC
readme updated
readme updated
Tip revision: 6161210
path.py
import numpy as np
from flask import request
from flask_restful import Resource
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.manifold import Isomap
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import normalize
import networkx as nx
from itertools import chain
from .utils import knn_by_label
def intersection_distance_creator(k):
def _intersection_distance(l1, l2):
return 1 - len(set(l1).intersection(set(l2))) / k
return _intersection_distance
class Path(Resource):
def __init__(self, json_data, Gs,
correct_predict_labels_,
correct_predict_idx_,
smoothed_knn):
self.json_data = json_data
self.Gs = Gs
self.correct_predict_labels_ = correct_predict_labels_
self.correct_predict_idx_ = correct_predict_idx_
self.smoothed_knn = smoothed_knn
def post(self):
req_all = request.get_json(force=True)
json_data = self.json_data
correct_predict_labels_ = self.correct_predict_labels_
correct_predict_idx_ = self.correct_predict_idx_
smoothed_knn = self.smoothed_knn
local_models = json_data['localModels']
total_seeds = sorted(list(set(
chain.from_iterable([l['target'] for l in local_models])
)))
seed_vector = np.array(json_data['dataVectors'])[total_seeds]
seed_knn = NearestNeighbors(n_neighbors=5).fit(seed_vector)
adjacency = seed_knn.kneighbors_graph(mode='distance')
seed_graph = nx.from_scipy_sparse_matrix(adjacency, edge_attribute='weight')
seed_graph = nx.relabel_nodes(seed_graph, dict(enumerate(total_seeds)))
paths = []
for req in req_all:
source_local_svm = req['sourceLocalSVM']
target_local_svm = req['targetLocalSVM']
target_source = local_models[source_local_svm]['target']
target_target = local_models[target_local_svm]['target']
source_target_path = None
source_target_length = 10000000000
for s in target_source:
for t in target_target:
_s_length = nx.shortest_path_length(G=seed_graph, source=s, target=t, weight='weight')
if _s_length < source_target_length:
source_target_length = _s_length
source_target_path = nx.shortest_path(G=seed_graph, source=s, target=t, weight='weight')
LOCAL_SVM_C = 1e3
vectors = np.array(json_data['dataVectors'])
labels = np.array(json_data['label'])
label_items = np.array(json_data['labelItems'])
TARGET_SVM_TARGET_LABEL = 100
TARGET_SVM_NON_TARGET_LABEL = 200
path_local_svms = []
for target_i in source_target_path[1:-1]:
_local_train_nums = {
labels[target_i]: (len(label_items) - 1) * 10,
'others': (len(label_items) - 1) * 10
}
target_vector = vectors[target_i]
dists, knns, new_labels = knn_by_label(
smoothed_knn,
correct_predict_labels_,
target_vector,
_local_train_nums
)
knn_list = [correct_predict_idx_[i] for i in knns[0]]
knn_vectors = vectors[knn_list]
_target_label = labels[target_i]
binarized_knn_labels = [
TARGET_SVM_TARGET_LABEL if labels[k] == _target_label else TARGET_SVM_NON_TARGET_LABEL
for k in knn_list]
local_svm = SVC(C=LOCAL_SVM_C, kernel='linear').fit(knn_vectors, binarized_knn_labels)
normal_vector = local_svm.coef_[0]
pca = PCA(n_components=1)
pca_1d_coords = pca.fit_transform(vectors[knn_list])
Q, R = np.linalg.qr(
np.vstack(
(normal_vector, pca.components_[0])
).T
)
sample_list = []
w = local_svm.coef_[0]
b = local_svm.intercept_[0]
cnt = 0
while cnt < 3:
range_min = np.min(knn_vectors, axis=0)
range_max = np.max(knn_vectors, axis=0)
sample = np.array([
np.random.uniform(_min, _max)
for _min, _max in zip(range_min, range_max)
])
x_m = (-b - np.dot(sample[:-1], w[:-1])) / w[-1]
if range_min[-1] < x_m < range_max[-1]:
sample[-1] = x_m
sample_list.append(sample.tolist())
cnt += 1
path_local_svms.append({
'target': [int(target_i)],
'target_label': [labels[target_i]],
'target_vector': list(target_vector),
'knns': knn_list,
'local_svm': local_svm,
'train_acc': local_svm.score(vectors[knn_list], binarized_knn_labels),
'initSideMatrix': Q,
'planeSamples': sample_list
})
if len(path_local_svms) == 0:
paths.append(dictify_localsvm(path_local_svms))
continue
ONLY_MERGE_SAME_CLASS = True
local_svm_merge_list = []
local_svm_merged_mask = [False] * len(path_local_svms)
local_svm_knn_lists = [t['knns'] for t in path_local_svms]
_intersection_distance_metric = intersection_distance_creator(len(local_svm_knn_lists[0]))
local_svm_knn = NearestNeighbors(n_neighbors=len(path_local_svms) - 1, radius=1.0,
metric=_intersection_distance_metric)
local_svm_knn.fit(local_svm_knn_lists)
for local_svm_i, local_svm in enumerate(path_local_svms):
if local_svm_merged_mask[local_svm_i]:
continue
nearby_local_svms_dists, nearby_local_svms_list = local_svm_knn.kneighbors([local_svm['knns']])
nearby_local_svms_dists = nearby_local_svms_dists[0]
nearby_local_svms_list = nearby_local_svms_list[0]
for j, d in enumerate(nearby_local_svms_dists):
if d >= 1.0:
nearby_local_svms_list = nearby_local_svms_list[:j]
break
temp_merge_candidates = [local_svm_i]
for nearby_svm_i in nearby_local_svms_list:
if local_svm_merged_mask[nearby_svm_i] or nearby_svm_i == local_svm_i:
continue
nearby_svm = path_local_svms[nearby_svm_i]
if ONLY_MERGE_SAME_CLASS:
if nearby_svm['target_label'] != local_svm['target_label']:
continue
merge_test_train = []
for _lsvm_i in temp_merge_candidates:
_lsvm = path_local_svms[_lsvm_i]
merge_test_train += _lsvm['knns']
merge_test_train += path_local_svms[nearby_svm_i]['knns']
merge_train_vectors = vectors[merge_test_train]
merge_train_labels = labels[merge_test_train]
self_test_svm = SVC(C=LOCAL_SVM_C, kernel='linear')
self_test_svm.fit(merge_train_vectors, merge_train_labels)
self_test_score = self_test_svm.score(merge_train_vectors, merge_train_labels)
if self_test_score >= 0.9:
temp_merge_candidates.append(nearby_svm_i)
local_svm_merge_list.append(temp_merge_candidates)
for i in temp_merge_candidates:
local_svm_merged_mask[i] = True
new_target_predict_local_svms = []
for merges in local_svm_merge_list:
new_target = []
new_target_vector = []
new_knns = []
for t in merges:
target_local = path_local_svms[t]
new_target.append(target_local['target'])
new_target_vector.append(target_local['target_vector'])
new_knns += target_local['knns']
knn_vectors = vectors[new_knns]
flatten = lambda l: [item for sublist in l for item in sublist]
new_target = flatten(new_target)
_target_label = labels[new_target[0]]
binarized_knn_labels = [
TARGET_SVM_TARGET_LABEL if labels[k] == _target_label else TARGET_SVM_NON_TARGET_LABEL
for k in new_knns]
internal_pairwise_dists = Isomap(n_components=2).fit(knn_vectors).dist_matrix_
local_svm = SVC(C=LOCAL_SVM_C, kernel='linear').fit(knn_vectors, binarized_knn_labels)
normal_vector = local_svm.coef_[0]
pca = PCA(n_components=1)
pca_1d_coords = pca.fit_transform(knn_vectors)
Q, R = np.linalg.qr(
np.vstack(
(normal_vector, pca.components_[0])
).T
)
sample_list = []
w = local_svm.coef_[0]
b = local_svm.intercept_[0]
cnt = 0
while cnt < 3:
range_min = np.min(knn_vectors, axis=0)
range_max = np.max(knn_vectors, axis=0)
sample = np.array([
np.random.uniform(_min, _max)
for _min, _max in zip(range_min, range_max)
])
x_m = (-b - np.dot(sample[:-1], w[:-1])) / w[-1]
if range_min[-1] < x_m < range_max[-1]:
sample[-1] = x_m
sample_list.append(sample.tolist())
cnt += 1
new_target_predict = {
'target': new_target,
'target_label': labels[new_target],
'target_vector': new_target_vector,
'knns': new_knns,
'knns_size': np.max(internal_pairwise_dists),
'local_svm': local_svm,
'train_acc': local_svm.score(vectors[new_knns], binarized_knn_labels),
'target_centroid': np.mean(vectors[new_target], axis=0),
'all_centroid': np.mean(vectors[new_knns], axis=0),
'coverage': [],
'initSideMatrix': Q,
'planeSamples': sample_list
}
new_target_predict_local_svms.append(new_target_predict)
paths.append(dictify_localsvm(new_target_predict_local_svms))
return paths
def dictify_localsvm(path_local_svms):
return [{
'target': p['target'],
'targetLabel': [int(i) for i in p['target_label']],
'knns': [int(i) for i in p['knns']],
'localSVM': {
'C': p['local_svm'].C,
'support_': p['local_svm'].support_.tolist(),
'coef': normalize(p['local_svm'].coef_).tolist(),
'intercept': p['local_svm'].intercept_.tolist()
},
'train_acc': p['train_acc'],
'initSideMatrix': p['initSideMatrix'].tolist(),
'planeSamples': p['planeSamples']
}
for p in path_local_svms
]