Raw File
pairwise_d.py
import sys
import time
from input_functions import *
import networkx as nx
import random
import math

def check_stretch(spanner_sp, graph_sp, param):
  if spanner_sp<=(graph_sp+param):
    return True
  return False

def verify_spanner_with_checker(G_S, G, all_pairs, check_stretch, param):
        for i in range(len(all_pairs)):
                if not (all_pairs[i][0] in G_S.nodes() and all_pairs[i][1] in G_S.nodes()):
                        return False
                if not nx.has_path(G_S, all_pairs[i][0], all_pairs[i][1]):
                        return False
                sp = nx.shortest_path_length(G, all_pairs[i][0], all_pairs[i][1], 'weight')
                #if not check_stretch(nx.dijkstra_path_length(G_S, all_pairs[i][0], all_pairs[i][1]), sp, param):
                if not check_stretch(nx.shortest_path_length(G_S, all_pairs[i][0], all_pairs[i][1], 'weight'), sp, param):
                        return False
        return True

def d_light_initialization(G, d):
  H = nx.Graph()
  for u in G.nodes():
    edges = []
    for v in G.neighbors(u):
      edges.append([u, v, G.get_edge_data(u, v, 'weight')])
    #print(edges)
    edges = sorted(edges, key = lambda e:e[2]['weight'])
    for i in range(min(d,len(edges))):
      H.add_weighted_edges_from([(edges[i][0], edges[i][1], edges[i][2]['weight'])])
  return H

def all_pairs_from_subset(s):
        s = list(s)
        all_pairs = []
        for i in range(len(s)):
                for j in range(i+1, len(s)):
                        p = []
                        p.append(s[i])
                        p.append(s[j])
                        all_pairs.append(p)
        return all_pairs

def proper_edge_order(a, b):
  if a>b:
    t = a
    a = b
    b = t
  return a, b

def get_missing_edges(pth, H, l):
  cur_missing_edges = set()
  for i in range(1, len(pth)):
    u = pth[i-1]
    v = pth[i]
    u, v = proper_edge_order(u, v)
    if (u, v) not in H.edges():
      cur_missing_edges.add((u, v))
    if len(cur_missing_edges)==l:
      break
  return cur_missing_edges

def get_last_missing_edges(pth, H, l):
  cur_missing_edges = set()
  i = len(pth) - 2
  while True:
    u = pth[i]
    v = pth[i+1]
    u, v = proper_edge_order(u, v)
    if (u, v) not in H.edges():
      cur_missing_edges.add((u, v))
    i = i - 1
    if i<0:
      break
    if len(cur_missing_edges)==l:
      break
  return cur_missing_edges

def shortest_path_phase(H, P, path, l):
  missing_edges_type1 = set()
  missing_edges_type2 = set()
  missing_edges_type3 = set()
  missing_edges_type4 = set()
  for s, t in P:
    pth = path[s][t]
    cur_missing_edges = get_missing_edges(pth, H, len(pth))
    if len(cur_missing_edges)<=l:
      missing_edges_type1 = missing_edges_type1.union(cur_missing_edges)
    else:
      missing_edges_type2 = missing_edges_type2.union(cur_missing_edges)
      cur_missing_edges = get_missing_edges(pth, H, l)
      missing_edges_type3 = missing_edges_type3.union(cur_missing_edges)
      cur_missing_edges = get_last_missing_edges(pth, H, l)
      missing_edges_type4 = missing_edges_type4.union(cur_missing_edges)
  return missing_edges_type1, missing_edges_type2, missing_edges_type3, missing_edges_type4

def add_missing_edges(H, missing_edges, G):
  for u, v in missing_edges:
    H.add_weighted_edges_from([(u, v, G[u][v]["weight"])])

def pairwise2W(G, S, d_frac):
  P = all_pairs_from_subset(S)
  d = math.ceil(math.pow(len(P), 1/3)/d_frac)
  valid_spanner = False
  H = d_light_initialization(G, d)
  path = dict(nx.all_pairs_dijkstra_path(G))
  n = len(list(G.nodes()))
  if len(P)>0:
    l = math.ceil(n/math.pow(len(P), 2/3))
    prob = 1/(d*l)
    missing_edges_type1, missing_edges_type2, _, _ = shortest_path_phase(H, P, path, l)
    total_missing_edges = missing_edges_type1.union(missing_edges_type2)
    while len(total_missing_edges)>(n*d):
      if verify_spanner_with_checker(Gs, G, all_pairs_from_subset(st), check_stretch, 2*find_max_weight(G)):
        valid_spanner = True
        break
      add_missing_edges(H, missing_edges_type1, G)
      for r in G.nodes():
        if random.uniform(0, 1)<=prob:
          for v in G.nodes():
            pth = path[r][v]
            cur_missing_edges = get_missing_edges(pth, H, len(pth))
            for p, q in cur_missing_edges:
              H.add_weighted_edges_from([(p, q, G[p][q]["weight"])])
      missing_edges_type1, missing_edges_type2, _, _ = shortest_path_phase(H, P, path, l)
      total_missing_edges = missing_edges_type1.union(missing_edges_type2)
    if not valid_spanner:
      add_missing_edges(H, total_missing_edges, G)
  total_cost = 0
  for u, v in H.edges():
    total_cost = total_cost + H[u][v]["weight"]
  return H, total_cost

def pairwise4W(G, S, d_frac):
  P = all_pairs_from_subset(S)
  d = math.ceil(math.pow(len(P), 2/7)/d_frac)
  valid_spanner = False
  H = d_light_initialization(G, d)
  path = dict(nx.all_pairs_dijkstra_path(G))
  n = len(list(G.nodes()))
  if len(P)>0:
    l = math.ceil(n/math.pow(len(P), 5/7))
    prob1 = d/n
    prob2 = 1/(d*l)
    missing_edges_type1, missing_edges_type2, missing_edges_type3, missing_edges_type4 = shortest_path_phase(H, P, path, l)
    total_missing_edges = missing_edges_type1.union(missing_edges_type2)
    while len(total_missing_edges)>(n*d):
      if verify_spanner_with_checker(Gs, G, all_pairs_from_subset(st), check_stretch, 4*find_max_weight(G)):
        valid_spanner = True
        break
      add_missing_edges(H, missing_edges_type1, G)
      add_missing_edges(H, missing_edges_type3, G)
      add_missing_edges(H, missing_edges_type4, G)
      for r in G.nodes():
        if random.uniform(0, 1)<=prob1:
          for v in G.nodes():
            pth = path[r][v]
            cur_missing_edges = get_missing_edges(pth, H, len(pth))
            for p, q in cur_missing_edges:
              H.add_weighted_edges_from([(p, q, G[p][q]["weight"])])
      # intermediate edges
      sample_nodes = []
      for r in G.nodes():
        if random.uniform(0, 1)<=prob2:
          sample_nodes.append(r)
      sample_pairs = all_pairs_from_subset(sample_nodes)
      for r1, r2 in sample_pairs:
        cur_missing_edges = get_missing_edges(path[r1][r2], H, len(path[r1][r2]))
        if len(cur_missing_edges)<=(n/(d*d)):
          add_missing_edges(H, cur_missing_edges, G)
      missing_edges_type1, missing_edges_type2, missing_edges_type3, missing_edges_type4 = shortest_path_phase(H, P, path, l)
      total_missing_edges = missing_edges_type1.union(missing_edges_type2)
    if not valid_spanner:
      add_missing_edges(H, missing_edges_type1, G)
      add_missing_edges(H, missing_edges_type3, G)
      add_missing_edges(H, missing_edges_type4, G)
  total_cost = 0
  for u, v in H.edges():
    total_cost = total_cost + H[u][v]["weight"]
  return H, total_cost

def clustering(G, cluster_size):
  U = list(G.nodes())
  cluster_arr = []
  Gc = nx.Graph()
  for v in G.nodes():
    unclustered_nghbors = []
    for v2 in G.neighbors(v):
      if v2 in U:
        unclustered_nghbors.append(v2)
    if len(unclustered_nghbors)>=cluster_size:
      unclustered_nghbors = unclustered_nghbors[:cluster_size]
      cluster = []
      for v2 in unclustered_nghbors:
        U.remove(v2)
        cluster.append(v2)
        Gc.add_edge(v, v2, weight=G[v][v2]["weight"])
        for v3 in unclustered_nghbors:
          if not v2==v3:
            if G.has_edge(v2, v3):
              Gc.add_edge(v2, v3, weight=G[v2][v3]["weight"])
      cluster_arr.append(cluster)
  for v in U:
    for v2 in G.neighbors(v):
      Gc.add_edge(v, v2, weight=G[v][v2]["weight"])
  return Gc, cluster_arr

def find_max_weight(G):
  mx = 0
  for e in G.edges():
    u, v = e
    wgt = G.get_edge_data(u, v, 'weight')['weight']
    if mx<wgt:
      mx = wgt
  return mx

def subsetwise(G, S):
  #W = 10
  W = find_max_weight(G)
  p = math.sqrt(len(S)*W)
  Gs, cluster_arr = clustering(G, math.ceil(p))
  #print(Gs.edges(), cluster_arr)
  for u in S:
    for v in S:
      if u!=v:
        pth = nx.shortest_path(G, source=u, target=v, weight="weight")
        cst = 0
        for i, w in enumerate(pth):
          if i>0:
            if not Gs.has_edge(pth[i-1], w):
              cst = cst + 1
        val = 0
        for cluster in cluster_arr:
          if len(set(cluster).intersection(set(pth)))>0:
            if has_improvement(Gs, pth, cluster):
              val = val + 1
            else:
              pth.reverse()
              if has_improvement(Gs, pth, cluster):
                val = val + 1
        if cst <= (2*W + 1)*val:
          Gs.add_weighted_edges_from([(x, y, G[x][y]["weight"]) for x, y in zip(pth[:len(pth)-1], pth[1:])])
  #print(Gs.edges())
  subset_cost = 0
  for u, v in Gs.edges():
    #subset_cost = subset_cost + Gs[u][v]["weight"]
    subset_cost = subset_cost + 1
  return Gs, subset_cost

def pairwise6W(G, S, d_frac):
  P = all_pairs_from_subset(S)
  d = math.ceil(math.pow(len(P), 1/4)/d_frac)
  valid_spanner = False
  H = d_light_initialization(G, d)
  path = dict(nx.all_pairs_dijkstra_path(G))
  n = len(list(G.nodes()))
  if len(P)>0:
    l = math.ceil(n/math.pow(len(P), 3/4))
    prob = 1/(d*l)
    missing_edges_type1, missing_edges_type2, missing_edges_type3, missing_edges_type4 = shortest_path_phase(H, P, path, l)
    total_missing_edges = missing_edges_type1.union(missing_edges_type2)
    while len(total_missing_edges)>(n*d):
      if verify_spanner_with_checker(Gs, G, all_pairs_from_subset(st), check_stretch, 6*find_max_weight(G)):
        valid_spanner = True
        break
      add_missing_edges(H, missing_edges_type1, G)
      add_missing_edges(H, missing_edges_type3, G)
      add_missing_edges(H, missing_edges_type4, G)
      sample_nodes = []
      for r in G.nodes():
        if random.uniform(0, 1)<=prob:
          sample_nodes.append(r)
      G_subset, _ = subsetwise(G, sample_nodes)
      add_missing_edges(H, G_subset.edges(), G)
      missing_edges_type1, missing_edges_type2, missing_edges_type3, missing_edges_type4 = shortest_path_phase(H, P, path, l)
      total_missing_edges = missing_edges_type1.union(missing_edges_type2)
    if not valid_spanner:
      add_missing_edges(H, missing_edges_type1, G)
      add_missing_edges(H, missing_edges_type3, G)
      add_missing_edges(H, missing_edges_type4, G)
  total_cost = 0
  for u, v in H.edges():
    total_cost = total_cost + H[u][v]["weight"]
  return H, total_cost

def is_near_connected(G, H, u, v, G_sp):
  H_sp = dict(nx.shortest_path_length(H, weight="weight"))
  for x in H.neighbors(u):
    if G_sp[v][x]==H_sp[v][x]:
      return True
  for x in H.neighbors(v):
    if G_sp[u][x]==H_sp[u][x]:
      return True
  return False

def compute_subsetwise_spanner(G, S):
  d = math.ceil(math.sqrt(len(S)))
  H = d_light_initialization(G, d)
  G_sp = dict(nx.shortest_path_length(G, weight="weight"))
  for s in S:
    for t in S:
      if s==t:
        continue
      distance_preserved = False
      if is_near_connected(G, H, s, t, G_sp):
        distance_preserved = True
      else:
        sp = nx.shortest_path(G, s, t)
        for i in range(1, len(sp)-1):
          x = sp[i]
          if is_near_connected(G, H, s, x, G_sp) and is_near_connected(G, H, x, t, G_sp):
            distance_preserved = True
            break
      if not distance_preserved:
        for i in range(0, len(sp)-1):
          H.add_weighted_edges_from([(sp[i], sp[i+1], G.get_edge_data(sp[i], sp[i+1], 'weight'))])
  total_cost = 0
  for u, v in H.edges():
    total_cost = total_cost + H[u][v]["weight"]
  return H, total_cost

def pairwise8W(G, S, d_frac):
  P = all_pairs_from_subset(S)
  d = math.ceil(math.pow(len(P), 1/4)/d_frac)
  valid_spanner = False
  H = d_light_initialization(G, d)
  path = dict(nx.all_pairs_dijkstra_path(G))
  n = len(list(G.nodes()))
  l = math.ceil(n/math.pow(len(P), 3/4))
  prob = 1/(d*l)
  missing_edges_type1, missing_edges_type2, missing_edges_type3, missing_edges_type4 = shortest_path_phase(H, P, path, l)
  total_missing_edges = missing_edges_type1.union(missing_edges_type2)
  while len(total_missing_edges)>(n*d):
    if verify_spanner_with_checker(Gs, G, all_pairs_from_subset(st), check_stretch, 8*find_max_weight(G)):
      valid_spanner = True
      break
    add_missing_edges(H, missing_edges_type1, G)
    add_missing_edges(H, missing_edges_type3, G)
    add_missing_edges(H, missing_edges_type4, G)
    sample_nodes = []
    for r in G.nodes():
      if random.uniform(0, 1)<=prob:
        sample_nodes.append(r)
    G_subset, _ = compute_subsetwise_spanner(G, sample_nodes)
    add_missing_edges(H, G_subset.edges(), G)
    missing_edges_type1, missing_edges_type2, missing_edges_type3, missing_edges_type4 = shortest_path_phase(H, P, path, l)
    total_missing_edges = missing_edges_type1.union(missing_edges_type2)
  if not valid_spanner:
    add_missing_edges(H, missing_edges_type1, G)
    add_missing_edges(H, missing_edges_type3, G)
    add_missing_edges(H, missing_edges_type4, G)
  total_cost = 0
  for u, v in H.edges():
    total_cost = total_cost + H[u][v]["weight"]
  return H, total_cost


def multi_level_spanner(G, subset_arr, stretch, d_frac):
  total_cost = 0
  l = len(subset_arr)
  E = set()
  for i, T in enumerate(subset_arr):
    if stretch==2:
      Gs, cost = pairwise2W(G, T, d_frac)
    elif stretch==4:
      Gs, cost = pairwise4W(G, T, d_frac)
    elif stretch==6:
      Gs, cost = pairwise6W(G, T, d_frac)
    elif stretch==8:
      Gs, cost = pairwise8W(G, T, d_frac)
    subset_cost = 0 
    for u, v in Gs.edges():
      if ((u, v) not in E) and ((v, u) not in E):
        subset_cost = subset_cost + 1
        E.add((u, v))
    #print(T, subset_cost)
    total_cost = total_cost + l*subset_cost
    l = l - 1
  return total_cost

def check_pairwise(folder_name, file_name_without_ext, stretch, d_frac):
  filename = folder_name + '/' + file_name_without_ext +'.txt'
  G, subset_arr = build_networkx_graph(filename)
  #subset_arr.reverse()
  start_time = time.time()
  #Gs, subset_cost = subsetwise(G, subset_arr[0])
  subset_cost = multi_level_spanner(G, subset_arr, stretch, d_frac)
  total_time = time.time() - start_time
  # append the outputs to a csv file
  #f = open(folder_name + '/' + output_file, 'a')
  #f.write(folder_name + ';' + file_name_without_ext + ';' + str(kruskal_cost) + ';\n')
  #f.close()
  #print(folder_name + ';' + file_name_without_ext + ';' + str(subset_cost) + ';' + str(total_time) + ';\n')
  #st = set(subset_arr[0])
  #print(folder_name + ';' + file_name_without_ext + ';' + str(verify_spanner_with_checker(Gs, G, all_pairs_from_subset(st), check_stretch, 2*find_max_weight(G))) + ';' + str(total_time) + ';\n')
  print(folder_name + ';' + file_name_without_ext + ';' + str(subset_cost) + ';' + str(total_time) + ';\n')

if __name__ == '__main__':
  # 1. Folder name
  # 2. File name
  # 3. Output File name
  check_pairwise(sys.argv[1], sys.argv[2], int(sys.argv[3]), int(sys.argv[4]))

back to top