/** * The threege tool is used to embed a graph into the hyperbolic plane * by first embedding it into the 3-dimensional hyperbolic space and * forcing all vertices onto the plane afterwards. */ #include #include #include #include #include #include #include #include #include #include #include "NLEHelper.h" #include "constantforce.h" #include "coordinate.h" #include "embedding.h" #include "gnuplot_i.h" #include "graph.h" #include "surfaceforce.h" // #include "force.hpp" #include "hyperbolicspace.h" #define FOR(i, n) for (int(i) = 0; (i) < (n); (i)++) /** * Flags */ DEFINE_string(graph, "", "The input file containing the graph to embed."); DEFINE_bool(info, true, "Print information about the graph."); DEFINE_int32(seed, 0, "The seed used to generate random numbers."); DEFINE_string(embed, "", "If set, embeds the graph and writes the resulting embedding to " "the specified file."); DEFINE_string(print, "", "Writes the graph to the specified file."); DEFINE_string(draw, "", "Saves the embedding as SVG file to the specified path."); DEFINE_string(embedding, "", "Reads an embedding from the passed path."); DEFINE_string(compare, "", "Compare the calculated embedding to the one saved in the passed " "file path. (phi-phi / r-r)"); DEFINE_string(plot, "", "Saves the plots representing the quality of the embedding to " "the passed filepath."); DEFINE_int32(degree, 5, "Only vertices whose degree is larger than this value will have " "node label in the drawn SVG file."); DEFINE_string(names, "", "Path to a file that maps the node indices to strings. When " "drawing the graph as SVG these names will be used instead of " "the indices."); DEFINE_string(edgeHisto, "", "File where the edge length histogram should be plotted to."); DEFINE_string( phase1Out, "", "Where the embedding after the first phase should be written to."); // Flags that represent parameters to tweak the embedding process. DEFINE_double(stable, 0.1, "Determines the threshold for when an embedding counts as " "stable or not."); DEFINE_double(repulsiveForceCoefficient, 0.03, "Determines how repulsive forces should to be scaled. Usually, " "the should be much weaker than the attractive forces, so a " "small value of 0.03 was set as default."); DEFINE_double( repulsiveForceCoefficientSecondPhase, 0.03, "Determines how repulsive forces should to be scaled in the " "second phase. Usually, the should be much weaker than the " "attractive forces, so a small value of 0.03 was set as default."); DEFINE_double(attractiveExponent, 1.0, "Determines the exponent of the attractive force."); DEFINE_double(repulsiveExponent, 0.5, "Determines the exponent of the repulsive force."); DEFINE_double(velocity, 0.5, "The velocity of the vertices which determines how much of the " "previous forces are again part of the current force."); DEFINE_double(initialTemperature, 0.5, "The initial temperature of the system."); DEFINE_double(temperatureDecreaseFactor, 0.975, "The factor that the temperature will be multiplied with, after " "an iteration. E.g. a factor of 0.5 will halve the temperature " "in each iteration."); void print_edge_stats_of_embedding(const Embedding& embedding) { std::cout << "Determining edge stats..." << "\n"; double number_of_edges = 0.0; double number_of_non_edges = 0.0; double long_edges = 0.0; double short_non_edges = 0.0; double R = embedding.max_radius(); for (int i = 0; i < embedding.G.n - 1; i++) { Coordinate i_coord = embedding.points[i]; for (int j = i + 1; j < embedding.G.n; j++) { Coordinate j_coord = embedding.points[j]; double edge_length = Force::hyperbolic_distance_between_coordinates(i_coord, j_coord); double difference_to_desired = R - edge_length; if (embedding.G.adjacent(i, j)) { if (difference_to_desired < 0.0) { long_edges += difference_to_desired * difference_to_desired; } number_of_edges += 1.0; } else { if (difference_to_desired > 0.0) { short_non_edges += difference_to_desired * difference_to_desired; } number_of_non_edges += 1.0; } } } std::cout << "Edges too long: " << long_edges / number_of_edges << "\n"; std::cout << "Non-Edges too short: " << short_non_edges / number_of_non_edges << "\n"; } void plot_edge_histogram_of_embedding_to_file(Embedding& embedding, string file) { std::cout << "Plotting edge length histogram to: " << file << "\n"; vector> histogram; embedding.edge_length_histogram(histogram); vector indices(histogram.size()); vector edge_buckets(histogram.size()); vector non_edge_buckets(histogram.size()); double number_of_edges = 0.0; double number_of_non_edges = 0.0; for (int i = 0; i < (int)histogram.size(); i++) { std::pair bucket = histogram[i]; double number_of_edges_in_bucket = (double)bucket.first; number_of_edges += number_of_edges_in_bucket; edge_buckets[i] = number_of_edges_in_bucket; double number_of_non_edges_in_bucket = (double)bucket.second; number_of_non_edges += number_of_non_edges_in_bucket; non_edge_buckets[i] = number_of_non_edges_in_bucket; indices[i] = i; } for (int i = 0; i < (int)histogram.size(); i++) { edge_buckets[i] /= number_of_edges; non_edge_buckets[i] /= number_of_non_edges; } Gnuplot gp("Edge Length Histogram"); gp.savetops(file); // set range gp.set_xrange(0, indices.size()); // gp.set_yrange(0, 1.0); gp.set_style("lines lw 5 lc rgb \"green\"") .plot_xy(indices, edge_buckets, "Edges"); gp.set_style("lines lw 5 lc rgb \"red\"") .plot_xy(indices, non_edge_buckets, "Non Edges"); /** * Now we determine the error we made using this embedding. * * This works as follows. For each bucket we check which of * the two (edges or non-edges) is smaller and add the bucket * value to the corresponding error term. */ double edges_error = 0.0; double non_edges_error = 0.0; for (int i = 0; i < (int)histogram.size(); i++) { if (edge_buckets[i] > non_edge_buckets[i]) { non_edges_error += non_edge_buckets[i]; } else { edges_error += edge_buckets[i]; } } std::cout << "Histogram Error Edges: " << edges_error << "\n"; std::cout << "Histogram Error Non Edges: " << non_edges_error << "\n"; } /** * Main procedure */ int main(int argc, char* argv[]) { /** * Evaluating the flags. */ google::ParseCommandLineFlags(&argc, &argv, true); google::InitGoogleLogging(argv[0]); FLAGS_log_dir = "/tmp/"; std::ios_base::sync_with_stdio(false); std::srand(FLAGS_seed); if (FLAGS_graph.empty()) { LOG(WARNING) << "No input graph specified!"; return EXIT_FAILURE; } unordered_map node_labels; vector new_permutation; auto G = Graph::fromFile(FLAGS_graph, &node_labels); G.sortByDegrees(&new_permutation); for (auto label_pair : node_labels) { string label = label_pair.first; int old_index = label_pair.second; node_labels[label] = new_permutation[old_index]; } if (FLAGS_info) { std::cout << "# Graph: " << FLAGS_graph << "\n"; std::cout << "# n = " << G.n << "\n"; std::cout << "# m = " << G.numEdges() << "\n"; std::cout << "# avg. deg. = " << G.averageDegree() << "\n"; std::cout << "# beta = " << G.powerLawExponent() << "\n"; std::cout << "# clustering = " << G.clusterCoeff() << "\n"; double T, n_orig, m_orig, alpha, R; NLEHelper::estimateHyperbolicParameters(G, &T, &n_orig, &m_orig, &alpha, &R, nullptr); std::cout << "# Hyperbolic Parameters: n = " << n_orig << ", m = " << m_orig << ", R = " << R << ", T = " << T << ", alpha = " << alpha << "\n"; } int n = G.n; int total_iterations = 0; double T, n_orig, m_orig, alpha, R; vector estimated_radii(n); NLEHelper::estimateHyperbolicParameters(G, &T, &n_orig, &m_orig, &alpha, &R, &estimated_radii); if (FLAGS_print.length() > 0) { std::cout << "Writing graph to file: " << FLAGS_print << "\n"; G.printToFile(FLAGS_print.c_str(), nullptr, false); } if (FLAGS_embed.length() > 0) { std::chrono::high_resolution_clock::time_point embedding_start = std::chrono::high_resolution_clock::now(); double max_radius = 0.0; vector points(n); FOR(i, n) { int seed = rand(); Coordinate rand_coord = Coordinate::random_point_on_surface_of_unit_ball(seed); std::uniform_real_distribution<> distribution(0, R); int seed2 = rand(); std::default_random_engine generator(seed2); double radius = distribution(generator); if (radius > max_radius) { max_radius = radius; } rand_coord *= radius; points[i] = rand_coord; } Embedding embedding = Embedding(G, points); embedding.velocity = FLAGS_velocity; /** * Before we start we determine the edge length histogram. This * will be overwritten later but at least we will have the error * terms in the console output. */ if (FLAGS_edgeHisto.length() > 0) { plot_edge_histogram_of_embedding_to_file(embedding, FLAGS_edgeHisto); } /** * Phase 1: General Forces... */ std::cout << "Phase 1: Radial + General forces..." << "\n"; /** * This force is responsible for moving the nodes on the sphere surface. */ SurfaceForce force = SurfaceForce(R, M_PI_4 / 2.0); force.temperature = FLAGS_initialTemperature; force.temperature_decrease = FLAGS_temperatureDecreaseFactor; force.repulsive_force_coefficient = FLAGS_repulsiveForceCoefficient; force.attractive_exponent = FLAGS_attractiveExponent; force.repulsive_exponent = FLAGS_repulsiveExponent; /** * This force is responsible for determining the radii. */ SurfaceForce radial_force = SurfaceForce(max_radius, M_PI_4 / 2.0); radial_force.temperature = FLAGS_initialTemperature; radial_force.temperature_decrease = FLAGS_temperatureDecreaseFactor; radial_force.repulsive_force_coefficient = FLAGS_repulsiveForceCoefficient; /** * Start with 10 iterations of radial forces. */ int initial_radial_iterations = 10; std::cout << initial_radial_iterations << " iterations of radial forces..." << "\n"; FOR(i, initial_radial_iterations) { embedding.apply_radial_force_to_vertices(radial_force); /** * After applying the radial forces, we determine the new * maximum radius, which will be our new disk radius. */ double new_max_radius = embedding.max_radius(); force.disk_radius = new_max_radius; } std::cout << initial_radial_iterations << " iterations of radial forces... Done." << "\n"; int current_iteration = 0; int max_iteration = 500; int radial_forces_interval = 20; int radial_forces_intermediate_iterations = 10; std::cout << "Performing at most " << max_iteration << " iterations of general forces and applying " << radial_forces_intermediate_iterations << " iterations of radial forces every " << radial_forces_interval << " iterations." << "\n"; bool embedding_is_stable = false; // double average_percent_of_processed_cells = 0.0; while (!embedding_is_stable && current_iteration < max_iteration) { /** * Every "radial_forces_interval" iterations we apply radial forces. */ if (current_iteration % radial_forces_interval == 0) { FOR(i, radial_forces_intermediate_iterations) { embedding.apply_radial_force_to_vertices(radial_force); /** * After applying the radial forces, we determine the new * maximum radius, which will be our new disk radius. */ double new_max_radius = embedding.max_radius(); force.disk_radius = new_max_radius; } } current_iteration++; total_iterations++; embedding.apply_force_to_vertices(force); /** * Check if the embedding is stable now. */ embedding_is_stable = embedding.is_stable_with_threshold(FLAGS_stable); if (force.temperature * force.temperature_decrease > 0.015) { force.temperature *= force.temperature_decrease; } else { force.temperature = 0.015; } /** * We also decrease the temperature of the radial_force. */ if (radial_force.temperature * radial_force.temperature_decrease > 0.015) { radial_force.temperature *= radial_force.temperature_decrease; } else { radial_force.temperature = 0.015; } } /** * Stop iterating if the maximum number of iterations is reached. */ if (current_iteration < max_iteration) { std::cout << "Stopped early (" << current_iteration << " of " << max_iteration << " iterations) since the embedding was already stable." << "\n"; } /** * Save the 3D embedding after the first phase. */ if (!FLAGS_phase1Out.empty()) { std::cout << "Saving embedding after first phase. File: " << FLAGS_phase1Out << "\n"; std::ofstream out_file; out_file.open(FLAGS_phase1Out); out_file << embedding.to_string(true); out_file.close(); } /** * Phase 1 End */ /** * Phase 2: Plane Forces... */ std::cout << "Phase 2: Plane forces..." << "\n"; /** * Determine the plane that the vertices should be pulled * towards. */ Coordinate plane_normal = embedding.fitting_plane(); /** * The force that pulls the vertices towards the plane. */ ConstantForce plane_force = ConstantForce(max_radius, M_PI / 3.0); plane_force.temperature = force.temperature; /** * The force that we used in the first phase will still by * applied in the second phase. However now we make the repulsive * forces larger, as it was too weak (on purpose) in the first * phase. */ force.repulsive_force_coefficient = FLAGS_repulsiveForceCoefficientSecondPhase; current_iteration = 0; max_iteration = 500; std::cout << "Performing at most " << max_iteration << " iterations." << "\n"; /** * We stop when the average distances of the vertices to the * plane is small enough. */ double average_distance_to_plane = embedding.average_distance_of_embedding_to_plane_with_normal( plane_normal); while (average_distance_to_plane > 0.5 && current_iteration < max_iteration) { current_iteration++; total_iterations++; // embedding // .apply_approximate_force_to_vertices_and_flatten_towards_plane_with_normal( // force, plane_force, plane_normal, FLAGS_pooling * R); embedding.apply_force_to_vertices_and_flatten_towards_plane_with_normal( force, plane_force, plane_normal); average_distance_to_plane = embedding.average_distance_of_embedding_to_plane_with_normal( plane_normal); /** * Update the temperature of the force */ if (force.temperature * force.temperature_decrease > 0.015) { force.temperature *= force.temperature_decrease; } else { force.temperature = 0.015; } plane_force.temperature = force.temperature; } /** * Stop if we have already reached the maximum number of iterations. */ if (current_iteration < max_iteration) { std::cout << "Stopped early (" << current_iteration << " of " << max_iteration << " iterations) since the average distance to the plane " "was already small enough." << "\n"; } /** * Phase 2 End */ /** * Phase 3: Reduce to two dimensions. */ std::cout << "Phase 3: Reduce to two dimensions." << "\n"; embedding.convert_to_2D_embedding(); /** * Phase 3 End */ std::chrono::high_resolution_clock::time_point embedding_end = std::chrono::high_resolution_clock::now(); auto embedding_duration = std::chrono::duration_cast(embedding_end - embedding_start) .count(); std::cout << "The embedding process took: " << embedding_duration / 1000.0 << " seconds." << "\n"; std::cout << "Writing embedding to file: " << FLAGS_embed << "\n"; std::ofstream file; file.open(FLAGS_embed); file << embedding.to_string_2D(true) << "\n"; file.close(); if (FLAGS_draw.length() > 0) { std::cout << "Drawing embedding to file: " << FLAGS_draw << "\n"; vector node_color(embedding.G.n, 1); if (FLAGS_names.length() > 0) { std::cout << "Reading node names from file: " << FLAGS_names << "\n"; std::ifstream file; file.open(FLAGS_names.c_str()); CHECK(file.good()) << "Maybe the file " << FLAGS_names << " doesn't exist?"; for (std::string line; getline(file, line);) { std::vector string_components; std::stringstream ss(line); std::string component; while (ss >> component) { string_components.push_back(component); if (ss.peek() == ' ') { ss.ignore(); } } string node_label = string_components[0]; string node_name = ""; for (int i = 1; i < string_components.size(); i++) { node_name += " " + string_components[i]; } int node_index = node_labels[node_label]; embedding.G.labels[node_index] = node_name; } } embedding.draw(FLAGS_draw.c_str(), node_color, FLAGS_degree, "", 1000, 1000); std::cout << "Embedding Log-Likelihood: " << embedding.log_likelihood() << "\n"; } if (FLAGS_compare.length() > 0 && FLAGS_plot.length() > 0) { std::cout << "Comparing with embedding: " << FLAGS_compare << "\n"; try { // create a new gnuplot file Gnuplot gp("Embedding Quality"); gp.savetops(FLAGS_plot); // set range gp.set_xrange(0.0, 2.0 * M_PI); gp.set_yrange(0.0, 2.0 * M_PI); std::cout << "Reading embedding from: " << FLAGS_compare << "\n"; vector other_points(embedding.G.n, Coordinate()); Embedding other_embedding = Embedding(embedding.G, other_points); other_embedding.read_embedding_from_file(FLAGS_compare.c_str(), node_labels); std::cout << "Other Embedding Log-Likelihood: " << other_embedding.log_likelihood() << "\n"; std::cout << "Plotting data to: " << FLAGS_plot << "\n"; vector other_phi_values(embedding.G.n); other_embedding.get_phi_values(other_phi_values); vector this_phi_values(embedding.G.n); embedding.get_phi_values(this_phi_values); gp.set_style("points pointtype 5 ps 0.75 lc rgb \"blue\"") .plot_xy(other_phi_values, this_phi_values, "Phi-Phi Plot"); // start a new picture gp.reset_plot(); double R = embedding.max_radius(); gp.set_xrange(0.0, 1.5 * R); gp.set_yrange(0.0, 1.5 * R); vector other_r_values(embedding.G.n); other_embedding.get_r_values(other_r_values); vector this_r_values(embedding.G.n); embedding.get_r_values(this_r_values); gp.set_style("points pointtype 5 ps 0.75 lc rgb \"blue\"") .plot_xy(other_r_values, this_r_values, "R-R Plot"); } catch (GnuplotException ge) { cout << ge.what() << endl; } } if (FLAGS_edgeHisto.length() > 0) { plot_edge_histogram_of_embedding_to_file(embedding, FLAGS_edgeHisto); } } if (FLAGS_embedding.length() > 0) { std::cout << "Reading embedding from file: " << FLAGS_embedding << "\n"; vector points(G.n, Coordinate()); Embedding embedding = Embedding(G, points); embedding.read_embedding_from_file(FLAGS_embedding.c_str(), node_labels); if (FLAGS_draw.length() > 0) { std::cout << "Drawing embedding to file: " << FLAGS_draw << "\n"; vector node_color(embedding.G.n, 1); embedding.draw(FLAGS_draw.c_str(), node_color, FLAGS_degree, "", 1000, 1000); // if (FLAGS_show) { // string open_drawing_command = "open " + FLAGS_draw; // system(open_drawing_command.c_str()); // } } if (FLAGS_compare.length() > 0 && FLAGS_plot.length() > 0) { std::cout << "Comparing with embedding: " << FLAGS_compare << "\n"; try { // create a new gnuplot file Gnuplot gp("Embedding Quality"); gp.savetops(FLAGS_plot); // set range gp.set_xrange(0.0, 2.0 * M_PI); gp.set_yrange(0.0, 2.0 * M_PI); std::cout << "Reading embedding from: " << FLAGS_compare << "\n"; vector other_points(embedding.G.n, Coordinate()); Embedding other_embedding = Embedding(embedding.G, other_points); other_embedding.read_embedding_from_file(FLAGS_compare.c_str(), node_labels); std::cout << "Other Embedding Log-Likelihood: " << other_embedding.log_likelihood() << "\n"; // if (FLAGS_edgeStats) { // print_edge_stats_of_embedding(other_embedding); // } std::cout << "Plotting data to: " << FLAGS_plot << "\n"; vector other_phi_values(embedding.G.n); other_embedding.get_phi_values(other_phi_values); vector this_phi_values(embedding.G.n); embedding.get_phi_values(this_phi_values); gp.set_style("points pointtype 5 ps 0.75 lc rgb \"blue\"") .plot_xy(other_phi_values, this_phi_values, "Phi-Phi Plot"); // start a new picture gp.reset_plot(); double R = embedding.max_radius(); gp.set_xrange(0.0, 1.5 * R); gp.set_yrange(0.0, 1.5 * R); vector other_r_values(embedding.G.n); other_embedding.get_r_values(other_r_values); vector this_r_values(embedding.G.n); embedding.get_r_values(this_r_values); gp.set_style("points pointtype 5 ps 0.75 lc rgb \"blue\"") .plot_xy(other_r_values, this_r_values, "R-R Plot"); // if (FLAGS_show) { // string open_plot_command = "open " + FLAGS_plot; // system(open_plot_command.c_str()); // } } catch (GnuplotException ge) { cout << ge.what() << endl; } } if (FLAGS_edgeHisto.length() > 0) { plot_edge_histogram_of_embedding_to_file(embedding, FLAGS_edgeHisto); } } return 0; }