Raw File
main.cc
/**
 * The threege tool is used to embed a graph into the hyperbolic plane
 * by first embedding it into the 3-dimensional hyperbolic space and
 * forcing all vertices onto the plane afterwards.
 */

#include <gflags/gflags.h>
#include <glog/logging.h>
#include <math.h>
#include <stdlib.h>
#include <time.h>

#include <chrono>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <random>

#include "NLEHelper.h"
#include "constantforce.h"
#include "coordinate.h"
#include "embedding.h"
#include "gnuplot_i.h"
#include "graph.h"
#include "surfaceforce.h"
// #include "force.hpp"
#include "hyperbolicspace.h"

#define FOR(i, n) for (int(i) = 0; (i) < (n); (i)++)

/**
 * Flags
 */

DEFINE_string(graph, "", "The input file containing the graph to embed.");
DEFINE_bool(info, true, "Print information about the graph.");
DEFINE_int32(seed, 0, "The seed used to generate random numbers.");
DEFINE_string(embed, "",
              "If set, embeds the graph and writes the resulting embedding to "
              "the specified file.");
DEFINE_string(print, "", "Writes the graph to the specified file.");
DEFINE_string(draw, "",
              "Saves the embedding as SVG file to the specified path.");
DEFINE_string(embedding, "", "Reads an embedding from the passed path.");
DEFINE_string(compare, "",
              "Compare the calculated embedding to the one saved in the passed "
              "file path. (phi-phi / r-r)");
DEFINE_string(plot, "",
              "Saves the plots representing the quality of the embedding to "
              "the passed filepath.");

DEFINE_int32(degree, 5,
             "Only vertices whose degree is larger than this value will have "
             "node label in the drawn SVG file.");

DEFINE_string(names, "",
              "Path to a file that maps the node indices to strings. When "
              "drawing the graph as SVG these names will be used instead of "
              "the indices.");

DEFINE_string(edgeHisto, "",
              "File where the edge length histogram should be plotted to.");

DEFINE_string(
    phase1Out, "",
    "Where the embedding after the first phase should be written to.");

// Flags that represent parameters to tweak the embedding process.

DEFINE_double(stable, 0.1,
              "Determines the threshold for when an embedding counts as "
              "stable or not.");

DEFINE_double(repulsiveForceCoefficient, 0.03,
              "Determines how repulsive forces should to be scaled. Usually, "
              "the should be much weaker than the attractive forces, so a "
              "small value of 0.03 was set as default.");

DEFINE_double(
    repulsiveForceCoefficientSecondPhase, 0.03,
    "Determines how repulsive forces should to be scaled in the "
    "second phase. Usually, the should be much weaker than the "
    "attractive forces, so a small value of 0.03 was set as default.");

DEFINE_double(attractiveExponent, 1.0,
              "Determines the exponent of the attractive force.");

DEFINE_double(repulsiveExponent, 0.5,
              "Determines the exponent of the repulsive force.");

DEFINE_double(velocity, 0.5,
              "The velocity of the vertices which determines how much of the "
              "previous forces are again part of the current force.");

DEFINE_double(initialTemperature, 0.5,
              "The initial temperature of the system.");

DEFINE_double(temperatureDecreaseFactor, 0.975,
              "The factor that the temperature will be multiplied with, after "
              "an iteration. E.g. a factor of 0.5 will halve the temperature "
              "in each iteration.");


void print_edge_stats_of_embedding(const Embedding& embedding) {
  std::cout << "Determining edge stats..."
            << "\n";
  double number_of_edges = 0.0;
  double number_of_non_edges = 0.0;
  double long_edges = 0.0;
  double short_non_edges = 0.0;
  double R = embedding.max_radius();
  for (int i = 0; i < embedding.G.n - 1; i++) {
    Coordinate i_coord = embedding.points[i];
    for (int j = i + 1; j < embedding.G.n; j++) {
      Coordinate j_coord = embedding.points[j];
      double edge_length =
          Force::hyperbolic_distance_between_coordinates(i_coord, j_coord);

      double difference_to_desired = R - edge_length;

      if (embedding.G.adjacent(i, j)) {
        if (difference_to_desired < 0.0) {
          long_edges += difference_to_desired * difference_to_desired;
        }
        number_of_edges += 1.0;
      } else {
        if (difference_to_desired > 0.0) {
          short_non_edges += difference_to_desired * difference_to_desired;
        }
        number_of_non_edges += 1.0;
      }
    }
  }

  std::cout << "Edges too long: " << long_edges / number_of_edges << "\n";
  std::cout << "Non-Edges too short: " << short_non_edges / number_of_non_edges
            << "\n";
}

void plot_edge_histogram_of_embedding_to_file(Embedding& embedding,
                                              string file) {
  std::cout << "Plotting edge length histogram to: " << file << "\n";
  vector<std::pair<int, int>> histogram;
  embedding.edge_length_histogram(histogram);

  vector<int> indices(histogram.size());
  vector<double> edge_buckets(histogram.size());
  vector<double> non_edge_buckets(histogram.size());

  double number_of_edges = 0.0;
  double number_of_non_edges = 0.0;

  for (int i = 0; i < (int)histogram.size(); i++) {
    std::pair<int, int> bucket = histogram[i];

    double number_of_edges_in_bucket = (double)bucket.first;
    number_of_edges += number_of_edges_in_bucket;
    edge_buckets[i] = number_of_edges_in_bucket;

    double number_of_non_edges_in_bucket = (double)bucket.second;
    number_of_non_edges += number_of_non_edges_in_bucket;
    non_edge_buckets[i] = number_of_non_edges_in_bucket;

    indices[i] = i;
  }

  for (int i = 0; i < (int)histogram.size(); i++) {
    edge_buckets[i] /= number_of_edges;
    non_edge_buckets[i] /= number_of_non_edges;
  }

  Gnuplot gp("Edge Length Histogram");
  gp.savetops(file);

  // set range
  gp.set_xrange(0, indices.size());
  // gp.set_yrange(0, 1.0);

  gp.set_style("lines lw 5 lc rgb \"green\"")
      .plot_xy(indices, edge_buckets, "Edges");

  gp.set_style("lines lw 5 lc rgb \"red\"")
      .plot_xy(indices, non_edge_buckets, "Non Edges");

  /**
   * Now we determine the error we made using this embedding.
   *
   * This works as follows. For each bucket we check which of
   * the two (edges or non-edges) is smaller and add the bucket
   * value to the corresponding error term.
   */

  double edges_error = 0.0;
  double non_edges_error = 0.0;

  for (int i = 0; i < (int)histogram.size(); i++) {
    if (edge_buckets[i] > non_edge_buckets[i]) {
      non_edges_error += non_edge_buckets[i];
    } else {
      edges_error += edge_buckets[i];
    }
  }

  std::cout << "Histogram Error Edges: " << edges_error << "\n";
  std::cout << "Histogram Error Non Edges: " << non_edges_error << "\n";
}

/**
 * Main procedure
 */
int main(int argc, char* argv[]) {
  /**
   * Evaluating the flags.
   */
  google::ParseCommandLineFlags(&argc, &argv, true);
  google::InitGoogleLogging(argv[0]);

  FLAGS_log_dir = "/tmp/";

  std::ios_base::sync_with_stdio(false);

  std::srand(FLAGS_seed);

  if (FLAGS_graph.empty()) {
    LOG(WARNING) << "No input graph specified!";
    return EXIT_FAILURE;
  }

  unordered_map<std::string, int> node_labels;
  vector<int> new_permutation;
  auto G = Graph::fromFile(FLAGS_graph, &node_labels);
  G.sortByDegrees(&new_permutation);

  for (auto label_pair : node_labels) {
    string label = label_pair.first;
    int old_index = label_pair.second;

    node_labels[label] = new_permutation[old_index];
  }

  if (FLAGS_info) {
    std::cout << "# Graph: " << FLAGS_graph << "\n";
    std::cout << "# n = " << G.n << "\n";
    std::cout << "# m = " << G.numEdges() << "\n";
    std::cout << "# avg. deg. = " << G.averageDegree() << "\n";
    std::cout << "# beta = " << G.powerLawExponent() << "\n";
    std::cout << "# clustering = " << G.clusterCoeff() << "\n";

    double T, n_orig, m_orig, alpha, R;
    NLEHelper::estimateHyperbolicParameters(G, &T, &n_orig, &m_orig, &alpha, &R,
                                            nullptr);

    std::cout << "# Hyperbolic Parameters: n = " << n_orig << ", m = " << m_orig
              << ", R = " << R << ", T = " << T << ", alpha = " << alpha
              << "\n";
  }

  int n = G.n;
  int total_iterations = 0;
  double T, n_orig, m_orig, alpha, R;
  vector<double> estimated_radii(n);
  NLEHelper::estimateHyperbolicParameters(G, &T, &n_orig, &m_orig, &alpha, &R,
                                          &estimated_radii);

  if (FLAGS_print.length() > 0) {
    std::cout << "Writing graph to file: " << FLAGS_print << "\n";
    G.printToFile(FLAGS_print.c_str(), nullptr, false);
  }

  if (FLAGS_embed.length() > 0) {
    std::chrono::high_resolution_clock::time_point embedding_start =
        std::chrono::high_resolution_clock::now();
    double max_radius = 0.0;
    vector<Coordinate> points(n);
    FOR(i, n) {
      int seed = rand();
      Coordinate rand_coord =
          Coordinate::random_point_on_surface_of_unit_ball(seed);
      std::uniform_real_distribution<> distribution(0, R);
      int seed2 = rand();
      std::default_random_engine generator(seed2);
      double radius = distribution(generator);

      if (radius > max_radius) {
        max_radius = radius;
      }

      rand_coord *= radius;
      points[i] = rand_coord;
    }

    Embedding embedding = Embedding(G, points);

    embedding.velocity = FLAGS_velocity;

    /**
     * Before we start we determine the edge length histogram. This
     * will be overwritten later but at least we will have the error
     * terms in the console output.
     */
    if (FLAGS_edgeHisto.length() > 0) {
      plot_edge_histogram_of_embedding_to_file(embedding, FLAGS_edgeHisto);
    }

    /**
     * Phase 1: General Forces...
     */
    std::cout << "Phase 1: Radial + General forces..."
              << "\n";

    /**
     * This force is responsible for moving the nodes on the sphere surface.
     */
    SurfaceForce force = SurfaceForce(R, M_PI_4 / 2.0);
    force.temperature = FLAGS_initialTemperature;
    force.temperature_decrease = FLAGS_temperatureDecreaseFactor;
    force.repulsive_force_coefficient = FLAGS_repulsiveForceCoefficient;
    force.attractive_exponent = FLAGS_attractiveExponent;
    force.repulsive_exponent = FLAGS_repulsiveExponent;

    /**
     * This force is responsible for determining the radii.
     */
    SurfaceForce radial_force = SurfaceForce(max_radius, M_PI_4 / 2.0);
    radial_force.temperature = FLAGS_initialTemperature;
    radial_force.temperature_decrease = FLAGS_temperatureDecreaseFactor;
    radial_force.repulsive_force_coefficient = FLAGS_repulsiveForceCoefficient;

    /**
     * Start with 10 iterations of radial forces.
     */
    int initial_radial_iterations = 10;
    std::cout << initial_radial_iterations << " iterations of radial forces..."
              << "\n";

    FOR(i, initial_radial_iterations) {
      embedding.apply_radial_force_to_vertices(radial_force);

      /**
       * After applying the radial forces, we determine the new
       * maximum radius, which will be our new disk radius.
       */
      double new_max_radius = embedding.max_radius();
      force.disk_radius = new_max_radius;
    }

    std::cout << initial_radial_iterations
              << " iterations of radial forces... Done."
              << "\n";

    int current_iteration = 0;
    int max_iteration = 500;

    int radial_forces_interval = 20;
    int radial_forces_intermediate_iterations = 10;

    std::cout << "Performing at most " << max_iteration
              << " iterations of general forces and applying "
              << radial_forces_intermediate_iterations
              << " iterations of radial forces every " << radial_forces_interval
              << " iterations."
              << "\n";

    bool embedding_is_stable = false;
    // double average_percent_of_processed_cells = 0.0;
    while (!embedding_is_stable && current_iteration < max_iteration) {
      /**
       * Every "radial_forces_interval" iterations we apply radial forces.
       */
      if (current_iteration % radial_forces_interval == 0) {
        FOR(i, radial_forces_intermediate_iterations) {
          embedding.apply_radial_force_to_vertices(radial_force);

          /**
           * After applying the radial forces, we determine the new
           * maximum radius, which will be our new disk radius.
           */
          double new_max_radius = embedding.max_radius();
          force.disk_radius = new_max_radius;
        }
      }

      current_iteration++;
      total_iterations++;

      embedding.apply_force_to_vertices(force);

      /**
       * Check if the embedding is stable now.
       */
      embedding_is_stable = embedding.is_stable_with_threshold(FLAGS_stable);

      if (force.temperature * force.temperature_decrease > 0.015) {
        force.temperature *= force.temperature_decrease;
      } else {
        force.temperature = 0.015;
      }

      /**
       * We also decrease the temperature of the radial_force.
       */
      if (radial_force.temperature * radial_force.temperature_decrease >
          0.015) {
        radial_force.temperature *= radial_force.temperature_decrease;
      } else {
        radial_force.temperature = 0.015;
      }
    }

    /**
     * Stop iterating if the maximum number of iterations is reached.
     */
    if (current_iteration < max_iteration) {
      std::cout << "Stopped early (" << current_iteration << " of "
                << max_iteration
                << " iterations) since the embedding was already stable."
                << "\n";
    }

    /**
     * Save the 3D embedding after the first phase.
     */
    if (!FLAGS_phase1Out.empty()) {
      std::cout << "Saving embedding after first phase. File: "
                << FLAGS_phase1Out << "\n";
      std::ofstream out_file;
      out_file.open(FLAGS_phase1Out);
      out_file << embedding.to_string(true);
      out_file.close();
    }

    /**
     * Phase 1 End
     */

    /**
     * Phase 2: Plane Forces...
     */
    std::cout << "Phase 2: Plane forces..."
              << "\n";

    /**
     * Determine the plane that the vertices should be pulled
     * towards.
     */
    Coordinate plane_normal = embedding.fitting_plane();

    /**
     * The force that pulls the vertices towards the plane.
     */
    ConstantForce plane_force = ConstantForce(max_radius, M_PI / 3.0);
    plane_force.temperature = force.temperature;

    /**
     * The force that we used in the first phase will still by
     * applied in the second phase. However now we make the repulsive
     * forces larger, as it was too weak (on purpose) in the first
     * phase.
     */
    force.repulsive_force_coefficient =
        FLAGS_repulsiveForceCoefficientSecondPhase;

    current_iteration = 0;
    max_iteration = 500;

    std::cout << "Performing at most " << max_iteration << " iterations."
              << "\n";

    /**
     * We stop when the average distances of the vertices to the
     * plane is small enough.
     */
    double average_distance_to_plane =
        embedding.average_distance_of_embedding_to_plane_with_normal(
            plane_normal);

    while (average_distance_to_plane > 0.5 &&
           current_iteration < max_iteration) {
      current_iteration++;
      total_iterations++;

      // embedding
      //     .apply_approximate_force_to_vertices_and_flatten_towards_plane_with_normal(
      //         force, plane_force, plane_normal, FLAGS_pooling * R);
      embedding.apply_force_to_vertices_and_flatten_towards_plane_with_normal(
          force, plane_force, plane_normal);

      average_distance_to_plane =
          embedding.average_distance_of_embedding_to_plane_with_normal(
              plane_normal);

      /**
       * Update the temperature of the force
       */
      if (force.temperature * force.temperature_decrease > 0.015) {
        force.temperature *= force.temperature_decrease;
      } else {
        force.temperature = 0.015;
      }

      plane_force.temperature = force.temperature;
    }

    /**
     * Stop if we have already reached the maximum number of iterations.
     */
    if (current_iteration < max_iteration) {
      std::cout << "Stopped early (" << current_iteration << " of "
                << max_iteration
                << " iterations) since the average distance to the plane "
                   "was already small enough."
                << "\n";
    }

    /**
     * Phase 2 End
     */

    /**
     * Phase 3: Reduce to two dimensions.
     */
    std::cout << "Phase 3: Reduce to two dimensions."
              << "\n";
    embedding.convert_to_2D_embedding();

    /**
     * Phase 3 End
     */

    std::chrono::high_resolution_clock::time_point embedding_end =
        std::chrono::high_resolution_clock::now();
    auto embedding_duration =
        std::chrono::duration_cast<std::chrono::milliseconds>(embedding_end -
                                                              embedding_start)
            .count();
    std::cout << "The embedding process took: " << embedding_duration / 1000.0
              << " seconds."
              << "\n";

    std::cout << "Writing embedding to file: " << FLAGS_embed << "\n";
    std::ofstream file;
    file.open(FLAGS_embed);
    file << embedding.to_string_2D(true) << "\n";
    file.close();

    if (FLAGS_draw.length() > 0) {
      std::cout << "Drawing embedding to file: " << FLAGS_draw << "\n";
      vector<int> node_color(embedding.G.n, 1);

      if (FLAGS_names.length() > 0) {
        std::cout << "Reading node names from file: " << FLAGS_names << "\n";
        std::ifstream file;
        file.open(FLAGS_names.c_str());
        CHECK(file.good()) << "Maybe the file " << FLAGS_names
                           << " doesn't exist?";

        for (std::string line; getline(file, line);) {
          std::vector<string> string_components;
          std::stringstream ss(line);
          std::string component;

          while (ss >> component) {
            string_components.push_back(component);
            if (ss.peek() == ' ') {
              ss.ignore();
            }
          }

          string node_label = string_components[0];
          string node_name = "";

          for (int i = 1; i < string_components.size(); i++) {
            node_name += " " + string_components[i];
          }

          int node_index = node_labels[node_label];

          embedding.G.labels[node_index] = node_name;
        }
      }

      embedding.draw(FLAGS_draw.c_str(), node_color, FLAGS_degree, "", 1000,
                     1000);

      std::cout << "Embedding Log-Likelihood: " << embedding.log_likelihood()
                << "\n";
    }

    if (FLAGS_compare.length() > 0 && FLAGS_plot.length() > 0) {
      std::cout << "Comparing with embedding: " << FLAGS_compare << "\n";
      try {
        // create a new gnuplot file
        Gnuplot gp("Embedding Quality");
        gp.savetops(FLAGS_plot);

        // set range
        gp.set_xrange(0.0, 2.0 * M_PI);
        gp.set_yrange(0.0, 2.0 * M_PI);

        std::cout << "Reading embedding from: " << FLAGS_compare << "\n";
        vector<Coordinate> other_points(embedding.G.n, Coordinate());
        Embedding other_embedding = Embedding(embedding.G, other_points);
        other_embedding.read_embedding_from_file(FLAGS_compare.c_str(),
                                                 node_labels);

        std::cout << "Other Embedding Log-Likelihood: "
                  << other_embedding.log_likelihood() << "\n";

        std::cout << "Plotting data to: " << FLAGS_plot << "\n";
        vector<double> other_phi_values(embedding.G.n);
        other_embedding.get_phi_values(other_phi_values);

        vector<double> this_phi_values(embedding.G.n);
        embedding.get_phi_values(this_phi_values);

        gp.set_style("points pointtype 5 ps 0.75 lc rgb \"blue\"")
            .plot_xy(other_phi_values, this_phi_values, "Phi-Phi Plot");

        // start a new picture
        gp.reset_plot();

        double R = embedding.max_radius();

        gp.set_xrange(0.0, 1.5 * R);
        gp.set_yrange(0.0, 1.5 * R);

        vector<double> other_r_values(embedding.G.n);
        other_embedding.get_r_values(other_r_values);

        vector<double> this_r_values(embedding.G.n);
        embedding.get_r_values(this_r_values);

        gp.set_style("points pointtype 5 ps 0.75 lc rgb \"blue\"")
            .plot_xy(other_r_values, this_r_values, "R-R Plot");

      } catch (GnuplotException ge) {
        cout << ge.what() << endl;
      }
    }

    if (FLAGS_edgeHisto.length() > 0) {
      plot_edge_histogram_of_embedding_to_file(embedding, FLAGS_edgeHisto);
    }
  }

  if (FLAGS_embedding.length() > 0) {
    std::cout << "Reading embedding from file: " << FLAGS_embedding << "\n";
    vector<Coordinate> points(G.n, Coordinate());
    Embedding embedding = Embedding(G, points);
    embedding.read_embedding_from_file(FLAGS_embedding.c_str(), node_labels);

    if (FLAGS_draw.length() > 0) {
      std::cout << "Drawing embedding to file: " << FLAGS_draw << "\n";
      vector<int> node_color(embedding.G.n, 1);
      embedding.draw(FLAGS_draw.c_str(), node_color, FLAGS_degree, "", 1000,
                     1000);

      // if (FLAGS_show) {
      //   string open_drawing_command = "open " + FLAGS_draw;
      //   system(open_drawing_command.c_str());
      // }
    }

    if (FLAGS_compare.length() > 0 && FLAGS_plot.length() > 0) {
      std::cout << "Comparing with embedding: " << FLAGS_compare << "\n";
      try {
        // create a new gnuplot file
        Gnuplot gp("Embedding Quality");
        gp.savetops(FLAGS_plot);

        // set range
        gp.set_xrange(0.0, 2.0 * M_PI);
        gp.set_yrange(0.0, 2.0 * M_PI);

        std::cout << "Reading embedding from: " << FLAGS_compare << "\n";
        vector<Coordinate> other_points(embedding.G.n, Coordinate());
        Embedding other_embedding = Embedding(embedding.G, other_points);
        other_embedding.read_embedding_from_file(FLAGS_compare.c_str(),
                                                 node_labels);

        std::cout << "Other Embedding Log-Likelihood: "
                  << other_embedding.log_likelihood() << "\n";

        // if (FLAGS_edgeStats) {
        //   print_edge_stats_of_embedding(other_embedding);
        // }

        std::cout << "Plotting data to: " << FLAGS_plot << "\n";
        vector<double> other_phi_values(embedding.G.n);
        other_embedding.get_phi_values(other_phi_values);

        vector<double> this_phi_values(embedding.G.n);
        embedding.get_phi_values(this_phi_values);

        gp.set_style("points pointtype 5 ps 0.75 lc rgb \"blue\"")
            .plot_xy(other_phi_values, this_phi_values, "Phi-Phi Plot");

        // start a new picture
        gp.reset_plot();

        double R = embedding.max_radius();

        gp.set_xrange(0.0, 1.5 * R);
        gp.set_yrange(0.0, 1.5 * R);

        vector<double> other_r_values(embedding.G.n);
        other_embedding.get_r_values(other_r_values);

        vector<double> this_r_values(embedding.G.n);
        embedding.get_r_values(this_r_values);

        gp.set_style("points pointtype 5 ps 0.75 lc rgb \"blue\"")
            .plot_xy(other_r_values, this_r_values, "R-R Plot");

        // if (FLAGS_show) {
        //   string open_plot_command = "open " + FLAGS_plot;
        //   system(open_plot_command.c_str());
        // }

      } catch (GnuplotException ge) {
        cout << ge.what() << endl;
      }
    }

    if (FLAGS_edgeHisto.length() > 0) {
      plot_edge_histogram_of_embedding_to_file(embedding, FLAGS_edgeHisto);
    }
  }

  return 0;
}
back to top