https://github.com/jonas-ellert/linear-time-runs
Tip revision: 065dfae01cccc8fa7fa8b116cfa91f272c0b22ba authored by Jonas Ellert on 23 June 2021, 12:11:55 UTC
add AUTHORS for software heritage
add AUTHORS for software heritage
Tip revision: 065dfae
runs.hpp
// MIT License
//
// Copyright (c) 2020 Jonas Ellert, jonas.ellert@tu-dortmund.de
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <algorithm>
#include <cstdint>
#include <cstdlib>
#include <deque>
#include <vector>
namespace linear_time_runs {
template<typename index_type>
struct run_type {
index_type start;
index_type period;
index_type length;
};
namespace internal {
template<bool increasing, typename value_type, typename index_type>
void unidirectional_runs(value_type const *const text, size_t const n,
std::vector <run_type<index_type>> &result) {
auto compare = [&text](index_type const &i, index_type const &j,
index_type const &lce) {
if constexpr (increasing) {
return j > 0 && text[i + lce] > text[j + lce];
} else {
return text[i + lce] < text[j + lce];
}
};
auto get_rlce = [&](index_type const &i, index_type const &j,
index_type lce = 0) {
while (text[i + lce] == text[j + lce])
++lce;
return lce;
};
auto get_llce = [&](index_type const &i, index_type const &j,
index_type llce = 0) {
while (text[i - llce] == text[j - llce])
++llce;
return llce;
};
struct distance_edge_type {
index_type distance, lce;
distance_edge_type(index_type const d, index_type const l)
: distance(d), lce(l) {}
};
struct target_edge_type {
index_type target, lce;
target_edge_type(index_type const t, index_type const l)
: target(t), lce(l) {}
};
std::deque <target_edge_type> q;
auto top_j = [&q]() { return q.back().target; };
auto top_lce = [&q]() { return q.back().lce; };
auto pop = [&q]() { q.pop_back(); };
auto push = [&q](index_type const j, index_type const lce) {
q.emplace_back(j, lce);
};
push(0, 0);
push(1, 0);
std::vector <index_type> first_edge_of_node(n + 1);
std::vector <distance_edge_type> edges;
edges.reserve(2 * n);
first_edge_of_node[1] = 0;
edges.emplace_back(1, 0);
index_type distance = 1;
index_type rhs = 1;
for (index_type i = 2; i < n - 1; ++i) {
first_edge_of_node[i] = edges.size();
index_type const copy_from = i - distance;
index_type const stop_edge = first_edge_of_node[copy_from + 1];
index_type e = first_edge_of_node[copy_from];
for (; e < stop_edge; ++e) {
if (i + edges[e].lce < rhs) {
edges.emplace_back(edges[e]);
} else
break;
}
if (e == stop_edge) {
index_type const target = i - edges.back().distance;
while (top_j() > target)
pop();
push(i, edges.back().lce);
continue;
}
index_type j = i - edges[e].distance;
index_type lce = get_rlce(i, j, (rhs > i) ? (rhs - i) : (index_type) 0);
rhs = i + lce;
distance = i - j;
edges.emplace_back(distance, lce);
while (top_j() > j)
pop();
while (compare(i, j, lce)) {
if (top_lce() < lce) {
lce = top_lce();
pop();
edges.emplace_back(i - top_j(), lce);
break;
}
pop();
j = top_j();
lce = get_rlce(i, j, lce);
rhs = i + lce;
distance = i - j;
edges.emplace_back(distance, lce);
}
push(i, lce);
}
first_edge_of_node[n - 1] = edges.size();
while (top_j() > 0) {
edges.emplace_back(n - 1 - top_j(), 0);
pop();
}
first_edge_of_node[n] = edges.size() + 1;
struct nss_edge_type {
index_type distance, llce, rlce;
};
std::vector <nss_edge_type> nss_edges(n);
for (index_type i = 1; i < n; ++i) {
index_type e = first_edge_of_node[i];
index_type end_edge = first_edge_of_node[i + 1] - 1; // last edge = pss
for (; e < end_edge; ++e) {
nss_edges[i - edges[e].distance] =
nss_edge_type{edges[e].distance, 0, edges[e].lce};
}
}
nss_edges[n - 2].llce = 0;
index_type lhs = n - 2;
distance = 1;
for (index_type i = n - 3; i > 0; --i) {
if (i > lhs + nss_edges[i + distance].llce) {
nss_edges[i].llce = nss_edges[i + distance].llce;
continue;
}
nss_edges[i].llce = get_llce(i, i + nss_edges[i].distance,
(lhs < i) ? (i - lhs) : (index_type) 0);
lhs = i - nss_edges[i].llce;
distance = nss_edges[i].distance;
}
std::vector <index_type> count_runs_at_idx(n);
for (index_type i = 1; i < n - 1; ++i) {
if (nss_edges[i].distance < nss_edges[i].llce + nss_edges[i].rlce) {
++count_runs_at_idx[i - nss_edges[i].llce + 1];
}
}
index_type runs_with_duplicates = 0;
for (index_type i = 1; i < n - 1; ++i) {
index_type const gsize = count_runs_at_idx[i];
count_runs_at_idx[i] = runs_with_duplicates;
runs_with_duplicates += gsize;
}
if (runs_with_duplicates > 0) {
auto prev_res_size = result.size();
result.resize(prev_res_size + runs_with_duplicates);
auto runs = &result[prev_res_size];
for (index_type i = 1; i < n - 1; ++i) {
if (nss_edges[i].distance < nss_edges[i].llce + nss_edges[i].rlce) {
auto &border = count_runs_at_idx[i - nss_edges[i].llce + 1];
runs[border++] = run_type<index_type>{
i - nss_edges[i].llce + 1, nss_edges[i].distance,
nss_edges[i].distance + nss_edges[i].llce + nss_edges[i].rlce - 1};
}
}
index_type runs_without_duplicates = 1;
for (index_type i = 1; i < runs_with_duplicates; ++i) {
if ((runs[i].length != runs[i - 1].length) ||
(runs[i].period != runs[i - 1].period) ||
(runs[i].start != runs[i - 1].start)) {
runs[runs_without_duplicates++] = runs[i];
}
}
result.resize(prev_res_size + runs_without_duplicates);
}
}
} // namespace internal
template<typename value_type, typename index_type = uint32_t>
auto compute_all_runs(value_type const *const text, size_t const n) {
using run_t = run_type<index_type>;
std::vector <run_t> result;
internal::unidirectional_runs<true>(text, n, result);
auto increasing = result.size();
internal::unidirectional_runs<false>(text, n, result);
std::inplace_merge(result.begin(), result.begin() + increasing, result.end(),
[](run_t const &a, run_t const &b) {
return a.start < b.start ||
(a.start == b.start && a.period < b.period);
});
result.shrink_to_fit();
return result;
}
} // namespace linear_time_runs