666 lines
26 KiB
C++
666 lines
26 KiB
C++
#include <libregexis024fa/fa_make_deterministic.h>
|
|
#include <libregexis024fa/misc_fa_funcs.h>
|
|
#include <libregexis024vm/utils.h> /* to get exitf */
|
|
#include <assert.h>
|
|
#include <libregexis024fa/tracking_fa_nodes.h>
|
|
#include <vector>
|
|
#include <map>
|
|
#include <algorithm>
|
|
#include <set>
|
|
#include <libregexis024fa/colored_codeset.h>
|
|
|
|
#if defined(LIBREGEXIS024_DEBUG) && defined(LIBREGEXIS024_ALLOW_LOUD)
|
|
#include <debugging_regexis024/prettyprint/prettyprint_util.h>
|
|
#include <string>
|
|
#include <functional>
|
|
#include <stdio.h>
|
|
#define PR_DEB
|
|
#endif
|
|
|
|
/* debug nonsence */
|
|
void input_fa_assert(const FA_Container& fa){
|
|
assert(fa.start);
|
|
for (FA_Node* node: fa.all){
|
|
if (node->type == one_char_read){
|
|
assert(!dynamic_cast<FA_NodeOfOneCharRead*>(node)->second_ns);
|
|
} else if (node->type == look_one_ahead ||
|
|
node->type == det_char_crossroads){
|
|
exitf("not allowed at this stage\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
struct OperHistoryNodeTransition {
|
|
TrackingOperationInFa op;
|
|
size_t u;
|
|
|
|
OperHistoryNodeTransition(const TrackingOperationInFa &op, size_t u): op(op), u(u) {}
|
|
};
|
|
|
|
struct OperHistoryNode {
|
|
std::vector<OperHistoryNodeTransition> next;
|
|
/* When it is part of clean history, this */
|
|
std::vector<uint64_t> compressed_selarr;
|
|
std::vector<uint64_t> raisin;
|
|
|
|
OperHistoryNode() = default;
|
|
};
|
|
|
|
/* This object can describe an empty superstate (needed to describe clean history nodes without raisin)
|
|
* If det_stops is empty, interpret it as empty superstate */
|
|
struct SuperState {
|
|
std::vector<uint64_t> sorted_raisin;
|
|
std::vector<uint64_t> double_compressed_selarr;
|
|
|
|
bool empty() const {
|
|
return sorted_raisin.empty();
|
|
}
|
|
|
|
#ifdef PR_DEB
|
|
std::string toString() const {
|
|
std::string f1_raisin;
|
|
for (uint64_t el: sorted_raisin) {
|
|
if (!f1_raisin.empty())
|
|
f1_raisin += ", ";
|
|
f1_raisin += std::to_string(el);
|
|
}
|
|
std::string f2_selarr;
|
|
for (uint64_t el: double_compressed_selarr) {
|
|
if (!f2_selarr.empty())
|
|
f2_selarr += ", ";
|
|
f2_selarr += std::to_string(el);
|
|
}
|
|
|
|
return "sorted_raisin: {" + f1_raisin + "}, double_comp_selarr: {" + f2_selarr + "}";
|
|
}
|
|
#endif
|
|
};
|
|
|
|
struct CleanOperHistoryNode {
|
|
std::vector<OperHistoryNodeTransition> next;
|
|
SuperState exit;
|
|
};
|
|
|
|
struct SelarrCompressionScheme {
|
|
size_t SN1, SN2 = 0, SN3 = 0;
|
|
std::vector<int32_t> S1_to_S2;
|
|
std::vector<regex_tai_t> S2_to_sifter;
|
|
std::vector<regex_tai_t> S3_to_sifter;
|
|
const RegexPriorityTable& sifter;
|
|
|
|
SelarrCompressionScheme(size_t sn1, const RegexPriorityTable &sifter) : SN1(sn1), sifter(sifter) {
|
|
assert(sifter.size() <= UINT32_MAX);
|
|
S1_to_S2.assign(SN1, -1);
|
|
for (regex_tai_t i = 0; i < sifter.size(); i++) {
|
|
auto& act = sifter[i].pos;
|
|
regex_tai_t first_on_s2 = S2_to_sifter.size();
|
|
S2_to_sifter.push_back(i);
|
|
S1_to_S2[act.first] = first_on_s2;
|
|
if (act.type != tracking_var_types::dot_cur_pos) {
|
|
S3_to_sifter.push_back(i);
|
|
}
|
|
if (act.type == tracking_var_types::range) {
|
|
regex_tai_t second_on_s2 = S2_to_sifter.size();
|
|
S2_to_sifter.push_back(i);
|
|
S1_to_S2[act.second] = second_on_s2;
|
|
}
|
|
}
|
|
SN2 = S2_to_sifter.size();
|
|
SN3 = S3_to_sifter.size();
|
|
assert(SN3 <= SN2 && SN2 <= SN1 && SN1 <= UINT16_MAX);
|
|
|
|
}
|
|
};
|
|
|
|
std::vector<uint64_t> compress_compressed_selarr(const std::vector<uint64_t>& S2,
|
|
const SelarrCompressionScheme& cmp) {
|
|
std::vector<uint64_t> S3(cmp.SN3);
|
|
for (size_t i = 0; i < cmp.SN3; i++) {
|
|
const RegexPriorityTableAction_Pos& act = cmp.sifter[cmp.S3_to_sifter[i]].pos;
|
|
if (act.type == tracking_var_types::dot_immediate) {
|
|
S3[i] = S2[cmp.S1_to_S2[act.first]];
|
|
} else {
|
|
assert(act.type == tracking_var_types::range); // It must be range type
|
|
uint64_t onFirstBorder = S2[cmp.S1_to_S2[act.first]];
|
|
uint64_t onSecondBorder = S2[cmp.S1_to_S2[act.second]];
|
|
S3[i] = (onFirstBorder > onSecondBorder) ? 1 : 0;
|
|
}
|
|
}
|
|
return S3;
|
|
}
|
|
|
|
bool compressed_selarr_A_outranks_B(const std::vector<uint64_t>& A, const std::vector<uint64_t>& B,
|
|
const SelarrCompressionScheme& cmp) {
|
|
for (const RegexPriorityTableAction& act: cmp.sifter) {
|
|
uint64_t valA = A[cmp.S1_to_S2[act.pos.first]];
|
|
uint64_t valB = B[cmp.S1_to_S2[act.pos.first]];
|
|
if (act.pos.type == tracking_var_types::range) {
|
|
uint64_t valAsecond = A[cmp.S1_to_S2[act.pos.second]];
|
|
uint64_t valBsecond = A[cmp.S1_to_S2[act.pos.second]];
|
|
valA = valAsecond > valA ? valAsecond - valA : 0;
|
|
valB = valBsecond > valB ? valBsecond - valB : 0;
|
|
}
|
|
if (valA == valB)
|
|
continue;
|
|
return (valA < valB) == act.minimize;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/* Beacuse of the way wash_history_bush builds this structure, root is te last node.
|
|
* rankdir is from left to right (guaranteed). Can be empty if original history contained no raisin */
|
|
struct RaisinBush {
|
|
std::vector<CleanOperHistoryNode> clean_history;
|
|
ssize_t start = -1;
|
|
|
|
bool empty() const {
|
|
return start < 0;
|
|
}
|
|
|
|
#ifdef PR_DEB
|
|
void print() {
|
|
lines text;
|
|
text.push_back("Raisin bush");
|
|
if (start >= 0) {
|
|
size_t n = clean_history.size();
|
|
std::vector<bool> m(n, false);
|
|
TreeWithStringsNode e{""};
|
|
std::function<void(TreeWithStringsNode&, size_t)> dfs = [&]
|
|
(TreeWithStringsNode& fill, size_t nodeId)
|
|
{
|
|
if (m[nodeId]) {
|
|
fill.val = "PARADOX";
|
|
return;
|
|
}
|
|
m[nodeId] = true;
|
|
const CleanOperHistoryNode& node = clean_history[nodeId];
|
|
fill.val = "[" + std::to_string(nodeId) + "]";
|
|
if (!node.exit.empty())
|
|
fill.val += (" EXIT: " + node.exit.toString());
|
|
size_t CN = node.next.size();
|
|
fill.childeren.resize(CN);
|
|
for (size_t i = 0; i < CN; i++) {
|
|
fill.childeren[i].val = node.next[i].op.toString();
|
|
fill.childeren[i].childeren = {{}};
|
|
dfs(fill.childeren[i].childeren[0], node.next[i].u);
|
|
}
|
|
};
|
|
dfs(e, start);
|
|
size_t am = 0;
|
|
for (bool el: m)
|
|
am += static_cast<size_t>(el);
|
|
if (am < n)
|
|
text[0] += ": " + std::to_string(n - am) + " nodes are unreachable by detour";
|
|
e.toLines(text);
|
|
} else {
|
|
if (clean_history.empty())
|
|
text[0] = "Empty Raisin Bush";
|
|
else
|
|
text [0] = "Raisin bush with no root and " + std::to_string(clean_history.size()) = " nodes missed";
|
|
}
|
|
printLines(wrapWithBox(text));
|
|
}
|
|
#endif
|
|
};
|
|
|
|
void wash_history_bush(const std::vector<OperHistoryNode>& history, RaisinBush& answer,
|
|
const SelarrCompressionScheme& cmp) {
|
|
assert(!history.empty());
|
|
std::vector<bool> has_raisin(history.size());
|
|
std::vector<ssize_t> dirty_to_clean(history.size(), -1);
|
|
std::vector<std::pair<size_t, size_t> > callStack = {{0, 0}};
|
|
|
|
auto hist_clean_detour_init_clean = [&](uint64_t v) -> uint64_t {
|
|
if (!has_raisin[v]) {
|
|
has_raisin[v] = true;
|
|
dirty_to_clean[v] = answer.clean_history.size();
|
|
answer.clean_history.emplace_back();
|
|
}
|
|
return dirty_to_clean[v];
|
|
};
|
|
|
|
while (!callStack.empty()) {
|
|
size_t v = callStack.back().first;
|
|
size_t od = callStack.back().second;
|
|
if (od == 0) {
|
|
if (!history[v].raisin.empty()) {
|
|
size_t cleanVId = hist_clean_detour_init_clean(v);
|
|
std::vector<uint64_t>& sr = answer.clean_history[cleanVId].exit.sorted_raisin;
|
|
sr = history[v].raisin;
|
|
std::sort(sr.begin(), sr.end());
|
|
answer.clean_history[cleanVId].exit.double_compressed_selarr = compress_compressed_selarr(history[v].compressed_selarr, cmp);
|
|
}
|
|
} else {
|
|
const OperHistoryNodeTransition& old_hist_tr = history[v].next[od - 1];
|
|
uint64_t ou = old_hist_tr.u;
|
|
if (has_raisin[ou]) {
|
|
size_t cleanVId = hist_clean_detour_init_clean(v);
|
|
answer.clean_history[cleanVId].next.emplace_back(old_hist_tr.op, dirty_to_clean[ou]);
|
|
}
|
|
}
|
|
|
|
if (od == history[v].next.size()) {
|
|
callStack.pop_back();
|
|
} else {
|
|
callStack.back().second++;
|
|
callStack.emplace_back(history[v].next[od].u, 0);
|
|
}
|
|
}
|
|
|
|
if (has_raisin[0]) {
|
|
assert(dirty_to_clean[0] >= 0);
|
|
answer.start = dirty_to_clean[0];
|
|
}
|
|
|
|
}
|
|
|
|
/* If is_it_after_read is false, unknown selarr range variable border and cur pos are evaluated to 0.
|
|
* Otherwise, cur pos considered to be greater than previous values of selarr ange variable boundaries */
|
|
void building_detour(const SelarrCompressionScheme& cmp,
|
|
const std::vector<uint64_t>& outer_selarr, const std::vector<FA_Node*>& zeroeps, const codeset_t& I,
|
|
RaisinBush& answer, bool is_it_after_read)
|
|
{
|
|
#ifdef PR_DEB
|
|
printf("Det Debug: build_detour started with zeroeps:{");
|
|
for (FA_Node* node: zeroeps)
|
|
printf("%lu,", node->nodeId);
|
|
printf("}, I: {%s}\n", stringifyCodesetBase10(I).c_str());
|
|
#endif
|
|
assert(cmp.SN3 == outer_selarr.size());
|
|
if (!is_it_after_read)
|
|
for (uint64_t val: outer_selarr)
|
|
assert(val == 0);
|
|
|
|
struct SearchMark {
|
|
FA_Node* domain_node;
|
|
uint64_t epsilon_refs = 0;
|
|
uint64_t detour_sat = 0;
|
|
/* id of corresponding history node */
|
|
size_t Hv = 0;
|
|
|
|
explicit SearchMark(FA_Node *domain_node) : domain_node(domain_node) {}
|
|
};
|
|
|
|
/* Default values are good for me */
|
|
std::vector<SearchMark> marks;
|
|
for (size_t i = 0; i < zeroeps.size(); i++) {
|
|
marks.emplace_back(zeroeps[i]);
|
|
zeroeps[i]->search_mark = i;
|
|
}
|
|
|
|
auto lob_allows_to_pass = [&](FA_NodeOfLookOneBehind* lob) -> bool {
|
|
if (!intersect_sets(lob->filter, I).empty()) {
|
|
assert(merge_sets(lob->filter, I) == lob->filter);
|
|
return true;
|
|
}
|
|
return false;
|
|
};
|
|
|
|
{ /* First i need to know exacly how many of MINE epsilon transitions are referencing each NODE */
|
|
std::vector<FA_Node*> domain_detour = zeroeps;
|
|
while (!domain_detour.empty()) {
|
|
FA_Node* v = domain_detour.back(); domain_detour.pop_back();
|
|
if (v->type == look_one_behind && !lob_allows_to_pass(dynamic_cast<FA_NodeOfLookOneBehind*>(v)))
|
|
continue;
|
|
for (FA_Node** uPtr: v->get_all_empty_valid_transitions()) {
|
|
assert(*uPtr);
|
|
int64_t &rds = (**uPtr).search_mark;
|
|
if (rds == -1) {
|
|
rds = marks.size();
|
|
domain_detour.push_back(*uPtr);
|
|
marks.emplace_back(*uPtr);
|
|
}
|
|
marks[rds].epsilon_refs++;
|
|
}
|
|
}
|
|
}
|
|
std::vector<OperHistoryNode> history = {OperHistoryNode()};
|
|
history[0].compressed_selarr.assign(cmp.SN2, 0);
|
|
for (size_t i = 0; i < cmp.SN3; i++) {
|
|
const RegexPriorityTableAction_Pos& act = cmp.sifter[cmp.S3_to_sifter[i]].pos;
|
|
if (act.type == tracking_var_types::range) {
|
|
if (outer_selarr[i]) {
|
|
history[0].compressed_selarr[cmp.S1_to_S2[act.second]] = 1;
|
|
}
|
|
} else {
|
|
assert(act.type == tracking_var_types::dot_immediate);
|
|
history[0].compressed_selarr[cmp.S1_to_S2[act.first]] = outer_selarr[i];
|
|
}
|
|
}
|
|
/* As a result, dot_cur_pos variables will be initialized as zero (always) */
|
|
|
|
/* In my second detour, I will pass each vertex here only one time: after hitting the total epsilon refcount */
|
|
std::vector<FA_Node*> can_process = zeroeps;
|
|
/*
|
|
auto increase_sat_refcount = [&](SearchMark& mark) {
|
|
mark.detour_sat++;
|
|
if (mark.detour_sat == mark.epsilon_refs && mark.ever_walked_in) {
|
|
can_process.push_back(mark.domain_node);
|
|
}
|
|
};
|
|
*/
|
|
|
|
auto add_history_update = [&](TrackingOperationInFa how, uint64_t where, uint64_t from_where) {
|
|
history[from_where].next.emplace_back(how, where);
|
|
};
|
|
|
|
while (!can_process.empty()) {
|
|
FA_Node* v = can_process.back(); can_process.pop_back();
|
|
SearchMark& Vmark = marks[v->search_mark];
|
|
assert(Vmark.detour_sat == Vmark.epsilon_refs);
|
|
uint64_t Hv = Vmark.Hv;
|
|
uint64_t Hop = Hv;
|
|
if (v->type == look_one_behind) {
|
|
FA_NodeOfLookOneBehind* tv = dynamic_cast<FA_NodeOfLookOneBehind*>(v);
|
|
if (!lob_allows_to_pass(tv))
|
|
continue;
|
|
} else if (isTrackingFaNode(v)) {
|
|
Hop = history.size();
|
|
history.emplace_back();
|
|
std::vector<uint64_t>& val2 = history.back().compressed_selarr;
|
|
val2 = history[Hv].compressed_selarr;
|
|
if (v->type == track_array_mov_imm) {
|
|
FA_NodeOfTrackArrayMovImm* tv = dynamic_cast<FA_NodeOfTrackArrayMovImm*>(v);
|
|
if (isSelarrOpcode(tv->operation)) {
|
|
int key_s2 = cmp.S1_to_S2[tv->key];
|
|
if (key_s2 >= 0){
|
|
assert(cmp.sifter[cmp.S2_to_sifter[key_s2]].pos.type == tracking_var_types::dot_immediate);
|
|
val2[key_s2] = tv->imm_value;
|
|
}
|
|
}
|
|
add_history_update(TrackingOperationInFa(tv->operation, tv->key, tv->imm_value), Hop, Hv);
|
|
} else if (v->type == track_array_mov_halfinvariant) {
|
|
FA_NodeOfTrackArrayMovHalfinvariant* tv = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant*>(v);
|
|
if (isSelarrOpcode(tv->operation)) {
|
|
int key_s2 = cmp.S1_to_S2[tv->key];
|
|
if (key_s2 >= 0){
|
|
const RegexPriorityTableAction_Pos& act = cmp.sifter[cmp.S2_to_sifter[key_s2]].pos;
|
|
assert(act.type != tracking_var_types::dot_immediate);
|
|
if (act.type == tracking_var_types::dot_cur_pos) {
|
|
val2[key_s2] = is_it_after_read ? 1 : 0;
|
|
} else {
|
|
val2[key_s2] = is_it_after_read ? 2 : 0;
|
|
}
|
|
}
|
|
}
|
|
add_history_update(TrackingOperationInFa(tv->operation, tv->key), Hop, Hv);
|
|
}
|
|
} else if (v->type == match || v->type == one_char_read) {
|
|
// Determinization stop
|
|
history[Hv].raisin.push_back(v->nodeId);
|
|
}
|
|
for (FA_Node** uPtr: v->get_all_empty_valid_transitions()) {
|
|
assert(*uPtr);
|
|
SearchMark& Umark = marks[(**uPtr).search_mark];
|
|
/* Here I use Hop to determine Hv value of u */
|
|
if (Umark.detour_sat == 0) {
|
|
Umark.Hv = Hop;
|
|
} else if (Umark.Hv != Hop) {
|
|
if (compressed_selarr_A_outranks_B(
|
|
history[Hop].compressed_selarr, history[Umark.Hv].compressed_selarr, cmp)){
|
|
Umark.Hv = Hop;
|
|
}
|
|
}
|
|
/* Collision calculation finished */
|
|
Umark.detour_sat++;
|
|
if (Umark.detour_sat == Umark.epsilon_refs) {
|
|
can_process.push_back(Umark.domain_node);
|
|
}
|
|
}
|
|
}
|
|
/* Cleaning this mess */
|
|
for (auto& m: marks) {
|
|
m.domain_node->search_mark = -1;
|
|
}
|
|
/* Packaging the answer (we do a little bit of dfs here) */
|
|
wash_history_bush(history, answer, cmp);
|
|
}
|
|
|
|
void update_had_to_fork_status(const RaisinBush& bush, int& had_to_fork) {
|
|
for (const CleanOperHistoryNode& node: bush.clean_history) {
|
|
if (node.next.size() > 1 || (!node.next.empty() && !node.exit.empty())) {
|
|
had_to_fork = 1;
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
|
|
typedef size_t superstate_id_t;
|
|
|
|
typedef std::vector<std::pair<FA_Node**, superstate_id_t>> homework_t;
|
|
|
|
struct LessSuperState {
|
|
bool operator()(const SuperState& A, const SuperState& B) const {
|
|
std::less<std::vector<uint64_t>> f1L;
|
|
if (f1L(A.sorted_raisin, B.sorted_raisin))
|
|
return true;
|
|
if (f1L(B.sorted_raisin, A.sorted_raisin))
|
|
return false;
|
|
return f1L(A.double_compressed_selarr, B.double_compressed_selarr);
|
|
}
|
|
};
|
|
|
|
struct GlobalDetourProgress {
|
|
std::map<SuperState, superstate_id_t, LessSuperState> superstates;
|
|
/* Each element is a root of some megabush in resFa */
|
|
std::vector<FA_Node*> superstate_megabush_constructed;
|
|
std::vector<SuperState> todo_superstaes;
|
|
};
|
|
|
|
/* If x was not previously achieved, it will also add it to t o d o list of global detour */
|
|
superstate_id_t convertSuperstateToId(const SuperState& x, GlobalDetourProgress& gdp) {
|
|
if (gdp.superstates.count(x)) {
|
|
return gdp.superstates[x];
|
|
}
|
|
size_t n = gdp.superstates.size();
|
|
gdp.superstates.insert({x, n});
|
|
gdp.todo_superstaes.push_back(x);
|
|
gdp.superstate_megabush_constructed.push_back(NULL);
|
|
return n;
|
|
}
|
|
|
|
FA_Node* build_dead_end(FA_Container& resFa) {
|
|
return resFa.makeForking();
|
|
}
|
|
|
|
void build_bush(const RaisinBush& alpha, FA_Node** sowing_location, FA_Container& resFa,
|
|
homework_t& homework, GlobalDetourProgress& gdp) {
|
|
size_t n = alpha.clean_history.size();
|
|
if (n == 0) {
|
|
FA_Node* dead_end = build_dead_end(resFa);
|
|
reattach_fa_node_edge(sowing_location, dead_end);
|
|
return;
|
|
}
|
|
std::vector<std::pair<FA_Node**, size_t>> todo = {{sowing_location, alpha.start}};
|
|
|
|
while (!todo.empty()) {
|
|
FA_Node** sl = todo.back().first;
|
|
const CleanOperHistoryNode& hnode = alpha.clean_history[todo.back().second];
|
|
todo.pop_back();
|
|
auto history_transition = [&](size_t i, FA_Node** of_sl) {
|
|
FA_NodePathPart* pn = convert_to_node(hnode.next[i].op, resFa);
|
|
reattach_fa_node_edge(of_sl, pn);
|
|
todo.emplace_back(&(pn->nxt_node), hnode.next[i].u);
|
|
};
|
|
|
|
if (hnode.next.empty()) {
|
|
assert(!hnode.exit.empty());
|
|
superstate_id_t w = convertSuperstateToId(hnode.exit, gdp);
|
|
homework.emplace_back(sl, w);
|
|
} else if (hnode.next.size() == 1 && hnode.exit.empty()) {
|
|
history_transition(0, sl);
|
|
} else {
|
|
FA_NodeOfForking* forker = resFa.makeForking();
|
|
bool raisin = !hnode.exit.empty();
|
|
size_t k = hnode.next.size();
|
|
forker->nxt_options.assign(k + static_cast<size_t>(raisin), NULL);
|
|
for (size_t i = 0; i < k; i++) {
|
|
history_transition(i, &(forker->nxt_options[i]));
|
|
}
|
|
if (raisin) {
|
|
superstate_id_t w = convertSuperstateToId(hnode.exit, gdp);
|
|
homework.emplace_back(&(forker->nxt_options[k]), w);
|
|
}
|
|
reattach_fa_node_edge(sl, forker);
|
|
}
|
|
}
|
|
}
|
|
|
|
ColoredCodeset get_pretreated_cc(FA_Container& sourceFa) {
|
|
std::set<codeset_t> little_insects;
|
|
for (FA_Node* v: sourceFa.all) {
|
|
if (v->type == look_one_behind) {
|
|
little_insects.insert(static_cast<FA_NodeOfLookOneBehind*>(v)->filter);
|
|
}
|
|
}
|
|
ColoredCodeset pretreated_cc(little_insects.size());
|
|
for (const codeset_t& cs: little_insects) {
|
|
pretreated_cc.apply_divisor(cs);
|
|
}
|
|
return pretreated_cc;
|
|
}
|
|
|
|
// todo add a check on size of dfa
|
|
void try_determinize_fa(FA_Container &sourceFa, const RegexPriorityTable &sifter, regex_tai_t selarr_sz,
|
|
const REGEX_IS024_FA_FirstStageFixInfo &info1, FA_Container &resFa, int &error, int& had_to_fork)
|
|
{
|
|
/* During execuion, i will create pointers to field res.start and store them (inside the scope of this function)
|
|
* Luckily res argument is already immovable in this scope. */
|
|
error = 0;
|
|
had_to_fork = 0;
|
|
assert(resFa.start == NULL && resFa.all.empty());
|
|
input_fa_assert(sourceFa);
|
|
SelarrCompressionScheme cmp(selarr_sz, sifter);
|
|
|
|
GlobalDetourProgress gdp;
|
|
homework_t homework;
|
|
|
|
ColoredCodeset pretreated_cc = get_pretreated_cc(sourceFa);
|
|
|
|
FA_Node** res_start_ptr = &(resFa.start);
|
|
if (info1.fed_chars_extend_one_left) {
|
|
ColoredCodeset inp_distinction = pretreated_cc;
|
|
inp_distinction.apply_divisor(codeset_of_all);
|
|
std::vector<codeset_t> starting_Is;
|
|
std::vector<std::vector<size_t>> starting_Cids; /* Filler variable */
|
|
inp_distinction.get_splits_of_non_dummy(starting_Is, starting_Cids);
|
|
size_t R = starting_Is.size();
|
|
for (auto& rdh: starting_Cids) {
|
|
assert(rdh.size() == 1 && rdh[0] == 0);
|
|
}
|
|
FA_NodeOfDetCharCrossroads* very_first_cr = resFa.makeDetCharCrossroads();
|
|
very_first_cr->second_ns = true;
|
|
reattach_fa_node_edge(res_start_ptr, very_first_cr);
|
|
very_first_cr->crossroads.resize(R); /* After that, nobody has right to resize crossroads array */
|
|
for (size_t i = 0; i < R; i++) {
|
|
very_first_cr->crossroads[i].input = starting_Is[i];
|
|
FA_Node** sowing_place = &(very_first_cr->crossroads[i].nxt_node);
|
|
RaisinBush alpha;
|
|
building_detour(cmp, std::vector<uint64_t>(cmp.SN3, 0), {sourceFa.start}, starting_Is[i], alpha, false);
|
|
#ifdef PR_DEB
|
|
printf("Initialization hard %ld/%ld\n", i + 1, R);
|
|
alpha.print();
|
|
#endif
|
|
update_had_to_fork_status(alpha, had_to_fork);
|
|
build_bush(alpha, sowing_place, resFa, homework, gdp);
|
|
}
|
|
} else {
|
|
RaisinBush alpha;
|
|
building_detour(cmp, std::vector<uint64_t>(cmp.SN3, 0), {sourceFa.start}, codeset_of_all, alpha, false);
|
|
#ifdef PR_DEB
|
|
printf("Initialization easy\n");
|
|
alpha.print();
|
|
#endif
|
|
update_had_to_fork_status(alpha, had_to_fork);
|
|
build_bush(alpha, res_start_ptr, resFa, homework, gdp);
|
|
}
|
|
/* Now we start the actual detour. */
|
|
while (!gdp.todo_superstaes.empty()) {
|
|
SuperState SS = gdp.todo_superstaes.back(); gdp.todo_superstaes.pop_back();
|
|
// printf("Global detour turn: %s\n", SS.toString().c_str());
|
|
std::vector<FA_NodeOfOneCharRead*> reading_stops;
|
|
codeset_t how_can_i_finish = {};
|
|
for (size_t v: SS.sorted_raisin) {
|
|
FA_Node* node = sourceFa.all[v];
|
|
if (node->type == one_char_read) {
|
|
reading_stops.push_back(static_cast<FA_NodeOfOneCharRead*>(node));
|
|
} else if (node->type == match) {
|
|
auto fn = static_cast<FA_NodeOfMatch*>(node);
|
|
assert(!fn->ext_filter_added || info1.fed_chars_extend_one_right);
|
|
if (fn->ext_filter_added) {
|
|
how_can_i_finish = merge_sets(how_can_i_finish, fn->pending_filter);
|
|
} else {
|
|
how_can_i_finish = codeset_of_all;
|
|
}
|
|
} else
|
|
assert(false);
|
|
}
|
|
// Determinization stop: one char read (input)
|
|
ColoredCodeset inp_distinction = pretreated_cc;
|
|
size_t pr = reading_stops.size();
|
|
for (size_t i = 0; i < pr; i++) {
|
|
inp_distinction.apply_divisor(reading_stops[i]->filter);
|
|
}
|
|
std::vector<codeset_t> Is;
|
|
std::vector<std::vector<size_t>> Cids;
|
|
inp_distinction.get_splits_of_non_dummy(Is, Cids);
|
|
size_t R = Is.size();
|
|
FA_NodeOfDetCharCrossroads* my_cr = NULL;
|
|
if (R > 0) {
|
|
my_cr = resFa.makeDetCharCrossroads();
|
|
if (!info1.fed_chars_extend_one_right && !how_can_i_finish.empty()) {
|
|
assert(how_can_i_finish == codeset_of_all);
|
|
my_cr->matching = true;
|
|
}
|
|
my_cr->crossroads.resize(R);
|
|
}
|
|
for (size_t i = 0; i < R; i++) {
|
|
my_cr->crossroads[i].input = Is[i];
|
|
my_cr->crossroads[i].nxt_node = NULL;
|
|
std::vector<FA_Node*> fl_passed_filters;
|
|
for (size_t j: Cids[i]) {
|
|
fl_passed_filters.push_back(reading_stops[j]->nxt_node);
|
|
}
|
|
// todo: make a function out of next 6 lines of code
|
|
RaisinBush alpha;
|
|
building_detour(cmp, SS.double_compressed_selarr, fl_passed_filters, Is[i], alpha, true);
|
|
#ifdef PR_DEB
|
|
printf("That same turn, subbush %ld/%ld\n", i + 1, R);
|
|
alpha.print();
|
|
#endif
|
|
update_had_to_fork_status(alpha, had_to_fork);
|
|
build_bush(alpha, &(my_cr->crossroads[i].nxt_node), resFa, homework, gdp);
|
|
}
|
|
// Determinization stop: match (finish)
|
|
FA_Node* finish_route = NULL;
|
|
if (!how_can_i_finish.empty() && (info1.fed_chars_extend_one_right || R == 0)) {
|
|
FA_NodeOfMatch* matcher = resFa.makeMatch();
|
|
finish_route = matcher;
|
|
if (info1.fed_chars_extend_one_right) {
|
|
FA_NodeOfOneCharRead* right_ext_read = resFa.makeOneCharRead(how_can_i_finish, true);
|
|
reattach_nxt_node(right_ext_read, matcher);
|
|
finish_route = right_ext_read;
|
|
}
|
|
}
|
|
// Combining these two cases
|
|
assert(finish_route || my_cr);
|
|
FA_Node*& endsUp = gdp.superstate_megabush_constructed[gdp.superstates[SS]];
|
|
if (!finish_route) {
|
|
endsUp = my_cr;
|
|
} else if (!my_cr) {
|
|
endsUp = finish_route;
|
|
} else {
|
|
FA_NodeOfForking* F = resFa.makeForking();
|
|
F->nxt_options = {NULL, NULL};
|
|
reattach_fa_node_edge(&(F->nxt_options[0]), my_cr);
|
|
reattach_fa_node_edge(&(F->nxt_options[1]), finish_route);
|
|
endsUp = F;
|
|
}
|
|
}
|
|
/* Now it's time to do the homework: link all megabushes */
|
|
for (auto& p: homework) {
|
|
reattach_fa_node_edge(p.first, gdp.superstate_megabush_constructed[p.second]);
|
|
}
|
|
}
|
|
|