libregexis024/src/libregexis024fa/graph_to_bytecode/core.cpp

111 lines
5.2 KiB
C++

#include <libregexis024fa/graph_to_bytecode/core.h>
#include <assert.h>
#include <libregexis024fa/graph_to_bytecode/writing_commands.h>
#include <libregexis024fa/graph_to_bytecode/filter.h>
namespace regexis024 {
#define nonthrowing_assert(expr) if (!(expr)) {error = -1; return; }
void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_bookmarks& bookmark_manager,
size_t& read_ss_ns, size_t& fork_ss_ns, int& error)
{
bookmark_id_t node_start_bm_offset = bookmark_manager.new_range_of_bookmarks(fa.all.size());
read_ss_ns = 0;
fork_ss_ns = 0;
assert(fa.start);
std::vector<FA_Node*> todo = {fa.start};
auto nodesBookmark = [&](FA_Node* node) -> bookmark_id_t {
assert(node);
return node_start_bm_offset + node->nodeId;
};
auto addBranching = [&](FA_Node* node) {
todo.push_back(node);
};
while (!todo.empty()) {
FA_Node* node = todo.back(); todo.pop_back();
if (bookmark_manager.has_landed(nodesBookmark(node))) {
continue;
}
while (true) {
if (bookmark_manager.has_landed(nodesBookmark(node))) {
cmd_JUMP(result, bookmark_manager, nodesBookmark(node));
break;
}
bookmark_manager.land_bookmark(result, nodesBookmark(node));
if (node->type == match) {
cmd_MATCH(result);
cmd_DIE(result);
break;
} else if (node->type == one_char_read) {
FA_NodeOfOneCharRead* ocr = dynamic_cast<FA_NodeOfOneCharRead*>(node);
nonthrowing_assert(read_ss_ns < UINT32_MAX);
cmd_READ(result, read_ss_ns++);
addBranching(ocr->nxt_node);
bool can_spill = write_filter(result, bookmark_manager, {ocr->filter},{nodesBookmark(ocr->nxt_node)});
if (!can_spill)
break;
node = ocr->nxt_node;
} else if (node->type == look_one_behind) {
FA_NodeOfLookOneBehind* lob = dynamic_cast<FA_NodeOfLookOneBehind*>(node);
addBranching(lob->nxt_node);
bool can_spill = write_filter(result, bookmark_manager, {lob->filter}, {nodesBookmark(lob->nxt_node)});
if (!can_spill)
break;
node = lob->nxt_node;
} else if (node->type == forking) {
FA_NodeOfForking* fn = dynamic_cast<FA_NodeOfForking*>(node);
std::vector<FA_Node*>& nxt_options = fn->nxt_options;
if (nxt_options.empty()) {
cmd_DIE(result);
break;
}
if (nxt_options.size() >= 2) {
nonthrowing_assert(fork_ss_ns < UINT32_MAX);
sslot_id_t sslot = fork_ss_ns++;
for (size_t i = 0; i + 1 < nxt_options.size(); i++) {
cmd_FORK(result, bookmark_manager, sslot, nodesBookmark(nxt_options[i]));
addBranching(nxt_options[i]);
}
}
node = nxt_options.back();
} else if (node->type == track_array_mov_imm) {
FA_NodeOfTrackArrayMovImm* tami = dynamic_cast<FA_NodeOfTrackArrayMovImm*>(node);
write_byte(result, tami->operation);
write_tai(result, tami->key);
write_quadword(result, tami->imm_value);
node = tami->nxt_node;
} else if (node->type == track_array_mov_halfinvariant) {
FA_NodeOfTrackArrayMovHalfinvariant* tamh = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant *>(node);
write_byte(result, tamh->operation);
write_tai(result, tamh->key);
node = tamh->nxt_node;
} else if (node->type == det_char_crossroads) {
FA_NodeOfDetCharCrossroads* dcc = dynamic_cast<FA_NodeOfDetCharCrossroads*>(node);
nonthrowing_assert(read_ss_ns < UINT32_MAX);
if (dcc->matching)
cmd_MATCH(result);
cmd_READ(result, read_ss_ns++);
std::vector<codeset_t> codesets;
std::vector<bookmark_id_t> branches;
for (const DFA_CrossroadPath& p: dcc->crossroads) {
codesets.push_back(p.input);
branches.push_back(nodesBookmark(p.nxt_node));
addBranching(p.nxt_node);
}
bool can_spill = write_filter(result, bookmark_manager, codesets, branches);
if (!can_spill)
break;
if (dcc->crossroads.empty())
break;
node = dcc->crossroads[0].nxt_node;
} else
assert(false);
}
}
}
}