Fixed vm bugs, refactored the entire broject to use regexis024 namespace, removed some junk, added tests

This commit is contained in:
Андреев Григорий 2024-07-30 01:02:07 +03:00
parent b11afa72ea
commit 76f3742521
79 changed files with 4988 additions and 4875 deletions

View File

@ -72,7 +72,6 @@ struct Libregexis024BuildSystem {
"libregexis024fa/graph_to_bytecode/core.cpp", "libregexis024fa/graph_to_bytecode/core.cpp",
"libregexis024sol/common_codesets.cpp", "libregexis024sol/common_codesets.cpp",
"libregexis024sol/part_of_expr_that_tracks.cpp",
"libregexis024sol/expr_compiler.cpp", "libregexis024sol/expr_compiler.cpp",
"libregexis024sol/square_bracket_expression.cpp", "libregexis024sol/square_bracket_expression.cpp",
"libregexis024sol/sol_misc_base.cpp", "libregexis024sol/sol_misc_base.cpp",

View File

@ -10,6 +10,7 @@
#include <libregexis024vm/vm_opcodes.h> #include <libregexis024vm/vm_opcodes.h>
#include <libregexis024fa/tracking_fa_nodes.h> #include <libregexis024fa/tracking_fa_nodes.h>
namespace regexis024 {
const char* one_char_read_color = "black"; const char* one_char_read_color = "black";
const char* forking_color = "darkorchid1"; const char* forking_color = "darkorchid1";
const char* look_one_behind_color = "darkslateblue"; const char* look_one_behind_color = "darkslateblue";
@ -184,7 +185,7 @@ void print_fa(const FA_Container& fa, FILE* fd, const KnownTrackingTools& ktr,
/* Two Infoboxes */ /* Two Infoboxes */
auto stringifyTrackingVarType = [](tracking_var_type type) -> std::string { auto stringifyTrackingVarType = [](tracking_var_type_t type) -> std::string {
switch (type) { switch (type) {
case tracking_var_types::range: case tracking_var_types::range:
return "range"; return "range";
@ -199,7 +200,7 @@ void print_fa(const FA_Container& fa, FILE* fd, const KnownTrackingTools& ktr,
for (auto& p: ktr.track_names){ for (auto& p: ktr.track_names){
const SubtrackingNameInfo& tu = ktr.retrieval_info[p.second]; const SubtrackingNameInfo& tu = ktr.retrieval_info[p.second];
auto getRole = [](bool presence, tracking_var_type type, int first, int second, auto getRole = [](bool presence, tracking_var_type_t type, int first, int second,
const std::string& ARR_NAME) -> std::string { const std::string& ARR_NAME) -> std::string {
if (!presence) { if (!presence) {
assert(first == -1 && second == -1); assert(first == -1 && second == -1);
@ -264,7 +265,7 @@ void print_fa(const FA_Container& fa, FILE* fd, const KnownTrackingTools& ktr,
if (!isImmMovOpcode(cn->operation)) if (!isImmMovOpcode(cn->operation))
fprintf(stderr, "bad operation in node %lu\n", node->nodeId); fprintf(stderr, "bad operation in node %lu\n", node->nodeId);
snprintf(buf, 1024, "%s %hu %lu", snprintf(buf, 1024, "%s %hu %lu",
regex024_opcode_tostr(cn->operation), cn->key, cn->imm_value); opcode_to_str(cn->operation), cn->key, cn->imm_value);
print_edge(node, cn->nxt_node,std::string(buf), fd, bd); print_edge(node, cn->nxt_node,std::string(buf), fd, bd);
} else if (node->type == track_array_mov_halfinvariant){ } else if (node->type == track_array_mov_halfinvariant){
FA_NodeOfTrackArrayMovHalfinvariant* cn = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant *>(node); FA_NodeOfTrackArrayMovHalfinvariant* cn = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant *>(node);
@ -272,7 +273,7 @@ void print_fa(const FA_Container& fa, FILE* fd, const KnownTrackingTools& ktr,
if (!isCurPosMovOpcode(cn->operation)) if (!isCurPosMovOpcode(cn->operation))
fprintf(stderr, "bad operation in node %lu\n", node->nodeId); fprintf(stderr, "bad operation in node %lu\n", node->nodeId);
snprintf(buf, 1024, "%s %hu", snprintf(buf, 1024, "%s %hu",
regex024_opcode_tostr(cn->operation), cn->key); opcode_to_str(cn->operation), cn->key);
print_edge(node, cn->nxt_node,std::string(buf), fd, bd); print_edge(node, cn->nxt_node,std::string(buf), fd, bd);
} else if (node->type == det_char_crossroads){ } else if (node->type == det_char_crossroads){
FA_NodeOfDetCharCrossroads* cn = dynamic_cast<FA_NodeOfDetCharCrossroads *>(node); FA_NodeOfDetCharCrossroads* cn = dynamic_cast<FA_NodeOfDetCharCrossroads *>(node);
@ -323,3 +324,4 @@ void show_fa_with_sxiv_after_dot(const FA_Container& fa, const KnownTrackingTool
unlink(temp_gv); unlink(temp_gv);
unlink(temp_png); unlink(temp_png);
} }
}

View File

@ -5,8 +5,10 @@
#include <libregexis024sol/part_of_expr_that_tracks.h> #include <libregexis024sol/part_of_expr_that_tracks.h>
#include <libregexis024fa/selarr_priority_table.h> #include <libregexis024fa/selarr_priority_table.h>
namespace regexis024 {
/* Uses temporary file FAGraph.gv,png, dot command and sxiv */ /* Uses temporary file FAGraph.gv,png, dot command and sxiv */
void show_fa_with_sxiv_after_dot(const FA_Container& fa, const KnownTrackingTools& ktr, void show_fa_with_sxiv_after_dot(const FA_Container& fa, const KnownTrackingTools& ktr,
const RegexPriorityTable& priority_table); const RegexPriorityTable& priority_table);
}
#endif #endif

View File

@ -2,9 +2,7 @@
#include <functional> #include <functional>
#include <libregexis024vm/utils.h> #include <libregexis024vm/utils.h>
TreeWithStringsNode::TreeWithStringsNode(const std::string &val): val(val) { namespace regexis024 {
}
static const char* ch_empty = " "; static const char* ch_empty = " ";
static const char* ch_passing_by = "\u2502 "; static const char* ch_passing_by = "\u2502 ";
static const char* ch_connect_right_and_forward = "\u251c\u2500\u2500\u2500"; static const char* ch_connect_right_and_forward = "\u251c\u2500\u2500\u2500";
@ -25,7 +23,7 @@ size_t length_of_line(const std::string& str) {
while (pos < str.size()) { while (pos < str.size()) {
int32_t code; int32_t code;
size_t adj; size_t adj;
utf8_string_iterat(code, adj, pos, reinterpret_cast<const uint8_t*>(str.data()), str.size()); utf8_string_iterat(code, adj, pos, str.data(), str.size());
if (code < 0) if (code < 0)
return ch; return ch;
ch++; ch++;
@ -86,4 +84,4 @@ void printLines(const lines &in) {
for (auto& l: in) for (auto& l: in)
printf("%s\n", l.c_str()); printf("%s\n", l.c_str());
} }
}

View File

@ -6,20 +6,19 @@
#include <vector> #include <vector>
#include <string> #include <string>
namespace regexis024 {
typedef std::vector<std::string> lines; typedef std::vector<std::string> lines;
struct TreeWithStringsNode { struct TreeWithStringsNode {
std::string val; std::string val;
std::vector<TreeWithStringsNode> childeren; std::vector<TreeWithStringsNode> childeren;
explicit TreeWithStringsNode(const std::string &val);
TreeWithStringsNode() = default;
void toLines(lines& out) const; void toLines(lines& out) const;
}; };
lines wrapWithBox(const lines& in); lines wrapWithBox(const lines& in);
void printLines(const lines& in); void printLines(const lines& in);
}
#endif #endif

View File

@ -2,7 +2,8 @@
#include <stdio.h> #include <stdio.h>
#include <string> #include <string>
std::string thread_to_str(const REGEX_IS024_Thread& thread){ namespace regexis024 {
std::string thread_to_str(const Thread& thread){
if (!(thread.slot_occupation_status & SLOT_OCCUPIED)) if (!(thread.slot_occupation_status & SLOT_OCCUPIED))
return "{ unoccupied }"; return "{ unoccupied }";
char buf[1024]; char buf[1024];
@ -10,7 +11,7 @@ std::string thread_to_str(const REGEX_IS024_Thread& thread){
return buf; return buf;
} }
std::string stack_to_str(const REGEX_IS024_Stack& stack){ std::string stack_to_str(const SSID_Stack& stack){
std::string res = "{ "; std::string res = "{ ";
for (uint32_t i = 0; i < stack.sz; i++){ for (uint32_t i = 0; i < stack.sz; i++){
if (i != 0) if (i != 0)
@ -21,7 +22,7 @@ std::string stack_to_str(const REGEX_IS024_Stack& stack){
return res; return res;
} }
std::string slots_to_str(const REGEX_IS024_CONTEXT& ctx){ std::string slots_to_str(const VMContext& ctx){
if (!ctx.initialized) if (!ctx.initialized)
return "uninitialized"; return "uninitialized";
std::string READ_slots; std::string READ_slots;
@ -43,7 +44,7 @@ std::string slots_to_str(const REGEX_IS024_CONTEXT& ctx){
return buf; return buf;
} }
void debug_print_context(const REGEX_IS024_CONTEXT& ctx, const char* place) { void debug_print_context(const VMContext& ctx, const char* place) {
printf("== DEBUG `%s` ==\n", place); printf("== DEBUG `%s` ==\n", place);
printf("Active thread: %s, sifting_with: %s, match: %s\n%s\n", printf("Active thread: %s, sifting_with: %s, match: %s\n%s\n",
@ -52,7 +53,8 @@ void debug_print_context(const REGEX_IS024_CONTEXT& ctx, const char* place) {
slots_to_str(ctx).c_str()); slots_to_str(ctx).c_str());
} }
void debug_print_thread(const REGEX_IS024_Thread& thr, const char *place) { void debug_print_thread(const Thread& thr, const char *place) {
printf("== DEBUG `%s` ==\n", place); printf("== DEBUG `%s` ==\n", place);
printf("This thread: %s\n", thread_to_str(thr).c_str()); printf("This thread: %s\n", thread_to_str(thr).c_str());
} }
}

View File

@ -4,8 +4,10 @@
#include <libregexis024vm/libregexis024vm.h> #include <libregexis024vm/libregexis024vm.h>
#include <libregexis024vm/instruction_implementation.h> #include <libregexis024vm/instruction_implementation.h>
void debug_print_context(const REGEX_IS024_CONTEXT& ctx, const char* place); namespace regexis024 {
void debug_print_context(const VMContext& ctx, const char* place);
void debug_print_thread(const REGEX_IS024_Thread& thr, const char *place); void debug_print_thread(const Thread& thr, const char *place);
}
#endif #endif

View File

@ -1,6 +1,7 @@
#include <libregexis024fa/codeset.h> #include <libregexis024fa/codeset.h>
#include <assert.h> #include <assert.h>
namespace regexis024 {
codeset_t invert_set(const codeset_t &X) { codeset_t invert_set(const codeset_t &X) {
if (X.empty()) if (X.empty())
return {{0, UINT32_MAX}}; return {{0, UINT32_MAX}};
@ -118,3 +119,4 @@ std::string stringifyCodesetBase10(const codeset_t& CS) {
} }
return cs; return cs;
} }
}

View File

@ -6,6 +6,7 @@
#include <stdint.h> #include <stdint.h>
#include <string> #include <string>
namespace regexis024 {
typedef std::vector<std::pair<uint32_t, uint32_t>> codeset_t; typedef std::vector<std::pair<uint32_t, uint32_t>> codeset_t;
codeset_t invert_set(const codeset_t& X); codeset_t invert_set(const codeset_t& X);
@ -23,5 +24,6 @@ codeset_t codeset_of_one_char(uint32_t ch);
#define codeset_of_all codeset_t({{0, UINT32_MAX}}) #define codeset_of_all codeset_t({{0, UINT32_MAX}})
std::string stringifyCodesetBase10(const codeset_t& CS); std::string stringifyCodesetBase10(const codeset_t& CS);
}
#endif //LIBREGEXIS024_CODESET_H #endif //LIBREGEXIS024_CODESET_H

View File

@ -2,6 +2,7 @@
#include <assert.h> #include <assert.h>
namespace regexis024 {
ColoredCodesetSegment::ColoredCodesetSegment(uint32_t color, uint32_t right_code): color(color), right_code(right_code) {} ColoredCodesetSegment::ColoredCodesetSegment(uint32_t color, uint32_t right_code): color(color), right_code(right_code) {}
ColoredCodesetSegmentList::ColoredCodesetSegmentList() { ColoredCodesetSegmentList::ColoredCodesetSegmentList() {
@ -181,3 +182,4 @@ void ColoredCodeset::get_splits_of_non_dummy(std::vector<codeset_t> &res_input,
cur = cur->next; cur = cur->next;
} }
} }
}

View File

@ -7,8 +7,8 @@
#include <libregexis024fa/codeset.h> #include <libregexis024fa/codeset.h>
namespace regexis024 {
/* Used for determinizer. Nowhere else */ /* Used for determinizer. Nowhere else */
struct ColoredCodesetSegment { struct ColoredCodesetSegment {
uint32_t color; uint32_t color;
uint32_t right_code; uint32_t right_code;
@ -61,6 +61,6 @@ public:
void get_splits_of_non_dummy(std::vector<codeset_t>& res_input, void get_splits_of_non_dummy(std::vector<codeset_t>& res_input,
std::vector<std::vector<size_t>>& res_color_to_requests); std::vector<std::vector<size_t>>& res_color_to_requests);
}; };
}
#endif #endif

View File

@ -7,6 +7,7 @@
// #include <debugging_regexis024/debug_through_graphviz.h> // #include <debugging_regexis024/debug_through_graphviz.h>
// #endif // #endif
namespace regexis024 {
REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_Container& resultFa) { REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_Container& resultFa) {
assert(sourceFa.start); assert(sourceFa.start);
REGEX_IS024_FA_FirstStageFixInfo info; REGEX_IS024_FA_FirstStageFixInfo info;
@ -193,3 +194,4 @@ void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& result
yay_new_start(resultFa, ns); yay_new_start(resultFa, ns);
} }
} }
}

View File

@ -3,6 +3,7 @@
#include "finite_automaton.h" #include "finite_automaton.h"
namespace regexis024 {
struct REGEX_IS024_FA_FirstStageFixInfo{ struct REGEX_IS024_FA_FirstStageFixInfo{
bool fed_chars_extend_one_left = false; bool fed_chars_extend_one_left = false;
bool fed_chars_extend_one_right = false; bool fed_chars_extend_one_right = false;
@ -14,5 +15,6 @@ REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_C
void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& resultFa, void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& resultFa,
const REGEX_IS024_FA_FirstStageFixInfo &info1); const REGEX_IS024_FA_FirstStageFixInfo &info1);
}
#endif //LIBREGEXIS024_FA_FIRST_STAGE_FIX_H #endif //LIBREGEXIS024_FA_FIRST_STAGE_FIX_H

View File

@ -1,6 +1,5 @@
#include <libregexis024fa/fa_make_deterministic.h> #include <libregexis024fa/fa_make_deterministic.h>
#include <libregexis024fa/misc_fa_funcs.h> #include <libregexis024fa/misc_fa_funcs.h>
#include <libregexis024vm/utils.h> /* to get exitf */
#include <assert.h> #include <assert.h>
#include <libregexis024fa/tracking_fa_nodes.h> #include <libregexis024fa/tracking_fa_nodes.h>
#include <vector> #include <vector>
@ -17,15 +16,15 @@
#define PR_DEB #define PR_DEB
#endif #endif
namespace regexis024 {
/* debug nonsence */ /* debug nonsence */
void input_fa_assert(const FA_Container& fa){ void input_fa_assert(const FA_Container& fa){
assert(fa.start); assert(fa.start);
for (FA_Node* node: fa.all){ for (FA_Node* node: fa.all){
if (node->type == one_char_read){ if (node->type == one_char_read){
assert(!dynamic_cast<FA_NodeOfOneCharRead*>(node)->second_ns); assert(!dynamic_cast<FA_NodeOfOneCharRead*>(node)->second_ns);
} else if (node->type == look_one_ahead || } else if (node->type == look_one_ahead || node->type == det_char_crossroads) {
node->type == det_char_crossroads){ assert(false);
exitf("not allowed at this stage\n");
} }
} }
} }
@ -84,23 +83,23 @@ struct CleanOperHistoryNode {
struct SelarrCompressionScheme { struct SelarrCompressionScheme {
size_t SN1, SN2 = 0, SN3 = 0; size_t SN1, SN2 = 0, SN3 = 0;
std::vector<int32_t> S1_to_S2; std::vector<int32_t> S1_to_S2;
std::vector<regex_tai_t> S2_to_sifter; std::vector<tai_t> S2_to_sifter;
std::vector<regex_tai_t> S3_to_sifter; std::vector<tai_t> S3_to_sifter;
const RegexPriorityTable& sifter; const RegexPriorityTable& sifter;
SelarrCompressionScheme(size_t sn1, const RegexPriorityTable &sifter) : SN1(sn1), sifter(sifter) { SelarrCompressionScheme(size_t sn1, const RegexPriorityTable &sifter) : SN1(sn1), sifter(sifter) {
assert(sifter.size() <= UINT32_MAX); assert(sifter.size() <= UINT32_MAX);
S1_to_S2.assign(SN1, -1); S1_to_S2.assign(SN1, -1);
for (regex_tai_t i = 0; i < sifter.size(); i++) { for (tai_t i = 0; i < sifter.size(); i++) {
auto& act = sifter[i].pos; auto& act = sifter[i].pos;
regex_tai_t first_on_s2 = S2_to_sifter.size(); tai_t first_on_s2 = S2_to_sifter.size();
S2_to_sifter.push_back(i); S2_to_sifter.push_back(i);
S1_to_S2[act.first] = first_on_s2; S1_to_S2[act.first] = first_on_s2;
if (act.type != tracking_var_types::dot_cur_pos) { if (act.type != tracking_var_types::dot_cur_pos) {
S3_to_sifter.push_back(i); S3_to_sifter.push_back(i);
} }
if (act.type == tracking_var_types::range) { if (act.type == tracking_var_types::range) {
regex_tai_t second_on_s2 = S2_to_sifter.size(); tai_t second_on_s2 = S2_to_sifter.size();
S2_to_sifter.push_back(i); S2_to_sifter.push_back(i);
S1_to_S2[act.second] = second_on_s2; S1_to_S2[act.second] = second_on_s2;
} }
@ -368,7 +367,7 @@ void building_detour(const SelarrCompressionScheme& cmp,
val2[key_s2] = tv->imm_value; val2[key_s2] = tv->imm_value;
} }
} }
add_history_update(TrackingOperationInFa(tv->operation, tv->key, tv->imm_value), Hop, Hv); add_history_update(TrackingOperationInFa{tv->operation, tv->key, tv->imm_value}, Hop, Hv);
} else if (v->type == track_array_mov_halfinvariant) { } else if (v->type == track_array_mov_halfinvariant) {
FA_NodeOfTrackArrayMovHalfinvariant* tv = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant*>(v); FA_NodeOfTrackArrayMovHalfinvariant* tv = dynamic_cast<FA_NodeOfTrackArrayMovHalfinvariant*>(v);
if (isSelarrOpcode(tv->operation)) { if (isSelarrOpcode(tv->operation)) {
@ -383,7 +382,7 @@ void building_detour(const SelarrCompressionScheme& cmp,
} }
} }
} }
add_history_update(TrackingOperationInFa(tv->operation, tv->key), Hop, Hv); add_history_update(TrackingOperationInFa{tv->operation, tv->key}, Hop, Hv);
} }
} else if (v->type == match || v->type == one_char_read) { } else if (v->type == match || v->type == one_char_read) {
// Determinization stop // Determinization stop
@ -521,7 +520,7 @@ ColoredCodeset get_pretreated_cc(FA_Container& sourceFa) {
} }
// todo add a check on size of dfa // todo add a check on size of dfa
void try_determinize_fa(FA_Container &sourceFa, const RegexPriorityTable &sifter, regex_tai_t selarr_sz, void try_determinize_fa(FA_Container &sourceFa, const RegexPriorityTable &sifter, tai_t selarr_sz,
const REGEX_IS024_FA_FirstStageFixInfo &info1, FA_Container &resFa, int &error, int& had_to_fork) const REGEX_IS024_FA_FirstStageFixInfo &info1, FA_Container &resFa, int &error, int& had_to_fork)
{ {
/* During execuion, i will create pointers to field res.start and store them (inside the scope of this function) /* During execuion, i will create pointers to field res.start and store them (inside the scope of this function)
@ -662,4 +661,4 @@ void try_determinize_fa(FA_Container &sourceFa, const RegexPriorityTable &sifter
reattach_fa_node_edge(p.first, gdp.superstate_megabush_constructed[p.second]); reattach_fa_node_edge(p.first, gdp.superstate_megabush_constructed[p.second]);
} }
} }
}

View File

@ -4,7 +4,9 @@
#include <libregexis024fa/fa_first_stage_fix.h> #include <libregexis024fa/fa_first_stage_fix.h>
#include <libregexis024fa/selarr_priority_table.h> #include <libregexis024fa/selarr_priority_table.h>
void try_determinize_fa(FA_Container &sourceFa, const RegexPriorityTable &sifter, regex_tai_t selarr_sz, namespace regexis024 {
void try_determinize_fa(FA_Container &sourceFa, const RegexPriorityTable &sifter, tai_t selarr_sz,
const REGEX_IS024_FA_FirstStageFixInfo &info1, FA_Container &resFa, int &error, int& had_to_fork); const REGEX_IS024_FA_FirstStageFixInfo &info1, FA_Container &resFa, int &error, int& had_to_fork);
}
#endif //LIBREGEXIS024_FA_MAKE_DETERMINISTIC_H #endif //LIBREGEXIS024_FA_MAKE_DETERMINISTIC_H

View File

@ -2,6 +2,7 @@
#include <libregexis024vm/utils.h> #include <libregexis024vm/utils.h>
#include <assert.h> #include <assert.h>
namespace regexis024 {
bool FA_Node::empty() { bool FA_Node::empty() {
return type != one_char_read && type != det_char_crossroads; return type != one_char_read && type != det_char_crossroads;
} }
@ -74,16 +75,16 @@ FA_NodeOfLookOneAhead::FA_NodeOfLookOneAhead(const codeset_t &restriction) : res
type = look_one_ahead; type = look_one_ahead;
} }
FA_NodeOfTrackArrayMovImm::FA_NodeOfTrackArrayMovImm(regex024_opcode operation, uint16_t key, uint64_t immValue) : FA_NodeOfTrackArrayMovImm::FA_NodeOfTrackArrayMovImm(opcode_t operation, uint16_t key, uint64_t immValue) :
operation(operation), key(key), imm_value(immValue) {type = track_array_mov_imm;} operation(operation), key(key), imm_value(immValue) {type = track_array_mov_imm;}
// //
FA_NodeOfTrackArrayMovHalfinvariant::FA_NodeOfTrackArrayMovHalfinvariant(regex024_opcode operation, uint16_t key): FA_NodeOfTrackArrayMovHalfinvariant::FA_NodeOfTrackArrayMovHalfinvariant(opcode_t operation, uint16_t key):
operation(operation), key(key){type = track_array_mov_halfinvariant;} operation(operation), key(key){type = track_array_mov_halfinvariant;}
// //
void FA_NodeOfDetCharCrossroads::apply_lookahead_restriction(const codeset_t &restriction) { void FA_NodeOfDetCharCrossroads::apply_lookahead_restriction(const codeset_t &restriction) {
exitf("What?? Oh, no, no. I am NOT doing it"); assert(false);
} }
FA_NodeOfDetCharCrossroads::FA_NodeOfDetCharCrossroads(const std::vector<DFA_CrossroadPath> &crossroads) FA_NodeOfDetCharCrossroads::FA_NodeOfDetCharCrossroads(const std::vector<DFA_CrossroadPath> &crossroads)
@ -108,7 +109,7 @@ void FA_Container::registerNew(FA_Node *node) {
try { try {
node->nodeId = (int64_t)all.size(); node->nodeId = (int64_t)all.size();
all.push_back(node); all.push_back(node);
} catch (const std::bad_alloc& ba) { } catch (const std::exception& ba) {
delete node; delete node;
throw; throw;
} }
@ -135,7 +136,8 @@ bs(OneCharRead, const codeset_t& filter COMMA bool second_namespace, filter COMM
bs(Forking, , ) bs(Forking, , )
bs(LookOneBehind, const codeset_t& filter, filter) bs(LookOneBehind, const codeset_t& filter, filter)
bs(LookOneAhead, const codeset_t& filter, filter) bs(LookOneAhead, const codeset_t& filter, filter)
bs(TrackArrayMovImm, regex024_opcode operation COMMA uint16_t key COMMA uint64_t immValue, bs(TrackArrayMovImm, opcode_t operation COMMA uint16_t key COMMA uint64_t immValue,
operation COMMA key COMMA immValue) operation COMMA key COMMA immValue)
bs(TrackArrayMovHalfinvariant, regex024_opcode operation COMMA uint16_t key, operation COMMA key) bs(TrackArrayMovHalfinvariant, opcode_t operation COMMA uint16_t key, operation COMMA key)
bs(DetCharCrossroads, ,{}) bs(DetCharCrossroads, ,{})
}

View File

@ -6,6 +6,7 @@
#include <libregexis024fa/codeset.h> #include <libregexis024fa/codeset.h>
#include <libregexis024vm/vm_opcodes.h> #include <libregexis024vm/vm_opcodes.h>
namespace regexis024 {
enum FA_Node_type: uint8_t { enum FA_Node_type: uint8_t {
match, match,
one_char_read, one_char_read,
@ -89,19 +90,19 @@ struct FA_NodeOfLookOneAhead: public FA_NodePathPart{
/* .type == track_array_mov_imm */ /* .type == track_array_mov_imm */
struct FA_NodeOfTrackArrayMovImm: public FA_NodePathPart{ struct FA_NodeOfTrackArrayMovImm: public FA_NodePathPart{
regex024_opcode operation; opcode_t operation;
uint16_t key; uint16_t key;
uint64_t imm_value; uint64_t imm_value;
FA_NodeOfTrackArrayMovImm(regex024_opcode operation, uint16_t key, uint64_t immValue); FA_NodeOfTrackArrayMovImm(opcode_t operation, uint16_t key, uint64_t immValue);
}; };
/* .type == track_array_mov_halfinvariant */ /* .type == track_array_mov_halfinvariant */
struct FA_NodeOfTrackArrayMovHalfinvariant: public FA_NodePathPart{ struct FA_NodeOfTrackArrayMovHalfinvariant: public FA_NodePathPart{
regex024_opcode operation; opcode_t operation;
uint16_t key; uint16_t key;
FA_NodeOfTrackArrayMovHalfinvariant(regex024_opcode operation, uint16_t key); FA_NodeOfTrackArrayMovHalfinvariant(opcode_t operation, uint16_t key);
}; };
struct DFA_CrossroadPath{ struct DFA_CrossroadPath{
@ -139,11 +140,12 @@ struct FA_Container{
FA_NodeOfForking* makeForking(); FA_NodeOfForking* makeForking();
FA_NodeOfLookOneBehind* makeLookOneBehind(const codeset_t& filter); FA_NodeOfLookOneBehind* makeLookOneBehind(const codeset_t& filter);
FA_NodeOfLookOneAhead* makeLookOneAhead(const codeset_t& filter); FA_NodeOfLookOneAhead* makeLookOneAhead(const codeset_t& filter);
FA_NodeOfTrackArrayMovImm* makeTrackArrayMovImm(regex024_opcode operation, uint16_t key, uint64_t immValue); FA_NodeOfTrackArrayMovImm* makeTrackArrayMovImm(opcode_t operation, uint16_t key, uint64_t immValue);
FA_NodeOfTrackArrayMovHalfinvariant* makeTrackArrayMovHalfinvariant(regex024_opcode operation, uint16_t key); FA_NodeOfTrackArrayMovHalfinvariant* makeTrackArrayMovHalfinvariant(opcode_t operation, uint16_t key);
FA_NodeOfDetCharCrossroads* makeDetCharCrossroads(); FA_NodeOfDetCharCrossroads* makeDetCharCrossroads();
~FA_Container(); ~FA_Container();
}; };
}
#endif //LIBREGEXIS024_FINITE_AUTOMATON_H #endif //LIBREGEXIS024_FINITE_AUTOMATON_H

View File

@ -5,8 +5,8 @@
#include <libregexis024fa/graph_to_bytecode/filter.h> #include <libregexis024fa/graph_to_bytecode/filter.h>
namespace regexis024 {
#define nonthrowing_assert(expr) if (!(expr)) {error = -1; return; } #define nonthrowing_assert(expr) if (!(expr)) {error = -1; return; }
void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_bookmarks& bookmark_manager, void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_bookmarks& bookmark_manager,
size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error) size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error)
{ {
@ -72,7 +72,7 @@ void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_b
} }
if (nxt_options.size() >= 2) { if (nxt_options.size() >= 2) {
nonthrowing_assert(fork_ss_ns < UINT32_MAX); nonthrowing_assert(fork_ss_ns < UINT32_MAX);
regex_sslot_id_t sslot = fork_ss_ns++; sslot_id_t sslot = fork_ss_ns++;
for (size_t i = 0; i + 1 < nxt_options.size(); i++) { for (size_t i = 0; i + 1 < nxt_options.size(); i++) {
cmd_FORK(result, bookmark_manager, sslot, nodesBookmark(nxt_options[i])); cmd_FORK(result, bookmark_manager, sslot, nodesBookmark(nxt_options[i]));
addBranching(nxt_options[i]); addBranching(nxt_options[i]);
@ -115,3 +115,4 @@ void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_b
belated_sslot_id(result, not_yet_dedicated_second_read_ns_ssids[j], j + first_read_ns); belated_sslot_id(result, not_yet_dedicated_second_read_ns_ssids[j], j + first_read_ns);
} }
} }
}

View File

@ -4,7 +4,9 @@
#include <libregexis024fa/finite_automaton.h> #include <libregexis024fa/finite_automaton.h>
#include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h> #include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h>
namespace regexis024 {
void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_bookmarks& bookmark_manager, void compilation_core(std::vector<uint8_t>& result, FA_Container& fa, explicit_bookmarks& bookmark_manager,
size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error); size_t& first_read_ns, size_t& second_read_ns, size_t& fork_ss_ns, int& error);
}
#endif #endif

View File

@ -7,21 +7,22 @@
#include <libregexis024fa/graph_to_bytecode/core.h> #include <libregexis024fa/graph_to_bytecode/core.h>
namespace regexis024 {
void write_priority_table_actions(std::vector<uint8_t>& result, RegexPriorityTable &priority_table) { void write_priority_table_actions(std::vector<uint8_t>& result, RegexPriorityTable &priority_table) {
for (RegexPriorityTableAction& act: priority_table) { for (RegexPriorityTableAction& act: priority_table) {
if (act.pos.isForRange()) { if (act.pos.isForRange()) {
write_byte(result, regex024_opcodes::DDIST_RABX_SELARR); write_byte(result, opcodes::DDIST_RABX_SELARR);
write_tai(result, act.pos.first); write_tai(result, act.pos.first);
write_tai(result, act.pos.second); write_tai(result, act.pos.second);
} else { } else {
write_byte(result, regex024_opcodes::DMOV_RABX_SELARR); write_byte(result, opcodes::DMOV_RABX_SELARR);
write_tai(result, act.pos.first); write_tai(result, act.pos.first);
} }
write_byte(result, act.minimize ? write_byte(result, act.minimize ?
regex024_opcodes::SIFTPRIOR_MIN_RABX : opcodes::SIFTPRIOR_MIN_RABX :
regex024_opcodes::SIFTPRIOR_MAX_RABX); opcodes::SIFTPRIOR_MAX_RABX);
} }
write_byte(result, regex024_opcodes::SIFT_DONE); write_byte(result, opcodes::SIFT_DONE);
} }
struct belate_initialization_parameters { struct belate_initialization_parameters {
@ -30,7 +31,7 @@ struct belate_initialization_parameters {
size_t todo_pos_second_ns_size; size_t todo_pos_second_ns_size;
void complete_it(std::vector<uint8_t>& result, void complete_it(std::vector<uint8_t>& result,
regex_sslot_id_t first_read_ns, regex_sslot_id_t second_read_ns, regex_sslot_id_t fork_ss_ns) sslot_id_t first_read_ns, sslot_id_t second_read_ns, sslot_id_t fork_ss_ns)
{ {
assert((uint64_t)first_read_ns + (uint64_t)second_read_ns <= UINT32_MAX); assert((uint64_t)first_read_ns + (uint64_t)second_read_ns <= UINT32_MAX);
belated_sslot_id(result, todo_pos_read_ss_n , first_read_ns + second_read_ns); belated_sslot_id(result, todo_pos_read_ss_n , first_read_ns + second_read_ns);
@ -47,27 +48,27 @@ belate_initialization_parameters write_some_normal_initialization(std::vector<ui
{ {
belate_initialization_parameters todo; belate_initialization_parameters todo;
write_byte(result, regex024_opcodes::PARAM_READ_SS_NUMBER); write_byte(result, opcodes::PARAM_READ_SS_NUMBER);
todo.todo_pos_read_ss_n = result.size(); todo.todo_pos_read_ss_n = result.size();
write_sslot_id(result, 0); // Belate write_sslot_id(result, 0); // Belate
write_byte(result, regex024_opcodes::PARAM_FORK_SS_NUMBER); write_byte(result, opcodes::PARAM_FORK_SS_NUMBER);
todo.todo_pos_fork_ss_n = result.size(); todo.todo_pos_fork_ss_n = result.size();
write_sslot_id(result, 0); // Belate write_sslot_id(result, 0); // Belate
write_byte(result, regex024_opcodes::PARAM_SELARR_LEN); write_byte(result, opcodes::PARAM_SELARR_LEN);
write_tai(result, selarr_size); write_tai(result, selarr_size);
write_byte(result, regex024_opcodes::MSG_MULTISTART_ALLOWED); write_byte(result, opcodes::MSG_MULTISTART_ALLOWED);
write_byte(result, 1); write_byte(result, 1);
write_byte(result, regex024_opcodes::MSG_FED_INPUT_EXTENDED); write_byte(result, opcodes::MSG_FED_INPUT_EXTENDED);
write_byte(result, info1.fed_chars_extend_one_left ? 1 : 0); write_byte(result, info1.fed_chars_extend_one_left ? 1 : 0);
write_byte(result, info1.fed_chars_extend_one_right ? 1 : 0); write_byte(result, info1.fed_chars_extend_one_right ? 1 : 0);
todo.todo_pos_second_ns_size = result.size(); todo.todo_pos_second_ns_size = result.size();
write_sslot_id(result, 0); // Belate write_sslot_id(result, 0); // Belate
write_byte(result, regex024_opcodes::INIT); write_byte(result, opcodes::INIT);
return todo; return todo;
} }
@ -87,7 +88,7 @@ void compile_fa_to_regexis024_bytecode(std::vector<uint8_t>& result,
write_priority_table_actions(result, priority_table); write_priority_table_actions(result, priority_table);
bookmark_manager.land_bookmark(result, BM_after_sift); bookmark_manager.land_bookmark(result, BM_after_sift);
write_byte(result, regex024_opcodes::PARAM_COLSIFTFUNC_SET); write_byte(result, opcodes::PARAM_COLSIFTFUNC_SET);
bookmark_manager.write_unresolved_reference(result, BM_sift_function); bookmark_manager.write_unresolved_reference(result, BM_sift_function);
} }
@ -100,3 +101,4 @@ void compile_fa_to_regexis024_bytecode(std::vector<uint8_t>& result,
init_param_todo.complete_it(result, first_read_ns, second_read_ns, fork_ss_ns); init_param_todo.complete_it(result, first_read_ns, second_read_ns, fork_ss_ns);
bookmark_manager.finish(result); bookmark_manager.finish(result);
} }
}

View File

@ -7,8 +7,10 @@
#include <libregexis024fa/selarr_priority_table.h> #include <libregexis024fa/selarr_priority_table.h>
#include <libregexis024fa/fa_first_stage_fix.h> #include <libregexis024fa/fa_first_stage_fix.h>
namespace regexis024 {
void compile_fa_to_regexis024_bytecode(std::vector<uint8_t>& result, FA_Container& fa, RegexPriorityTable& priority_table, void compile_fa_to_regexis024_bytecode(std::vector<uint8_t>& result, FA_Container& fa, RegexPriorityTable& priority_table,
size_t selarr_size, const REGEX_IS024_FA_FirstStageFixInfo& info1, int& error); size_t selarr_size, const REGEX_IS024_FA_FirstStageFixInfo& info1, int& error);
}
#endif #endif

View File

@ -4,32 +4,33 @@
#include <algorithm> #include <algorithm>
#include <libregexis024fa/graph_to_bytecode/writing_commands.h> #include <libregexis024fa/graph_to_bytecode/writing_commands.h>
namespace regexis024 {
std::vector<FilterSegment> convert_to_compSeg(const std::vector<codeset_t>& crossroad_codesets) std::vector<FilterSegment> convert_to_compSeg(const std::vector<codeset_t>& crossroad_codesets)
{ {
std::vector<FilterSegment> compSeg; std::vector<FilterSegment> compSeg;
std::vector<FilterSegment> seg; std::vector<FilterSegment> seg;
for (size_t i = 0; i < crossroad_codesets.size(); i++) { for (size_t i = 0; i < crossroad_codesets.size(); i++) {
for (auto& p: crossroad_codesets[i]) { for (auto& p: crossroad_codesets[i]) {
seg.emplace_back(i, p.first, p.second); seg.push_back({(ssize_t)i, p.first, p.second});
} }
} }
std::sort(seg.begin(), seg.end(), std::sort(seg.begin(), seg.end(),
[](const FilterSegment& a, const FilterSegment& b)->bool{return a.L < b.L;}); [](const FilterSegment& a, const FilterSegment& b)->bool{return a.L < b.L;});
if (seg.empty()) { if (seg.empty()) {
compSeg.emplace_back(-1, 0, UINT32_MAX); compSeg.push_back({-1, 0, UINT32_MAX});
} else { } else {
if (seg[0].L > 0) if (seg[0].L > 0)
compSeg.emplace_back(-1, 0, seg[0].L - 1); compSeg.push_back({-1, 0, seg[0].L - 1});
size_t N = seg.size(); size_t N = seg.size();
for (size_t i = 0; i + 1 < N; i++) { for (size_t i = 0; i + 1 < N; i++) {
compSeg.push_back(seg[i]); compSeg.push_back(seg[i]);
assert(seg[i].R < seg[i + 1].L); assert(seg[i].R < seg[i + 1].L);
if (seg[i].R + 1 < seg[i + 1].L) if (seg[i].R + 1 < seg[i + 1].L)
compSeg.emplace_back(-1, seg[i].R + 1, seg[i + 1].L - 1); compSeg.push_back({-1, seg[i].R + 1, seg[i + 1].L - 1});
} }
compSeg.push_back(seg.back()); compSeg.push_back(seg.back());
if (seg.back().R < UINT32_MAX) if (seg.back().R < UINT32_MAX)
compSeg.emplace_back(-1, seg[N - 1].R + 1, UINT32_MAX); compSeg.push_back({-1, seg[N - 1].R + 1, UINT32_MAX});
} }
assert(!compSeg.empty()); assert(!compSeg.empty());
return compSeg; return compSeg;
@ -115,6 +116,4 @@ bool write_filter(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_man
} }
return relies_on_proper_ending; return relies_on_proper_ending;
} }
}
FilterSegment::FilterSegment(ssize_t color, uint32_t l, uint32_t r): color(color), L(l), R(r) {}
//

View File

@ -6,16 +6,17 @@
#include <libregexis024fa/codeset.h> #include <libregexis024fa/codeset.h>
#include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h> #include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h>
namespace regexis024 {
struct FilterSegment { struct FilterSegment {
ssize_t color; ssize_t color;
uint32_t L, R; uint32_t L;
uint32_t R;
FilterSegment(ssize_t color, uint32_t l, uint32_t r);
}; };
/* Return whether user of function must place [0]'th option after the filter /* Return whether user of function must place [0]'th option after the filter
* The filter can end up being written in such a way that the end will never be reached */ * The filter can end up being written in such a way that the end will never be reached */
bool write_filter(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, bool write_filter(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager,
const std::vector<codeset_t>& crossroad_codesets, const std::vector<bookmark_id_t>& crossroad_marks); const std::vector<codeset_t>& crossroad_codesets, const std::vector<bookmark_id_t>& crossroad_marks);
}
#endif #endif

View File

@ -2,8 +2,8 @@
#include <assert.h> #include <assert.h>
#include <libregexis024vm/vm_opcodes.h> #include <libregexis024vm/vm_opcodes.h>
namespace regexis024 {
#define push_to_res_least_signif result.push_back(x & 0xffLU); x >>= 8 #define push_to_res_least_signif result.push_back(x & 0xffLU); x >>= 8
void write_byte(std::vector<uint8_t>& result, uint8_t x) { void write_byte(std::vector<uint8_t>& result, uint8_t x) {
result.push_back(x); result.push_back(x);
} }
@ -47,27 +47,27 @@ void belated_quadword(std::vector<uint8_t>& result, size_t pos, uint64_t value)
} }
#undef put_belated_to_res #undef put_belated_to_res
void write_sslot_id(std::vector<uint8_t>& result, regex_sslot_id_t x) { void write_sslot_id(std::vector<uint8_t>& result, sslot_id_t x) {
write_doubleword(result, x); write_doubleword(result, x);
} }
void write_tai(std::vector<uint8_t>& result, regex_tai_t x) { void write_tai(std::vector<uint8_t>& result, tai_t x) {
write_word(result, x); write_word(result, x);
} }
void write_near_ptr(std::vector<uint8_t>& result, regex_near_ptr_t x) { void write_near_ptr(std::vector<uint8_t>& result, near_ptr_t x) {
write_quadword(result, x); write_quadword(result, x);
} }
void belated_sslot_id(std::vector<uint8_t>& result, size_t pos, regex_sslot_id_t value) { void belated_sslot_id(std::vector<uint8_t>& result, size_t pos, sslot_id_t value) {
belated_doubleword(result, pos, value); belated_doubleword(result, pos, value);
} }
void belated_tai(std::vector<uint8_t>& result, size_t pos, regex_tai_t value) { void belated_tai(std::vector<uint8_t>& result, size_t pos, tai_t value) {
belated_word(result, pos, value); belated_word(result, pos, value);
} }
void belated_near_ptr(std::vector<uint8_t>& result, size_t pos, regex_near_ptr_t value) { void belated_near_ptr(std::vector<uint8_t>& result, size_t pos, near_ptr_t value) {
belated_quadword(result, pos, value); belated_quadword(result, pos, value);
} }
@ -111,5 +111,6 @@ bookmark_id_t explicit_bookmarks::new_range_of_bookmarks(size_t n) {
bool explicit_bookmarks::has_landed(bookmark_id_t bm) { bool explicit_bookmarks::has_landed(bookmark_id_t bm) {
return pile[bm].placed_somewhere; return pile[bm].placed_somewhere;
} }
}
#undef put_belated_to_res #undef put_belated_to_res

View File

@ -4,7 +4,7 @@
#include <stdint.h> #include <stdint.h>
#include <libregexis024vm/vm_opcodes_types.h> #include <libregexis024vm/vm_opcodes_types.h>
#include <vector> #include <vector>
namespace regexis024 {
void write_byte(std::vector<uint8_t>& result, uint8_t x); void write_byte(std::vector<uint8_t>& result, uint8_t x);
void write_word(std::vector<uint8_t>& result, uint16_t x); void write_word(std::vector<uint8_t>& result, uint16_t x);
void write_doubleword(std::vector<uint8_t>& result, uint32_t x); void write_doubleword(std::vector<uint8_t>& result, uint32_t x);
@ -16,13 +16,13 @@ void belated_doubleword(std::vector<uint8_t>& result, size_t pos, uint32_t value
void belated_quadword(std::vector<uint8_t>& result, size_t pos, uint64_t value); void belated_quadword(std::vector<uint8_t>& result, size_t pos, uint64_t value);
void write_sslot_id(std::vector<uint8_t>& result, regex_sslot_id_t x); void write_sslot_id(std::vector<uint8_t>& result, sslot_id_t x);
void write_tai(std::vector<uint8_t>& result, regex_tai_t x); void write_tai(std::vector<uint8_t>& result, tai_t x);
void write_near_ptr(std::vector<uint8_t>& result, regex_near_ptr_t x); void write_near_ptr(std::vector<uint8_t>& result, near_ptr_t x);
void belated_sslot_id(std::vector<uint8_t>& result, size_t pos, regex_sslot_id_t value); void belated_sslot_id(std::vector<uint8_t>& result, size_t pos, sslot_id_t value);
void belated_tai(std::vector<uint8_t>& result, size_t pos, regex_tai_t value); void belated_tai(std::vector<uint8_t>& result, size_t pos, tai_t value);
void belated_near_ptr(std::vector<uint8_t>& result, size_t pos, regex_near_ptr_t value); void belated_near_ptr(std::vector<uint8_t>& result, size_t pos, near_ptr_t value);
// constexpr uint64_t INSTRUCTION_SZ = REGEX024_BYTECODE_INSTRUCTION_SZ; // constexpr uint64_t INSTRUCTION_SZ = REGEX024_BYTECODE_INSTRUCTION_SZ;
// constexpr uint64_t SSLOT_ID_SZ = REGEX024_BYTECODE_SSLOT_ID_SZ; // constexpr uint64_t SSLOT_ID_SZ = REGEX024_BYTECODE_SSLOT_ID_SZ;
@ -58,6 +58,6 @@ struct explicit_bookmarks {
bool has_landed(bookmark_id_t bm); bool has_landed(bookmark_id_t bm);
}; };
}
#endif #endif

View File

@ -2,20 +2,21 @@
#include <libregexis024vm/vm_opcodes.h> #include <libregexis024vm/vm_opcodes.h>
#include <assert.h> #include <assert.h>
namespace regexis024 {
void cmd_JUMP(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, bookmark_id_t dest) { void cmd_JUMP(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, bookmark_id_t dest) {
write_byte(result, regex024_opcodes::JUMP); write_byte(result, opcodes::JUMP);
bookmark_manager.write_unresolved_reference(result, dest); bookmark_manager.write_unresolved_reference(result, dest);
} }
constexpr regex024_opcode cmp_EQUAL[4] = {regex024_opcodes::JCEQUAL_B, regex024_opcodes::JCEQUAL_W, constexpr opcode_t cmp_EQUAL[4] = {opcodes::JCEQUAL_B, opcodes::JCEQUAL_W,
regex024_opcodes::JCEQUAL_DW, regex024_opcodes::JCEQUAL_QW}; opcodes::JCEQUAL_DW, opcodes::JCEQUAL_QW};
constexpr regex024_opcode cmp_LESS[4] = {regex024_opcodes::JCLESS_B, regex024_opcodes::JCLESS_W, constexpr opcode_t cmp_LESS[4] = {opcodes::JCLESS_B, opcodes::JCLESS_W,
regex024_opcodes::JCLESS_DW, regex024_opcodes::JCLESS_QW}; opcodes::JCLESS_DW, opcodes::JCLESS_QW};
constexpr regex024_opcode cmp_GRTR[4] = {regex024_opcodes::JCGRTR_B, regex024_opcodes::JCGRTR_W, constexpr opcode_t cmp_GRTR[4] = {opcodes::JCGRTR_B, opcodes::JCGRTR_W,
regex024_opcodes::JCGRTR_DW, regex024_opcodes::JCGRTR_QW}; opcodes::JCGRTR_DW, opcodes::JCGRTR_QW};
void cmd_JC(const regex024_opcode cmpT[4], void cmd_JC(const opcode_t cmpT[4],
std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest) std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest)
{ {
if (val <= UINT8_MAX) { if (val <= UINT8_MAX) {
@ -48,28 +49,29 @@ void cmd_JCGRTR(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manag
} }
void cmd_DIE(std::vector<uint8_t> &result) { void cmd_DIE(std::vector<uint8_t> &result) {
write_byte(result, regex024_opcodes::DIE); write_byte(result, opcodes::DIE);
} }
void cmd_MATCH(std::vector<uint8_t> &result) { void cmd_MATCH(std::vector<uint8_t> &result) {
write_byte(result, regex024_opcodes::MATCH); write_byte(result, opcodes::MATCH);
} }
void cmd_READ_first_ns(std::vector<uint8_t>& result, size_t slot) { void cmd_READ_first_ns(std::vector<uint8_t>& result, size_t slot) {
assert(slot <= UINT32_MAX); assert(slot <= UINT32_MAX);
write_byte(result, regex024_opcodes::READ); write_byte(result, opcodes::READ);
write_sslot_id(result, slot); write_sslot_id(result, slot);
} }
void cmd_FORK(std::vector<uint8_t> &result, explicit_bookmarks& bookmark_manager, size_t slot, bookmark_id_t dest) { void cmd_FORK(std::vector<uint8_t> &result, explicit_bookmarks& bookmark_manager, size_t slot, bookmark_id_t dest) {
assert(slot <= UINT32_MAX); assert(slot <= UINT32_MAX);
write_byte(result, regex024_opcodes::FORK); write_byte(result, opcodes::FORK);
write_sslot_id(result, slot); write_sslot_id(result, slot);
bookmark_manager.write_unresolved_reference(result, dest); bookmark_manager.write_unresolved_reference(result, dest);
} }
void cmd_READ_second_ns(std::vector<uint8_t>& result, std::vector<size_t>& belate_second_read_ns_slot_args) { void cmd_READ_second_ns(std::vector<uint8_t>& result, std::vector<size_t>& belate_second_read_ns_slot_args) {
write_byte(result, regex024_opcodes::READ); write_byte(result, opcodes::READ);
belate_second_read_ns_slot_args.push_back(result.size()); belate_second_read_ns_slot_args.push_back(result.size());
write_sslot_id(result, 0); write_sslot_id(result, 0);
} }
}

View File

@ -4,6 +4,7 @@
#include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h> #include <libregexis024fa/graph_to_bytecode/natural_compiler_utils.h>
#include <libregexis024vm/vm_opcodes.h> #include <libregexis024vm/vm_opcodes.h>
namespace regexis024 {
void cmd_JUMP(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, bookmark_id_t dest); void cmd_JUMP(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, bookmark_id_t dest);
void cmd_JCEQUAL(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest); void cmd_JCEQUAL(std::vector<uint8_t>& result, explicit_bookmarks& bookmark_manager, uint64_t val, bookmark_id_t dest);
@ -16,5 +17,6 @@ void cmd_MATCH(std::vector<uint8_t>& result);
void cmd_READ_first_ns(std::vector<uint8_t>& result, size_t slot); void cmd_READ_first_ns(std::vector<uint8_t>& result, size_t slot);
void cmd_READ_second_ns(std::vector<uint8_t>& result, std::vector<size_t>& belate_second_read_ns_slot_args); void cmd_READ_second_ns(std::vector<uint8_t>& result, std::vector<size_t>& belate_second_read_ns_slot_args);
void cmd_FORK(std::vector<uint8_t> &result, explicit_bookmarks& bookmark_manager, size_t slot, bookmark_id_t dest); void cmd_FORK(std::vector<uint8_t> &result, explicit_bookmarks& bookmark_manager, size_t slot, bookmark_id_t dest);
}
#endif #endif

View File

@ -3,6 +3,7 @@
#include <assert.h> #include <assert.h>
#include <libregexis024vm/utils.h> #include <libregexis024vm/utils.h>
namespace regexis024 {
void reattach_fa_node_edge(FA_Node **old_node_ptr, FA_Node *new_node) { void reattach_fa_node_edge(FA_Node **old_node_ptr, FA_Node *new_node) {
assert(old_node_ptr); assert(old_node_ptr);
if (*old_node_ptr){ if (*old_node_ptr){
@ -32,7 +33,6 @@ void reattach_nxt_node(FA_NodePathPart *node, FA_Node *dest) {
reattach_fa_node_edge(&(node->nxt_node), dest); reattach_fa_node_edge(&(node->nxt_node), dest);
} }
// todo: get rid of exitf in the whole project
FA_Node* copy_node_no_container_adjustments(FA_Node& node){ FA_Node* copy_node_no_container_adjustments(FA_Node& node){
FA_Node* res; FA_Node* res;
/* Using implicitly defined copy constructors */ /* Using implicitly defined copy constructors */
@ -69,3 +69,4 @@ FA_Node *copy_fa_node_to_another_fa(FA_Node& node, FA_Container &resultFa) {
resultFa.registerNew(res); resultFa.registerNew(res);
return res; return res;
} }
}

View File

@ -4,6 +4,7 @@
#include "finite_automaton.h" #include "finite_automaton.h"
#include "fa_first_stage_fix.h" #include "fa_first_stage_fix.h"
namespace regexis024 {
FA_Node* copy_fa_node(FA_Node& node, FA_Container& fa); FA_Node* copy_fa_node(FA_Node& node, FA_Container& fa);
void yay_new_start(FA_Container& fa, FA_NodePathPart* node); void yay_new_start(FA_Container& fa, FA_NodePathPart* node);
void reattach_fa_node_edge(FA_Node** old_node_ptr, FA_Node* new_node); void reattach_fa_node_edge(FA_Node** old_node_ptr, FA_Node* new_node);
@ -13,5 +14,6 @@ void reattach_nxt_node(FA_NodePathPart* node, FA_Node* dest);
/* This is a one weird operation. New node in resultFa will still point to nodes in sourceFa, /* This is a one weird operation. New node in resultFa will still point to nodes in sourceFa,
* without increasing refcount of those nodes. YOU HAVE TO FIX IT ASAP */ * without increasing refcount of those nodes. YOU HAVE TO FIX IT ASAP */
FA_Node* copy_fa_node_to_another_fa(FA_Node& node, FA_Container& resultFa); FA_Node* copy_fa_node_to_another_fa(FA_Node& node, FA_Container& resultFa);
}
#endif //LIBREGEXIS024_MISC_FA_FUNCS_H #endif //LIBREGEXIS024_MISC_FA_FUNCS_H

View File

@ -1,15 +1,16 @@
#include <libregexis024fa/selarr_priority_table.h> #include <libregexis024fa/selarr_priority_table.h>
#include <assert.h> #include <assert.h>
namespace regexis024 {
bool RegexPriorityTableAction_Pos::isForRange() const { bool RegexPriorityTableAction_Pos::isForRange() const {
return second >= 0; return second >= 0;
} }
RegexPriorityTableAction_Pos::RegexPriorityTableAction_Pos(int first, int second, tracking_var_type type): RegexPriorityTableAction_Pos::RegexPriorityTableAction_Pos(int first, int second, tracking_var_type_t type):
first(first),second(second), type(type) {} first(first),second(second), type(type) {}
// //
RegexPriorityTableAction::RegexPriorityTableAction(bool minimize, int first, int second, tracking_var_type type): RegexPriorityTableAction::RegexPriorityTableAction(bool minimize, int first, int second, tracking_var_type_t type):
minimize(minimize), pos(first, second, type) {} minimize(minimize), pos(first, second, type) {}
// //
}

View File

@ -5,22 +5,24 @@
#include <vector> #include <vector>
#include <libregexis024fa/tracking_variables.h> #include <libregexis024fa/tracking_variables.h>
namespace regexis024 {
struct RegexPriorityTableAction_Pos{ struct RegexPriorityTableAction_Pos{
/* first and second are indexes in selarr (but second can be -1 if it is unused) */ /* first and second are indexes in selarr (but second can be -1 if it is unused) */
int first; int first;
int second; int second;
tracking_var_type type; tracking_var_type_t type;
bool isForRange() const; bool isForRange() const;
RegexPriorityTableAction_Pos(int first, int second, tracking_var_type type); RegexPriorityTableAction_Pos(int first, int second, tracking_var_type_t type);
}; };
struct RegexPriorityTableAction{ struct RegexPriorityTableAction{
bool minimize; bool minimize;
RegexPriorityTableAction_Pos pos; RegexPriorityTableAction_Pos pos;
RegexPriorityTableAction(bool minimize, int first, int second, tracking_var_type type); RegexPriorityTableAction(bool minimize, int first, int second, tracking_var_type_t type);
}; };
typedef std::vector<RegexPriorityTableAction> RegexPriorityTable; typedef std::vector<RegexPriorityTableAction> RegexPriorityTable;
}
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024FA_SELARR_PRIORITY_TABLE_H #endif //LIBREGEXIS024_SRC_LIBREGEXIS024FA_SELARR_PRIORITY_TABLE_H

View File

@ -1,41 +1,36 @@
#include <libregexis024fa/tracking_fa_nodes.h> #include <libregexis024fa/tracking_fa_nodes.h>
#include <assert.h> #include <assert.h>
bool isImmMovOpcode(regex024_opcode inst) { namespace regexis024 {
return inst == regex024_opcodes::MOV_COLARR_IMM || inst == regex024_opcodes::MOV_SELARR_IMM; bool isImmMovOpcode(opcode_t inst) {
return inst == opcodes::MOV_COLARR_IMM || inst == opcodes::MOV_SELARR_IMM;
} }
bool isCurPosMovOpcode(regex024_opcode inst) { bool isCurPosMovOpcode(opcode_t inst) {
return inst == regex024_opcodes::MOV_COLARR_BTPOS || inst == regex024_opcodes::MOV_SELARR_CHPOS; return inst == opcodes::MOV_COLARR_BTPOS || inst == opcodes::MOV_SELARR_CHPOS;
} }
bool isColarrOpcode(regex024_opcode inst) { bool isColarrOpcode(opcode_t inst) {
return inst == regex024_opcodes::MOV_COLARR_IMM || inst == regex024_opcodes::MOV_COLARR_BTPOS; return inst == opcodes::MOV_COLARR_IMM || inst == opcodes::MOV_COLARR_BTPOS;
} }
bool isSelarrOpcode(regex024_opcode inst) { bool isSelarrOpcode(opcode_t inst) {
return inst == regex024_opcodes::MOV_SELARR_IMM || inst == regex024_opcodes::MOV_SELARR_CHPOS; return inst == opcodes::MOV_SELARR_IMM || inst == opcodes::MOV_SELARR_CHPOS;
} }
bool isTrackingFaNode(const FA_Node *n) { bool isTrackingFaNode(const FA_Node *n) {
return n->type == track_array_mov_imm || n->type == track_array_mov_halfinvariant; return n->type == track_array_mov_imm || n->type == track_array_mov_halfinvariant;
} }
TrackingOperationInFa::TrackingOperationInFa(regex024_opcode opcode, regex_tai_t key, uint64_t imm_value)
: opcode(opcode), key(key), immValue(imm_value) {}
TrackingOperationInFa::TrackingOperationInFa(regex024_opcode opcode, regex_tai_t key)
: opcode(opcode), key(key) {}
std::string TrackingOperationInFa::toString() const { std::string TrackingOperationInFa::toString() const {
switch (opcode){ switch (opcode){
case regex024_opcodes::MOV_COLARR_IMM: case opcodes::MOV_COLARR_IMM:
return "colarr[" + std::to_string(key) + "] := " + std::to_string(immValue); return "colarr[" + std::to_string(key) + "] := " + std::to_string(immValue);
case regex024_opcodes::MOV_SELARR_IMM: case opcodes::MOV_SELARR_IMM:
return "selarr[" + std::to_string(key) + "] := " + std::to_string(immValue); return "selarr[" + std::to_string(key) + "] := " + std::to_string(immValue);
case regex024_opcodes::MOV_COLARR_BTPOS: case opcodes::MOV_COLARR_BTPOS:
return "colarr[" + std::to_string(key) + "] := cur byte position"; return "colarr[" + std::to_string(key) + "] := cur byte position";
case regex024_opcodes::MOV_SELARR_CHPOS: case opcodes::MOV_SELARR_CHPOS:
return "selarr[" + std::to_string(key) + "] := cur char position"; return "selarr[" + std::to_string(key) + "] := cur char position";
default: default:
return "wrong collection operation"; return "wrong collection operation";
@ -50,4 +45,4 @@ FA_NodePathPart* convert_to_node(const TrackingOperationInFa& op, FA_Container&
return fa.makeTrackArrayMovHalfinvariant(op.opcode, op.key); return fa.makeTrackArrayMovHalfinvariant(op.opcode, op.key);
} }
}

View File

@ -5,27 +5,24 @@
#include <libregexis024fa/finite_automaton.h> #include <libregexis024fa/finite_automaton.h>
#include <string> #include <string>
bool isImmMovOpcode(regex024_opcode inst); namespace regexis024 {
bool isCurPosMovOpcode(regex024_opcode inst); bool isImmMovOpcode(opcode_t inst);
bool isColarrOpcode(regex024_opcode inst); bool isCurPosMovOpcode(opcode_t inst);
bool isSelarrOpcode(regex024_opcode inst); bool isColarrOpcode(opcode_t inst);
bool isSelarrOpcode(opcode_t inst);
bool isTrackingFaNode(const FA_Node* n); bool isTrackingFaNode(const FA_Node* n);
struct TrackingOperationInFa { struct TrackingOperationInFa {
regex024_opcode opcode; opcode_t opcode;
regex_tai_t key; tai_t key;
/* Not needed for halfinvariant operations */ /* Not needed for halfinvariant operations */
uint64_t immValue; uint64_t immValue;
TrackingOperationInFa(regex024_opcode opcode, regex_tai_t key, uint64_t imm_value);
TrackingOperationInFa(regex024_opcode opcode, regex_tai_t key);
std::string toString() const; std::string toString() const;
}; };
FA_NodePathPart* convert_to_node(const TrackingOperationInFa& op, FA_Container& fa); FA_NodePathPart* convert_to_node(const TrackingOperationInFa& op, FA_Container& fa);
}
#endif #endif

View File

@ -1,6 +1,7 @@
#ifndef LIBREGEXIS024_SRC_LIBREGEXIS024FA_TRACKING_VARIABLES_H #ifndef LIBREGEXIS024_SRC_LIBREGEXIS024FA_TRACKING_VARIABLES_H
#define LIBREGEXIS024_SRC_LIBREGEXIS024FA_TRACKING_VARIABLES_H #define LIBREGEXIS024_SRC_LIBREGEXIS024FA_TRACKING_VARIABLES_H
namespace regexis024 {
namespace tracking_var_types { namespace tracking_var_types {
enum tracking_var_type_I { enum tracking_var_type_I {
range, range,
@ -9,6 +10,7 @@ namespace tracking_var_types {
}; };
} }
typedef tracking_var_types::tracking_var_type_I tracking_var_type; typedef tracking_var_types::tracking_var_type_I tracking_var_type_t;
}
#endif #endif

View File

@ -2,6 +2,7 @@
#include <libregexis024sol/sol_misc_base.h> #include <libregexis024sol/sol_misc_base.h>
#include <assert.h> #include <assert.h>
namespace regexis024 {
uint32_t read_hex(REGEX_IS024_MeaningContext& ctx, int sz){ uint32_t read_hex(REGEX_IS024_MeaningContext& ctx, int sz){
uint32_t res = 0; uint32_t res = 0;
for (int i = 0; i < sz; i++){ for (int i = 0; i < sz; i++){
@ -60,3 +61,4 @@ backslash_expression_parsing_try_regular(REGEX_IS024_MeaningContext &ctx, const
} }
} }
} }
}

View File

@ -5,6 +5,7 @@
#include <assert.h> #include <assert.h>
#include <memory> #include <memory>
namespace regexis024 {
struct ParseCall{ struct ParseCall{
virtual ~ParseCall() = default; virtual ~ParseCall() = default;
virtual std::unique_ptr<ParseCall> afterReceive(REGEX_IS024_MeaningContext& ctx) { assert(false); } virtual std::unique_ptr<ParseCall> afterReceive(REGEX_IS024_MeaningContext& ctx) { assert(false); }
@ -141,3 +142,4 @@ void interpret_command_as_charset_giving(const CommonCodesets& cc, const Command
else else
assert(false); assert(false);
} }
}

View File

@ -1,5 +1,6 @@
#include <libregexis024sol/common_codesets.h> #include <libregexis024sol/common_codesets.h>
namespace regexis024 {
CommonCodesets::CommonCodesets() { CommonCodesets::CommonCodesets() {
spaces = set_add_char(spaces, U'\n'); spaces = set_add_char(spaces, U'\n');
spaces = set_add_char(spaces, U' '); spaces = set_add_char(spaces, U' ');
@ -11,3 +12,4 @@ CommonCodesets::CommonCodesets() {
variable_constituents = set_add_char(word_constituents, U'-'); variable_constituents = set_add_char(word_constituents, U'-');
variable_constituents = merge_sets(variable_constituents, digits); variable_constituents = merge_sets(variable_constituents, digits);
} }
}

View File

@ -3,6 +3,7 @@
#include <libregexis024fa/codeset.h> #include <libregexis024fa/codeset.h>
namespace regexis024 {
struct CommonCodesets { struct CommonCodesets {
codeset_t spaces; codeset_t spaces;
codeset_t word_constituents; codeset_t word_constituents;
@ -10,5 +11,6 @@ struct CommonCodesets {
codeset_t variable_constituents; codeset_t variable_constituents;
CommonCodesets(); CommonCodesets();
}; };
}
#endif #endif

View File

@ -23,6 +23,7 @@
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0) #define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0) #define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
namespace regexis024 {
/* ****************************** Top */ /* ****************************** Top */
const char* dfa_arg_aliases_condone[] = {"forgive", "condone", "okay", "optional", "nonimportant", "ifpossible", NULL}; const char* dfa_arg_aliases_condone[] = {"forgive", "condone", "okay", "optional", "nonimportant", "ifpossible", NULL};
@ -161,17 +162,17 @@ chekushka BracketLvl_ParseCall::afterReceive(REGEX_IS024_MeaningContext& ctx, Pa
assert(tai_slots.colarr_first >= 0 && tai_slots.colarr_first < UINT16_MAX); assert(tai_slots.colarr_first >= 0 && tai_slots.colarr_first < UINT16_MAX);
assert(tai_slots.colarr_second >= 0 && tai_slots.colarr_second < UINT16_MAX); assert(tai_slots.colarr_second >= 0 && tai_slots.colarr_second < UINT16_MAX);
result = join(subexpression_from_path(fa.makeTrackArrayMovHalfinvariant( result = join(subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
regex024_opcodes::MOV_COLARR_BTPOS, tai_slots.colarr_first)), result); opcodes::MOV_COLARR_BTPOS, tai_slots.colarr_first)), result);
result = join(result, subexpression_from_path(fa.makeTrackArrayMovHalfinvariant( result = join(result, subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
regex024_opcodes::MOV_COLARR_BTPOS, tai_slots.colarr_second))); opcodes::MOV_COLARR_BTPOS, tai_slots.colarr_second)));
} }
if (tai_slots.stored_in_sa){ if (tai_slots.stored_in_sa){
assert(tai_slots.selarr_first >= 0 && tai_slots.selarr_first < UINT16_MAX); assert(tai_slots.selarr_first >= 0 && tai_slots.selarr_first < UINT16_MAX);
assert(tai_slots.selarr_second >= 0 && tai_slots.selarr_second < UINT16_MAX); assert(tai_slots.selarr_second >= 0 && tai_slots.selarr_second < UINT16_MAX);
result = join(subexpression_from_path(fa.makeTrackArrayMovHalfinvariant( result = join(subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
regex024_opcodes::MOV_SELARR_CHPOS, tai_slots.selarr_first)), result); opcodes::MOV_SELARR_CHPOS, tai_slots.selarr_first)), result);
result = join(result, subexpression_from_path(fa.makeTrackArrayMovHalfinvariant( result = join(result, subexpression_from_path(fa.makeTrackArrayMovHalfinvariant(
regex024_opcodes::MOV_SELARR_CHPOS, tai_slots.selarr_second))); opcodes::MOV_SELARR_CHPOS, tai_slots.selarr_second)));
pctx.is_inside_of_these_sa_subexpressions[namedSubexpressionId] = false; pctx.is_inside_of_these_sa_subexpressions[namedSubexpressionId] = false;
} }
} }
@ -225,7 +226,7 @@ void parseBody(REGEX_IS024_MeaningContext& ctx, FA_Container& fa, SubExprCompile
} }
REGEX_IS024_MeaningContext::REGEX_IS024_MeaningContext(size_t inputSize, const char *input) : input_size(inputSize), REGEX_IS024_MeaningContext::REGEX_IS024_MeaningContext(size_t inputSize, const char *input) : input_size(inputSize),
input(reinterpret_cast<const uint8_t *>(input)) { input(input) {
CommonCodesets codeset_collection; CommonCodesets codeset_collection;
FA_Container fa; FA_Container fa;
FA_Container fa_1f; FA_Container fa_1f;
@ -278,3 +279,4 @@ REGEX_IS024_MeaningContext::REGEX_IS024_MeaningContext(size_t inputSize, const c
return; return;
} }
} }
}

View File

@ -5,14 +5,11 @@
#include <vector> #include <vector>
#include <stdint.h> #include <stdint.h>
// todo: SUPER HIGHT PRIORITY: MOVE all this spaces digits variable_constituents junk out of this class
// todo: also PLEEEASE, write static before literally nearly every single one little stupid function in this library
#include <libregexis024sol/part_of_expr_that_tracks.h> #include <libregexis024sol/part_of_expr_that_tracks.h>
namespace regexis024 {
struct REGEX_IS024_MeaningContext{ struct REGEX_IS024_MeaningContext{
size_t input_size; size_t input_size;
const uint8_t* input; const char* input;
bool error = false; bool error = false;
std::string error_msg; std::string error_msg;
@ -30,5 +27,5 @@ struct REGEX_IS024_MeaningContext{
REGEX_IS024_MeaningContext(size_t inputSize, const char *input); REGEX_IS024_MeaningContext(size_t inputSize, const char *input);
}; };
}
#endif //LIBREGEXIS024_EXPR_COMPILER_H #endif //LIBREGEXIS024_EXPR_COMPILER_H

View File

@ -6,6 +6,7 @@
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0) #define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0) #define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
namespace regexis024 {
const char* header_command_dfa_names[] = {"dfa", "determinize", NULL}; const char* header_command_dfa_names[] = {"dfa", "determinize", NULL};
const char* header_command_select_names[] = {"s", "select", "selarr", "selectional", NULL}; const char* header_command_select_names[] = {"s", "select", "selarr", "selectional", NULL};
@ -32,3 +33,4 @@ void int_parse_with_limit_concern(const std::string &str, REGEX_IS024_MeaningCon
aux_THROW("integer is too big"); aux_THROW("integer is too big");
} }
} }
}

View File

@ -4,10 +4,11 @@
#include <libregexis024sol/special_terminals.h> #include <libregexis024sol/special_terminals.h>
namespace regexis024 {
bool is_header_cmd(const Command& cmd); bool is_header_cmd(const Command& cmd);
bool is_header_dfa_cmd(const Command& cmd); bool is_header_dfa_cmd(const Command& cmd);
bool is_header_select_cmd(const Command& cmd); bool is_header_select_cmd(const Command& cmd);
void int_parse_with_limit_concern(const std::string& str, REGEX_IS024_MeaningContext &ctx, size_t& res, int lim); void int_parse_with_limit_concern(const std::string& str, REGEX_IS024_MeaningContext &ctx, size_t& res, int lim);
}
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_EXPR_PARSE_FUNCTIONS_COMMAND_RECOGNITION_H #endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_EXPR_PARSE_FUNCTIONS_COMMAND_RECOGNITION_H

View File

@ -14,6 +14,7 @@
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0) #define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0) #define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
namespace regexis024 {
/* **************************** Sequence */ /* **************************** Sequence */
void in_case_of_backslash(REGEX_IS024_MeaningContext &ctx, const CommonCodesets& cc, FA_Container &fa, SubExprCompiled& backPart) { void in_case_of_backslash(REGEX_IS024_MeaningContext &ctx, const CommonCodesets& cc, FA_Container &fa, SubExprCompiled& backPart) {
@ -179,22 +180,22 @@ chekushka Sequence_ParseCall::firstTime(REGEX_IS024_MeaningContext &ctx, Parsing
readChar(ctx); readChar(ctx);
if (ctx.ktr.retrieval_info[id].stored_in_sa) if (ctx.ktr.retrieval_info[id].stored_in_sa)
parts.emplace_back(subexpression_from_path( parts.emplace_back(subexpression_from_path(
fa.makeTrackArrayMovImm(regex024_opcodes::MOV_SELARR_IMM, fa.makeTrackArrayMovImm(opcodes::MOV_SELARR_IMM,
ctx.ktr.retrieval_info[id].selarr_first, value))); ctx.ktr.retrieval_info[id].selarr_first, value)));
if (ctx.ktr.retrieval_info[id].stored_in_ca) if (ctx.ktr.retrieval_info[id].stored_in_ca)
parts.emplace_back(subexpression_from_path( parts.emplace_back(subexpression_from_path(
fa.makeTrackArrayMovImm(regex024_opcodes::MOV_COLARR_IMM, fa.makeTrackArrayMovImm(opcodes::MOV_COLARR_IMM,
ctx.ktr.retrieval_info[id].colarr_first, value))); ctx.ktr.retrieval_info[id].colarr_first, value)));
} else if (typeDet == U';'){ } else if (typeDet == U';'){
ensure_space_for_track_unit(ctx, name, tracking_var_types::dot_cur_pos); call_ERROR_CHECK; ensure_space_for_track_unit(ctx, name, tracking_var_types::dot_cur_pos); call_ERROR_CHECK;
readChar(ctx); readChar(ctx);
if (ctx.ktr.retrieval_info[id].stored_in_sa) if (ctx.ktr.retrieval_info[id].stored_in_sa)
parts.emplace_back(subexpression_from_path( parts.emplace_back(subexpression_from_path(
fa.makeTrackArrayMovHalfinvariant(regex024_opcodes::MOV_SELARR_CHPOS, fa.makeTrackArrayMovHalfinvariant(opcodes::MOV_SELARR_CHPOS,
ctx.ktr.retrieval_info[id].selarr_first))); ctx.ktr.retrieval_info[id].selarr_first)));
if (ctx.ktr.retrieval_info[id].stored_in_ca) if (ctx.ktr.retrieval_info[id].stored_in_ca)
parts.emplace_back(subexpression_from_path( parts.emplace_back(subexpression_from_path(
fa.makeTrackArrayMovHalfinvariant(regex024_opcodes::MOV_COLARR_BTPOS, fa.makeTrackArrayMovHalfinvariant(opcodes::MOV_COLARR_BTPOS,
ctx.ktr.retrieval_info[id].colarr_first))); ctx.ktr.retrieval_info[id].colarr_first)));
} else } else
call_THROW("Missing ; or ( in the beginning of tracking unit"); call_THROW("Missing ; or ( in the beginning of tracking unit");
@ -220,3 +221,4 @@ chekushka Sequence_ParseCall::afterReceive(REGEX_IS024_MeaningContext &ctx, Pars
// This is possible only if I received a bracket expression // This is possible only if I received a bracket expression
return firstTime(ctx, pctx, fa); return firstTime(ctx, pctx, fa);
} }
}

View File

@ -10,6 +10,7 @@
#include <assert.h> #include <assert.h>
#include <libregexis024fa/selarr_priority_table.h> #include <libregexis024fa/selarr_priority_table.h>
namespace regexis024 {
struct ParsingContext{ struct ParsingContext{
/* Those subexpressions, that are tracket by s`a are forbidden from nesting inside themselves */ /* Those subexpressions, that are tracket by s`a are forbidden from nesting inside themselves */
std::vector<bool> is_inside_of_these_sa_subexpressions; std::vector<bool> is_inside_of_these_sa_subexpressions;
@ -67,8 +68,6 @@ struct Sequence_ParseCall: public ParseCall{
chekushka afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa); chekushka afterReceive(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa);
chekushka firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa); chekushka firstTime(REGEX_IS024_MeaningContext& ctx, ParsingContext& pctx, FA_Container& fa);
}; };
}
/* Some auxilary functions */
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_EXPR_PARSE_FUNCTIONS_EPF_H #endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_EXPR_PARSE_FUNCTIONS_EPF_H

View File

@ -4,9 +4,9 @@
#define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0) #define aux_THROW(str) do { report(ctx, "regex: " str); return; } while (0)
#define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0) #define aux_ERROR_CHECK do { if (ctx.error) { return; } } while (0)
namespace regexis024 {
void for_one_type(REGEX_IS024_MeaningContext &ctx, uint16_t& free_ARR_tai, int& ARR_first, int& ARR_second, void for_one_type(REGEX_IS024_MeaningContext &ctx, uint16_t& free_ARR_tai, int& ARR_first, int& ARR_second,
const std::string& ARR_NAME, tracking_var_type type){ const std::string& ARR_NAME, tracking_var_type_t type){
#define check_is_available() if (free_ARR_tai == UINT16_MAX) { \ #define check_is_available() if (free_ARR_tai == UINT16_MAX) { \
report(ctx, ("regex: " + ARR_NAME + ": key namespace overflow").c_str()); return;} report(ctx, ("regex: " + ARR_NAME + ": key namespace overflow").c_str()); return;}
check_is_available() check_is_available()
@ -17,7 +17,7 @@ void for_one_type(REGEX_IS024_MeaningContext &ctx, uint16_t& free_ARR_tai, int&
} }
} }
void ensure_space_for_track_unit(REGEX_IS024_MeaningContext &ctx, const std::string& name, tracking_var_type type) { void ensure_space_for_track_unit(REGEX_IS024_MeaningContext &ctx, const std::string& name, tracking_var_type_t type) {
size_t id = ctx.ktr.track_names[name]; size_t id = ctx.ktr.track_names[name];
/* Size of this verctor won't be changed. THis is a safe reference */ /* Size of this verctor won't be changed. THis is a safe reference */
SubtrackingNameInfo& info = ctx.ktr.retrieval_info[id]; SubtrackingNameInfo& info = ctx.ktr.retrieval_info[id];
@ -36,3 +36,4 @@ void ensure_space_for_track_unit(REGEX_IS024_MeaningContext &ctx, const std::str
aux_THROW("tracking tool unit type mismatch"); aux_THROW("tracking tool unit type mismatch");
} }
} }
}

View File

@ -4,7 +4,8 @@
#include <libregexis024sol/expr_compiler.h> #include <libregexis024sol/expr_compiler.h>
void ensure_space_for_track_unit(REGEX_IS024_MeaningContext &ctx, const std::string& name, tracking_var_type type); namespace regexis024 {
void ensure_space_for_track_unit(REGEX_IS024_MeaningContext &ctx, const std::string& name, tracking_var_type_t type);
}
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_TRACKING_UNITS_H #endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_TRACKING_UNITS_H

View File

@ -1,2 +0,0 @@
// #include <libregexis024sol/part_of_expr_that_tracks.h>

View File

@ -6,12 +6,13 @@
#include <string> #include <string>
#include <libregexis024fa/tracking_variables.h> #include <libregexis024fa/tracking_variables.h>
namespace regexis024 {
struct SubtrackingNameInfo{ struct SubtrackingNameInfo{
bool stored_in_ca = true; bool stored_in_ca = true;
bool stored_in_sa = false; bool stored_in_sa = false;
bool discovered = false; bool discovered = false;
tracking_var_type type; tracking_var_type_t type;
/* These fields will be -1 if unused */ /* These fields will be -1 if unused */
int colarr_first = -1; int colarr_first = -1;
int colarr_second = -1; int colarr_second = -1;
@ -26,6 +27,6 @@ struct KnownTrackingTools {
std::map<std::string, int64_t> track_names; std::map<std::string, int64_t> track_names;
std::vector<SubtrackingNameInfo> retrieval_info; std::vector<SubtrackingNameInfo> retrieval_info;
}; };
}
#endif //PART_OF_EXPR_THAT_TRACKS_H #endif //PART_OF_EXPR_THAT_TRACKS_H

View File

@ -1,6 +1,7 @@
#include <libregexis024sol/sol_misc_base.h> #include <libregexis024sol/sol_misc_base.h>
#include <libregexis024vm/utils.h> #include <libregexis024vm/utils.h>
namespace regexis024 {
void report(REGEX_IS024_MeaningContext &ctx, const char *error) { void report(REGEX_IS024_MeaningContext &ctx, const char *error) {
if (!ctx.error){ if (!ctx.error){
ctx.error = true; ctx.error = true;
@ -51,5 +52,4 @@ std::string tryRead_REGEX024_name(REGEX_IS024_MeaningContext &ctx) {
} }
return res; return res;
} }
}

View File

@ -5,6 +5,7 @@
#include <libregexis024sol/expr_compiler.h> #include <libregexis024sol/expr_compiler.h>
#include <string> #include <string>
namespace regexis024 {
void report(REGEX_IS024_MeaningContext& ctx, const char* error); void report(REGEX_IS024_MeaningContext& ctx, const char* error);
bool isEnd(REGEX_IS024_MeaningContext& ctx); bool isEnd(REGEX_IS024_MeaningContext& ctx);
@ -16,5 +17,5 @@ bool is_REGEX024_nameConstituent(int32_t ch);
/* Name in my library consists of [0-9a-zA-Z]. If the first peeped letter is not name constituent, /* Name in my library consists of [0-9a-zA-Z]. If the first peeped letter is not name constituent,
* empty string is returned */ * empty string is returned */
std::string tryRead_REGEX024_name(REGEX_IS024_MeaningContext& ctx); std::string tryRead_REGEX024_name(REGEX_IS024_MeaningContext& ctx);
}
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SOL_MISC_BASE_H #endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SOL_MISC_BASE_H

View File

@ -5,6 +5,7 @@
#include <libregexis024sol/expr_compiler.h> #include <libregexis024sol/expr_compiler.h>
#include <libregexis024sol/common_codesets.h> #include <libregexis024sol/common_codesets.h>
namespace regexis024 {
/* This option of backslash usage should be checked last. /* This option of backslash usage should be checked last.
* Function can generate error. Always check the error first */ * Function can generate error. Always check the error first */
void void
@ -32,5 +33,6 @@ struct Command: CommandEntity{
Command command_expr_parse(REGEX_IS024_MeaningContext& ctx); Command command_expr_parse(REGEX_IS024_MeaningContext& ctx);
bool is_command_for_charset(const Command& cmd); bool is_command_for_charset(const Command& cmd);
void interpret_command_as_charset_giving(const CommonCodesets& cc, const Command& cmd, codeset_t& ret); void interpret_command_as_charset_giving(const CommonCodesets& cc, const Command& cmd, codeset_t& ret);
}
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SPECIAL_TERMINALS_H #endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SPECIAL_TERMINALS_H

View File

@ -6,6 +6,7 @@
#include <memory> #include <memory>
#include <assert.h> #include <assert.h>
namespace regexis024 {
/* Can allow backslash (later should check that backslash expression is not multicharar or empty */ /* Can allow backslash (later should check that backslash expression is not multicharar or empty */
bool soundsLikeCharOrRangeStart(int32_t peeped) { bool soundsLikeCharOrRangeStart(int32_t peeped) {
return peeped >= 0 && (peeped != U'[' && peeped != U']' && peeped != U'!' && \ return peeped >= 0 && (peeped != U'[' && peeped != U']' && peeped != U'!' && \
@ -187,3 +188,4 @@ codeset_t sq_bracket_expr_parse(REGEX_IS024_MeaningContext &ctx, const CommonCod
} }
return res; return res;
} }
}

View File

@ -5,6 +5,7 @@
#include <libregexis024sol/expr_compiler.h> #include <libregexis024sol/expr_compiler.h>
#include <libregexis024sol/common_codesets.h> #include <libregexis024sol/common_codesets.h>
namespace regexis024 {
codeset_t sq_bracket_expr_parse(REGEX_IS024_MeaningContext& ctx, const CommonCodesets& cc); codeset_t sq_bracket_expr_parse(REGEX_IS024_MeaningContext& ctx, const CommonCodesets& cc);
}
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SQUARE_BRACKET_EXPRESSION_H #endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SQUARE_BRACKET_EXPRESSION_H

View File

@ -3,6 +3,7 @@
#include <assert.h> #include <assert.h>
#include <stdio.h> #include <stdio.h>
namespace regexis024 {
SubExprCompiled subexpr_charset_reading_filter(const codeset_t &codeset, FA_Container &fa) { SubExprCompiled subexpr_charset_reading_filter(const codeset_t &codeset, FA_Container &fa) {
return subexpression_from_path(fa.makeOneCharRead(codeset, false)); return subexpression_from_path(fa.makeOneCharRead(codeset, false));
} }
@ -182,3 +183,4 @@ SubExprCompiled forkify(const std::vector<SubExprCompiled> &options, FA_Containe
void SubExprCompiled::assertDefault() { void SubExprCompiled::assertDefault() {
assert(!start && ends.empty() && can_be_empty); assert(!start && ends.empty() && can_be_empty);
} }
}

View File

@ -3,6 +3,7 @@
#include <libregexis024fa/finite_automaton.h> #include <libregexis024fa/finite_automaton.h>
namespace regexis024 {
struct SubExprCompiled{ struct SubExprCompiled{
FA_Node* start = NULL; FA_Node* start = NULL;
/* After putting there values from neighbour vectors in nodes, these vectors must not change size */ /* After putting there values from neighbour vectors in nodes, these vectors must not change size */
@ -28,5 +29,5 @@ SubExprCompiled RobertAngier(const SubExprCompiled& source, FA_Container& fa);
/* pass REGEXIS024_MAX_REPEAT + 1 as max_allowed to allow infinite repeat */ /* pass REGEXIS024_MAX_REPEAT + 1 as max_allowed to allow infinite repeat */
void apply_repeat_to_subexpression(SubExprCompiled& patient, FA_Container& fa, size_t min_allowed, size_t max_allowed); void apply_repeat_to_subexpression(SubExprCompiled& patient, FA_Container& fa, size_t min_allowed, size_t max_allowed);
}
#endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SUBEXPR_FA_TRANSFORMED_H #endif //LIBREGEXIS024_SRC_LIBREGEXIS024SOL_SUBEXPR_FA_TRANSFORMED_H

View File

@ -11,15 +11,17 @@
#include <map> #include <map>
#include <stdio.h> #include <stdio.h>
using namespace regexis024;
struct assembler_context_bookmark{ struct assembler_context_bookmark{
regex_near_ptr_t pos_in_r024program; near_ptr_t pos_in_r024program;
int LINE; int LINE;
}; };
struct pending_bookmark{ struct pending_bookmark{
/* Must fill this byte with pos of pos_in_r024program in assembler_context_bookmark /* Must fill this byte with pos of pos_in_r024program in assembler_context_bookmark
* In a sense, this is a pointer to a NULL pointer that is yet to become normal kinda pointer */ * In a sense, this is a pointer to a NULL pointer that is yet to become normal kinda pointer */
regex_near_ptr_t pos_in_r024program; near_ptr_t pos_in_r024program;
const char* name; const char* name;
/* LINE of the reference is needed in case of error */ /* LINE of the reference is needed in case of error */
int LINE; int LINE;
@ -46,7 +48,7 @@ struct assembler_context{
} }
/* pending bookmerk requests should be added only with beg_for_bookmark method, /* pending bookmerk requests should be added only with beg_for_bookmark method,
* or else SEGFAULT will be your frequent guest */ * or else SEGFAULT will be your frequent guest */
*reinterpret_cast<regex_near_ptr_t *>(&result[br.pos_in_r024program]) = bookmarks[br.name].pos_in_r024program; *reinterpret_cast<near_ptr_t *>(&result[br.pos_in_r024program]) = bookmarks[br.name].pos_in_r024program;
} }
} }

View File

@ -11,8 +11,9 @@
#include <map> #include <map>
#include <stdio.h> #include <stdio.h>
#include <inttypes.h> #include <inttypes.h>
#include <stdexcept>
// TODO: apply here my new change in near pointer size using namespace regexis024;
struct landing_place_resolvance{ struct landing_place_resolvance{
size_t name_id; size_t name_id;
@ -34,12 +35,14 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
}; };
uint64_t used_names = 0; uint64_t used_names = 0;
/* From program position -> to names[ind] & */ /* From program position -> to names[ind] & */
std::map<regex_near_ptr_t, landing_place_resolvance> bookmarks; std::map<near_ptr_t, landing_place_resolvance> bookmarks;
regex_near_ptr_t IP = 0; near_ptr_t IP = 0;
auto check_inboundness = [&](int region){ auto check_inboundness = [&](int region){
if (!vmprog_check_inboundness(prgSize, IP, region)) if (!vmprog_check_inboundness(prgSize, IP, region)) {
exitf("This program can't be decomposed into commands in a trivial way"); fprintf(stderr, "This program can't be decomposed into commands in a trivial way");
std::terminate();
}
}; };
auto extract_b = [&]() -> uint8_t{ auto extract_b = [&]() -> uint8_t{
check_inboundness(1); check_inboundness(1);
@ -60,19 +63,19 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
auto extract_instruction = [&]() -> uint8_t{ auto extract_instruction = [&]() -> uint8_t{
return extract_b(); return extract_b();
}; };
auto extract_sslot_id = [&]() -> regex_sslot_id_t{ auto extract_sslot_id = [&]() -> sslot_id_t{
return extract_dw(); return extract_dw();
}; };
auto extract_near_pointer = [&]() -> regex_near_ptr_t{ auto extract_near_pointer = [&]() -> near_ptr_t{
return extract_qw(); return extract_qw();
}; };
auto extract_track_array_index = [&]() -> regex_tai_t{ auto extract_track_array_index = [&]() -> tai_t{
return extract_w(); return extract_w();
}; };
bool second_phase = false; bool second_phase = false;
auto fph_register_landing = [&](regex_near_ptr_t pos){ auto fph_register_landing = [&](near_ptr_t pos){
if (!second_phase){ if (!second_phase){
if (bookmarks.count(pos) == 0){ if (bookmarks.count(pos) == 0){
if (used_names == names.size()) if (used_names == names.size())
@ -83,15 +86,17 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
} }
}; };
auto get_bookmark_in_2phase = [&](regex_near_ptr_t pos) -> std::string { auto get_bookmark_in_2phase = [&](near_ptr_t pos) -> std::string {
if (bookmarks.count(pos) == 0) if (bookmarks.count(pos) == 0) {
exitf("bruh"); fprintf(stderr, "Bruh\n");
std::terminate();
}
return names[bookmarks[pos].name_id]; return names[bookmarks[pos].name_id];
}; };
auto one_reading = [&](){ auto one_reading = [&](){
while (IP < prgSize) { while (IP < prgSize) {
regex_near_ptr_t start_pos = IP; near_ptr_t start_pos = IP;
if (second_phase){ if (second_phase){
if (bookmarks.count(IP) != 0){ if (bookmarks.count(IP) != 0){
printf("%s:\n", get_bookmark_in_2phase(IP).c_str()); printf("%s:\n", get_bookmark_in_2phase(IP).c_str());
@ -102,11 +107,11 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
switch (opcode) { switch (opcode) {
#define secPrint(fmt, ...) if (second_phase) {printf("% 3lu) " fmt, start_pos, __VA_ARGS__);} } break; #define secPrint(fmt, ...) if (second_phase) {printf("% 3lu) " fmt, start_pos, __VA_ARGS__);} } break;
#define secPrintNoArg(str) if (second_phase) {printf("% 3lu) " str, start_pos);} } break; #define secPrintNoArg(str) if (second_phase) {printf("% 3lu) " str, start_pos);} } break;
#define instCase(oper_code) case regex024_opcodes::oper_code: { #define instCase(oper_code) case opcodes::oper_code: {
#define jcMess(cond, sz_uppercase, x_t, extract_method, printf_sign) \ #define jcMess(cond, sz_uppercase, x_t, extract_method, printf_sign) \
instCase(JC ## cond ## _ ## sz_uppercase) \ instCase(JC ## cond ## _ ## sz_uppercase) \
x_t x = extract_method(); \ x_t x = extract_method(); \
regex_near_ptr_t dest = extract_near_pointer(); \ near_ptr_t dest = extract_near_pointer(); \
fph_register_landing(dest); \ fph_register_landing(dest); \
secPrint("JC" #cond "_" #sz_uppercase " %" printf_sign " $%s\n", x, get_bookmark_in_2phase(dest).c_str()) secPrint("JC" #cond "_" #sz_uppercase " %" printf_sign " $%s\n", x, get_bookmark_in_2phase(dest).c_str())
#define jcCacaphony(cond) \ #define jcCacaphony(cond) \
@ -131,22 +136,22 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
instCase(FORK) instCase(FORK)
uint32_t ssid = extract_sslot_id(); uint32_t ssid = extract_sslot_id();
regex_near_ptr_t dest = extract_near_pointer(); near_ptr_t dest = extract_near_pointer();
fph_register_landing(dest); fph_register_landing(dest);
secPrint("FORK %u $%s\n", ssid, get_bookmark_in_2phase(dest).c_str()) secPrint("FORK %u $%s\n", ssid, get_bookmark_in_2phase(dest).c_str())
simpleDimple(MATCH) simpleDimple(MATCH)
simpleDimple(DIE) simpleDimple(DIE)
instCase(PARAM_READ_SS_NUMBER) instCase(PARAM_READ_SS_NUMBER)
regex_sslot_id_t ssid_max_plus_one = extract_sslot_id(); sslot_id_t ssid_max_plus_one = extract_sslot_id();
secPrint("PARAM_READ_SS_NUMBER %u\n", ssid_max_plus_one) secPrint("PARAM_READ_SS_NUMBER %u\n", ssid_max_plus_one)
instCase(PARAM_FORK_SS_NUMBER) instCase(PARAM_FORK_SS_NUMBER)
regex_sslot_id_t ssid_max_plus_one = extract_sslot_id(); sslot_id_t ssid_max_plus_one = extract_sslot_id();
secPrint("PARAM_FORK_SS_NUMBER %u\n", ssid_max_plus_one) secPrint("PARAM_FORK_SS_NUMBER %u\n", ssid_max_plus_one)
instCase(PARAM_SELARR_LEN) instCase(PARAM_SELARR_LEN)
regex_tai_t tai_max_plus_one = extract_track_array_index(); tai_t tai_max_plus_one = extract_track_array_index();
secPrint("PARAM_SELARR_LEN %hu\n", tai_max_plus_one) secPrint("PARAM_SELARR_LEN %hu\n", tai_max_plus_one)
instCase(PARAM_COLSIFTFUNC_SET) instCase(PARAM_COLSIFTFUNC_SET)
regex_near_ptr_t entry = extract_near_pointer(); near_ptr_t entry = extract_near_pointer();
fph_register_landing(entry); fph_register_landing(entry);
secPrint("PARAM_COLSIFTFUNC_SET $%s\n", get_bookmark_in_2phase(entry).c_str()) secPrint("PARAM_COLSIFTFUNC_SET $%s\n", get_bookmark_in_2phase(entry).c_str())
simpleDimple(PARAM_COLSIFTFUNC_WIPE) simpleDimple(PARAM_COLSIFTFUNC_WIPE)
@ -156,36 +161,37 @@ void print_disassembly(size_t prgSize, uint8_t* prg){
instCase(MSG_FED_INPUT_EXTENDED) instCase(MSG_FED_INPUT_EXTENDED)
uint8_t left = extract_b(); uint8_t left = extract_b();
uint8_t right = extract_b(); uint8_t right = extract_b();
regex_sslot_id_t part = extract_sslot_id(); sslot_id_t part = extract_sslot_id();
secPrint("MSG_FED_INPUT_EXTENDED %hhu %hhu %u\n", left, right, part) secPrint("MSG_FED_INPUT_EXTENDED %hhu %hhu %u\n", left, right, part)
instCase(DMOV_RABX_SELARR) instCase(DMOV_RABX_SELARR)
regex_tai_t i = extract_track_array_index(); tai_t i = extract_track_array_index();
secPrint("DMOV_RABX_SELARR %hu\n", i) secPrint("DMOV_RABX_SELARR %hu\n", i)
instCase(DDIST_RABX_SELARR) instCase(DDIST_RABX_SELARR)
regex_tai_t s = extract_track_array_index(); tai_t s = extract_track_array_index();
regex_tai_t e = extract_track_array_index(); tai_t e = extract_track_array_index();
secPrint("DDIST_RABX_SELARR %hu %hu\n", s, e); secPrint("DDIST_RABX_SELARR %hu %hu\n", s, e);
simpleDimple(SIFTPRIOR_MIN_RABX) simpleDimple(SIFTPRIOR_MIN_RABX)
simpleDimple(SIFTPRIOR_MAX_RABX) simpleDimple(SIFTPRIOR_MAX_RABX)
simpleDimple(SIFT_DONE) simpleDimple(SIFT_DONE)
instCase(MOV_COLARR_IMM) instCase(MOV_COLARR_IMM)
regex_tai_t tai = extract_track_array_index(); tai_t tai = extract_track_array_index();
uint64_t imm = extract_qw(); uint64_t imm = extract_qw();
secPrint("MOV_COLARR_IMM %hu %lu\n", tai, imm); secPrint("MOV_COLARR_IMM %hu %lu\n", tai, imm);
instCase(MOV_COLARR_BTPOS) instCase(MOV_COLARR_BTPOS)
regex_tai_t tai = extract_track_array_index(); tai_t tai = extract_track_array_index();
secPrint("MOV_COLARR_BTPOS %hu\n", tai); secPrint("MOV_COLARR_BTPOS %hu\n", tai);
instCase(MOV_SELARR_IMM) instCase(MOV_SELARR_IMM)
regex_tai_t tai = extract_track_array_index(); tai_t tai = extract_track_array_index();
uint64_t imm = extract_qw(); uint64_t imm = extract_qw();
secPrint("MOV_SELARR_IMM %hu %lu\n", tai, imm); secPrint("MOV_SELARR_IMM %hu %lu\n", tai, imm);
instCase(MOV_SELARR_CHPOS) instCase(MOV_SELARR_CHPOS)
regex_tai_t tai = extract_track_array_index(); tai_t tai = extract_track_array_index();
secPrint("MOV_SELARR_CHPOS %hu\n", tai); secPrint("MOV_SELARR_CHPOS %hu\n", tai);
simpleDimple(INIT) simpleDimple(INIT)
simpleDimple(THROW) simpleDimple(THROW)
default: default:
exitf("Bad opcode\n"); fprintf(stderr, "Bad opcode\n");
std::terminate();
#undef secPrint #undef secPrint
#undef secPrintNoArg #undef secPrintNoArg
#undef instCase #undef instCase

View File

@ -2,12 +2,16 @@
#include <libregexis024vm/utils.h> #include <libregexis024vm/utils.h>
#include <stdio.h> #include <stdio.h>
using namespace regexis024;
void test_ccs_fnc(const codeset_t &got, const codeset_t &expected){ void test_ccs_fnc(const codeset_t &got, const codeset_t &expected){
static int id = 1; static int id = 1;
if (got == expected) if (got == expected) {
printf("Test %d passed\n", id++); printf("Test %d passed\n", id++);
else } else {
exitf("Test %d failed\n", id); printf("Test %d failed\n", id);
std::terminate();
}
} }
void invert_test(const codeset_t& A, const codeset_t& C){ void invert_test(const codeset_t& A, const codeset_t& C){

View File

@ -10,8 +10,8 @@ static int test_id = 0;
void do_test(const std::vector<uint8_t>& prg, const std::string& str, const std::vector<bool>& prefix_matching){ void do_test(const std::vector<uint8_t>& prg, const std::string& str, const std::vector<bool>& prefix_matching){
assert(str.size() + 1 == prefix_matching.size()); assert(str.size() + 1 == prefix_matching.size());
REGEX_IS024_CONTEXT ctx{prg.size(), prg.data(), 0, 0, 1000, 1000, 1000000}; VMContext ctx{prg.size(), prg.data(), 0, 0, 1000, 1000, 1000000};
regex024_error_code ret; error_code_t ret;
// todo // todo
printf("TEST %d passed\n", test_id); printf("TEST %d passed\n", test_id);
test_id++; test_id++;

View File

@ -1,6 +1,8 @@
#include <libregexis024sol/expr_compiler.h> #include <libregexis024sol/expr_compiler.h>
#include <libregexis024test/byte_code_disassembler.h> #include <libregexis024test/byte_code_disassembler.h>
using namespace regexis024;
int main(){ int main(){
std::string regular_expression = "\\>1*"; std::string regular_expression = "\\>1*";
REGEX_IS024_MeaningContext regex(regular_expression.size(), regular_expression.c_str()); REGEX_IS024_MeaningContext regex(regular_expression.size(), regular_expression.c_str());

View File

@ -7,6 +7,8 @@
#include <stdexcept> #include <stdexcept>
#include <random> #include <random>
using namespace regexis024;
struct test_id_t { struct test_id_t {
int test_id; int test_id;
int subtest_id; int subtest_id;

View File

@ -21,19 +21,54 @@ void test(const string& input, const string& pattern, const MatchInfo& right_ans
} }
int main() { int main() {
test("11aa", "^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {}));
test("aa11", "^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {}));
test("a111", "^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo());
test("aa11", "^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo());
test("1a11", "^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo());
test("11aa", "!dfa;^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {}));
test("aa11", "!dfa;^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo({}, {}));
test("a111", "!dfa;^!A;\\B!A;\\b!any;\\B!any;$", MatchInfo());
test("aa11", "!dfa;^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo());
test("1a11", "!dfa;^!A;\\B!A;\\B!any;\\B!any;$", MatchInfo());
test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n",
"!dfa;!select{fieldname{ca}fieldbody{ca}}^^^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n$$$",
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {20, 25, 26, 28}));
test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n",
"!dfa;!select{fieldname{ca}fieldbody{ca}}^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+\\>):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {20, 25, 26, 28}));
test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n",
"!dfa;!select{fieldname{ca}fieldbody{ca}}^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {20, 25, 26, 28}));
test("LINE\r\nFirst:Second\r\n\r\n",
"!select{fieldname{ca}}LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}}, {6, 11}));
test("LINE\r\nFirst:Second\r\n\r\n",
"!select{fieldname}LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
MatchInfo({{0, 12}, {1, 18}}, {6, 11}));
test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n",
"!select{fieldname{ca}fieldbody{ca}}LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {20, 25, 26, 28}));
test("абвгд", "абвгд", MatchInfo({}, {}));
test("абвввввввгд", "абв*г", MatchInfo({}, {}));
test("абвввввввд", "абв*г", MatchInfo());
test("LINE\r\nFirst:Second\r\nThird:12\r\n\r\n",
"!dfa;^LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}, {0, 20}, {1, 25}, {2, 26}, {3, 28}}, {}));
test("LINE\r\nFirst:Second\r\n\r\n",
"LINE\r\n(#fieldname([\\u0021-\\u007E&^:]+):#fieldbody([\\u0000-\\u007F&^\r\n]*)\r\n)*\r\n",
MatchInfo({{0, 6}, {1, 11}, {2, 12}, {3, 18}}, {}));
test("C111111111111", "C\\>1*", MatchInfo({}, {})); test("C111111111111", "C\\>1*", MatchInfo({}, {}));
// return 0; test("GET / HTTP/1.1\r\nHost: example.com\r\nAAAAA: a\rfaafafdf\r\n\r\n",
test("GET / HTTP/1.1\r\nHost: bibura sosat\r\nLos-es-raus: a\rfaafafdf\r\n\r\n",
"!dfa;(GET|POST) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n", "!dfa;(GET|POST) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n",
MatchInfo()); MatchInfo());
test("\r24234\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo()); test("\r24234\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo());
test("\n3432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo()); test("\n3432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo());
test("3:::;;432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {})); test("3:::;;432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {}));
test("3:::;;432 \r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {})); test("3:::;;432 \r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {}));
test("GET / HTTP/0.9\r\nHost: bibura sosat\r\nLos-es-raus: afaafafdf\r\n\r\n", test("GET / HTTP/0.9\r\nHost: bibur at\r\nContent-type: html\r\n\r\n",
"^(GET|POST\\>) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n", "^(GET|POST\\>) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n",
MatchInfo({}, {})); MatchInfo({}, {}));
// return 0;
test("b", "#boba(b)", MatchInfo({{0, 0}, {1, 1}}, {})); test("b", "#boba(b)", MatchInfo({{0, 0}, {1, 1}}, {}));
test("abc", "!selarr{boba{ca}}^a#boba(b)c$", MatchInfo({{0, 1}, {1, 2}}, {1, 2})); test("abc", "!selarr{boba{ca}}^a#boba(b)c$", MatchInfo({{0, 1}, {1, 2}}, {1, 2}));
for (int i = 0; i < 64; i++) { for (int i = 0; i < 64; i++) {

View File

@ -7,7 +7,8 @@
// using namespace regexis024; // using namespace regexis024;
void convert(regexis024::TrackingVariableInfo& to, const SubtrackingNameInfo& from) { namespace regexis024 {
void convert(TrackingVariableInfo& to, const SubtrackingNameInfo& from) {
#define plagiat(field) to.field = from.field; #define plagiat(field) to.field = from.field;
plagiat(type); plagiat(type);
plagiat(colarr_first); plagiat(colarr_first);
@ -19,7 +20,7 @@ void convert(regexis024::TrackingVariableInfo& to, const SubtrackingNameInfo& fr
#undef plagiat #undef plagiat
} }
int regexis024::matchStrToRegexp(const std::string& input, const std::string& pattern, int matchStrToRegexp(const std::string& input, const std::string& pattern,
MatchInfo& retMatchInfo, track_var_list& retTrackVarList, std::string& retStatus) MatchInfo& retMatchInfo, track_var_list& retTrackVarList, std::string& retStatus)
{ {
retTrackVarList = {}; retTrackVarList = {};
@ -34,14 +35,14 @@ int regexis024::matchStrToRegexp(const std::string& input, const std::string& pa
for (auto& iip: regexp.ktr.track_names) { for (auto& iip: regexp.ktr.track_names) {
convert(retTrackVarList[iip.first], regexp.ktr.retrieval_info[iip.second]); convert(retTrackVarList[iip.first], regexp.ktr.retrieval_info[iip.second]);
} }
REGEX_IS024_VirtualMachine vm(regexp.compiled_program.size(), regexp.compiled_program.data(), VirtualMachine vm(regexp.compiled_program.size(), regexp.compiled_program.data(),
UINT64_MAX, UINT16_MAX, UINT64_MAX, UINT16_MAX,
UINT32_MAX, UINT32_MAX, UINT64_MAX); UINT32_MAX, UINT32_MAX, UINT64_MAX);
auto getVMErrString = [&]() -> std::string { auto getVMErrString = [&]() -> std::string {
return std::string(regex024_error_code_tostr(vm.getErrno())); return std::string(error_code_to_str(vm.getErrno()));
}; };
if (vm.initialize() != regex024_error_codes::stable) { if (vm.initialize() != error_codes::stable) {
retStatus = "Virtual machine initialization. " + getVMErrString(); retStatus = "Virtual machine initialization. " + getVMErrString();
return -1; return -1;
} }
@ -51,11 +52,11 @@ int regexis024::matchStrToRegexp(const std::string& input, const std::string& pa
retStatus = "Unnatural extended input request."; retStatus = "Unnatural extended input request.";
return -1; return -1;
} }
if (vm.addNewMatchingThread() != regex024_error_codes::stable) { if (vm.addNewMatchingThread() != error_codes::stable) {
retStatus = "Virtual machine first kick. " + getVMErrString(); retStatus = "Virtual machine first kick. " + getVMErrString();
} }
if (left_ext_feed) { if (left_ext_feed) {
if (vm.extendedFeedCharacter('\n') != regex024_error_codes::stable) { if (vm.extendedFeedCharacter('\n') != error_codes::stable) {
retStatus = "VM left extended input. " + getVMErrString(); retStatus = "VM left extended input. " + getVMErrString();
return -1; return -1;
} }
@ -63,19 +64,19 @@ int regexis024::matchStrToRegexp(const std::string& input, const std::string& pa
for (size_t cur_text_pos = 0;cur_text_pos < input.size();) { for (size_t cur_text_pos = 0;cur_text_pos < input.size();) {
int32_t inp_code; int32_t inp_code;
size_t adj; size_t adj;
utf8_string_iterat(inp_code, adj, cur_text_pos, reinterpret_cast<const uint8_t*>(input.data()), input.size()); utf8_string_iterat(inp_code, adj, cur_text_pos, input.data(), input.size());
if (inp_code < 0) { if (inp_code < 0) {
retStatus = "Input string encoding error."; retStatus = "Input string encoding error.";
return -1; return -1;
} }
if (vm.feedCharacter(static_cast<uint64_t>(inp_code), adj) != regex024_error_codes::stable) { if (vm.feedCharacter(static_cast<uint64_t>(inp_code), adj) != error_codes::stable) {
retStatus = "VM input. " + getVMErrString(); retStatus = "VM input. " + getVMErrString();
return -1; return -1;
} }
cur_text_pos += adj; cur_text_pos += adj;
} }
if (right_ext_feed) { if (right_ext_feed) {
if (vm.extendedFeedCharacter('\n') != regex024_error_codes::stable) { if (vm.extendedFeedCharacter('\n') != error_codes::stable) {
retStatus = "VM right extended input. " + getVMErrString(); retStatus = "VM right extended input. " + getVMErrString();
return -1; return -1;
} }
@ -93,16 +94,17 @@ int regexis024::matchStrToRegexp(const std::string& input, const std::string& pa
return 0; return 0;
} }
bool regexis024::MatchInfo::operator==(const MatchInfo &other) const { bool MatchInfo::operator==(const MatchInfo &other) const {
if (!have_match && !other.have_match) if (!have_match && !other.have_match)
return true; return true;
return (have_match == other.have_match) && (sa == other.sa) && (ca_history == other.ca_history); return (have_match == other.have_match) && (sa == other.sa) && (ca_history == other.ca_history);
} }
bool regexis024::MatchInfo::operator!=(const MatchInfo &other) const { bool MatchInfo::operator!=(const MatchInfo &other) const {
return !(*this == other); return !(*this == other);
} }
regexis024::MatchInfo::MatchInfo(const std::vector<REGEX_IS024_CAEvent> &ca_history, const std::vector<uint64_t> &sa): MatchInfo::MatchInfo(const std::vector<CAEvent> &ca_history, const std::vector<uint64_t> &sa):
ca_history(ca_history), sa(sa), have_match(true) { ca_history(ca_history), sa(sa), have_match(true) {
} }
}

View File

@ -11,7 +11,7 @@ namespace regexis024 {
bool stored_in_ca = true; bool stored_in_ca = true;
bool stored_in_sa = false; bool stored_in_sa = false;
tracking_var_type type; tracking_var_type_t type;
/* These fields will be -1 if unused */ /* These fields will be -1 if unused */
int colarr_first = -1; int colarr_first = -1;
int colarr_second = -1; int colarr_second = -1;
@ -24,7 +24,7 @@ namespace regexis024 {
struct MatchInfo { struct MatchInfo {
bool have_match = false; bool have_match = false;
std::vector<REGEX_IS024_CAEvent> ca_history; std::vector<CAEvent> ca_history;
std::vector<uint64_t> sa; std::vector<uint64_t> sa;
bool operator==(const MatchInfo& other) const ; bool operator==(const MatchInfo& other) const ;
@ -32,7 +32,7 @@ namespace regexis024 {
MatchInfo() = default; MatchInfo() = default;
MatchInfo(const std::vector<REGEX_IS024_CAEvent> &ca_history, const std::vector<uint64_t> &sa); MatchInfo(const std::vector<CAEvent> &ca_history, const std::vector<uint64_t> &sa);
}; };
int matchStrToRegexp(const std::string& input, const std::string& pattern, int matchStrToRegexp(const std::string& input, const std::string& pattern,

View File

@ -1,21 +1,22 @@
#include <libregexis024vm/instruction_implementation.h> #include <libregexis024vm/instruction_implementation.h>
#include <stdexcept> #include <stdexcept>
void swap_old_settled_and_new_active(REGEX_IS024_CONTEXT &ctx, REGEX_IS024_Thread& old_settled){ namespace regexis024 {
void swap_old_settled_and_new_active(VMContext &ctx, Thread& old_settled){
ctx_print_debug(ctx); ctx_print_debug(ctx);
assert(old_settled.slot_occupation_status == SLOT_OCCUPIED_val); assert(old_settled.slot_occupation_status == SLOT_OCCUPIED_val);
REGEX_IS024_Thread temp = old_settled; Thread temp = old_settled;
old_settled = ctx.active_thread; old_settled = ctx.active_thread;
old_settled.slot_occupation_status = SLOT_NEW_val; old_settled.slot_occupation_status = SLOT_NEW_val;
ctx.active_thread = temp; ctx.active_thread = temp;
// slot_occupation_status & SLOT_OCCUPIED of actie thread is true, because it was retrieved from old_settled // slot_occupation_status & SLOT_OCCUPIED of active thread is true, because it was retrieved from old_settled
} }
void start_noncloning_conflict(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Thread& other){ void start_noncloning_conflict(VMContext& ctx, Thread& other){
ctx_print_debug(ctx); ctx_print_debug(ctx);
if (ctx.have_sift_function){ if (ctx.have_sift_function){
ctx.sifting_with = &other; ctx.sifting_with = &other;
ctx.who_started_sift = regex024_opcode::READ; ctx.who_started_sift = opcode_t::READ;
ctx.intruder_IP = ctx.active_thread.IP; ctx.intruder_IP = ctx.active_thread.IP;
ctx.active_thread.IP = ctx.sift_function; ctx.active_thread.IP = ctx.sift_function;
ctx.RAX = ctx.RBX = 0; ctx.RAX = ctx.RBX = 0;
@ -26,11 +27,11 @@ void start_noncloning_conflict(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Thread& oth
} }
/* The one that drops as an intruder here is current active.thread.IP */ /* The one that drops as an intruder here is current active.thread.IP */
void start_cloning_conflict(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Thread& other, regex_near_ptr_t clone_IP){ void start_cloning_conflict(VMContext& ctx, Thread& other, near_ptr_t clone_IP){
ctx_print_debug(ctx); ctx_print_debug(ctx);
if (ctx.have_sift_function){ if (ctx.have_sift_function){
ctx.sifting_with = &other; ctx.sifting_with = &other;
ctx.who_started_sift = regex024_opcode::FORK; ctx.who_started_sift = opcode_t::FORK;
ctx.intruder_IP = ctx.active_thread.IP; ctx.intruder_IP = ctx.active_thread.IP;
ctx.child_ret_IP = clone_IP; ctx.child_ret_IP = clone_IP;
ctx.active_thread.IP = ctx.sift_function; ctx.active_thread.IP = ctx.sift_function;
@ -41,15 +42,15 @@ void start_cloning_conflict(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Thread& other,
} }
#define initialization_phase_check() if (ctx.initialized){ \ #define initialization_phase_check() if (ctx.initialized){ \
ctx.error = regex024_error_codes::too_late; return; } ctx.error = error_codes::too_late; return; }
#define general_matching_mode_check() if (!ctx.initialized){ \ #define general_matching_mode_check() if (!ctx.initialized){ \
ctx.error = regex024_error_codes::too_early; return; } if(ctx.sifting_with){ \ ctx.error = error_codes::too_early; return; } if(ctx.sifting_with){ \
ctx.error = regex024_error_codes::instruction_not_for_collision_thread; return; } ctx.error = error_codes::instruction_not_for_collision_thread; return; }
#define sift_mode_check() if (!ctx.sifting_with){ \ #define sift_mode_check() if (!ctx.sifting_with){ \
ctx.error = regex024_error_codes::instruction_not_for_collision_thread; return; } ctx.error = error_codes::instruction_not_for_collision_thread; return; }
/* Can append to both read_halted+new stacks of context */ /* Can append to both read_halted+new stacks of context */
void read_halted_new_type_stacks_append(REGEX_IS024_CONTEXT &ctx, regex_sslot_id_t ssid){ void read_halted_new_type_stacks_append(VMContext &ctx, sslot_id_t ssid){
ctx_print_debug(ctx); ctx_print_debug(ctx);
if (ssid < ctx.portion_of_FIRST_read_halt_ns){ if (ssid < ctx.portion_of_FIRST_read_halt_ns){
ctx.READ_halted_stack_new_first.append(ssid); ctx.READ_halted_stack_new_first.append(ssid);
@ -58,12 +59,12 @@ void read_halted_new_type_stacks_append(REGEX_IS024_CONTEXT &ctx, regex_sslot_id
} }
} }
void do_i_read(REGEX_IS024_CONTEXT &ctx, regex_sslot_id_t ssid) { void do_i_read(VMContext &ctx, sslot_id_t ssid) {
ctx_print_debug(ctx); ctx_print_debug(ctx);
general_matching_mode_check() general_matching_mode_check()
if (ssid >= ctx.read_slots_number) if (ssid >= ctx.read_slots_number)
smitsya(read_sslot_out_of_range); smitsya(read_sslot_out_of_range);
REGEX_IS024_Thread& other = ctx.READ_halted_slots[ssid]; Thread& other = ctx.READ_halted_slots[ssid];
if (other.slot_occupation_status & SLOT_OCCUPIED){ if (other.slot_occupation_status & SLOT_OCCUPIED){
if (other.slot_occupation_status & SLOT_NEW){ if (other.slot_occupation_status & SLOT_NEW){
start_noncloning_conflict(ctx, other); start_noncloning_conflict(ctx, other);
@ -81,31 +82,31 @@ void do_i_read(REGEX_IS024_CONTEXT &ctx, regex_sslot_id_t ssid) {
} }
} }
void i_READ(REGEX_IS024_CONTEXT &ctx) { void i_READ(VMContext &ctx) {
ctx_print_debug(ctx); ctx_print_debug(ctx);
check_available_prg(REGEX024_BYTECODE_SSLOT_ID_SZ) check_available_prg(BYTECODE_SSLOT_ID_SZ)
regex_sslot_id_t ssid = ctx.extract_sslot_id(); sslot_id_t ssid = ctx.extract_sslot_id();
do_i_read(ctx, ssid); do_i_read(ctx, ssid);
} }
void i_READZ(REGEX_IS024_CONTEXT &ctx) { void i_READZ(VMContext &ctx) {
ctx_print_debug(ctx); ctx_print_debug(ctx);
do_i_read(ctx, 0); do_i_read(ctx, 0);
} }
void i_JUMP(REGEX_IS024_CONTEXT& ctx){ void i_JUMP(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
check_available_prg(REGEX024_BYTECODE_NEAR_POINTER_SZ) check_available_prg(BYTECODE_NEAR_POINTER_SZ)
ctx.active_thread.IP = ctx.extract_near_pointer(); ctx.active_thread.IP = ctx.extract_near_pointer();
} }
template<typename conditionT, typename immArgSzT> template<typename conditionT, typename immArgSzT>
void i_JC(REGEX_IS024_CONTEXT& ctx) void i_JC(VMContext& ctx)
{ {
ctx_print_debug(ctx); ctx_print_debug(ctx);
check_available_prg(immArgSzT::byte_sz + REGEX024_BYTECODE_NEAR_POINTER_SZ); check_available_prg(immArgSzT::byte_sz + BYTECODE_NEAR_POINTER_SZ);
uint64_t imm_val_B = immArgSzT::extract(ctx); uint64_t imm_val_B = immArgSzT::extract(ctx);
regex_near_ptr_t dest = ctx.extract_near_pointer(); near_ptr_t dest = ctx.extract_near_pointer();
uint64_t imm_val_A = ctx.INP; uint64_t imm_val_A = ctx.INP;
if (conditionT::call(imm_val_A, imm_val_B)) if (conditionT::call(imm_val_A, imm_val_B))
ctx.active_thread.IP = dest; ctx.active_thread.IP = dest;
@ -117,22 +118,22 @@ struct condGrtr{static bool call(uint64_t A, uint64_t B){return A > B;}};
struct immArgByte{ struct immArgByte{
static constexpr int byte_sz = 1; static constexpr int byte_sz = 1;
static uint64_t extract(REGEX_IS024_CONTEXT& ctx){return ctx.extract_b();} static uint64_t extract(VMContext& ctx){return ctx.extract_b();}
}; };
struct immArgWord{ struct immArgWord{
static constexpr int byte_sz = 2; static constexpr int byte_sz = 2;
static uint64_t extract(REGEX_IS024_CONTEXT& ctx){return ctx.extract_w();} static uint64_t extract(VMContext& ctx){return ctx.extract_w();}
}; };
struct immArgDoubleWord{ struct immArgDoubleWord{
static constexpr int byte_sz = 4; static constexpr int byte_sz = 4;
static uint64_t extract(REGEX_IS024_CONTEXT& ctx){return ctx.extract_dw();} static uint64_t extract(VMContext& ctx){return ctx.extract_dw();}
}; };
struct immArgQuadWord{ struct immArgQuadWord{
static constexpr int byte_sz = 8; static constexpr int byte_sz = 8;
static uint64_t extract(REGEX_IS024_CONTEXT& ctx){return ctx.extract_qw();} static uint64_t extract(VMContext& ctx){return ctx.extract_qw();}
}; };
void clone_thread_into_slot(REGEX_IS024_Thread& source, REGEX_IS024_Thread& vessel){ void clone_thread_into_slot(Thread& source, Thread& vessel){
thread_print_debug(source); thread_print_debug(source);
my_assert(!(vessel.slot_occupation_status & SLOT_OCCUPIED)); my_assert(!(vessel.slot_occupation_status & SLOT_OCCUPIED));
my_assert((source.slot_occupation_status & SLOT_OCCUPIED)); my_assert((source.slot_occupation_status & SLOT_OCCUPIED));
@ -146,15 +147,15 @@ void clone_thread_into_slot(REGEX_IS024_Thread& source, REGEX_IS024_Thread& vess
} }
/* One FORK-slot governs the one single unique position in program: the next one after the fork */ /* One FORK-slot governs the one single unique position in program: the next one after the fork */
void i_FORK(REGEX_IS024_CONTEXT& ctx){ void i_FORK(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
general_matching_mode_check() general_matching_mode_check()
check_available_prg(REGEX024_BYTECODE_SSLOT_ID_SZ + REGEX024_BYTECODE_NEAR_POINTER_SZ); check_available_prg(BYTECODE_SSLOT_ID_SZ + BYTECODE_NEAR_POINTER_SZ);
regex_sslot_id_t ssid = ctx.extract_sslot_id(); sslot_id_t ssid = ctx.extract_sslot_id();
regex_near_ptr_t dest = ctx.extract_near_pointer(); near_ptr_t dest = ctx.extract_near_pointer();
if (ssid >= ctx.fork_slots_number) if (ssid >= ctx.fork_slots_number)
smitsya(fork_sslot_out_of_range); smitsya(fork_sslot_out_of_range);
REGEX_IS024_Thread& other = ctx.FORK_halted_slots[ssid]; Thread& other = ctx.FORK_halted_slots[ssid];
if (other.slot_occupation_status & SLOT_OCCUPIED){ if (other.slot_occupation_status & SLOT_OCCUPIED){
start_cloning_conflict(ctx, other, dest); start_cloning_conflict(ctx, other, dest);
} else { } else {
@ -164,7 +165,7 @@ void i_FORK(REGEX_IS024_CONTEXT& ctx){
} }
} }
void i_MATCH(REGEX_IS024_CONTEXT& ctx){ void i_MATCH(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
general_matching_mode_check() general_matching_mode_check()
if (ctx.matched_thread.slot_occupation_status & SLOT_OCCUPIED){ if (ctx.matched_thread.slot_occupation_status & SLOT_OCCUPIED){
@ -174,77 +175,77 @@ void i_MATCH(REGEX_IS024_CONTEXT& ctx){
} }
} }
void i_DIE(REGEX_IS024_CONTEXT& ctx){ void i_DIE(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
general_matching_mode_check() general_matching_mode_check()
ctx.active_thread.delete_thread(); ctx.active_thread.delete_thread();
ctx.try_to_continue_scheduled(); ctx.try_to_continue_scheduled();
} }
void i_PARAM_READ_SS_NUMBER(REGEX_IS024_CONTEXT& ctx){ void i_PARAM_READ_SS_NUMBER(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
initialization_phase_check() initialization_phase_check()
check_available_prg(REGEX024_BYTECODE_SSLOT_ID_SZ) check_available_prg(BYTECODE_SSLOT_ID_SZ)
regex_sslot_id_t read_slots_number = ctx.extract_sslot_id(); sslot_id_t read_slots_number = ctx.extract_sslot_id();
ctx.read_slots_number = read_slots_number; ctx.read_slots_number = read_slots_number;
} }
void i_PARAM_FORK_SS_NUMBER(REGEX_IS024_CONTEXT& ctx){ void i_PARAM_FORK_SS_NUMBER(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
initialization_phase_check() initialization_phase_check()
check_available_prg(REGEX024_BYTECODE_SSLOT_ID_SZ) check_available_prg(BYTECODE_SSLOT_ID_SZ)
regex_sslot_id_t fork_slots_number = ctx.extract_sslot_id(); sslot_id_t fork_slots_number = ctx.extract_sslot_id();
ctx.fork_slots_number = fork_slots_number; ctx.fork_slots_number = fork_slots_number;
} }
void i_PARAM_SELARR_LEN(REGEX_IS024_CONTEXT& ctx){ void i_PARAM_SELARR_LEN(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
initialization_phase_check() initialization_phase_check()
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ) check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
regex_tai_t selection_array_len = ctx.extract_track_array_index(); tai_t selection_array_len = ctx.extract_track_array_index();
ctx.selection_array_len = selection_array_len; ctx.selection_array_len = selection_array_len;
} }
void i_PARAM_COLSIFTFUNC_SET(REGEX_IS024_CONTEXT& ctx){ void i_PARAM_COLSIFTFUNC_SET(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
initialization_phase_check() initialization_phase_check()
check_available_prg(REGEX024_BYTECODE_NEAR_POINTER_SZ) check_available_prg(BYTECODE_NEAR_POINTER_SZ)
regex_near_ptr_t sift_function = ctx.extract_near_pointer(); near_ptr_t sift_function = ctx.extract_near_pointer();
ctx.have_sift_function = true; ctx.have_sift_function = true;
ctx.sift_function = sift_function; ctx.sift_function = sift_function;
} }
void i_PARAM_COLSIFTFUNC_WIPE(REGEX_IS024_CONTEXT& ctx){ void i_PARAM_COLSIFTFUNC_WIPE(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
initialization_phase_check() initialization_phase_check()
ctx.have_sift_function = false; ctx.have_sift_function = false;
} }
void i_MSG_MULTISTART_ALLOWED(REGEX_IS024_CONTEXT& ctx){ void i_MSG_MULTISTART_ALLOWED(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
initialization_phase_check() initialization_phase_check()
check_available_prg(1) check_available_prg(1)
ctx.allows_multistart = (bool)ctx.extract_b(); ctx.allows_multistart = (bool)ctx.extract_b();
} }
void i_MSG_FED_INPUT_EXTENDED(REGEX_IS024_CONTEXT& ctx){ void i_MSG_FED_INPUT_EXTENDED(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
initialization_phase_check() initialization_phase_check()
check_available_prg(1 + 1 + REGEX024_BYTECODE_SSLOT_ID_SZ) check_available_prg(1 + 1 + BYTECODE_SSLOT_ID_SZ)
ctx.fed_input_extends_left = ctx.extract_b(); ctx.fed_input_extends_left = ctx.extract_b();
ctx.fed_input_extends_right = ctx.extract_b(); ctx.fed_input_extends_right = ctx.extract_b();
ctx.portion_of_second_read_halt_ns = ctx.extract_sslot_id(); ctx.portion_of_second_read_halt_ns = ctx.extract_sslot_id();
} }
uint64_t get_el_from_selarr(uint64_t* sa, regex_near_ptr_t ind){ uint64_t get_el_from_selarr(uint64_t* sa, near_ptr_t ind){
return sa ? sa[1UL + ind] : 0; return sa ? sa[1UL + ind] : 0;
} }
void i_DMOV_RABX_SELARR(REGEX_IS024_CONTEXT& ctx){ void i_DMOV_RABX_SELARR(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
sift_mode_check() sift_mode_check()
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ) check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
regex_tai_t i1 = ctx.extract_track_array_index(); tai_t i1 = ctx.extract_track_array_index();
if (i1 >= ctx.selection_array_len) if (i1 >= ctx.selection_array_len)
smitsya(selection_arr_out_of_range); smitsya(selection_arr_out_of_range);
ctx.RAX = get_el_from_selarr(ctx.active_thread.SAptr, i1); ctx.RAX = get_el_from_selarr(ctx.active_thread.SAptr, i1);
@ -257,23 +258,23 @@ uint64_t get_selarr_el_dist(uint64_t* sa, uint16_t start, uint16_t end){
return v_end > v_start ? v_end - v_start : 0; return v_end > v_start ? v_end - v_start : 0;
} }
void i_DDIST_RABX_SELARR(REGEX_IS024_CONTEXT& ctx){ void i_DDIST_RABX_SELARR(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
sift_mode_check() sift_mode_check()
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ * 2) check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ * 2)
regex_tai_t i_start = ctx.extract_track_array_index(); tai_t i_start = ctx.extract_track_array_index();
if (i_start >= ctx.selection_array_len) if (i_start >= ctx.selection_array_len)
smitsya(selection_arr_out_of_range); smitsya(selection_arr_out_of_range);
regex_tai_t i_end = ctx.extract_track_array_index(); tai_t i_end = ctx.extract_track_array_index();
if (i_end >= ctx.selection_array_len) if (i_end >= ctx.selection_array_len)
smitsya(selection_arr_out_of_range); smitsya(selection_arr_out_of_range);
ctx.RAX = get_selarr_el_dist(ctx.active_thread.SAptr, i_start, i_end); ctx.RAX = get_selarr_el_dist(ctx.active_thread.SAptr, i_start, i_end);
ctx.RBX = get_selarr_el_dist(ctx.sifting_with->SAptr, i_start, i_end); ctx.RBX = get_selarr_el_dist(ctx.sifting_with->SAptr, i_start, i_end);
} }
void finish_conflict_homesteader_wins(REGEX_IS024_CONTEXT& ctx){ void finish_conflict_homesteader_wins(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
if (ctx.who_started_sift == regex024_opcodes::READ){ if (ctx.who_started_sift == opcodes::READ){
ctx.active_thread.delete_thread(); ctx.active_thread.delete_thread();
ctx.try_to_continue_scheduled(); ctx.try_to_continue_scheduled();
} else { } else {
@ -284,11 +285,11 @@ void finish_conflict_homesteader_wins(REGEX_IS024_CONTEXT& ctx){
ctx.sifting_with = NULL; ctx.sifting_with = NULL;
} }
void finish_conflict_intruder_wins(REGEX_IS024_CONTEXT& ctx){ void finish_conflict_intruder_wins(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
ctx.sifting_with->delete_thread(); ctx.sifting_with->delete_thread();
ctx.active_thread.IP = ctx.intruder_IP; ctx.active_thread.IP = ctx.intruder_IP;
if (ctx.who_started_sift == regex024_opcodes::READ){ if (ctx.who_started_sift == opcodes::READ){
/* noncloning conflict won by intruder+ */ /* noncloning conflict won by intruder+ */
*ctx.sifting_with = ctx.active_thread; *ctx.sifting_with = ctx.active_thread;
ctx.active_thread.slot_occupation_status = SLOT_EMPTY_val; ctx.active_thread.slot_occupation_status = SLOT_EMPTY_val;
@ -301,7 +302,7 @@ void finish_conflict_intruder_wins(REGEX_IS024_CONTEXT& ctx){
ctx.sifting_with = NULL; ctx.sifting_with = NULL;
} }
void i_SIFTPRIOR_MIN_RABX(REGEX_IS024_CONTEXT& ctx){ void i_SIFTPRIOR_MIN_RABX(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
sift_mode_check() sift_mode_check()
if (ctx.RAX < ctx.RBX){ if (ctx.RAX < ctx.RBX){
@ -311,7 +312,7 @@ void i_SIFTPRIOR_MIN_RABX(REGEX_IS024_CONTEXT& ctx){
} }
} }
void i_SIFTPRIOR_MAX_RABX(REGEX_IS024_CONTEXT& ctx){ void i_SIFTPRIOR_MAX_RABX(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
sift_mode_check() sift_mode_check()
if (ctx.RAX > ctx.RBX){ if (ctx.RAX > ctx.RBX){
@ -321,43 +322,43 @@ void i_SIFTPRIOR_MAX_RABX(REGEX_IS024_CONTEXT& ctx){
} }
} }
void i_SIFT_DONE(REGEX_IS024_CONTEXT& ctx){ void i_SIFT_DONE(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
sift_mode_check() sift_mode_check()
finish_conflict_homesteader_wins(ctx); finish_conflict_homesteader_wins(ctx);
} }
/* Can give errors */ /* Can give errors */
void ca_branch_new_node(REGEX_IS024_CONTEXT& ctx, regex_tai_t key, uint64_t val){ void ca_branch_new_node(VMContext& ctx, tai_t key, uint64_t val){
ctx_print_debug(ctx); ctx_print_debug(ctx);
if (ctx.CAN_total >= ctx.CA_TREE_LIMIT) if (ctx.CAN_total >= ctx.CA_TREE_LIMIT)
smitsya(ca_tree_limit_violation); smitsya(ca_tree_limit_violation);
REGEX024_CollectionArrayNode* node = new REGEX024_CollectionArrayNode{key, val, ctx.active_thread.CAHptr, 1}; CollectionArrayNode* node = new CollectionArrayNode{key, val, ctx.active_thread.CAHptr, 1};
// if (ctx.active_thread.CAHptr) // if (ctx.active_thread.CAHptr)
// (ctx.active_thread.CAHptr->refs)++; // (ctx.active_thread.CAHptr->refs)++;
ctx.active_thread.CAHptr = node; ctx.active_thread.CAHptr = node;
ctx.CAN_total++; ctx.CAN_total++;
} }
void i_MOV_COLARR_IMM(REGEX_IS024_CONTEXT& ctx){ void i_MOV_COLARR_IMM(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
general_matching_mode_check() general_matching_mode_check()
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ + 8) check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ + 8)
regex_tai_t ca_ind = ctx.extract_track_array_index(); tai_t ca_ind = ctx.extract_track_array_index();
uint64_t imm = ctx.extract_qw(); uint64_t imm = ctx.extract_qw();
ca_branch_new_node(ctx, ca_ind, imm); ca_branch_new_node(ctx, ca_ind, imm);
} }
void i_MOV_COLARR_BTPOS(REGEX_IS024_CONTEXT& ctx){ void i_MOV_COLARR_BTPOS(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
general_matching_mode_check() general_matching_mode_check()
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ) check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
regex_tai_t ca_ind = ctx.extract_track_array_index(); tai_t ca_ind = ctx.extract_track_array_index();
ca_branch_new_node(ctx, ca_ind, ctx.passed_bytes); ca_branch_new_node(ctx, ca_ind, ctx.passed_bytes);
} }
/* Can throw error, should be placed at the end. Call ONLY in general matching mode */ /* Can throw error, should be placed at the end. Call ONLY in general matching mode */
void edit_selection_array(REGEX_IS024_CONTEXT& ctx, uint64_t key, uint64_t val){ void edit_selection_array(VMContext& ctx, uint64_t key, uint64_t val){
ctx_print_debug(ctx); ctx_print_debug(ctx);
uint64_t N = ctx.selection_array_len; uint64_t N = ctx.selection_array_len;
if (key >= N) if (key >= N)
@ -384,39 +385,40 @@ void edit_selection_array(REGEX_IS024_CONTEXT& ctx, uint64_t key, uint64_t val){
} }
} }
void i_MOV_SELARR_IMM(REGEX_IS024_CONTEXT& ctx){ void i_MOV_SELARR_IMM(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
general_matching_mode_check() general_matching_mode_check()
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ + 8) check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ + 8)
regex_tai_t sa_ind = ctx.extract_track_array_index(); tai_t sa_ind = ctx.extract_track_array_index();
uint64_t imm = ctx.extract_qw(); uint64_t imm = ctx.extract_qw();
edit_selection_array(ctx, sa_ind, imm); edit_selection_array(ctx, sa_ind, imm);
} }
void i_MOV_SELARR_CHPOS(REGEX_IS024_CONTEXT& ctx){ void i_MOV_SELARR_CHPOS(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
general_matching_mode_check() general_matching_mode_check()
check_available_prg(REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ) check_available_prg(BYTECODE_TRACK_ARRAY_INDEX_ID_SZ)
regex_tai_t sa_ind = ctx.extract_track_array_index(); tai_t sa_ind = ctx.extract_track_array_index();
edit_selection_array(ctx, sa_ind, ctx.passed_chars); edit_selection_array(ctx, sa_ind, ctx.passed_chars);
} }
void calloc_stack_slots(REGEX_IS024_Stack& stack, regex_sslot_id_t nmemb) { void calloc_stack_slots(SSID_Stack& stack, sslot_id_t nmemb) {
assert(stack.sz == 0 && !stack.slots); assert(stack.max_size == 0 && stack.sz == 0 && !stack.slots);
regex_sslot_id_t* storage = static_cast<regex_sslot_id_t *>(calloc(nmemb, sizeof(regex_sslot_id_t))); sslot_id_t* storage = static_cast<sslot_id_t *>(calloc(nmemb, sizeof(sslot_id_t)));
if (!storage) if (!storage)
throw std::bad_alloc(); throw std::bad_alloc();
stack.slots = storage; stack.slots = storage;
stack.max_size = nmemb;
} }
REGEX_IS024_Thread* calloc_slots_array(regex_sslot_id_t nmemb) { Thread* calloc_slots_array(sslot_id_t nmemb) {
REGEX_IS024_Thread* ptr = static_cast<REGEX_IS024_Thread *>(calloc(nmemb, sizeof(REGEX_IS024_Thread))); Thread* ptr = static_cast<Thread *>(calloc(nmemb, sizeof(Thread)));
if (!ptr) if (!ptr)
throw std::bad_alloc(); throw std::bad_alloc();
return ptr; return ptr;
} }
void i_INIT(REGEX_IS024_CONTEXT& ctx){ void i_INIT(VMContext& ctx){
ctx_print_debug(ctx); ctx_print_debug(ctx);
initialization_phase_check() initialization_phase_check()
if (ctx.selection_array_len > ctx.SA_LEN_LIMIT) if (ctx.selection_array_len > ctx.SA_LEN_LIMIT)
@ -442,19 +444,19 @@ void i_INIT(REGEX_IS024_CONTEXT& ctx){
ctx.active_thread.delete_thread(); ctx.active_thread.delete_thread();
} }
void i_THROW(REGEX_IS024_CONTEXT& ctx){ void i_THROW(VMContext& ctx){
ctx.error = regex024_error_codes::program_throw; ctx.error = error_codes::program_throw;
} }
void instruction_table(REGEX_IS024_CONTEXT &ctx) { void instruction_table(VMContext &ctx) {
ctx_print_debug(ctx); ctx_print_debug(ctx);
uint8_t opcode = ctx.extract_instruction(); uint8_t opcode = ctx.extract_instruction();
#define rcase(inst) case regex024_opcodes::inst: return i_ ## inst (ctx); #define rcase(inst) case opcodes::inst: return i_ ## inst (ctx);
#define jumpC(UN, st) case regex024_opcodes::JC ## UN ## _B: return i_JC<st, immArgByte>(ctx); \ #define jumpC(UN, st) case opcodes::JC ## UN ## _B: return i_JC<st, immArgByte>(ctx); \
case regex024_opcodes::JC ## UN ## _W: return i_JC<st, immArgWord>(ctx); \ case opcodes::JC ## UN ## _W: return i_JC<st, immArgWord>(ctx); \
case regex024_opcodes::JC ## UN ## _DW: return i_JC<st, immArgDoubleWord>(ctx); \ case opcodes::JC ## UN ## _DW: return i_JC<st, immArgDoubleWord>(ctx); \
case regex024_opcodes::JC ## UN ## _QW: return i_JC<st, immArgQuadWord>(ctx); case opcodes::JC ## UN ## _QW: return i_JC<st, immArgQuadWord>(ctx);
switch (opcode) { switch (opcode) {
rcase(READ) rcase(READ)
rcase(READZ) rcase(READZ)
@ -486,6 +488,7 @@ void instruction_table(REGEX_IS024_CONTEXT &ctx) {
rcase(INIT) rcase(INIT)
rcase(THROW) rcase(THROW)
default: default:
ctx.error = regex024_error_codes::invalid_opcode; ctx.error = error_codes::invalid_opcode;
}
} }
} }

View File

@ -7,7 +7,7 @@
#include <libregexis024vm/vm_opcodes.h> #include <libregexis024vm/vm_opcodes.h>
#include <assert.h> #include <assert.h>
#define smitsya(error_type) do {ctx.error = regex024_error_codes::error_type; return; } while (0) #define smitsya(error_type) do {ctx.error = error_codes::error_type; return; } while (0)
#define SLOT_EMPTY_val 0 #define SLOT_EMPTY_val 0
#define SLOT_OCCUPIED 1 #define SLOT_OCCUPIED 1
@ -16,7 +16,7 @@
#define SLOT_NEW_val (SLOT_OCCUPIED | SLOT_NEW) #define SLOT_NEW_val (SLOT_OCCUPIED | SLOT_NEW)
#define check_available_prg(regionSz) if (!ctx.check_inboundness(regionSz)){ \ #define check_available_prg(regionSz) if (!ctx.check_inboundness(regionSz)){ \
ctx.error = regex024_error_codes::improper_finish; return; } ctx.error = error_codes::improper_finish; return; }
#if defined(LIBREGEXIS024_DEBUG) && defined(LIBREGEXIS024_ALLOW_LOUD) #if defined(LIBREGEXIS024_DEBUG) && defined(LIBREGEXIS024_ALLOW_LOUD)
@ -30,6 +30,8 @@
#define thread_print_debug(thread) #define thread_print_debug(thread)
#endif #endif
void instruction_table(REGEX_IS024_CONTEXT& ctx); namespace regexis024 {
void instruction_table(VMContext& ctx);
}
#endif //LIBREGEXIS024_INSTRUCTION_IMPLEMENTATION_H #endif //LIBREGEXIS024_INSTRUCTION_IMPLEMENTATION_H

View File

@ -1,10 +1,10 @@
#include <libregexis024vm/vm_opcodes.h> #include <libregexis024vm/vm_opcodes.h>
#include <libregexis024vm/utils.h> #include <libregexis024vm/utils.h>
#define rcase(name) case regex024_opcodes::name: return #name; namespace regexis024 {
const char *opcode_to_str(opcode_t x) {
const char *regex024_opcode_tostr(regex024_opcode x) {
switch (x) { switch (x) {
#define rcase(name) case opcodes::name: return #name;
rcase(READ) rcase(READ)
rcase(READZ) rcase(READZ)
rcase(JUMP) rcase(JUMP)
@ -45,3 +45,4 @@ const char *regex024_opcode_tostr(regex024_opcode x) {
return "Invalid opcode"; return "Invalid opcode";
} }
} }
}

View File

@ -16,37 +16,38 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdint.h> #include <stdint.h>
struct REGEX_IS024_Stack{ namespace regexis024 {
regex_sslot_id_t* slots = NULL; struct SSID_Stack{
regex_sslot_id_t sz = 0; sslot_id_t* slots = NULL;
sslot_id_t max_size = 0;
sslot_id_t sz = 0;
regex_sslot_id_t pop(); sslot_id_t pop();
void append(regex_sslot_id_t x); void append(sslot_id_t x);
bool empty() const; bool empty() const;
bool non_empty() const;
REGEX_IS024_Stack(const REGEX_IS024_Stack&) = delete; SSID_Stack(const SSID_Stack&) = delete;
REGEX_IS024_Stack& operator=(const REGEX_IS024_Stack&) = delete; SSID_Stack& operator=(const SSID_Stack&) = delete;
REGEX_IS024_Stack() = default; SSID_Stack() = default;
~REGEX_IS024_Stack(); ~SSID_Stack();
}; };
struct REGEX024_CollectionArrayNode{ struct CollectionArrayNode{
/* Key is small for historical reasons I do not rememeber. Who cares anyway */ /* Key is small for historical reasons I do not rememeber. Who cares anyway */
regex_tai_t key; tai_t key;
uint64_t value; uint64_t value;
/* NULL at the beginning */ /* NULL at the beginning */
REGEX024_CollectionArrayNode* prev; CollectionArrayNode* prev;
/* Reference counting */ /* Reference counting */
uint64_t refs = 0; uint64_t refs = 0;
}; };
struct REGEX_IS024_Thread{ struct Thread{
/* First byte field is used only when thread is located in slot */ /* First byte field is used only when thread is located in slot */
uint8_t slot_occupation_status = 0; uint8_t slot_occupation_status = 0;
regex_near_ptr_t IP = 0; near_ptr_t IP = 0;
REGEX024_CollectionArrayNode* CAHptr = NULL; CollectionArrayNode* CAHptr = NULL;
/* Pointer to the seletion array. SA's are reference counted. Because of that every SA /* Pointer to the seletion array. SA's are reference counted. Because of that every SA
* is elongated by one meta element in the beginning - reference counter. So the actual elements * is elongated by one meta element in the beginning - reference counter. So the actual elements
* are enumerated starting from one. */ * are enumerated starting from one. */
@ -56,18 +57,18 @@ struct REGEX_IS024_Thread{
void debug_print(const char* place); void debug_print(const char* place);
}; };
struct REGEX_IS024_CONTEXT{ struct VMContext{
REGEX_IS024_CONTEXT(size_t programSize, const uint8_t *data, uint64_t caTreeLimit, regex_tai_t saLenLimit, VMContext(size_t programSize, const uint8_t *data, uint64_t caTreeLimit, tai_t saLenLimit,
regex_sslot_id_t readSsLimit, regex_sslot_id_t forkSsLimit, uint64_t timeTickLimit); sslot_id_t readSsLimit, sslot_id_t forkSsLimit, uint64_t timeTickLimit);
regex024_error_code feedSOF(); error_code_t feedSOF();
/* You can safely pile up calls to this command, nothing bad will happen */ /* You can safely pile up calls to this command, nothing bad will happen */
regex024_error_code startThread(); error_code_t startThread();
regex024_error_code extendedFeedCharacter(uint64_t input); error_code_t extendedFeedCharacter(uint64_t input);
regex024_error_code feedCharacter(uint64_t INP, uint64_t corresponding_byte_amount); error_code_t feedCharacter(uint64_t INP, uint64_t corresponding_byte_amount);
~REGEX_IS024_CONTEXT(); ~VMContext();
/* Program size larger than 2^62 is forbidden */ /* Program size larger than 2^62 is forbidden */
size_t program_size = 0; size_t program_size = 0;
@ -78,9 +79,9 @@ struct REGEX_IS024_CONTEXT{
/* CA = Collecton array. */ /* CA = Collecton array. */
uint64_t CA_TREE_LIMIT; uint64_t CA_TREE_LIMIT;
/* SA = Selection array */ /* SA = Selection array */
regex_tai_t SA_LEN_LIMIT; tai_t SA_LEN_LIMIT;
regex_sslot_id_t READ_SS_LIMIT; sslot_id_t READ_SS_LIMIT;
regex_sslot_id_t FORK_SS_LIMIT; sslot_id_t FORK_SS_LIMIT;
/* If time_tick_limit is non-zero, regex virtual machine will stop with error /* If time_tick_limit is non-zero, regex virtual machine will stop with error
* after this many ticks. This parameter set's the timeout.*/ * after this many ticks. This parameter set's the timeout.*/
@ -93,38 +94,38 @@ struct REGEX_IS024_CONTEXT{
uint64_t CAN_total = 0; uint64_t CAN_total = 0;
/* Program selects it */ /* Program selects it */
regex_tai_t selection_array_len = 0; tai_t selection_array_len = 0;
regex_sslot_id_t read_slots_number = 0; sslot_id_t read_slots_number = 0;
regex_sslot_id_t fork_slots_number = 0; sslot_id_t fork_slots_number = 0;
bool have_sift_function = false; bool have_sift_function = false;
regex_near_ptr_t sift_function; near_ptr_t sift_function;
bool allows_multistart = false; bool allows_multistart = false;
uint8_t fed_input_extends_left = 0, fed_input_extends_right = 0; uint8_t fed_input_extends_left = 0, fed_input_extends_right = 0;
regex_sslot_id_t portion_of_second_read_halt_ns = 0, portion_of_FIRST_read_halt_ns = 0; sslot_id_t portion_of_second_read_halt_ns = 0, portion_of_FIRST_read_halt_ns = 0;
bool initialized = false; bool initialized = false;
regex_near_ptr_t unnatural_started_thread_IP = 1337; near_ptr_t unnatural_started_thread_IP = 1337;
regex024_error_code error = regex024_error_codes::stable; error_code_t error = error_codes::stable;
REGEX_IS024_Thread* READ_halted_slots; Thread* READ_halted_slots;
REGEX_IS024_Stack READ_halted_stack_old; SSID_Stack READ_halted_stack_old;
REGEX_IS024_Stack READ_halted_stack_new_first; SSID_Stack READ_halted_stack_new_first;
REGEX_IS024_Stack READ_halted_stack_new_second; SSID_Stack READ_halted_stack_new_second;
REGEX_IS024_Thread* FORK_halted_slots; Thread* FORK_halted_slots;
REGEX_IS024_Stack FORK_halted_stack; SSID_Stack FORK_halted_stack;
REGEX_IS024_Thread active_thread; Thread active_thread;
/* Environment for sifting stuff */ /* Environment for sifting stuff */
REGEX_IS024_Thread* sifting_with = NULL; Thread* sifting_with = NULL;
/* specifies the type of operation vm should do after shift (there are only two distinct options) */ /* specifies the type of operation vm should do after shift (there are only two distinct options) */
uint8_t who_started_sift; uint8_t who_started_sift;
/* Sifting process uses IP field of active thread. Other data of thread is not modified or used during collision /* Sifting process uses IP field of active thread. Other data of thread is not modified or used during collision
* procudure. Old IP is stored there, if needed */ * procudure. Old IP is stored there, if needed */
regex_near_ptr_t child_ret_IP; near_ptr_t child_ret_IP;
regex_near_ptr_t intruder_IP; near_ptr_t intruder_IP;
/* RAX corresponds to intruder. Its data is stored in active thread field*/ /* RAX corresponds to intruder. Its data is stored in active thread field*/
uint64_t RAX; uint64_t RAX;
/* RBX corresponds to homesteader. Its data is accessible by `REGEX_IS024_Thread* sifting_with` pointer*/ /* RBX corresponds to homesteader. Its data is accessible by `REGEX_IS024_Thread* sifting_with` pointer*/
@ -132,7 +133,7 @@ struct REGEX_IS024_CONTEXT{
/* Will be unoccupied if no threads matched. After each feed of character this field will be wiped /* Will be unoccupied if no threads matched. After each feed of character this field will be wiped
* User should take care of intermediate success himself */ * User should take care of intermediate success himself */
REGEX_IS024_Thread matched_thread; Thread matched_thread;
uint64_t INP = 0; uint64_t INP = 0;
uint64_t passed_chars = 0; uint64_t passed_chars = 0;
@ -148,11 +149,11 @@ struct REGEX_IS024_CONTEXT{
uint64_t extract_qw(); uint64_t extract_qw();
uint8_t extract_instruction(); uint8_t extract_instruction();
regex_sslot_id_t extract_sslot_id(); sslot_id_t extract_sslot_id();
regex_near_ptr_t extract_near_pointer(); near_ptr_t extract_near_pointer();
regex_tai_t extract_track_array_index(); tai_t extract_track_array_index();
void debug_print(const char* place); void debug_print(const char* place);
}; };
}
#endif //LIBREGEXIS024_LIBREGEXIS024VM_H #endif //LIBREGEXIS024_LIBREGEXIS024VM_H

View File

@ -1,54 +1,54 @@
#include <stdexcept>
#include <libregexis024vm/libregexis024vm.h> #include <libregexis024vm/libregexis024vm.h>
#include <libregexis024vm/instruction_implementation.h> #include <libregexis024vm/instruction_implementation.h>
#include <utility> #include <utility>
regex_sslot_id_t REGEX_IS024_Stack::pop() { namespace regexis024 {
sslot_id_t SSID_Stack::pop() {
assert(sz != 0); assert(sz != 0);
return slots[--sz]; return slots[--sz];
} }
void REGEX_IS024_Stack::append(regex_sslot_id_t x) { void SSID_Stack::append(sslot_id_t x) {
assert(max_size > 0);
assert(slots); assert(slots);
assert(sz < max_size);
slots[sz] = x; slots[sz] = x;
sz++; sz++;
} }
bool REGEX_IS024_Stack::empty() const { bool SSID_Stack::empty() const {
return !non_empty(); return sz == 0;
} }
bool REGEX_IS024_Stack::non_empty() const { SSID_Stack::~SSID_Stack() {
return sz;
}
REGEX_IS024_Stack::~REGEX_IS024_Stack() {
assert(empty()); assert(empty());
free(slots); free(slots);
} }
REGEX_IS024_CONTEXT::REGEX_IS024_CONTEXT(size_t programSize, const uint8_t *data, VMContext::VMContext(size_t programSize, const uint8_t *data,
uint64_t caTreeLimit, regex_tai_t saLenLimit, uint64_t caTreeLimit, tai_t saLenLimit,
regex_sslot_id_t readSsLimit, regex_sslot_id_t forkSsLimit, sslot_id_t readSsLimit, sslot_id_t forkSsLimit,
uint64_t timeTickLimit) : uint64_t timeTickLimit) :
program_size(programSize), prg(data), CA_TREE_LIMIT(caTreeLimit), SA_LEN_LIMIT(saLenLimit), program_size(programSize), prg(data), CA_TREE_LIMIT(caTreeLimit), SA_LEN_LIMIT(saLenLimit),
READ_SS_LIMIT(readSsLimit), FORK_SS_LIMIT(forkSsLimit), time_tick_limit(timeTickLimit) READ_SS_LIMIT(readSsLimit), FORK_SS_LIMIT(forkSsLimit), time_tick_limit(timeTickLimit)
{ {
if (program_size > (1UL << 62)) if (program_size > (1UL << 62))
exitf("Program is too huge\n"); throw std::runtime_error("Program is too big");
active_thread.slot_occupation_status = SLOT_OCCUPIED; active_thread.slot_occupation_status = SLOT_OCCUPIED;
} }
/* No only will it launch a wave of deallocation in CA tree, but as a nice bonus it's /* No only will it launch a wave of deallocation in CA tree, but as a nice bonus it's
* gonna deoccupy slot_occupation_status*/ * gonna deoccupy slot_occupation_status*/
void REGEX_IS024_Thread::delete_thread() noexcept { void Thread::delete_thread() noexcept {
thread_print_debug(*this); thread_print_debug(*this);
my_assert(slot_occupation_status & SLOT_OCCUPIED); my_assert(slot_occupation_status & SLOT_OCCUPIED);
slot_occupation_status = SLOT_EMPTY_val; slot_occupation_status = SLOT_EMPTY_val;
REGEX024_CollectionArrayNode* cur_CAptr = CAHptr; CollectionArrayNode* cur_CAptr = CAHptr;
while (cur_CAptr){ while (cur_CAptr){
assert(cur_CAptr->refs > 0); assert(cur_CAptr->refs > 0);
if (--(cur_CAptr->refs) == 0){ if (--(cur_CAptr->refs) == 0){
REGEX024_CollectionArrayNode* next_CAptr = cur_CAptr->prev; CollectionArrayNode* next_CAptr = cur_CAptr->prev;
delete cur_CAptr; delete cur_CAptr;
cur_CAptr = next_CAptr; cur_CAptr = next_CAptr;
} else } else
@ -60,9 +60,9 @@ void REGEX_IS024_Thread::delete_thread() noexcept {
} }
} }
void emptify_one_of_new_read_halted_stacks(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Stack& type_new_stack){ void emptify_one_of_new_read_halted_stacks(VMContext& ctx, SSID_Stack& type_new_stack){
while (type_new_stack.non_empty()){ while (!type_new_stack.empty()){
REGEX_IS024_Thread& thread = ctx.READ_halted_slots[type_new_stack.pop()]; Thread& thread = ctx.READ_halted_slots[type_new_stack.pop()];
assert(thread.slot_occupation_status & SLOT_OCCUPIED); assert(thread.slot_occupation_status & SLOT_OCCUPIED);
thread.delete_thread(); thread.delete_thread();
} }
@ -74,17 +74,17 @@ void emptify_one_of_new_read_halted_stacks(REGEX_IS024_CONTEXT& ctx, REGEX_IS024
* active slot will be occupied with it. * active slot will be occupied with it.
* *
* try_to_continue_scheduled() assumes that active thread is unoccupied.*/ * try_to_continue_scheduled() assumes that active thread is unoccupied.*/
void REGEX_IS024_CONTEXT::try_to_continue_scheduled(){ void VMContext::try_to_continue_scheduled(){
ctx_print_debug(*this); ctx_print_debug(*this);
my_assert(!(active_thread.slot_occupation_status & SLOT_OCCUPIED)); my_assert(!(active_thread.slot_occupation_status & SLOT_OCCUPIED));
if (FORK_halted_stack.sz){ if (FORK_halted_stack.sz){
regex_sslot_id_t ssid = FORK_halted_stack.pop(); sslot_id_t ssid = FORK_halted_stack.pop();
active_thread = FORK_halted_slots[ssid]; active_thread = FORK_halted_slots[ssid];
FORK_halted_slots[ssid].slot_occupation_status = SLOT_EMPTY_val; FORK_halted_slots[ssid].slot_occupation_status = SLOT_EMPTY_val;
return; return;
} }
while (READ_halted_stack_old.sz){ while (READ_halted_stack_old.sz){
regex_sslot_id_t ssid = READ_halted_stack_old.pop(); sslot_id_t ssid = READ_halted_stack_old.pop();
if (READ_halted_slots[ssid].slot_occupation_status & SLOT_NEW){ if (READ_halted_slots[ssid].slot_occupation_status & SLOT_NEW){
/* This is the case when old thread was silently replaced by settled new thread */ /* This is the case when old thread was silently replaced by settled new thread */
continue; continue;
@ -96,28 +96,28 @@ void REGEX_IS024_CONTEXT::try_to_continue_scheduled(){
/* Failure here will be detected. We started with unoccupied active thread. iterator inside kick will see it */ /* Failure here will be detected. We started with unoccupied active thread. iterator inside kick will see it */
} }
void kick(REGEX_IS024_CONTEXT& ctx) { void kick(VMContext& ctx) {
ctx_print_debug(ctx); ctx_print_debug(ctx);
while ((ctx.active_thread.slot_occupation_status & SLOT_OCCUPIED) while ((ctx.active_thread.slot_occupation_status & SLOT_OCCUPIED)
&& ctx.error == regex024_error_codes::stable){ && ctx.error == error_codes::stable){
if (ctx.timer >= ctx.time_tick_limit) if (ctx.timer >= ctx.time_tick_limit)
smitsya(timeout); smitsya(timeout);
ctx.timer++; ctx.timer++;
check_available_prg(REGEX024_BYTECODE_INSTRUCTION_SZ) // May return from kick(ctx) check_available_prg(BYTECODE_INSTRUCTION_SZ) // May return from kick(ctx)
// smivanie from those instructions will be immediately detected. Everything is OK // smivanie from those instructions will be immediately detected. Everything is OK
instruction_table(ctx); instruction_table(ctx);
} }
} }
regex024_error_code REGEX_IS024_CONTEXT::feedSOF() { error_code_t VMContext::feedSOF() {
ctx_print_debug(*this); ctx_print_debug(*this);
kick(*this); kick(*this);
return error; return error;
} }
regex024_error_code REGEX_IS024_CONTEXT::startThread() { error_code_t VMContext::startThread() {
ctx_print_debug(*this); ctx_print_debug(*this);
active_thread.slot_occupation_status = SLOT_OCCUPIED; active_thread.slot_occupation_status = SLOT_OCCUPIED;
active_thread.IP = unnatural_started_thread_IP; active_thread.IP = unnatural_started_thread_IP;
@ -127,29 +127,20 @@ regex024_error_code REGEX_IS024_CONTEXT::startThread() {
return error; return error;
} }
/* I hate C++ (aka antichrist), won't use move sementic (aka drink cornsyrup) */ void fill_empty_old_read_halted_stack(VMContext& ctx, SSID_Stack& read_halted_stack_new){
void swap_stacks(REGEX_IS024_Stack& A, REGEX_IS024_Stack& B) {
std::swap(A.sz, B.sz);
std::swap(A.slots, B.slots);
}
void fill_empty_old_read_halted_stack(REGEX_IS024_CONTEXT& ctx, REGEX_IS024_Stack& read_halted_stack_new){
ctx_print_debug(ctx); ctx_print_debug(ctx);
my_assert(!ctx.READ_halted_stack_old.non_empty());
// Actually, READ_halted_stack_old is always empty in this case // Actually, READ_halted_stack_old is always empty in this case
assert(ctx.READ_halted_stack_old.empty()); assert(ctx.READ_halted_stack_old.empty());
swap_stacks(ctx.READ_halted_stack_old, read_halted_stack_new); while (!read_halted_stack_new.empty()) {
for (uint32_t i = 0; i < ctx.READ_halted_stack_old.sz; i++){ sslot_id_t sr = read_halted_stack_new.pop();
REGEX_IS024_Thread& slot = ctx.READ_halted_slots[ctx.READ_halted_stack_old.slots[i]]; Thread& slot = ctx.READ_halted_slots[sr];
/* Should get rid of 'NEW' qualifier */ assert(slot.slot_occupation_status & SLOT_NEW_val);
assert(slot.slot_occupation_status & SLOT_OCCUPIED); slot.slot_occupation_status = SLOT_OCCUPIED_val;
if (slot.slot_occupation_status & SLOT_OCCUPIED) ctx.READ_halted_stack_old.append(sr);
slot.slot_occupation_status = SLOT_OCCUPIED;
} }
} }
regex024_error_code REGEX_IS024_CONTEXT::feedCharacter(uint64_t input, uint64_t corresponding_byte_amount) { error_code_t VMContext::feedCharacter(uint64_t input, uint64_t corresponding_byte_amount) {
ctx_print_debug(*this); ctx_print_debug(*this);
if (matched_thread.slot_occupation_status & SLOT_OCCUPIED) if (matched_thread.slot_occupation_status & SLOT_OCCUPIED)
matched_thread.delete_thread(); matched_thread.delete_thread();
@ -163,7 +154,7 @@ regex024_error_code REGEX_IS024_CONTEXT::feedCharacter(uint64_t input, uint64_t
return error; return error;
} }
regex024_error_code REGEX_IS024_CONTEXT::extendedFeedCharacter(uint64_t input) { error_code_t VMContext::extendedFeedCharacter(uint64_t input) {
ctx_print_debug(*this); ctx_print_debug(*this);
if (matched_thread.slot_occupation_status & SLOT_OCCUPIED) if (matched_thread.slot_occupation_status & SLOT_OCCUPIED)
matched_thread.delete_thread(); matched_thread.delete_thread();
@ -174,19 +165,19 @@ regex024_error_code REGEX_IS024_CONTEXT::extendedFeedCharacter(uint64_t input) {
return error; return error;
} }
REGEX_IS024_CONTEXT::~REGEX_IS024_CONTEXT() { VMContext::~VMContext() {
ctx_print_debug(*this); ctx_print_debug(*this);
if (initialized){ if (initialized){
emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new_first); emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new_first);
emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new_second); emptify_one_of_new_read_halted_stacks(*this, READ_halted_stack_new_second);
while (READ_halted_stack_old.non_empty()){ while (!READ_halted_stack_old.empty()){
REGEX_IS024_Thread& thread = READ_halted_slots[READ_halted_stack_old.pop()]; Thread& thread = READ_halted_slots[READ_halted_stack_old.pop()];
assert(thread.slot_occupation_status & SLOT_OCCUPIED); assert(thread.slot_occupation_status & SLOT_OCCUPIED);
if (!(thread.slot_occupation_status & SLOT_NEW)) if (!(thread.slot_occupation_status & SLOT_NEW))
thread.delete_thread(); thread.delete_thread();
} }
free(READ_halted_slots); free(READ_halted_slots);
while (FORK_halted_stack.non_empty()) while (!FORK_halted_stack.empty())
FORK_halted_slots[FORK_halted_stack.pop()].delete_thread(); FORK_halted_slots[FORK_halted_stack.pop()].delete_thread();
free(FORK_halted_slots); free(FORK_halted_slots);
@ -195,3 +186,4 @@ REGEX_IS024_CONTEXT::~REGEX_IS024_CONTEXT() {
} }
} }
} }
}

View File

@ -1,38 +1,40 @@
#include <libregexis024vm/libregexis024vm.h> #include <libregexis024vm/libregexis024vm.h>
#include <libregexis024vm/vm_opcodes.h> #include <libregexis024vm/vm_opcodes.h>
bool REGEX_IS024_CONTEXT::check_inboundness(int region){ namespace regexis024 {
bool VMContext::check_inboundness(int region){
return vmprog_check_inboundness(program_size, active_thread.IP, region); return vmprog_check_inboundness(program_size, active_thread.IP, region);
} }
uint8_t REGEX_IS024_CONTEXT::extract_b() { uint8_t VMContext::extract_b() {
return vmprog_extract_b(&active_thread.IP, prg); return vmprog_extract_b(&active_thread.IP, prg);
} }
uint16_t REGEX_IS024_CONTEXT::extract_w() { uint16_t VMContext::extract_w() {
return vmprog_extract_w(&active_thread.IP, prg); return vmprog_extract_w(&active_thread.IP, prg);
} }
uint32_t REGEX_IS024_CONTEXT::extract_dw() { uint32_t VMContext::extract_dw() {
return vmprog_extract_dw(&active_thread.IP, prg); return vmprog_extract_dw(&active_thread.IP, prg);
} }
uint64_t REGEX_IS024_CONTEXT::extract_qw() { uint64_t VMContext::extract_qw() {
return vmprog_extract_qw(&active_thread.IP, prg); return vmprog_extract_qw(&active_thread.IP, prg);
} }
uint8_t REGEX_IS024_CONTEXT::extract_instruction() { uint8_t VMContext::extract_instruction() {
return extract_b(); return extract_b();
} }
regex_sslot_id_t REGEX_IS024_CONTEXT::extract_sslot_id() { sslot_id_t VMContext::extract_sslot_id() {
return extract_dw(); return extract_dw();
} }
regex_near_ptr_t REGEX_IS024_CONTEXT::extract_near_pointer() { near_ptr_t VMContext::extract_near_pointer() {
return extract_qw(); return extract_qw();
} }
regex_tai_t REGEX_IS024_CONTEXT::extract_track_array_index() { tai_t VMContext::extract_track_array_index() {
return extract_w(); return extract_w();
} }
}

View File

@ -1,74 +1,76 @@
#include <stdexcept>
#include <libregexis024vm/libregexis024vm_interface.h> #include <libregexis024vm/libregexis024vm_interface.h>
#include <libregexis024vm/libregexis024vm.h> #include <libregexis024vm/libregexis024vm.h>
#include <libregexis024vm/instruction_implementation.h> #include <libregexis024vm/instruction_implementation.h>
bool REGEX_IS024_CAEvent::operator==(const REGEX_IS024_CAEvent &other) const { namespace regexis024 {
bool CAEvent::operator==(const CAEvent &other) const {
return (key == other.key) && (value == other.value); return (key == other.key) && (value == other.value);
} }
#define reveal ((REGEX_IS024_CONTEXT*)opaque) #define reveal ((VMContext*)opaque)
REGEX_IS024_VirtualMachine::REGEX_IS024_VirtualMachine(size_t programSize, const uint8_t *data, VirtualMachine::VirtualMachine(size_t programSize, const uint8_t *data,
uint64_t caTreeLimit, regex_tai_t saLenLimit, uint64_t caTreeLimit, tai_t saLenLimit,
regex_sslot_id_t readSsLimit, regex_sslot_id_t forkSsLimit, sslot_id_t readSsLimit, sslot_id_t forkSsLimit,
uint64_t timeTickLimit) { uint64_t timeTickLimit) {
opaque = new REGEX_IS024_CONTEXT(programSize, data, caTreeLimit, saLenLimit, opaque = new VMContext(programSize, data, caTreeLimit, saLenLimit,
readSsLimit, forkSsLimit, timeTickLimit); readSsLimit, forkSsLimit, timeTickLimit);
} }
regex024_error_code REGEX_IS024_VirtualMachine::initialize() { error_code_t VirtualMachine::initialize() {
if (gave_SOF) if (gave_SOF)
exitf("double feedSOF\n"); throw std::runtime_error("double feedSOF\n");
gave_SOF = true; gave_SOF = true;
return reveal->feedSOF(); return reveal->feedSOF();
} }
bool REGEX_IS024_VirtualMachine::isInitialized() { bool VirtualMachine::isInitialized() {
return reveal->initialized; return reveal->initialized;
} }
bool REGEX_IS024_VirtualMachine::isUsable() { bool VirtualMachine::isUsable() {
return isInitialized() && reveal->error == regex024_error_codes::stable; return isInitialized() && reveal->error == error_codes::stable;
} }
REGEX_IS024_VirtualMachine::~REGEX_IS024_VirtualMachine() { VirtualMachine::~VirtualMachine() {
delete reveal; delete reveal;
} }
regex_tai_t REGEX_IS024_VirtualMachine::getSelectionArrayLength() { tai_t VirtualMachine::getSelectionArrayLength() {
return isUsable() ? reveal->selection_array_len : 0; return isUsable() ? reveal->selection_array_len : 0;
} }
bool REGEX_IS024_VirtualMachine::isAllowMultistart() { bool VirtualMachine::isAllowMultistart() {
return isUsable() ? reveal->allows_multistart : false; return isUsable() ? reveal->allows_multistart : false;
} }
uint8_t REGEX_IS024_VirtualMachine::getInputLeftExtensionSize() { uint8_t VirtualMachine::getInputLeftExtensionSize() {
return isUsable() ? reveal->fed_input_extends_left : 0; return isUsable() ? reveal->fed_input_extends_left : 0;
} }
uint8_t REGEX_IS024_VirtualMachine::getInputRightExtensionSize() { uint8_t VirtualMachine::getInputRightExtensionSize() {
return isUsable() ? reveal->fed_input_extends_right : 0; return isUsable() ? reveal->fed_input_extends_right : 0;
} }
regex024_error_code REGEX_IS024_VirtualMachine::getErrno() { error_code_t VirtualMachine::getErrno() {
return reveal->error; return reveal->error;
} }
/* Stupid kinda function. Checks if somebody is ready to continue reading the actual string */ /* Stupid kinda function. Checks if somebody is ready to continue reading the actual string or extended l-r input */
bool REGEX_IS024_VirtualMachine::haveSurvivors() { bool VirtualMachine::haveSurvivors() {
return isUsable() && (reveal->READ_halted_stack_new_first.non_empty()); return isUsable() && (!reveal->READ_halted_stack_new_first.empty() || !reveal->READ_halted_stack_new_second.empty());
} }
bool REGEX_IS024_VirtualMachine::isMatched() { bool VirtualMachine::isMatched() {
return isUsable() && static_cast<bool>((reveal->matched_thread.slot_occupation_status & SLOT_OCCUPIED)); return isUsable() && static_cast<bool>((reveal->matched_thread.slot_occupation_status & SLOT_OCCUPIED));
} }
std::vector<REGEX_IS024_CAEvent> REGEX_IS024_VirtualMachine::getMatchedThreadCABranchReverse() { std::vector<CAEvent> VirtualMachine::getMatchedThreadCABranchReverse() {
if (!isMatched()) if (!isMatched())
return {}; return {};
std::vector<REGEX_IS024_CAEvent> res; std::vector<CAEvent> res;
REGEX024_CollectionArrayNode* cur = reveal->matched_thread.CAHptr; CollectionArrayNode* cur = reveal->matched_thread.CAHptr;
while (cur != NULL){ while (cur != NULL){
res.push_back({cur->key, cur->value}); res.push_back({cur->key, cur->value});
cur = cur->prev; cur = cur->prev;
@ -76,7 +78,7 @@ std::vector<REGEX_IS024_CAEvent> REGEX_IS024_VirtualMachine::getMatchedThreadCAB
return res; return res;
} }
uint64_t REGEX_IS024_VirtualMachine::getMatchedThreadSAValue(uint16_t key) { uint64_t VirtualMachine::getMatchedThreadSAValue(uint16_t key) {
if (key >= getSelectionArrayLength()) if (key >= getSelectionArrayLength())
return 0; return 0;
if (!isMatched()) if (!isMatched())
@ -84,22 +86,21 @@ uint64_t REGEX_IS024_VirtualMachine::getMatchedThreadSAValue(uint16_t key) {
return reveal->matched_thread.SAptr ? reveal->matched_thread.SAptr[key + 1] : 0; return reveal->matched_thread.SAptr ? reveal->matched_thread.SAptr[key + 1] : 0;
} }
regex024_error_code REGEX_IS024_VirtualMachine::addNewMatchingThread() { error_code_t VirtualMachine::addNewMatchingThread() {
if (!isUsable()) if (!isUsable())
exitf("unusable\n"); throw std::runtime_error("unusable");
// if (started_first_thread && !isAllowMultistart())
// exitf("Multistart is forbidden, bad usage of program\n");
return reveal->startThread(); return reveal->startThread();
} }
regex024_error_code REGEX_IS024_VirtualMachine::extendedFeedCharacter(uint64_t input) { error_code_t VirtualMachine::extendedFeedCharacter(uint64_t input) {
if (!isUsable()) if (!isUsable())
exitf("unusable\n"); throw std::runtime_error("unusable\n");
return reveal->extendedFeedCharacter(input); return reveal->extendedFeedCharacter(input);
} }
regex024_error_code REGEX_IS024_VirtualMachine::feedCharacter(uint64_t input, uint64_t bytesResembled) { error_code_t VirtualMachine::feedCharacter(uint64_t input, uint64_t bytesResembled) {
if (!isUsable()) if (!isUsable())
exitf("unusable\n"); throw std::runtime_error("unusable\n");
return reveal->feedCharacter(input, bytesResembled); return reveal->feedCharacter(input, bytesResembled);
} }
}

View File

@ -6,41 +6,42 @@
#include <libregexis024vm/vm_errno.h> #include <libregexis024vm/vm_errno.h>
#include <libregexis024vm/vm_opcodes_types.h> #include <libregexis024vm/vm_opcodes_types.h>
struct REGEX_IS024_CAEvent{ namespace regexis024 {
regex_tai_t key; struct CAEvent{
tai_t key;
uint64_t value; uint64_t value;
bool operator==(const REGEX_IS024_CAEvent& other) const; bool operator==(const CAEvent& other) const;
}; };
class REGEX_IS024_VirtualMachine{ struct VirtualMachine{
public: VirtualMachine(size_t programSize, const uint8_t *data, uint64_t caTreeLimit, uint16_t saLenLimit,
REGEX_IS024_VirtualMachine(size_t programSize, const uint8_t *data, uint64_t caTreeLimit, uint16_t saLenLimit,
uint32_t readSsLimit, uint32_t forkSsLimit, uint64_t timeTickLimit); uint32_t readSsLimit, uint32_t forkSsLimit, uint64_t timeTickLimit);
REGEX_IS024_VirtualMachine(const REGEX_IS024_VirtualMachine& ) = delete; VirtualMachine(const VirtualMachine& ) = delete;
REGEX_IS024_VirtualMachine& operator=(const REGEX_IS024_VirtualMachine&) = delete; VirtualMachine& operator=(const VirtualMachine&) = delete;
regex024_error_code initialize(); error_code_t initialize();
bool isInitialized(); bool isInitialized();
bool isUsable(); bool isUsable();
virtual ~REGEX_IS024_VirtualMachine(); virtual ~VirtualMachine();
regex_tai_t getSelectionArrayLength(); tai_t getSelectionArrayLength();
bool isAllowMultistart(); bool isAllowMultistart();
uint8_t getInputLeftExtensionSize(); uint8_t getInputLeftExtensionSize();
uint8_t getInputRightExtensionSize(); uint8_t getInputRightExtensionSize();
regex024_error_code getErrno(); error_code_t getErrno();
bool haveSurvivors(); bool haveSurvivors();
bool isMatched(); bool isMatched();
std::vector<REGEX_IS024_CAEvent> getMatchedThreadCABranchReverse(); std::vector<CAEvent> getMatchedThreadCABranchReverse();
uint64_t getMatchedThreadSAValue(uint16_t key); uint64_t getMatchedThreadSAValue(uint16_t key);
regex024_error_code addNewMatchingThread(); error_code_t addNewMatchingThread();
regex024_error_code extendedFeedCharacter(uint64_t input); error_code_t extendedFeedCharacter(uint64_t input);
regex024_error_code feedCharacter(uint64_t input, uint64_t bytesResembled); error_code_t feedCharacter(uint64_t input, uint64_t bytesResembled);
private: private:
bool gave_SOF = false; bool gave_SOF = false;
void* opaque; void* opaque;
}; };
}
#endif //LIBREGEXIS024_LIBREGEXIS024VM_INTERFACE_H #endif //LIBREGEXIS024_LIBREGEXIS024VM_INTERFACE_H

View File

@ -10,21 +10,14 @@
#error "Big endian is currently unsupported" #error "Big endian is currently unsupported"
#endif #endif
void exitf(const char *fmt, ...) { namespace regexis024 {
va_list va; int utf8_retrieve_size(char firstByte) {
va_start(va, fmt); if (!((uint8_t)firstByte & 0b10000000))
vfprintf(stderr, fmt, va);
va_end(va);
exit(1);
}
int utf8_retrieve_size(uint8_t firstByte) {
if (!(firstByte & 0b10000000))
return 1; return 1;
uint8_t a = 0b11000000; uint8_t a = 0b11000000;
uint8_t b = 0b00100000; uint8_t b = 0b00100000;
for (int i = 2; i <= 4; i++){ for (int i = 2; i <= 4; i++){
if ((firstByte & (a | b)) == a) if (((uint8_t)firstByte & (a | b)) == a)
return i; return i;
a |= b; a |= b;
b >>= 1; b >>= 1;
@ -32,13 +25,13 @@ int utf8_retrieve_size(uint8_t firstByte) {
return -1; return -1;
} }
int32_t utf8_retrieve_character(int sz, size_t pos, const uint8_t *string) { int32_t utf8_retrieve_character(int sz, size_t pos, const char *string) {
if (sz == 1) if (sz == 1)
return string[pos]; return (uint8_t)string[pos];
uint32_t v = string[pos] & (0b01111111 >> sz); uint32_t v = (uint8_t)string[pos] & (0b01111111 >> sz);
pos++; pos++;
for (int i = 1; i < sz; i++){ for (int i = 1; i < sz; i++){
uint32_t th = string[pos]; uint32_t th = (uint8_t)string[pos];
if ((th & 0b11000000) != 0b10000000) if ((th & 0b11000000) != 0b10000000)
return -1; return -1;
v <<= 6; v <<= 6;
@ -49,13 +42,11 @@ int32_t utf8_retrieve_character(int sz, size_t pos, const uint8_t *string) {
return static_cast<int32_t>(v); return static_cast<int32_t>(v);
} }
#define AAAAAA {cp = -1; return;} void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const char *string, size_t string_size) {
if (pos >= string_size) {cp = -1; return;}
void utf8_string_iterat(int32_t &cp, size_t &adj, size_t pos, const uint8_t *string, size_t string_size) {
if (pos >= string_size) AAAAAA
adj = utf8_retrieve_size(string[pos]); adj = utf8_retrieve_size(string[pos]);
if (adj < 0 || pos + adj > string_size) AAAAAA if (adj < 0 || pos + adj > string_size) {cp = -1; return;}
if ((cp = utf8_retrieve_character(adj, pos, string)) < 0) AAAAAA if ((cp = utf8_retrieve_character(adj, pos, string)) < 0) {cp = -1;}
} }
bool is_string_in_stringset(const char *strSample, const char **strSet) { bool is_string_in_stringset(const char *strSample, const char **strSet) {
@ -67,3 +58,4 @@ bool is_string_in_stringset(const char *strSample, const char **strSet) {
} }
return false; return false;
} }
}

View File

@ -4,18 +4,19 @@
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
void exitf(const char* fmt, ...); // todo: move this file out from my eyes.
namespace regexis024 {
/* 1, 2, 3, 4 on success; -1 on error */ /* 1, 2, 3, 4 on success; -1 on error */
int utf8_retrieve_size(uint8_t firstByte); int utf8_retrieve_size(char firstByte);
/* sz is a positive value returned by utf8_retrieve_size. Returns negative on error */ /* sz is a positive value returned by utf8_retrieve_size. Returns negative on error */
int32_t utf8_retrieve_character(int sz, size_t pos, const uint8_t* string); int32_t utf8_retrieve_character(int sz, size_t pos, const char* string);
/* cp is negative on error. adj is the size of letter in bytes. Can be used to adjust pos. /* cp is negative on error. adj is the size of letter in bytes. Can be used to adjust pos.
* All safety checks will be performed */ * All safety checks will be performed */
void utf8_string_iterat(int32_t& cp, size_t& adj, size_t pos, const uint8_t* string, size_t string_size); void utf8_string_iterat(int32_t& cp, size_t& adj, size_t pos, const char* string, size_t string_size);
bool is_string_in_stringset(const char* strSample, const char* strSet[]); bool is_string_in_stringset(const char* strSample, const char* strSet[]);
}
#endif //LIBREGEXIS024_UTILS_H #endif //LIBREGEXIS024_UTILS_H

View File

@ -1,7 +1,8 @@
#include <libregexis024vm/vm_errno.h> #include <libregexis024vm/vm_errno.h>
const char *regex024_error_code_tostr(regex024_error_code x) { namespace regexis024 {
#define rcase(name) case regex024_error_codes::name: return #name; const char *error_code_to_str(error_code_t x) {
#define rcase(name) case error_codes::name: return #name;
switch (x) { switch (x) {
rcase(stable) rcase(stable)
rcase(ca_tree_limit_violation) rcase(ca_tree_limit_violation)
@ -24,3 +25,4 @@ const char *regex024_error_code_tostr(regex024_error_code x) {
return "unknown_error_code"; return "unknown_error_code";
} }
} }
}

View File

@ -3,7 +3,8 @@
#include <stdint.h> #include <stdint.h>
namespace regex024_error_codes { namespace regexis024 {
namespace error_codes {
enum regex024_error_code_I: int { enum regex024_error_code_I: int {
stable = 0, stable = 0,
ca_tree_limit_violation = -1, ca_tree_limit_violation = -1,
@ -38,8 +39,9 @@ namespace regex024_error_codes {
}; };
} }
typedef regex024_error_codes::regex024_error_code_I regex024_error_code; typedef error_codes::regex024_error_code_I error_code_t;
const char* regex024_error_code_tostr(regex024_error_code x); const char* error_code_to_str(error_code_t x);
}
#endif //LIBREGEXIS024_VM_ERRNO_H #endif //LIBREGEXIS024_VM_ERRNO_H

View File

@ -3,7 +3,8 @@
#include <libregexis024vm/vm_opcodes_types.h> #include <libregexis024vm/vm_opcodes_types.h>
namespace regex024_opcodes { namespace regexis024 {
namespace opcodes {
enum regex024_opcode_I: uint8_t{ enum regex024_opcode_I: uint8_t{
/* READ <Settlement ID> */ /* READ <Settlement ID> */
READ = 0, READ = 0,
@ -74,26 +75,25 @@ namespace regex024_opcodes {
}; };
} }
typedef regex024_opcodes::regex024_opcode_I regex024_opcode; typedef opcodes::regex024_opcode_I opcode_t;
const char* regex024_opcode_tostr(regex024_opcode x); const char* opcode_to_str(opcode_t x);
constexpr uint64_t BYTECODE_INSTRUCTION_SZ = 1;
constexpr uint64_t BYTECODE_SSLOT_ID_SZ = 4;
constexpr uint64_t BYTECODE_TRACK_ARRAY_INDEX_ID_SZ = 2;
constexpr uint64_t BYTECODE_NEAR_POINTER_SZ = 8;
constexpr uint64_t REGEX024_BYTECODE_INSTRUCTION_SZ = 1; bool vmprog_check_inboundness(near_ptr_t prgSize, near_ptr_t IP, near_ptr_t region);
constexpr uint64_t REGEX024_BYTECODE_SSLOT_ID_SZ = 4;
constexpr uint64_t REGEX024_BYTECODE_TRACK_ARRAY_INDEX_ID_SZ = 2;
constexpr uint64_t REGEX024_BYTECODE_NEAR_POINTER_SZ = 8;
bool vmprog_check_inboundness(regex_near_ptr_t prgSize, regex_near_ptr_t IP, regex_near_ptr_t region); uint8_t vmprog_extract_b(near_ptr_t* IPptr, const uint8_t* prg);
uint16_t vmprog_extract_w(near_ptr_t* IPptr, const uint8_t* prg);
uint8_t vmprog_extract_b(regex_near_ptr_t* IPptr, const uint8_t* prg); uint32_t vmprog_extract_dw(near_ptr_t* IPptr, const uint8_t* prg);
uint16_t vmprog_extract_w(regex_near_ptr_t* IPptr, const uint8_t* prg); uint64_t vmprog_extract_qw(near_ptr_t* IPptr, const uint8_t* prg);
uint32_t vmprog_extract_dw(regex_near_ptr_t* IPptr, const uint8_t* prg);
uint64_t vmprog_extract_qw(regex_near_ptr_t* IPptr, const uint8_t* prg);
uint8_t vmprog_extract_instruction(regex_near_ptr_t* IPptr, const uint8_t* prg);
regex_sslot_id_t vmprog_extract_sslot_id(regex_near_ptr_t* IPptr, const uint8_t* prg);
regex_near_ptr_t vmprog_extract_near_pointer(regex_near_ptr_t* IPptr, const uint8_t* prg);
regex_tai_t vmprog_extrack_track_array_index(regex_near_ptr_t* IPptr, const uint8_t* prg);
uint8_t vmprog_extract_instruction(near_ptr_t* IPptr, const uint8_t* prg);
sslot_id_t vmprog_extract_sslot_id(near_ptr_t* IPptr, const uint8_t* prg);
near_ptr_t vmprog_extract_near_pointer(near_ptr_t* IPptr, const uint8_t* prg);
tai_t vmprog_extrack_track_array_index(near_ptr_t* IPptr, const uint8_t* prg);
}
#endif //LIBREGEXIS024_VM_OPCODES_H #endif //LIBREGEXIS024_VM_OPCODES_H

View File

@ -1,47 +1,54 @@
#include <libregexis024vm/vm_opcodes.h> #include <libregexis024vm/vm_opcodes.h>
#ifndef __ORDER_LITTLE_ENDIAN__ namespace regexis024 {
#error "Big endian is currently unsupported" bool vmprog_check_inboundness(near_ptr_t prgSz, near_ptr_t IP, near_ptr_t region) {
#endif
bool vmprog_check_inboundness(regex_near_ptr_t prgSz, regex_near_ptr_t IP, regex_near_ptr_t region) {
return IP + region <= prgSz; return IP + region <= prgSz;
} }
uint8_t vmprog_extract_b(regex_near_ptr_t *IPptr, const uint8_t *prg) { uint8_t vmprog_extract_b(near_ptr_t *IPptr, const uint8_t *prg) {
return prg[(*IPptr)++]; return prg[(*IPptr)++];
} }
uint16_t vmprog_extract_w(regex_near_ptr_t *IPptr, const uint8_t *prg) { uint16_t vmprog_extract_w(near_ptr_t *IPptr, const uint8_t *prg) {
uint16_t answer = *(uint16_t*)(&prg[*IPptr]); uint16_t answer = 0;
*IPptr += 2; (*IPptr) += 2;
for (int i = 1; i < 3; i++) {
answer <<= 8; answer |= prg[(*IPptr) - i];
}
return answer; return answer;
} }
uint32_t vmprog_extract_dw(regex_near_ptr_t *IPptr, const uint8_t *prg) { uint32_t vmprog_extract_dw(near_ptr_t *IPptr, const uint8_t *prg) {
uint32_t answer = *(uint32_t *)(&prg[*IPptr]); uint32_t answer = 0;
*IPptr += 4; (*IPptr) += 4;
for (int i = 1; i < 5; i++) {
answer <<= 8; answer |= prg[(*IPptr) - i];
}
return answer; return answer;
} }
uint64_t vmprog_extract_qw(regex_near_ptr_t *IPptr, const uint8_t *prg) { uint64_t vmprog_extract_qw(near_ptr_t *IPptr, const uint8_t *prg) {
uint64_t answer = *(uint64_t *)(&prg[*IPptr]); uint64_t answer = 0;
*IPptr += 8; (*IPptr) += 8;
for (int i = 1; i < 9; i++) {
answer <<= 8; answer |= prg[(*IPptr) - i];
}
return answer; return answer;
} }
uint8_t vmprog_extract_instruction(regex_near_ptr_t *IPptr, const uint8_t *prg) { uint8_t vmprog_extract_instruction(near_ptr_t *IPptr, const uint8_t *prg) {
return vmprog_extract_b(IPptr, prg); return vmprog_extract_b(IPptr, prg);
} }
regex_sslot_id_t vmprog_extract_sslot_id(regex_near_ptr_t *IPptr, const uint8_t *prg) { sslot_id_t vmprog_extract_sslot_id(near_ptr_t *IPptr, const uint8_t *prg) {
return vmprog_extract_dw(IPptr, prg); return vmprog_extract_dw(IPptr, prg);
} }
regex_near_ptr_t vmprog_extract_near_pointer(regex_near_ptr_t *IPptr, const uint8_t *prg) { near_ptr_t vmprog_extract_near_pointer(near_ptr_t *IPptr, const uint8_t *prg) {
return vmprog_extract_qw(IPptr, prg); return vmprog_extract_qw(IPptr, prg);
} }
regex_tai_t vmprog_extrack_track_array_index(regex_near_ptr_t *IPptr, const uint8_t *prg) { tai_t vmprog_extrack_track_array_index(near_ptr_t *IPptr, const uint8_t *prg) {
return vmprog_extract_w(IPptr, prg); return vmprog_extract_w(IPptr, prg);
} }
}

View File

@ -3,9 +3,10 @@
#include <stdint.h> #include <stdint.h>
typedef uint32_t regex_sslot_id_t; namespace regexis024 {
typedef uint64_t regex_near_ptr_t; typedef uint32_t sslot_id_t;
typedef uint16_t regex_tai_t; typedef uint64_t near_ptr_t;
typedef uint16_t tai_t;
}
#endif //VM_OPCODES_TYPES_H #endif //VM_OPCODES_TYPES_H