Fixed fa_first_stage_fix.cpp

This commit is contained in:
Андреев Григорий 2024-07-29 15:30:38 +03:00
parent b4381e9238
commit b11afa72ea
4 changed files with 29 additions and 10 deletions

View File

@ -30,7 +30,7 @@ REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_C
std::vector<Marked> searched; std::vector<Marked> searched;
searched.emplace_back(loa.nxt_node); searched.emplace_back(loa.nxt_node);
beg->search_mark = 0; loa.nxt_node->search_mark = 0;
for (size_t done = 0; done < searched.size(); done++){ for (size_t done = 0; done < searched.size(); done++){
FA_Node& cur = *searched[done].node; FA_Node& cur = *searched[done].node;
@ -77,6 +77,11 @@ REGEX_IS024_FA_FirstStageFixInfo first_stage_fix_fa(FA_Container& sourceFa, FA_C
if (my->type == match) if (my->type == match)
info.fed_chars_extend_one_right = true; info.fed_chars_extend_one_right = true;
} }
{
Marked& loa_nxt_aux = searched[loa.nxt_node->search_mark];
if (loa_nxt_aux.making_copy)
reattach_nxt_node(&loa, loa_nxt_aux.copy);
}
for (auto& v_sete: searched) for (auto& v_sete: searched)
v_sete.node->search_mark = -1; v_sete.node->search_mark = -1;
@ -151,7 +156,6 @@ void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& result
assert(resultFa.all.empty() && !resultFa.start); assert(resultFa.all.empty() && !resultFa.start);
if (!sourceFa.start) if (!sourceFa.start)
return; return;
// todo: rewrite first stage using that cool technique I just invented
resultFa.start = sourceFa.start; resultFa.start = sourceFa.start;
// A vector of pointers in resutFa to nodes that belong to sourceFa. They should undergo a little bit of copying. // A vector of pointers in resutFa to nodes that belong to sourceFa. They should undergo a little bit of copying.
std::vector<FA_Node**> homework = {&(resultFa.start)}; std::vector<FA_Node**> homework = {&(resultFa.start)};
@ -165,9 +169,9 @@ void regular_second_stage_fix(const FA_Container& sourceFa, FA_Container& result
if (sourceV->type == match) { if (sourceV->type == match) {
FA_NodeOfMatch& mn = dynamic_cast<FA_NodeOfMatch&>(*sourceV); FA_NodeOfMatch& mn = dynamic_cast<FA_NodeOfMatch&>(*sourceV);
FA_NodeOfMatch* res_mn = resultFa.makeMatch(); FA_NodeOfMatch* res_mn = resultFa.makeMatch();
if (mn.ext_filter_added && mn.pending_filter != codeset_of_all) { if (info1.fed_chars_extend_one_right) {
assert(info1.fed_chars_extend_one_right); FA_NodeOfOneCharRead* res_ocr2n = resultFa.makeOneCharRead(
FA_NodeOfOneCharRead* res_ocr2n = resultFa.makeOneCharRead(mn.pending_filter, true); mn.ext_filter_added ? mn.pending_filter : codeset_of_all, true);
reattach_nxt_node(res_ocr2n, res_mn); reattach_nxt_node(res_ocr2n, res_mn);
sourceIdToResNode[sourceVId] = res_ocr2n; sourceIdToResNode[sourceVId] = res_ocr2n;
} else { } else {

View File

@ -2,7 +2,7 @@
#include <libregexis024test/byte_code_disassembler.h> #include <libregexis024test/byte_code_disassembler.h>
int main(){ int main(){
std::string regular_expression = "!selarr{boba{ca}}^a#boba(b)c$"; std::string regular_expression = "\\>1*";
REGEX_IS024_MeaningContext regex(regular_expression.size(), regular_expression.c_str()); REGEX_IS024_MeaningContext regex(regular_expression.size(), regular_expression.c_str());
if (regex.error) if (regex.error)
fprintf(stderr, "%s\n", regex.error_msg.c_str()); fprintf(stderr, "%s\n", regex.error_msg.c_str());

View File

@ -10,14 +10,30 @@ void test(const string& input, const string& pattern, const MatchInfo& right_ans
MatchInfo given_answer; MatchInfo given_answer;
track_var_list retTrackVarList; track_var_list retTrackVarList;
string retStatus; string retStatus;
matchStrToRegexp(input, pattern, given_answer, retTrackVarList, retStatus); int ret = matchStrToRegexp(input, pattern, given_answer, retTrackVarList, retStatus);
if (ret < 0) {
throw runtime_error("Test failed. matching. " + retStatus);
}
if (given_answer != right_answer) { if (given_answer != right_answer) {
throw runtime_error("Test failed"); throw runtime_error("Test failed.");
} }
printf("Test passed\n"); printf("Test passed\n");
} }
int main() { int main() {
test("C111111111111", "C\\>1*", MatchInfo({}, {}));
// return 0;
test("GET / HTTP/1.1\r\nHost: bibura sosat\r\nLos-es-raus: a\rfaafafdf\r\n\r\n",
"!dfa;(GET|POST) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n",
MatchInfo());
test("\r24234\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo());
test("\n3432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo());
test("3:::;;432\r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {}));
test("3:::;;432 \r\n", "[\\u0000-\\u007F&^\r\n]*\r\n", MatchInfo({}, {}));
test("GET / HTTP/0.9\r\nHost: bibura sosat\r\nLos-es-raus: afaafafdf\r\n\r\n",
"^(GET|POST\\>) / HTTP/(1.1|1.0|0.9)\r\n([\\u0021-\\u007E&^:]+:([\\u0000-\\u007F&^\r\n])*\r\n)*\r\n",
MatchInfo({}, {}));
// return 0;
test("b", "#boba(b)", MatchInfo({{0, 0}, {1, 1}}, {})); test("b", "#boba(b)", MatchInfo({{0, 0}, {1, 1}}, {}));
test("abc", "!selarr{boba{ca}}^a#boba(b)c$", MatchInfo({{0, 1}, {1, 2}}, {1, 2})); test("abc", "!selarr{boba{ca}}^a#boba(b)c$", MatchInfo({{0, 1}, {1, 2}}, {1, 2}));
for (int i = 0; i < 64; i++) { for (int i = 0; i < 64; i++) {

View File

@ -89,9 +89,8 @@ int regexis024::matchStrToRegexp(const std::string& input, const std::string& pa
retMatchInfo.sa[i] = vm.getMatchedThreadSAValue(i); retMatchInfo.sa[i] = vm.getMatchedThreadSAValue(i);
retMatchInfo.ca_history = vm.getMatchedThreadCABranchReverse(); retMatchInfo.ca_history = vm.getMatchedThreadCABranchReverse();
std::reverse(retMatchInfo.ca_history.begin(), retMatchInfo.ca_history.end()); std::reverse(retMatchInfo.ca_history.begin(), retMatchInfo.ca_history.end());
return 0;
} }
return -1; return 0;
} }
bool regexis024::MatchInfo::operator==(const MatchInfo &other) const { bool regexis024::MatchInfo::operator==(const MatchInfo &other) const {