110 lines
4.2 KiB
C++
110 lines
4.2 KiB
C++
#include <algorithm>
|
|
#include <libregexis024tools/stringmatching.h>
|
|
#include <libregexis024sol/expr_compiler.h>
|
|
#include <libregexis024vm/libregexis024vm_interface.h>
|
|
#include <libregexis024vm/utils.h>
|
|
#include <assert.h>
|
|
|
|
// using namespace regexis024;
|
|
|
|
namespace regexis024 {
|
|
void convert(TrackingVariableInfo& to, const SubtrackingNameInfo& from) {
|
|
#define plagiat(field) to.field = from.field;
|
|
plagiat(type);
|
|
plagiat(colarr_first);
|
|
plagiat(colarr_second);
|
|
plagiat(stored_in_ca);
|
|
plagiat(selarr_first);
|
|
plagiat(selarr_second);
|
|
plagiat(stored_in_sa);
|
|
#undef plagiat
|
|
}
|
|
|
|
int matchStrToRegexp(const std::string& input, const std::string& pattern,
|
|
MatchInfo& retMatchInfo, track_var_list& retTrackVarList, std::string& retStatus)
|
|
{
|
|
retTrackVarList = {};
|
|
retMatchInfo = MatchInfo();
|
|
retStatus = "";
|
|
REGEX_IS024_MeaningContext regexp(pattern.size(), pattern.data());
|
|
if (regexp.error) {
|
|
retStatus = "Pattern compilation. " + regexp.error_msg;
|
|
return -1;
|
|
}
|
|
retTrackVarList = {};
|
|
for (auto& iip: regexp.ktr.track_names) {
|
|
convert(retTrackVarList[iip.first], regexp.ktr.retrieval_info[iip.second]);
|
|
}
|
|
VirtualMachine vm(regexp.compiled_program.size(), regexp.compiled_program.data(),
|
|
UINT64_MAX, UINT16_MAX,
|
|
UINT32_MAX, UINT32_MAX, UINT64_MAX);
|
|
auto getVMErrString = [&]() -> std::string {
|
|
return std::string(error_code_to_str(vm.getErrno()));
|
|
};
|
|
|
|
if (vm.initialize() != error_codes::stable) {
|
|
retStatus = "Virtual machine initialization. " + getVMErrString();
|
|
return -1;
|
|
}
|
|
int left_ext_feed = vm.getInputLeftExtensionSize();
|
|
int right_ext_feed = vm.getInputRightExtensionSize();
|
|
if (left_ext_feed > 1 || right_ext_feed > 1) {
|
|
retStatus = "Unnatural extended input request.";
|
|
return -1;
|
|
}
|
|
if (vm.addNewMatchingThread() != error_codes::stable) {
|
|
retStatus = "Virtual machine first kick. " + getVMErrString();
|
|
}
|
|
if (left_ext_feed) {
|
|
if (vm.extendedFeedCharacter('\n') != error_codes::stable) {
|
|
retStatus = "VM left extended input. " + getVMErrString();
|
|
return -1;
|
|
}
|
|
}
|
|
for (size_t cur_text_pos = 0;cur_text_pos < input.size();) {
|
|
int32_t inp_code;
|
|
size_t adj;
|
|
utf8_string_iterat(inp_code, adj, cur_text_pos, input.data(), input.size());
|
|
if (inp_code < 0) {
|
|
retStatus = "Input string encoding error.";
|
|
return -1;
|
|
}
|
|
if (vm.feedCharacter(static_cast<uint64_t>(inp_code), adj) != error_codes::stable) {
|
|
retStatus = "VM input. " + getVMErrString();
|
|
return -1;
|
|
}
|
|
cur_text_pos += adj;
|
|
}
|
|
if (right_ext_feed) {
|
|
if (vm.extendedFeedCharacter('\n') != error_codes::stable) {
|
|
retStatus = "VM right extended input. " + getVMErrString();
|
|
return -1;
|
|
}
|
|
}
|
|
assert(vm.isUsable());
|
|
if (vm.isMatched()) {
|
|
retMatchInfo.have_match = true;
|
|
size_t SN1 = vm.getSelectionArrayLength();
|
|
retMatchInfo.sa.assign(SN1, 0);
|
|
for (size_t i = 0; i < SN1; i++)
|
|
retMatchInfo.sa[i] = vm.getMatchedThreadSAValue(i);
|
|
retMatchInfo.ca_history = vm.getMatchedThreadCABranchReverse();
|
|
std::reverse(retMatchInfo.ca_history.begin(), retMatchInfo.ca_history.end());
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
bool MatchInfo::operator==(const MatchInfo &other) const {
|
|
if (!have_match && !other.have_match)
|
|
return true;
|
|
return (have_match == other.have_match) && (sa == other.sa) && (ca_history == other.ca_history);
|
|
}
|
|
|
|
bool MatchInfo::operator!=(const MatchInfo &other) const {
|
|
return !(*this == other);
|
|
}
|
|
|
|
MatchInfo::MatchInfo(const std::vector<CAEvent> &ca_history, const std::vector<uint64_t> &sa):
|
|
ca_history(ca_history), sa(sa), have_match(true) {
|
|
}
|
|
} |