libregexis024/src/libregexis024sol/command_expression.cpp
2024-07-31 22:30:29 +03:00

146 lines
5.4 KiB
C++

#include <libregexis024sol/special_terminals.h>
#include <libregexis024sol/utils.h>
#include <libregexis024sol/sol_misc_base.h>
#include <assert.h>
#include <memory>
namespace regexis024 {
struct ParseCall{
virtual ~ParseCall() = default;
virtual std::unique_ptr<ParseCall> afterReceive(REGEX_IS024_MeaningContext& ctx) { assert(false); }
virtual std::unique_ptr<ParseCall> firstTime(REGEX_IS024_MeaningContext& ctx) { assert(false); }
};
struct Top_ParseCall: public ParseCall{
Command& res;
explicit Top_ParseCall(Command &res) : res(res) {}
std::unique_ptr<ParseCall> firstTime(REGEX_IS024_MeaningContext &ctx) override;
std::unique_ptr<ParseCall> afterReceive(REGEX_IS024_MeaningContext &ctx) override;
};
struct Bracker_ParseCall: public ParseCall{
std::vector<CommandArgument>& res;
bool closingBraceEnded = false;
explicit Bracker_ParseCall(std::vector<CommandArgument> &res) : res(res) {}
std::unique_ptr<ParseCall> argReadProc(REGEX_IS024_MeaningContext& ctx);
std::unique_ptr<ParseCall> firstTime(REGEX_IS024_MeaningContext &ctx) override;
std::unique_ptr<ParseCall> afterReceive(REGEX_IS024_MeaningContext &ctx) override;
};
#define call_ERROR_CHECK do { if (ctx.error) { return NULL; } } while (0)
#define call_THROW(str) do { report(ctx, "command expression: " str); return NULL; } while (0)
std::unique_ptr<ParseCall> Top_ParseCall::firstTime(REGEX_IS024_MeaningContext &ctx) {
assert(readChar(ctx) == U'!');
int32_t ch = peep(ctx); call_ERROR_CHECK;
if (ch == U'~'){
/* I assume during construction I received reference to newly initialized struct */
res.tilda = true;
return NULL;
}
res.name = tryRead_REGEX024_name(ctx); call_ERROR_CHECK;
if (res.name.empty())
call_THROW("top lvl: no command name specified");
ch = peep(ctx); call_ERROR_CHECK;
if (ch == U';'){
readChar(ctx);
return NULL;
}
if (ch == U'{'){
return std::make_unique<Bracker_ParseCall>(res.arguments);
}
call_THROW("top lvl: command call should be ended with ';' or '{...}'");
}
std::unique_ptr<ParseCall> Top_ParseCall::afterReceive(REGEX_IS024_MeaningContext &ctx) {
return NULL;
}
std::unique_ptr<ParseCall> Bracker_ParseCall::firstTime(REGEX_IS024_MeaningContext &ctx) {
assert(readChar(ctx) == U'{');
return argReadProc(ctx);
}
std::unique_ptr<ParseCall> Bracker_ParseCall::afterReceive(REGEX_IS024_MeaningContext &ctx) {
closingBraceEnded = true;
return argReadProc(ctx);
}
std::unique_ptr<ParseCall> Bracker_ParseCall::argReadProc(REGEX_IS024_MeaningContext &ctx) {
repeat:
int32_t ch = peep(ctx); call_ERROR_CHECK;
if (ch == U';'){
res.emplace_back();
readChar(ctx);
closingBraceEnded = false;
goto repeat;
} else if (ch == U'}'){
readChar(ctx);
if (!closingBraceEnded){
res.emplace_back();
}
return NULL;
} else if (is_REGEX024_nameConstituent(ch)){
res.emplace_back();
res.back().is_empty = false;
res.back().name = tryRead_REGEX024_name(ctx);
int32_t eCh = peep(ctx); call_ERROR_CHECK;
if (eCh == U';'){
readChar(ctx);
closingBraceEnded = false;
goto repeat;
} else if (eCh == U'{'){
return std::make_unique<Bracker_ParseCall>(res.back().arguments);
} else if (eCh == U'}'){
readChar(ctx);
return NULL;
}
call_THROW("brace lvl: argument ends with ';' or {...}");
}
call_THROW("brace lvl: argument starts with ';' or it's name");
}
Command command_expr_parse(REGEX_IS024_MeaningContext &ctx) {
std::vector<std::unique_ptr<ParseCall>> callStack;
Command res;
callStack.push_back(std::make_unique<Top_ParseCall>(res));
bool first_time = true;
while (!callStack.empty()){
if (ctx.error)
return {};
auto nxt = first_time ? callStack.back()->firstTime(ctx) : callStack.back()->afterReceive(ctx);
if (nxt){
callStack.push_back(std::move(nxt));
first_time = true;
} else {
callStack.pop_back();
first_time = false;
}
}
return res;
}
const char* commands_for_codesets[] = {"word", "space", "digit", "variable", "any", "A", NULL};
bool is_command_for_charset(const Command &cmd) {
return !cmd.tilda && cmd.arguments.empty() && is_string_in_stringset(cmd.name.c_str(), commands_for_codesets);
}
void interpret_command_as_charset_giving(const CommonCodesets& cc, const Command &cmd, codeset_t& ret)
{
if (cmd.name == "word")
ret = cc.word_constituents;
else if (cmd.name == "space")
ret = cc.spaces;
else if (cmd.name == "digit")
ret = cc.digits;
else if (cmd.name == "variable")
ret = cc.variable_constituents;
else if (cmd.name == "any" || cmd.name == "A")
ret = codeset_of_all;
else
assert(false);
}
}