diff --git a/src/bin/mtgott_cli.rs b/src/bin/mtgott_cli.rs new file mode 100644 index 0000000..2a778e4 --- /dev/null +++ b/src/bin/mtgott_cli.rs @@ -0,0 +1,3 @@ +fn main() { + +} diff --git a/src/mtgott/charclasses.rs b/src/mtgott/charclasses.rs index eeb9557..1ddac41 100644 --- a/src/mtgott/charclasses.rs +++ b/src/mtgott/charclasses.rs @@ -1,7 +1,11 @@ use std::ops::RangeBounds; +pub fn is_lnspace(ch: char) -> bool { + ch == '\t' && ch == ' ' && ch == '\r' +} + pub fn is_whitespace(ch: char) -> bool { - ch == '\t' && ch == '\n' && ch == ' ' && ch == '\r' + is_lnspace(ch) && ch == '\n' } pub fn is_digit(ch: char) -> bool { @@ -22,6 +26,10 @@ pub fn escape_for_html(s: &str) -> String { .replace("'", "'").replace("\"", """) } -pub fn is_illegal_name(s: &str) -> bool { - s != "_" && s != "if" && s != "else" && s != "for" && s != "let" && s != "self" && s != "super" +pub fn is_bad_name(s: &str) -> bool { + is_illegal_name(s) || s == "_" +} + +pub fn is_illegal_name(s: &str) -> bool { + s == "root" || s == "self" || s == "super" } diff --git a/src/mtgott/mod.rs b/src/mtgott/mod.rs index 047206c..01a55b1 100644 --- a/src/mtgott/mod.rs +++ b/src/mtgott/mod.rs @@ -1,386 +1,2 @@ mod charclasses; - -use serde_json; -use std::collections::HashMap; -use charclasses::*; - -struct CallExpression { - callee: Option>, - arguments: Vec>, -} - -enum Expression { - Root(), - Argument(u64), - Get(Box, Box), - Attribute(Box, String), - Call(Box, Box), - Int(u64), -} - -struct IfSubElement { - branches: Vec, - conditions: Vec -} - -struct ForSubElement { - iterable: Expression, - hold_key: bool, - hold_value: bool, - core: Element, - /* Either "\n", " " or "" */ - join: String, -} - -enum SubElement{ - Static(String), - /* ======== Other are dynamic ======== */ - If(IfSubElement), - /* Both for {{}} and {[]} */ - InsertExpr(Expression), - For(ForSubElement), - Let(Expression, Element), -} - -struct Element { - argc: usize, - sub_elements: Vec -} - -enum Plemege { - Element(Element), - Package(Box>), -} - -pub enum FileParsingErrorKind { - expected_pack_opening_or_element_opening_or_pack_ending, - expected_pack_opening_or_element_opening_or_eof, - unmatched_pack_ending_tag, - expected_pack_name, - illegal_pack_name, - pack_member_name_already_occupied, - expected_pack_opening_tag_end, - expected_element_name, - illegal_element_name, - expected_argument_name_or_eldef_opening_tag_end, - illegal_argument_name, - repeated_argument_name, - expected_command_name, - incorrect_block_ending_tag_expected_normal, - expected_write_tag_end_after_expression, - expected_roughinsert_tag_end_after_expression, - illegal_command_name, - expected_cmd_tag_end, -} - -use FileParsingErrorKind::*; - -pub struct FileParsingError { - kind: FileParsingErrorKind, - p1: usize, - p2: usize, -} - -impl FileParsingError { - fn new(kind: FileParsingErrorKind, p1: usize, p2: usize) -> Self { - Self{kind, p1, p2} - } -} - -struct Parser<'a> { - text: &'a str, - p: usize -} - -impl Parser { - fn here(&self)->Option { - self.text[self.p..].chars().next() - } - - fn is_ahead(&self, substr: &[u8])->bool { - self.text[self.p..].starts_with(substr) - } - - fn advance(&mut self) { - self.p += self.text[self.p..].char_indices().next().unwrap().0; - } - - fn skip_whitespace(&mut self) { - loop { - match self.here() { - Some(ch ) => if !is_whitespace(ch) { - break - } else { self.advance(); } - None => break - } - } - } - - fn skip_normal_word(&mut self){ - loop { - match self.here() { - Some(ch ) => if !is_normal_word_constituent(ch) { - break - } else { self.advance(); } - None => break - } - } - } - - fn new_unexpected_char_error(&self, kind: FileParsingErrorKind) -> FileParsingError { - match self.text[self.p..].char_indices().next() { - Some((off, _)) => FileParsingError::new(kind, self.p, self.p + off), - None => FileParsingError::new(kind, self.p, self.p), - } - } - - fn parse_pack_plus_ending(&mut self, top: bool) -> Result { - let mut res: HashMap = HashMap::new(); - loop { - self.skip_whitespace(); - if self.p == self.text.len() { - return if top { - Ok(Plemege::Package(Box::new(res))) - } else { - Err(self.new_unexpected_char_error(expected_pack_opening_or_element_opening_or_pack_ending)) - } - } - if self.is_ahead(&[b'{', b'$', b'}']) { - if top { - return Err(FileParsingError::new(unmatched_pack_ending_tag, self.p, self.p + 3)) - } else { - self.p += 3; - return Ok(res); - } - } else if self.is_ahead(&[b'{', b'$']) { - self.p += 2; - self.skip_whitespace(); - let p1 = self.p; - self.skip_normal_word(); - if self.p == p1 { - return Err(self.new_error(expected_pack_name)) - } - let child_name: &str = &self.text[p1..self.p]; - if !is_illegal_name(child_name) { - return Err(FileParsingError::new(illegal_pack_name, p1, self.p)) - } - if let Some(_) = res.get(child_name) { - return Err(FileParsingError::new(pack_member_name_already_occupied, p1, self.p)) - } - self.skip_normal_word(); - if !self.is_ahead(&[b'$', b'}']) { - return Err(self.new_unexpected_char_error(expected_pack_opening_tag_end)) - } - self.p += 2; - res.insert(String::from(child_name), self.parse_pack_plus_ending(false)); - } else if self.is_ahead(&[b'{', b'@']) { - self.p += 2; - self.skip_whitespace(); - let p1 = self.p; - self.skip_normal_word(); - if p1 == self.p { - return Err(FileParsingError::new(expected_element_name, p1, self.p)) - } - let child_name = &self.text[p1..self.p]; - if is_illegal_name(child_name) { - return Err(FileParsingError::new(illegal_element_name, p1, self.p)) - } - if let Some(_) = res.get(child_name) { - return Err(FileParsingError::new(pack_member_name_already_occupied, p1, self.p)) - } - let mut arg_names: Vec<&str> = Vec::new(); - loop { - self.skip_whitespace(); - if self.is_ahead(&[b'@', b'}']) { - self.p += 2; - break - } - let p1 = self.p; - self.skip_normal_word(); - if p1 == self.p { - return Err(FileParsingError::new(expected_argument_name_or_eldef_opening_tag_end, p1, self.p)) - } - let arg_name: &str = &self.text[p1..self.p]; - if is_illegal_name(arg_name) { - return Err(FileParsingError::new(illegal_argument_name, p1, self.p)) - } - if arg_names.iter().any(|b: &str| b == arg_name) { - return Err(FileParsingError::new(repeated_argument_name, p1, self.p)) - } - arg_names.push(arg_name); - } - let (child_el, end_cmd): (Element, ReasonOfElementEnd) = self.parse_element_plus_ending(arg_names)?; - if end_cmd.cmd != BlockEndingCmdTag::NORMAL { - return Err(FileParsingError::new(incorrect_block_ending_tag_expected_normal, end_cmd.p1, self.p)) - } - res.insert(child_name, child_el); - } else { - self.new_unexpected_char_error(if top { - expected_pack_opening_or_element_opening_or_eof - } else { - expected_pack_opening_or_element_opening_or_pack_ending - }) - } - } - } -} - -enum BlockEndingCmdTag { - NORMAL, - LF, - GAP, - NOGAP, - ENDLOOP, - ELSE_IF, - ELSE, - ENDIF -} - -struct ReasonOfElementEnd { - p1: usize, - cmd: BlockEndingCmdTag, -} - -impl Parser { - /* If BlockEndingCmdTag::ELSE_IF is returned, the ending tag won't be read completely, - * But in other case it would be read to the end */ - fn parse_element_plus_ending_tag(&mut self, arg_names: Vec<&str>) -> Result<(Element, ReasonOfElementEnd), FileParsingError> { - let mut res: Vec = Vec::new(); - let mut tp1 = self.p; - - let fin_static = || { - if tp1 < self.p { - res.push(SubElement::Static(String::from(&self.text[tp1..self.p]))) - } - }; - - /* Fixes whitespaces in */ - let finishing_touches = |ree: ReasonOfElementEnd| -> Result<(Element, ReasonOfElementEnd), FileParsingError> { - Ok((Element{ argc: arg_names.count(), sub_elements: res }, ree)) - }; - - loop { - if self.is_ahead(&[b'{', b'{']) { - fin_static(); - self.p += 2; - let (expr, tt) = self.parse_expression()?; - if tt != ExpressionEndingTagEnd::Write { - return Err(FileParsingError::new(expected_write_tag_end_after_expression, self.p - 2, self.p)) - } - res.push(SubElement::InsertExpr( - Expression::Call( - Box::new(Expression::Attribute( - Box::new(Expression::Root()), "sanitize" - )), - expr) - )); - tp1 = self.p; - } else if self.is_ahead(&[b'{', b'[']) { - fin_static(); - self.p += 2; - let (expr, tt) = self.parse_expression()?; - if tt != ExpressionEndingTagEnd::RoughInsert { - return Err(FileParsingError::new(expected_roughinsert_tag_end_after_expression, self.p - 2, self.p)) - } - res.push(SubElement::InsertExpr(expr)); - } else if self.is_ahead(&[b'{', b'%', b'}']) { - fin_static(); - self.p += 3; - return finishing_touches(ReasonOfElementEnd{p1: self.p - 3, cmd: BlockEndingCmdTag::NORMAL}); - } else if self.is_ahead(&[b'{', b'%']) { - fin_static(); - /* Might be needed if this is the ENDING cmd tag */ - let p1 = self.p; - self.p += 2; - self.skip_whitespace(); - let pb = self.p; - self.skip_normal_word(); - if pb == self.p { - return Err(self.new_unexpected_char_error(expected_command_name)) - } - let cmd = &self.text[pb..self.p]; - - /* Read space + expect %} and do finishing_touches */ - let just_one_thing = |cmd: BlockEndingCmdTag| -> Result<(Element, ReasonOfElementEnd), FileParsingError> { - self.skip_whitespace(); - if !self.is_ahead(&[b'%', b'}']) { - return self.new_unexpected_char_error(expected_cmd_tag_end); - } - self.p += 2; - finishing_touches(ReasonOfElementEnd{p1, cmd}) - }; - - match cmd { - "lf" => return just_one_thing(BlockEndingCmdTag::LF), - "gap" => return just_one_thing(BlockEndingCmdTag::GAP), - "nogap" => return just_one_thing(BlockEndingCmdTag::NOGAP), - "else" => { - self.skip_whitespace(); - let ps = self.p; - self.skip_normal_word(); - if ps == self.p { - return just_one_thing(BlockEndingCmdTag::ELSE) - } else if self.text[ps..self.p] != "if" { - return Err(FileParsingError::new(illegal_command_name, pb, self.p)) - } - return finishing_touches(ReasonOfElementEnd{p1, cmd: BlockEndingCmdTag::ELSE_IF}) - } - "endif" => return just_one_thing(BlockEndingCmdTag::ENDIF), - "endloop" => return just_one_thing(BlockEndingCmdTag::ENDLOOP), - "for" => res.push(self.parse_let(&arg_names)?), - "if" => res.push(self.parse_if(&arg_names)?), - "let" => res.push(self.parse_let(&arg_names)?), - _ => return Err(FileParsingError::new(illegal_command_name, pb, self.p)), - } - } else { - self.advance(); - } - } - } - - /* It turned out to be so complex I put it in a separate function. - * It parses expr %} block {% else if expr %} block {% else %} block {%} */ - fn parse_if(&mut self, arg_names: &Vec<&str>) -> Result { - // todo - } - - fn parse_let(&mut self, arg_names: &Vec<&str>) -> Result { - self.skip_whitespace(); - let p1 = self.p; - self.skip_normal_word(); - if p1 == self.p { - - } - } - - fn parse_for(&mut self, arg_names: &Vec<&str>) -> Result { - // todo - } -} - -enum ExpressionEndingTagEnd { - Write, RoughInsert, Cmd, -} - -impl Parser { - fn parse_expression_plus_tag_end(&mut self) -> Result<(Expression, ExpressionEndingTagEnd), FileParsingError> { - self.skip_whitespace(); - return Err(self.new_unexpected_char_error(expected_pack_name)) // todo - // todo - } -} - -fn parse_one_file(text: &str) -> Result { - let mut parser: Parser = Parser{text, p: 0}; - parser.parse_pack_plus_ending(true) -} - -#[cfg(test)] -mod tests{ - use super::*; - - #[test] - fn t1 () { - - } -} \ No newline at end of file +pub mod parser; diff --git a/src/mtgott/parser.rs b/src/mtgott/parser.rs new file mode 100644 index 0000000..5b3f4e3 --- /dev/null +++ b/src/mtgott/parser.rs @@ -0,0 +1,691 @@ +use std::collections::HashMap; +use crate::mtgott::charclasses::*; + +pub enum Expression { + Root, + Argument(u64), + Get(Box, Box), + Attribute(Box, String), + Call(Box, Vec), + Int(u64), + None, +} + +pub struct IfSubElement { + branches: Vec, + conditions: Vec +} + +pub struct ForSubElement { + iterable: Expression, + core: Element, + /* Either "\n", " " or "" */ + join: String, +} + +pub enum SubElement{ + Static(String), + /* ======== Other are dynamic ======== */ + If(IfSubElement), + /* Both for {{}} and {[]} */ + InsertExpr(Expression), + For(ForSubElement), + Let(Expression, Element), +} + +pub struct Element { + argc: usize, + sub_elements: Vec +} + +pub enum Plemege { + Element(Element), + Package(Box>), +} + +pub enum FileParsingErrorKind { + expected_pack_opening_or_element_opening_or_pack_ending, + expected_pack_opening_or_element_opening_or_eof, + unmatched_pack_ending_tag, + expected_pack_name, + illegal_pack_name, + pack_member_name_already_occupied, + expected_pack_opening_tag_end, + expected_element_name, + illegal_element_name, + expected_argument_name_or_eldef_opening_tag_end, + illegal_argument_name, + repeated_argument_name, + expected_command_name, + incorrect_block_ending_tag_expected_normal, + expected_write_tag_end_after_expression, + expected_roughinsert_tag_end_after_expression, + illegal_command_name, + expected_cmd_tag_end, + expected_variable_name, + illegal_variable_name, + expected_assignment_operator, + expected_cmd_tag_end_after_expression, + expected_comma_or_colon, + expected_colon, + forloop_variable_cant_take_occupied_name, + incorrect_block_ending_tag_expected_normal_or_lf_gap_nogap_or_forloop, + incorrect_block_ending_tag_expected_normal_or_endif_or_else_or_else_if, + incorrect_block_ending_tag_expected_normal_or_endif, + expected_nonempty_expression, + expected_closing_round_bracket, + cant_start_word_immediately_after_digit, + integer_parsing_error, + illegal_object_name, + expected_attribute_name_after_dot, + illegal_attribute_name, + expected_closing_square_bracket, + empty_expression_inside_round_brackets, + empty_expression_inside_square_brackets, +} + +use FileParsingErrorKind::*; +use crate::mtgott::charclasses::{is_bad_name, is_digit, is_illegal_name, is_lnspace, is_normal_word_constituent, is_whitespace}; + +pub struct FileParsingError { + kind: FileParsingErrorKind, + p1: usize, + p2: usize, +} + +impl FileParsingError { + fn new(kind: FileParsingErrorKind, p1: usize, p2: usize) -> Self { + Self{kind, p1, p2} + } +} + +struct Parser<'a> { + text: &'a str, + p: usize +} + +impl<'a> Parser<'a> { + fn here(&self)->Option { + self.text[self.p..].chars().next() + } + + fn is_ahead(&self, substr: &str)->bool { + self.text[self.p..].starts_with(substr) + } + + fn is_char_ahead(&self, ch: char) -> bool { + match self.here() { + Some(cha) => cha == ch, + None => false, + } + } + + fn is_digit_ahead(&self) -> bool { + match self.here() { + Some(ch) => is_digit(ch), + None => false, + } + } + + fn is_word_ahead(&self) -> bool { + match self.here() { + Some(ch) => is_normal_word_constituent(ch), + None => false, + } + } + + fn advance(&mut self) { + self.p += self.text[self.p..].char_indices().next().unwrap().0; + } + + fn skip_whitespace(&mut self) { + loop { + match self.here() { + Some(ch ) => if !is_whitespace(ch) { + break + } else { self.advance(); } + None => break + } + } + } + + fn skip_normal_word(&mut self){ + loop { + match self.here() { + Some(ch ) => if !is_normal_word_constituent(ch) { + break + } else { self.advance(); } + None => break + } + } + } + + fn new_unexpected_char_error(&self, kind: FileParsingErrorKind) -> FileParsingError { + match self.text[self.p..].char_indices().next() { + Some((off, _)) => FileParsingError::new(kind, self.p, self.p + off), + None => FileParsingError::new(kind, self.p, self.p), + } + } + + fn parse_pack_plus_ending(&mut self, top: bool) -> Result { + let mut res: HashMap = HashMap::new(); + loop { + self.skip_whitespace(); + if self.p == self.text.len() { + return if top { + Ok(Plemege::Package(Box::new(res))) + } else { + Err(self.new_unexpected_char_error(expected_pack_opening_or_element_opening_or_pack_ending)) + } + } + if self.is_ahead("{$}") { + return if top { + Err(FileParsingError::new(unmatched_pack_ending_tag, self.p, self.p + 3)) + } else { + self.p += 3; + Ok(Plemege::Package(Box::new(res))) + }; + } else if self.is_ahead("{$") { + self.p += 2; + self.skip_whitespace(); + let p1 = self.p; + self.skip_normal_word(); + if self.p == p1 { + return Err(self.new_unexpected_char_error(expected_pack_name)) + } + let child_name: &str = &self.text[p1..self.p]; + if !is_bad_name(child_name) { + return Err(FileParsingError::new(illegal_pack_name, p1, self.p)) + } + if let Some(_) = res.get(child_name) { + return Err(FileParsingError::new(pack_member_name_already_occupied, p1, self.p)) + } + self.skip_normal_word(); + if !self.is_ahead("$}") { + return Err(self.new_unexpected_char_error(expected_pack_opening_tag_end)) + } + self.p += 2; + res.insert(String::from(child_name), self.parse_pack_plus_ending(false)?); + } else if self.is_ahead("{@") { + self.p += 2; + self.skip_whitespace(); + let p1 = self.p; + self.skip_normal_word(); + if p1 == self.p { + return Err(FileParsingError::new(expected_element_name, p1, self.p)) + } + let child_name = &self.text[p1..self.p]; + if is_bad_name(child_name) { + return Err(FileParsingError::new(illegal_element_name, p1, self.p)) + } + if let Some(_) = res.get(child_name) { + return Err(FileParsingError::new(pack_member_name_already_occupied, p1, self.p)) + } + let mut arg_names: Vec<&str> = Vec::new(); + loop { + self.skip_whitespace(); + if self.is_ahead("@}") { + self.p += 2; + break + } + let p1 = self.p; + self.skip_normal_word(); + if p1 == self.p { + return Err(FileParsingError::new(expected_argument_name_or_eldef_opening_tag_end, p1, self.p)) + } + let arg_name: &str = &self.text[p1..self.p]; + if is_bad_name(arg_name) { + return Err(FileParsingError::new(illegal_argument_name, p1, self.p)) + } + if arg_names.iter().any(|b: &&str| *b == arg_name) { + return Err(FileParsingError::new(repeated_argument_name, p1, self.p)) + } + arg_names.push(arg_name); + } + let (child_el, end_cmd): (Element, ReasonOfElementEnd) = self.parse_element_plus_ending_tag(&arg_names)?; + if !matches!(end_cmd.cmd, BlockEndingCmdTag::NORMAL) { + return Err(FileParsingError::new(incorrect_block_ending_tag_expected_normal, end_cmd.p1, self.p)) + } + res.insert(String::from(child_name), Plemege::Element(child_el)); + } else { + return Err(self.new_unexpected_char_error(if top { + expected_pack_opening_or_element_opening_or_eof + } else { + expected_pack_opening_or_element_opening_or_pack_ending + })); + } + } + } +} + +enum BlockEndingCmdTag { + NORMAL, + LF, + GAP, + NOGAP, + ENDLOOP, + ELSE_IF, + ELSE, + ENDIF +} + +struct ReasonOfElementEnd { + p1: usize, + cmd: BlockEndingCmdTag, +} + +fn fix_whitespaces_in_element(subels: &mut Vec) { + let n = subels.len(); + if n > 0 { + match &mut subels[0] { + SubElement::Static(org) => { + let mut ta = 0; + for p in 0..org.len(){ + if !is_whitespace(org.as_bytes()[p] as char) { + ta = p; break + } + if org.as_bytes()[p] == b'\n' { + ta = p + 1; + } + } + *org = String::from(&org[ta..]); + }, + _ => {}, + } + match &mut subels[n - 1] { + SubElement::Static(org) => { + while let Some(ch) = org.chars().last() { + if is_whitespace(ch) { org.pop(); } else { break } + } + }, + _ => {}, + } + } + let mut min_offset = usize::MAX; + for i in 0..subels.len() { + match &mut subels[i] { + SubElement::Static(org) => { + let mut seen_online = i > 0; + let mut line_bg: usize = 0; + for p in 0..org.len() { + let ch = org.as_bytes()[p] as char; + if !is_whitespace(ch) { + if !seen_online { + seen_online = true; + min_offset = std::cmp::min(min_offset, p - line_bg) + } + } else if ch == '\n' { + line_bg = p + 1; + seen_online = false; + } + } + /* This won't cause issues on the .last() because we previously rstripped the last part */ + if !seen_online{ + min_offset = std::cmp::min(min_offset, org.len() - line_bg) + } + }, + _ => {}, + } + } + for i in 0..subels.len() { + match &mut subels[i] { + SubElement::Static(org) => { + let mut res: Vec = Vec::new(); + let mut should_ignore_gap = i > 0; + let mut line_bg: usize = 0; + for p in 0..org.len() { + let ch = org.as_bytes()[p]; + if ch == b'\n' { + line_bg = p + 1; + should_ignore_gap = false; + /* We handle trailing whitespaces case here */ + while let Some(&ch) = res.last() { + if is_lnspace(ch as char) { res. pop(); } else { break } + } + } else if p - line_bg < min_offset { + continue + } + res.push(ch); + } + *org = String::from_utf8(res).unwrap(); + }, + _ => {}, + } + } +} + +impl<'a> Parser<'a> { + /* If BlockEndingCmdTag::ELSE_IF is returned, the ending tag won't be read completely, + * But in other case it would be read to the end */ + fn parse_element_plus_ending_tag(&mut self, arg_names: &Vec<&str>) -> Result<(Element, ReasonOfElementEnd), FileParsingError> { + let mut res: Vec = Vec::new(); + let mut tp1 = self.p; + + let fin_static = |p: &Parser, tp1: usize, res: &mut Vec| { + if tp1 < p.p { + res.push(SubElement::Static(String::from(&self.text[tp1..p.p]))) + } + }; + + /* Fixes whitespaces in static sub-elements */ + let finishing_touches = |ree: ReasonOfElementEnd, mut res: Vec| -> Result<(Element, ReasonOfElementEnd), FileParsingError> { + fix_whitespaces_in_element(&mut res); + Ok((Element{ argc: arg_names.len(), sub_elements: res }, ree)) + }; + + loop { + if self.is_ahead("{{") { + fin_static(self, tp1, &mut res); + self.p += 2; + let expr: Expression = self.parse_expression(arg_names)?; + if !self.is_ahead("}}") { + return Err(FileParsingError::new(expected_write_tag_end_after_expression, self.p - 2, self.p)); + } + self.p += 2; + if !matches!(expr, Expression::None){ + res.push(SubElement::InsertExpr( + Expression::Call( + Box::new(Expression::Attribute( + Box::new(Expression::Root), String::from("sanitize") + )), + vec![expr]) + )); + } + tp1 = self.p; + } else if self.is_ahead("{[") { + fin_static(self, tp1, &mut res); + self.p += 2; + let expr: Expression = self.parse_expression(arg_names)?; + if !self.is_ahead("]}") { + return Err(FileParsingError::new(expected_roughinsert_tag_end_after_expression, self.p - 2, self.p)) + } + self.p += 2; + if !matches!(expr, Expression::None){ + res.push(SubElement::InsertExpr(expr)); + } + tp1 = self.p; + } else if self.is_ahead("{%}") { + fin_static(self, tp1, &mut res); + self.p += 3; + return finishing_touches(ReasonOfElementEnd{p1: self.p - 3, cmd: BlockEndingCmdTag::NORMAL}, res); + } else if self.is_ahead("{%") { + fin_static(self, tp1, &mut res); + /* Might be needed if this is the ENDING cmd tag */ + let p1 = self.p; + self.p += 2; + self.skip_whitespace(); + let pb = self.p; + self.skip_normal_word(); + if pb == self.p { + return Err(self.new_unexpected_char_error(expected_command_name)) + } + let cmd = &self.text[pb..self.p]; + + /* Read space + expect %} and do finishing_touches */ + let just_one_thing = |pelf: &mut Parser, cmd: BlockEndingCmdTag, res: Vec| -> Result<(Element, ReasonOfElementEnd), FileParsingError> { + pelf.skip_whitespace(); + if !pelf.is_ahead("%}") { + return Err(pelf.new_unexpected_char_error(expected_cmd_tag_end)); + } + pelf.p += 2; + finishing_touches(ReasonOfElementEnd{p1, cmd}, res) + }; + + match cmd { + "lf" => return just_one_thing(self, BlockEndingCmdTag::LF, res), + "gap" => return just_one_thing(self, BlockEndingCmdTag::GAP, res), + "nogap" => return just_one_thing(self, BlockEndingCmdTag::NOGAP, res), + "else" => { + self.skip_whitespace(); + let ps = self.p; + self.skip_normal_word(); + if ps == self.p { + return just_one_thing(self, BlockEndingCmdTag::ELSE, res) + } else if &self.text[ps..self.p] != "if" { + return Err(FileParsingError::new(illegal_command_name, pb, self.p)) + } + return finishing_touches(ReasonOfElementEnd{p1, cmd: BlockEndingCmdTag::ELSE_IF}, res); + } + "endif" => return just_one_thing(self, BlockEndingCmdTag::ENDIF, res), + "endloop" => return just_one_thing(self, BlockEndingCmdTag::ENDLOOP, res), + "for" => res.push(self.parse_let(arg_names)?), + "if" => res.push(self.parse_if(arg_names)?), + "let" => res.push(self.parse_let(arg_names)?), + _ => return Err(FileParsingError::new(illegal_command_name, pb, self.p)), + } + tp1 = self.p; + } else { + self.advance(); + } + } + } + + fn parse_expression_at_cmd_tag_end(&mut self, arg_names: &Vec<&str>) -> Result { + let p1 = self.p; + let expr: Expression = self.parse_expression(arg_names)?; + if matches!(expr, Expression::None) { + return Err(FileParsingError::new(expected_nonempty_expression, p1, self.p)) + } + if !self.is_ahead("%}"){ + return Err(self.new_unexpected_char_error(expected_cmd_tag_end_after_expression)); + } + Ok(expr) + } + + /* It turned out to be so complex I put it in a separate function. + * It parses expr %} block {% else if expr %} block {% else %} block {%} */ + fn parse_if(&mut self, arg_names: &Vec<&str>) -> Result { + let mut conditions: Vec = Vec::new(); + let mut blocks: Vec = Vec::new(); + loop { + let expr = self.parse_expression_at_cmd_tag_end(arg_names)?; + let (inner_block, ending_tag) = self.parse_element_plus_ending_tag(arg_names)?; + conditions.push(expr); + match ending_tag.cmd { + BlockEndingCmdTag::ELSE | BlockEndingCmdTag::NORMAL | BlockEndingCmdTag::ENDIF | + BlockEndingCmdTag::ELSE_IF => blocks.push(inner_block), + _ => return Err(FileParsingError::new( + incorrect_block_ending_tag_expected_normal_or_endif_or_else_or_else_if, ending_tag.p1, self.p)), + } + if matches!(ending_tag.cmd, BlockEndingCmdTag::ELSE) { + let (else_block, the_end) = self.parse_element_plus_ending_tag(arg_names)?; + if !matches!(the_end.cmd, BlockEndingCmdTag::NORMAL | BlockEndingCmdTag::ENDIF){ + return Err(FileParsingError::new(incorrect_block_ending_tag_expected_normal_or_endif, the_end.p1, self.p)); + } + blocks.push(else_block); + break + } else if matches!(ending_tag.cmd, BlockEndingCmdTag::NORMAL | BlockEndingCmdTag::ENDIF) { + break + } + } + Ok(SubElement::If(IfSubElement{branches: blocks, conditions})) + } + + fn parse_let(&mut self, arg_names: &Vec<&str>) -> Result { + self.skip_whitespace(); + let p1 = self.p; + self.skip_normal_word(); + if p1 == self.p { + // Ironically, these symbols are actually constants + return Err(FileParsingError::new(expected_variable_name, p1, self.p)); + } + let new_variable_name = &self.text[p1..self.p]; + if is_bad_name(new_variable_name){ + return Err(FileParsingError::new(illegal_variable_name, p1, self.p)); + } + self.skip_whitespace(); + if !self.is_char_ahead('=') { + return Err(self.new_unexpected_char_error(expected_assignment_operator)); + } + self.p += 1; + let expr = self.parse_expression_at_cmd_tag_end(arg_names)?; + let mut arg_names_extended = arg_names.clone(); + arg_names_extended.push(new_variable_name); + let (inner_block, ending) = self.parse_element_plus_ending_tag(&arg_names_extended)?; + if !matches!(ending.cmd, BlockEndingCmdTag::NORMAL) { + return Err(FileParsingError::new(incorrect_block_ending_tag_expected_normal, ending.p1, self.p)); + } + Ok(SubElement::Let(expr, inner_block)) + } + + fn parse_for_new_variable(&mut self, arg_names_extended: &Vec<&str>) -> Result<&'a str, FileParsingError> { + self.skip_whitespace(); + let t1 = self.p; + self.skip_normal_word(); + if t1 == self.p { + return Err(self.new_unexpected_char_error(expected_variable_name)); + } + let name = &self.text[t1..self.p]; + if is_illegal_name(name) { + return Err(FileParsingError::new(illegal_variable_name, t1, self.p)); + } + if name != "_" && arg_names_extended.iter().find(|&&b| b == name).is_some() { + return Err(FileParsingError::new(forloop_variable_cant_take_occupied_name, t1, self.p)); + } + Ok(name) + } + + fn parse_for(&mut self, arg_names: &Vec<&str>) -> Result { + let mut arg_names_extended = arg_names.clone(); + + let name1 = self.parse_for_new_variable(&arg_names_extended)?; + arg_names_extended.push(name1); + + let mut name2 = ""; + self.skip_whitespace(); + if self.is_char_ahead(',') { + self.p += 1; + name2 = self.parse_for_new_variable(&arg_names_extended)?; + } + arg_names_extended.push(name2); + + if !self.is_char_ahead(':'){ + return Err(self.new_unexpected_char_error( + if name2.len() > 0 { expected_colon } else { expected_comma_or_colon } + )); + } + self.p += 1; + + let expr = self.parse_expression_at_cmd_tag_end(arg_names)?; + let (inner_block, ending) = self.parse_element_plus_ending_tag(&arg_names_extended)?; + let separator: String = String::from(match ending.cmd { + BlockEndingCmdTag::NOGAP => "", + BlockEndingCmdTag::GAP => " ", + BlockEndingCmdTag::NORMAL | BlockEndingCmdTag::LF | BlockEndingCmdTag::ENDLOOP => "\n", + _ => return Err(FileParsingError::new( + incorrect_block_ending_tag_expected_normal_or_lf_gap_nogap_or_forloop, ending.p1, self.p)), + }); + Ok(SubElement::For(ForSubElement{iterable: expr, core: inner_block, join: separator})) + } + + /* Checks for ]} }} and %}. May actually return NoneOfThose */ + fn is_tag_end_ahead(&self) -> bool { + self.is_ahead("]}") || self.is_ahead("}}") || self.is_ahead("%}") + } + + /* l1 expression = l2 space l2 space ... space l2 */ + fn parse_expression_l2(&mut self, arg_names: &Vec<&str>) -> Result { + self.skip_whitespace(); + if self.is_char_ahead('(') { + let p1 = self.p; + self.p += 1; + let expr = self.parse_expression(arg_names)?; + self.skip_whitespace(); + if !self.is_char_ahead(')') { + return Err(self.new_unexpected_char_error(expected_closing_round_bracket)) + } + self.p += 1; + if matches!(expr, Expression::None){ + return Err(FileParsingError::new(empty_expression_inside_round_brackets, p1, self.p)) + } + return Ok(expr); + } + if self.is_digit_ahead() { + let p1 = self.p; + loop { + if self.is_digit_ahead() { + self.p += 1; + } else if self.is_word_ahead() { + return Err(self.new_unexpected_char_error(cant_start_word_immediately_after_digit)) + } else { + return match self.text[p1..self.p].parse::() { + Ok(v) => Ok(Expression::Int(v)), + Err(_) => Err(FileParsingError::new(integer_parsing_error, p1, self.p)), + }; + } + } + } else if self.is_word_ahead() { + let p1 = self.p; + self.skip_normal_word(); + let toplevel_name = &self.text[p1..self.p]; + if is_bad_name(toplevel_name) { + return Err(FileParsingError::new(illegal_object_name, p1, self.p)); + } + let mut bg: Expression = match arg_names.iter().rposition(|&n| n == toplevel_name) { + Some(i) => Expression::Argument(i as u64), + None => Expression::Attribute(Box::new(Expression::Root), String::from(toplevel_name)) + }; + loop { + self.skip_whitespace(); + bg = if self.is_char_ahead('.') { + self.p += 1; + self.skip_whitespace(); + let attrp1 = self.p; + self.skip_normal_word(); + if attrp1 == self.p { + return Err(self.new_unexpected_char_error(expected_attribute_name_after_dot)); + } + let attr_name = &self.text[attrp1..self.p]; + if is_bad_name(attr_name) { + return Err(FileParsingError::new(illegal_attribute_name, attrp1, self.p)); + } + Expression::Attribute(Box::new(bg), String::from(attr_name)) + } else if self.is_char_ahead('[') { + let sqbrp1 = self.p; + self.p += 1; + let sub_expr = self.parse_expression(arg_names)?; + self.skip_whitespace(); + if !self.is_char_ahead(']') { + return Err(self.new_unexpected_char_error(expected_closing_square_bracket)) + } + self.p += 1; + if matches!(sub_expr, Expression::None) { + return Err(FileParsingError::new(empty_expression_inside_square_brackets, sqbrp1, self.p)) + } + Expression::Get(Box::new(bg), Box::new(sub_expr)) + } else { + break + } + } + return Ok(bg) + } else { + return Ok(Expression::None) + } + } + + fn parse_expression(&mut self, arg_names: &Vec<&str>) -> Result { + let e1: Expression = self.parse_expression_l2(arg_names)?; + let mut call_args: Vec = Vec::new(); + /* It is okay to enter call_args reading loop even when e1 is None. + If parse_expression_l2 returned None, subsequent call to parse_expression_l2 + is guaranteed to return None. Arg list will be empty and the final `if` will choose + the second branch, which will return Expression::None */ + loop { + let arg_expr: Expression = self.parse_expression_l2(arg_names)?; + if matches!(arg_expr, Expression::None) { + break + } + call_args.push(arg_expr) + } + Ok(if call_args.len() > 0 { + Expression::Call(Box::new(e1), call_args) + } else { + e1 + }) + } +} + +pub fn parse_one_file(text: &str) -> Result { + let mut parser: Parser = Parser{text, p: 0}; + parser.parse_pack_plus_ending(true) +} diff --git a/tests/parsing_test.rs b/tests/parsing_test.rs new file mode 100644 index 0000000..ab23ca9 --- /dev/null +++ b/tests/parsing_test.rs @@ -0,0 +1,34 @@ +use yyyi_ru::mtgott::parser::*; + +fn generate_strings( + prefix: &mut String, + target_length: usize, + alphabet: &[char], + f: &mut impl Fn(String), +) { + if prefix.len() == target_length { + f(prefix.clone()); + } else { + for &c in alphabet { + prefix.push(c); + generate_strings(prefix, target_length, alphabet, f); + prefix.pop(); + } + } +} + +#[test] +fn test_parse_file_with_all_combinations() { + let alphabet = [' ', '{', '%', '}', '$', 'a']; + let target_length = 3; + + generate_strings(&mut String::new(), target_length, &alphabet, &mut |s| { + println!("Parsing {s}"); + parse_one_file(&s); + }); +} + +#[test] +fn t1(){ + +} \ No newline at end of file