From 8bd9323c48d3da8d96db4a7da03e74bea85139bb Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Mon, 21 Nov 2022 22:40:11 +0100 Subject: [PATCH] wip parse inf loop --- src/inline.rs | 307 +++++++++++++++++++++++++++----------------------- src/lex.rs | 69 +++++++----- 2 files changed, 206 insertions(+), 170 deletions(-) diff --git a/src/inline.rs b/src/inline.rs index f086d77..35b0de1 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -1,4 +1,5 @@ use crate::lex; +use crate::Span; use lex::Delimiter; use lex::Symbol; @@ -54,7 +55,7 @@ pub enum Container { Destination, } -#[derive(Debug)] +#[derive(Debug, PartialEq, Eq)] pub enum Event { Start(Container), End(Container), @@ -80,7 +81,6 @@ pub enum Dir { pub struct Parser { openers: Vec, events: Vec, - //tree: tree::Builder, } impl Parser { @@ -91,148 +91,173 @@ impl Parser { } } - /* - pub fn parse(mut self, src: &str) -> impl Iterator { - todo!() - } - */ -} - -struct Parse<'s> { - src: &'s str, - lexer: lex::Lexer<'s>, - events: &'s mut Vec, -} - -impl<'s> Parse<'s> { - fn new(src: &'s str, events: &'s mut Vec) -> Self { - todo!() - } - - /* - fn parse(mut self, src: &str, evs: &mut Vec) { - let mut chars = src.chars(); - while let Some(t) = chars.next() { - { - let verbatim_opt = match t.kind { - lex::Kind::Seq(lex::Sequence::Dollar) => { - let math_opt = (t.len <= 2) - .then(|| { - if let Some(lex::Token { - kind: lex::Kind::Seq(lex::Sequence::Backtick), - len, - }) = self.chars.clone().next() - { - Some((DisplayMath, *len)) - } else { - None - } - }) - .flatten(); - if math_opt.is_some() { - chars.next(); // backticks - } - math_opt - } - lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, t.len)), - _ => None, - }; - - if let Some((atom, opener_len)) = verbatim_opt { - for tok in chars { - if let lex::Kind::Seq(lex::Sequence::Backtick) = tok.kind { - if tok.len >= opener_len { - break; - } - } - } - evs.push(Event::Atom(atom)); - return; - } + pub fn parse<'a>(&'a mut self, src: &'a str) -> impl Iterator + 'a { + std::iter::from_fn(|| { + if self.events.is_empty() { + Parse::new(src, &mut self.openers, &mut self.events).parse(); } - { - let container_opt = match t.kind { - lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)), - lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)), - lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)), - lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)), - lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)), - lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)), - lex::Kind::Open(Delimiter::Bracket) => Some((LinkText, Dir::Open)), - lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)), - lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)), - lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)), - lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)), - lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)), - lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)), - lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)), - lex::Kind::Close(Delimiter::Bracket) => Some((LinkText, Dir::Close)), - lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)), - lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)), - lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)), - lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)), - lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)), - lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)), - lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)), - _ => None, - }; - - if let Some((cont, ty)) = container_opt { - if matches!(ty, Dir::Close | Dir::Both) && self.openers.contains(&cont) { - loop { - let c = self.openers.pop().unwrap(); - evs.push(Event::End(c)); - if c == cont { - break; - } - } - return; - } else if matches!(ty, Dir::Open | Dir::Both) { - self.openers.push(cont); - evs.push(Event::Start(cont)); - } - return; - } - } - - { - if let lex::Kind::Open(Delimiter::Brace) = t.kind { - todo!(); // check for attr - } - } - - if let Some(Event::Atom(Str)) = evs.last() { - } else { - evs.push(Event::Atom(Str)); - } - } - } -*/ -} - -/* -impl<'s> Iterator for Parser<'s> { - type Item = (Event<'s>, Span); - - fn next(&mut self) -> Option { - self.chars.next().map(|c| { - match c { - '*' => todo!(), - '_' => todo!(), - '^' => todo!(), - '~' => todo!(), - '\'' => todo!(), - '"' => todo!(), - '$' => todo!(), - '<' => todo!(), - '{' => todo!(), - '[' => todo!(), - _ => - } + self.events.pop() }) } } -*/ -mod test {} +struct Parse<'s> { + lexer: lex::Lexer<'s>, + openers: &'s mut Vec, + events: &'s mut Vec, + + /// Next token to be eaten. + next_token: lex::Token, + /// Position after `next_token`. + pos: usize, + /// Span of last eaten token. + span: Span, +} + +impl<'s> Parse<'s> { + fn new(src: &'s str, openers: &'s mut Vec, events: &'s mut Vec) -> Self { + let mut lexer = lex::Lexer::new(src); + let next_token = lexer.next_token(); + let pos = next_token.len; + Self { + lexer, + openers, + events, + next_token, + pos, + span: Span::new(0, 0), + } + } + + fn step(&mut self) -> lex::Token { + let token = self.lexer.next_token(); + dbg!(&token, self.pos); + self.pos += token.len; + std::mem::replace(&mut self.next_token, token) + } + + fn eat(&mut self) -> lex::Kind { + let end = self.pos; + let token = self.step(); + self.span = Span::new(end - token.len, end); + token.kind + } + + fn peek(&mut self) -> &lex::Kind { + &self.next_token.kind + } + + fn parse(&mut self) { + let mut kind = self.eat(); + + //dbg!(&kind); + + if kind == lex::Kind::Eof { + return; + } + + { + let verbatim_opt = match kind { + lex::Kind::Seq(lex::Sequence::Dollar) => { + let math_opt = (self.span.len() <= 2) + .then(|| { + if let lex::Kind::Seq(lex::Sequence::Backtick) = self.peek() { + Some((DisplayMath, self.span.len())) + } else { + None + } + }) + .flatten(); + if math_opt.is_some() { + self.eat(); // backticks + } + math_opt + } + lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, self.span.len())), + _ => None, + }; + + if let Some((atom, opener_len)) = verbatim_opt { + while !matches!(kind, lex::Kind::Seq(lex::Sequence::Backtick)) + || self.span.len() != opener_len + { + kind = self.eat(); + } + self.events.push(Event::Atom(atom)); + return; + } + } + + { + let container_opt = match kind { + lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)), + lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)), + lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)), + lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)), + lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)), + lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)), + lex::Kind::Open(Delimiter::Bracket) => Some((LinkText, Dir::Open)), + lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)), + lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)), + lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)), + lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)), + lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)), + lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)), + lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)), + lex::Kind::Close(Delimiter::Bracket) => Some((LinkText, Dir::Close)), + lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)), + lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)), + lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)), + lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)), + lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)), + lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)), + lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)), + _ => None, + }; + + if let Some((cont, ty)) = container_opt { + if matches!(ty, Dir::Close | Dir::Both) && self.openers.contains(&cont) { + loop { + let c = self.openers.pop().unwrap(); + self.events.push(Event::End(c)); + if c == cont { + break; + } + } + return; + } else if matches!(ty, Dir::Open | Dir::Both) { + self.openers.push(cont); + self.events.push(Event::Start(cont)); + } + return; + } + } + + { + if let lex::Kind::Open(Delimiter::Brace) = kind { + todo!(); // check for attr + } + } + + if let Some(Event::Atom(Str)) = self.events.last() { + } else { + self.events.push(Event::Atom(Str)); + } + } +} + +#[cfg(test)] +mod test { + use super::Atom::*; + use super::Event::*; + + #[test] + fn container_brace() { + let mut p = super::Parser::new(); + assert_eq!( + &[Atom(Str)], + p.parse("{_hej_}").collect::>().as_slice(), + ); + } +} diff --git a/src/lex.rs b/src/lex.rs index 958dba1..1da8f21 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -22,6 +22,7 @@ pub enum Kind { Close(Delimiter), Sym(Symbol), Seq(Sequence), + Eof, } #[derive(Debug, PartialEq, Eq)] @@ -95,12 +96,31 @@ impl<'s> Lexer<'s> { } } + pub fn next_token(&mut self) -> Token { + if let Some(token) = self.next.take() { + token + } else { + let mut current = self.token(); + + // concatenate text tokens + if let Token { kind: Text, len } = &mut current { + self.next = Some(self.token()); + while let Some(Token { kind: Text, len: l }) = self.next { + *len += l; + self.next = Some(self.token()); + } + } + + current + } + } + fn peek(&mut self) -> char { self.chars.clone().next().unwrap_or(EOF) } - fn eat(&mut self) -> Option { - self.chars.next() + fn eat(&mut self) -> char { + self.chars.next().unwrap_or(EOF) } fn len(&self) -> usize { @@ -113,12 +133,14 @@ impl<'s> Lexer<'s> { } } - fn token(&mut self) -> Option { - let first = self.eat()?; + fn token(&mut self) -> Token { + let first = self.eat(); let escape = self.escape; let kind = match first { + EOF => Eof, + _ if escape && first == ' ' => Nbsp, _ if escape => Text, @@ -202,7 +224,7 @@ impl<'s> Lexer<'s> { let len = self.len(); - Some(Token { kind, len }) + Token { kind, len } } fn eat_seq(&mut self, s: Sequence) -> Kind { @@ -220,29 +242,6 @@ impl<'s> Lexer<'s> { } } -impl<'s> Iterator for Lexer<'s> { - type Item = Token; - - fn next(&mut self) -> Option { - if let Some(token) = self.next.take() { - Some(token) - } else { - let mut current = self.token(); - - // concatenate text tokens - if let Some(Token { kind: Text, len }) = &mut current { - self.next = self.token(); - while let Some(Token { kind: Text, len: l }) = self.next { - *len += l; - self.next = self.token(); - } - } - - current - } - } -} - #[cfg(test)] mod test { use super::Delimiter::*; @@ -250,10 +249,22 @@ mod test { use super::Sequence::*; use super::Symbol::*; + fn tokenize(src: &str) -> impl Iterator + '_ { + let mut lexer = super::Lexer::new(src); + std::iter::from_fn(move || { + let tok = lexer.next_token(); + if matches!(tok.kind, Eof) { + None + } else { + Some(tok) + } + }) + } + macro_rules! test_lex { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { #[allow(unused)] - let actual = super::Lexer::new($src).map(|t| t.kind).collect::>(); + let actual = tokenize($src).map(|t| t.kind).collect::>(); let expected = vec![$($($token),*,)?]; assert_eq!(actual, expected, "{}", $src); };