From e8e551fd8bb5a7a9cdee96106770bea054958512 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Wed, 22 Feb 2023 19:24:02 +0100 Subject: [PATCH] inline: separate lex, span to separate Input object easier handling of mutable pointers, can borrow self.input instead of whole self can e.g. borrow mutable state while still eating new tokens --- src/inline.rs | 191 ++++++++++++++++++++++++++++---------------------- 1 file changed, 107 insertions(+), 84 deletions(-) diff --git a/src/inline.rs b/src/inline.rs index 53be6f1..d0e2d16 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -73,33 +73,24 @@ pub struct Event { pub span: Span, } -pub struct Parser { - /// Lexer, hosting upcoming source. +pub struct Input { + /// Lexer, hosting source. lexer: lex::Lexer, /// Span of current event. span: Span, - /// Stack with kind and index of _potential_ openers for containers. - openers: Vec<(Delim, usize)>, - /// Buffer queue for next events. Events are buffered until no modifications due to future - /// characters are needed. - events: std::collections::VecDeque, } -impl + Clone> Parser { - pub fn new(chars: I) -> Self { +impl + Clone> Input { + fn new(chars: I) -> Self { Self { lexer: lex::Lexer::new(chars), span: Span::new(0, 0), - openers: Vec::new(), - events: std::collections::VecDeque::new(), } } - pub fn reset(&mut self, chars: I) { + fn reset(&mut self, chars: I) { self.lexer = lex::Lexer::new(chars); self.span = Span::new(0, 0); - self.openers.clear(); - debug_assert!(self.events.is_empty()); } fn eat(&mut self) -> Option { @@ -118,18 +109,62 @@ impl + Clone> Parser { self.span = self.span.empty_after(); } + fn ahead_attributes(&mut self) -> Option<(bool, Span)> { + let mut span = self.span.empty_after(); + let mut ahead = self.lexer.chars(); + let (mut attr_len, mut has_attr) = attr::valid(&mut ahead); + if attr_len > 0 { + while attr_len > 0 { + span = span.extend(attr_len); + self.lexer = lex::Lexer::new(ahead.clone()); + + let (l, non_empty) = attr::valid(&mut ahead); + has_attr |= non_empty; + attr_len = l; + } + Some((has_attr, span)) + } else { + None + } + } +} + +pub struct Parser { + input: Input, + /// Stack with kind and index of _potential_ openers for containers. + openers: Vec<(Delim, usize)>, + /// Buffer queue for next events. Events are buffered until no modifications due to future + /// characters are needed. + events: std::collections::VecDeque, +} + +impl + Clone> Parser { + pub fn new(chars: I) -> Self { + Self { + input: Input::new(chars), + openers: Vec::new(), + events: std::collections::VecDeque::new(), + } + } + + pub fn reset(&mut self, chars: I) { + self.input.reset(chars); + self.openers.clear(); + debug_assert!(self.events.is_empty()); + } + fn push_sp(&mut self, kind: EventKind, span: Span) -> Option<()> { self.events.push_back(Event { kind, span }); Some(()) } fn push(&mut self, kind: EventKind) -> Option<()> { - self.push_sp(kind, self.span) + self.push_sp(kind, self.input.span) } fn parse_event(&mut self) -> Option<()> { - self.reset_span(); - self.eat().map(|first| { + self.input.reset_span(); + self.input.eat().map(|first| { self.parse_verbatim(&first) .or_else(|| self.parse_attributes(&first)) .or_else(|| self.parse_autolink(&first)) @@ -155,7 +190,7 @@ impl + Clone> Parser { if let Some(lex::Token { kind: lex::Kind::Seq(Sequence::Backtick), len, - }) = self.peek() + }) = self.input.peek() { Some(( if first.len == 2 { @@ -171,7 +206,7 @@ impl + Clone> Parser { }) .flatten(); if math_opt.is_some() { - self.eat(); // backticks + self.input.eat(); // backticks } math_opt } @@ -180,25 +215,28 @@ impl + Clone> Parser { }?; let e_attr = self.events.len(); - self.push_sp(EventKind::Placeholder, Span::empty_at(self.span.start())); + self.push_sp( + EventKind::Placeholder, + Span::empty_at(self.input.span.start()), + ); let opener_event = self.events.len(); self.push(EventKind::Enter(kind)); - let mut span_inner = self.span.empty_after(); + let mut span_inner = self.input.span.empty_after(); let mut span_outer = None; let mut non_whitespace_first = None; let mut non_whitespace_last = None; - while let Some(t) = self.eat() { + while let Some(t) = self.input.eat() { if matches!(t.kind, lex::Kind::Seq(Sequence::Backtick)) && t.len == opener_len { if matches!(kind, Verbatim) && matches!( - self.lexer.peek().map(|t| &t.kind), + self.input.peek().map(|t| &t.kind), Some(lex::Kind::Open(Delimiter::BraceEqual)) ) { - let mut ahead = self.lexer.chars(); + let mut ahead = self.input.lexer.chars(); let mut end = false; let len = (&mut ahead) .skip(2) // {= @@ -214,7 +252,7 @@ impl + Clone> Parser { .map(char::len_utf8) .sum(); if len > 0 && end { - let tok = self.eat(); + let tok = self.input.eat(); debug_assert_eq!( tok, Some(lex::Token { @@ -222,12 +260,12 @@ impl + Clone> Parser { len: 2, }) ); - self.lexer = lex::Lexer::new(ahead); - let span_format = self.span.after(len); + self.input.lexer = lex::Lexer::new(ahead); + let span_format = self.input.span.after(len); kind = RawFormat; self.events[opener_event].kind = EventKind::Enter(kind); self.events[opener_event].span = span_format; - self.span = span_format.translate(1); // } + self.input.span = span_format.translate(1); // } span_outer = Some(span_format); } } @@ -240,7 +278,7 @@ impl + Clone> Parser { non_whitespace_last = Some((t.kind, span_inner.end() + t.len)); } span_inner = span_inner.extend(t.len); - self.reset_span(); + self.input.reset_span(); } if let Some((lex::Kind::Seq(Sequence::Backtick), pos)) = non_whitespace_first { @@ -251,10 +289,10 @@ impl + Clone> Parser { } self.push_sp(EventKind::Str, span_inner); - self.push_sp(EventKind::Exit(kind), span_outer.unwrap_or(self.span)); + self.push_sp(EventKind::Exit(kind), span_outer.unwrap_or(self.input.span)); - if let Some((non_empty, span)) = self.ahead_attributes() { - self.span = span; + if let Some((non_empty, span)) = self.input.ahead_attributes() { + self.input.span = span; if non_empty { self.events[e_attr] = Event { kind: EventKind::Attributes, @@ -268,13 +306,13 @@ impl + Clone> Parser { fn parse_attributes(&mut self, first: &lex::Token) -> Option<()> { if first.kind == lex::Kind::Open(Delimiter::Brace) { - let mut ahead = self.lexer.chars(); + let mut ahead = self.input.lexer.chars(); let (mut attr_len, mut has_attr) = attr::valid(std::iter::once('{').chain(&mut ahead)); attr_len = attr_len.saturating_sub(1); // rm { if attr_len > 0 { while attr_len > 0 { - self.span = self.span.extend(attr_len); - self.lexer = lex::Lexer::new(ahead.clone()); + self.input.span = self.input.span.extend(attr_len); + self.input.lexer = lex::Lexer::new(ahead.clone()); let (l, non_empty) = attr::valid(&mut ahead); attr_len = l; @@ -303,7 +341,7 @@ impl + Clone> Parser { self.push_sp(EventKind::Str, span_str); return self.push_sp(EventKind::Exit(Span), span_str.empty_after()); } else { - return self.push_sp(EventKind::Placeholder, self.span.empty_before()); + return self.push_sp(EventKind::Placeholder, self.input.span.empty_before()); } } } @@ -313,7 +351,7 @@ impl + Clone> Parser { fn parse_autolink(&mut self, first: &lex::Token) -> Option<()> { if first.kind == lex::Kind::Sym(Symbol::Lt) { - let mut ahead = self.lexer.chars(); + let mut ahead = self.input.lexer.chars(); let mut end = false; let mut is_url = false; let len = (&mut ahead) @@ -332,11 +370,11 @@ impl + Clone> Parser { .map(char::len_utf8) .sum(); if end && is_url { - self.lexer = lex::Lexer::new(ahead); - self.span = self.span.after(len); + self.input.lexer = lex::Lexer::new(ahead); + self.input.span = self.input.span.after(len); self.push(EventKind::Enter(Autolink)); self.push(EventKind::Str); - self.span = self.span.after(1); + self.input.span = self.input.span.after(1); return self.push(EventKind::Exit(Autolink)); } } @@ -345,7 +383,7 @@ impl + Clone> Parser { fn parse_symbol(&mut self, first: &lex::Token) -> Option<()> { if first.kind == lex::Kind::Sym(Symbol::Colon) { - let mut ahead = self.lexer.chars(); + let mut ahead = self.input.lexer.chars(); let mut end = false; let mut valid = true; let len = (&mut ahead) @@ -360,10 +398,10 @@ impl + Clone> Parser { .map(char::len_utf8) .sum(); if end && valid { - self.lexer = lex::Lexer::new(ahead); - self.span = self.span.after(len); - let span = self.span; - self.span = self.span.after(1); + self.input.lexer = lex::Lexer::new(ahead); + self.input.span = self.input.span.after(len); + let span = self.input.span; + self.input.span = self.input.span.after(1); return self.push_sp(EventKind::Atom(Symbol), span); } } @@ -373,14 +411,14 @@ impl + Clone> Parser { fn parse_footnote_reference(&mut self, first: &lex::Token) -> Option<()> { if first.kind == lex::Kind::Open(Delimiter::Bracket) && matches!( - self.peek(), + self.input.peek(), Some(lex::Token { kind: lex::Kind::Sym(Symbol::Caret), .. }) ) { - let tok = self.eat(); + let tok = self.input.eat(); debug_assert_eq!( tok, Some(lex::Token { @@ -388,7 +426,7 @@ impl + Clone> Parser { len: 1, }) ); - let mut ahead = self.lexer.chars(); + let mut ahead = self.input.lexer.chars(); let mut end = false; let len = (&mut ahead) .take_while(|c| { @@ -403,10 +441,10 @@ impl + Clone> Parser { .map(char::len_utf8) .sum(); if end { - self.lexer = lex::Lexer::new(ahead); - self.span = self.span.after(len); + self.input.lexer = lex::Lexer::new(ahead); + self.input.span = self.input.span.after(len); self.push(EventKind::Atom(FootnoteReference)); - self.span = self.span.after(1); + self.input.span = self.input.span.after(1); return Some(()); } } @@ -440,7 +478,7 @@ impl + Clone> Parser { return None; } - let inner_span = self.events[e_opener].span.between(self.span); + let inner_span = self.events[e_opener].span.between(self.input.span); let mut closed = match DelimEventKind::from(d) { DelimEventKind::Container(cont) => { self.events[e_opener].kind = EventKind::Enter(cont); @@ -467,7 +505,7 @@ impl + Clone> Parser { } } - if let Some((non_empty, span)) = self.ahead_attributes() { + if let Some((non_empty, span)) = self.input.ahead_attributes() { if non_empty { self.events[e_attr] = Event { kind: EventKind::Attributes, @@ -480,7 +518,7 @@ impl + Clone> Parser { closed = self.push(EventKind::Exit(Container::Span)); } - self.span = span; + self.input.span = span; } closed @@ -491,6 +529,7 @@ impl + Clone> Parser { } if matches!(dir, Dir::Both) && self + .input .peek() .map_or(true, |t| matches!(t.kind, lex::Kind::Whitespace)) { @@ -506,7 +545,10 @@ impl + Clone> Parser { } self.openers.push((delim, self.events.len())); // push dummy event in case attributes are encountered after closing delimiter - self.push_sp(EventKind::Placeholder, Span::empty_at(self.span.start())); + self.push_sp( + EventKind::Placeholder, + Span::empty_at(self.input.span.start()), + ); // use non-opener for now, replace if closed later self.push(match delim { Delim::SingleQuoted => EventKind::Atom(Quote { @@ -522,27 +564,8 @@ impl + Clone> Parser { }) } - fn ahead_attributes(&mut self) -> Option<(bool, Span)> { - let mut span = self.span.empty_after(); - let mut ahead = self.lexer.chars(); - let (mut attr_len, mut has_attr) = attr::valid(&mut ahead); - if attr_len > 0 { - while attr_len > 0 { - span = span.extend(attr_len); - self.lexer = lex::Lexer::new(ahead.clone()); - - let (l, non_empty) = attr::valid(&mut ahead); - has_attr |= non_empty; - attr_len = l; - } - Some((has_attr, span)) - } else { - None - } - } - fn post_span(&mut self, ty: SpanType, opener_event: usize) -> Option<()> { - let mut ahead = self.lexer.chars(); + let mut ahead = self.input.lexer.chars(); let (kind, span) = match ahead.next() { Some(opener @ ('[' | '(')) => { let img = ty == SpanType::Image; @@ -565,17 +588,17 @@ impl + Clone> Parser { .map(char::len_utf8) .sum(); end.then(|| { - let span = self.span.after(len).translate(1); + let span = self.input.span.after(len).translate(1); (kind, span) }) } _ => None, }?; - self.lexer = lex::Lexer::new(ahead); + self.input.lexer = lex::Lexer::new(ahead); self.events[opener_event].kind = EventKind::Enter(kind); self.events[opener_event].span = span; - self.span = span.translate(1); + self.input.span = span.translate(1); self.push_sp(EventKind::Exit(kind), span) } @@ -586,11 +609,11 @@ impl + Clone> Parser { lex::Kind::Escape => Escape, lex::Kind::Nbsp => Nbsp, lex::Kind::Seq(Sequence::Period) if first.len >= 3 => { - while self.span.len() > 3 { - self.push_sp(EventKind::Atom(Ellipsis), self.span.with_len(3)); - self.span = self.span.skip(3); + while self.input.span.len() > 3 { + self.push_sp(EventKind::Atom(Ellipsis), self.input.span.with_len(3)); + self.input.span = self.input.span.skip(3); } - if self.span.len() == 3 { + if self.input.span.len() == 3 { Ellipsis } else { return self.push(EventKind::Str); @@ -610,8 +633,8 @@ impl + Clone> Parser { .chain(std::iter::repeat(EnDash).take(n)) .for_each(|atom| { let l = if matches!(atom, EnDash) { 2 } else { 3 }; - self.push_sp(EventKind::Atom(atom), self.span.with_len(l)); - self.span = self.span.skip(l); + self.push_sp(EventKind::Atom(atom), self.input.span.with_len(l)); + self.input.span = self.input.span.skip(l); }); return Some(()); }