From 4a7967812e9db30efd3612113749bcc5b955b7b6 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Tue, 17 Jan 2023 22:36:10 +0100 Subject: [PATCH] lex: fix buggy peek --- src/inline.rs | 23 ++++++++++++++++------- src/lex.rs | 24 ++++++++++++++++++------ 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/src/inline.rs b/src/inline.rs index a80d611..34e6c0a 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -67,7 +67,7 @@ pub struct Event { pub span: Span, } -pub struct Parser { +pub struct Parser { /// Lexer, hosting upcoming source. lexer: lex::Lexer, /// Span of current event. @@ -177,9 +177,10 @@ impl + Clone> Parser { Some(lex::Kind::Open(Delimiter::BraceEqual)) ) { - let mut ahead = self.lexer.inner().clone(); + let mut ahead = self.lexer.chars(); let mut end = false; let len = (&mut ahead) + .skip(2) // {= .take_while(|c| { if *c == '{' { return false; @@ -191,8 +192,16 @@ impl + Clone> Parser { }) .count(); if len > 0 && end { + let tok = self.eat(); + debug_assert_eq!( + tok, + Some(lex::Token { + kind: lex::Kind::Open(Delimiter::BraceEqual), + len: 2, + }) + ); self.lexer = lex::Lexer::new(ahead); - let span_format = Span::by_len(self.span.end() + "{=".len(), len); + let span_format = Span::by_len(self.span.end(), len); kind = RawFormat; self.events[opener_event].kind = EventKind::Enter(kind); self.events[opener_event].span = span_format; @@ -238,7 +247,7 @@ impl + Clone> Parser { .back() .map_or(false, |e| e.kind == EventKind::Str) { - let mut ahead = self.lexer.inner().clone(); + let mut ahead = self.lexer.chars(); let (mut attr_len, mut has_attr) = attr::valid(std::iter::once('{').chain(&mut ahead)); attr_len -= 1; // rm { if attr_len > 0 { @@ -296,7 +305,7 @@ impl + Clone> Parser { fn parse_autolink(&mut self, first: &lex::Token) -> Option { if first.kind == lex::Kind::Sym(Symbol::Lt) { - let mut ahead = self.lexer.inner().clone(); + let mut ahead = self.lexer.chars(); let mut end = false; let mut is_url = false; let len = (&mut ahead) @@ -372,7 +381,7 @@ impl + Clone> Parser { } } - let mut ahead = self.lexer.inner().clone(); + let mut ahead = self.lexer.chars(); let (mut attr_len, mut has_attr) = attr::valid(&mut ahead); if attr_len > 0 { let span_closer = self.span; @@ -429,7 +438,7 @@ impl + Clone> Parser { } fn post_span(&mut self, ty: SpanType, opener_event: usize) -> Option { - let mut ahead = self.lexer.inner().clone(); + let mut ahead = self.lexer.chars(); match ahead.next() { Some(opener @ ('[' | '(')) => { let img = ty == SpanType::Image; diff --git a/src/lex.rs b/src/lex.rs index 833bb32..1390d88 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -82,8 +82,9 @@ impl Sequence { } #[derive(Clone)] -pub(crate) struct Lexer { +pub(crate) struct Lexer { chars: I, + chars_non_peeked: I, /// Next character should be escaped. escape: bool, /// Token to be peeked or next'ed. @@ -95,26 +96,30 @@ pub(crate) struct Lexer { impl + Clone> Lexer { pub fn new(chars: I) -> Lexer { Lexer { - chars, + chars: chars.clone(), + chars_non_peeked: chars, escape: false, next: None, len: 0, } } + /// NOTE: Peeked [`Kind::Text`] tokens are only one char long, they may be longer when + /// consumed. pub fn peek(&mut self) -> Option<&Token> { if self.next.is_none() { - self.next = self.next_token(); + self.next = self.token(); } self.next.as_ref() } - pub fn inner(&self) -> &I { - &self.chars + pub fn chars(&self) -> I { + self.chars_non_peeked.clone() } fn next_token(&mut self) -> Option { let mut current = self.token(); + self.chars_non_peeked = self.chars.clone(); // concatenate text tokens if let Some(Token { kind: Text, len }) = &mut current { @@ -145,6 +150,7 @@ impl + Clone> Lexer { } fn token(&mut self) -> Option { + self.chars_non_peeked = self.chars.clone(); self.len = 0; let first = self.eat_char()?; @@ -271,7 +277,13 @@ impl + Clone> Iterator for Lexer { type Item = Token; fn next(&mut self) -> Option { - self.next.take().or_else(|| self.next_token()) + self.next + .take() + .map(|x| { + self.chars_non_peeked = self.chars.clone(); + x + }) + .or_else(|| self.next_token()) } }