lex: fix buggy peek

This commit is contained in:
Noah Hellman 2023-01-17 22:36:10 +01:00
parent 6af5c5c884
commit 4a7967812e
2 changed files with 34 additions and 13 deletions

View file

@ -67,7 +67,7 @@ pub struct Event {
pub span: Span, pub span: Span,
} }
pub struct Parser<I> { pub struct Parser<I: Iterator + Clone> {
/// Lexer, hosting upcoming source. /// Lexer, hosting upcoming source.
lexer: lex::Lexer<I>, lexer: lex::Lexer<I>,
/// Span of current event. /// Span of current event.
@ -177,9 +177,10 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
Some(lex::Kind::Open(Delimiter::BraceEqual)) Some(lex::Kind::Open(Delimiter::BraceEqual))
) )
{ {
let mut ahead = self.lexer.inner().clone(); let mut ahead = self.lexer.chars();
let mut end = false; let mut end = false;
let len = (&mut ahead) let len = (&mut ahead)
.skip(2) // {=
.take_while(|c| { .take_while(|c| {
if *c == '{' { if *c == '{' {
return false; return false;
@ -191,8 +192,16 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
}) })
.count(); .count();
if len > 0 && end { if len > 0 && end {
let tok = self.eat();
debug_assert_eq!(
tok,
Some(lex::Token {
kind: lex::Kind::Open(Delimiter::BraceEqual),
len: 2,
})
);
self.lexer = lex::Lexer::new(ahead); self.lexer = lex::Lexer::new(ahead);
let span_format = Span::by_len(self.span.end() + "{=".len(), len); let span_format = Span::by_len(self.span.end(), len);
kind = RawFormat; kind = RawFormat;
self.events[opener_event].kind = EventKind::Enter(kind); self.events[opener_event].kind = EventKind::Enter(kind);
self.events[opener_event].span = span_format; self.events[opener_event].span = span_format;
@ -238,7 +247,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
.back() .back()
.map_or(false, |e| e.kind == EventKind::Str) .map_or(false, |e| e.kind == EventKind::Str)
{ {
let mut ahead = self.lexer.inner().clone(); let mut ahead = self.lexer.chars();
let (mut attr_len, mut has_attr) = attr::valid(std::iter::once('{').chain(&mut ahead)); let (mut attr_len, mut has_attr) = attr::valid(std::iter::once('{').chain(&mut ahead));
attr_len -= 1; // rm { attr_len -= 1; // rm {
if attr_len > 0 { if attr_len > 0 {
@ -296,7 +305,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
fn parse_autolink(&mut self, first: &lex::Token) -> Option<Event> { fn parse_autolink(&mut self, first: &lex::Token) -> Option<Event> {
if first.kind == lex::Kind::Sym(Symbol::Lt) { if first.kind == lex::Kind::Sym(Symbol::Lt) {
let mut ahead = self.lexer.inner().clone(); let mut ahead = self.lexer.chars();
let mut end = false; let mut end = false;
let mut is_url = false; let mut is_url = false;
let len = (&mut ahead) let len = (&mut ahead)
@ -372,7 +381,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
} }
} }
let mut ahead = self.lexer.inner().clone(); let mut ahead = self.lexer.chars();
let (mut attr_len, mut has_attr) = attr::valid(&mut ahead); let (mut attr_len, mut has_attr) = attr::valid(&mut ahead);
if attr_len > 0 { if attr_len > 0 {
let span_closer = self.span; let span_closer = self.span;
@ -429,7 +438,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
} }
fn post_span(&mut self, ty: SpanType, opener_event: usize) -> Option<Event> { fn post_span(&mut self, ty: SpanType, opener_event: usize) -> Option<Event> {
let mut ahead = self.lexer.inner().clone(); let mut ahead = self.lexer.chars();
match ahead.next() { match ahead.next() {
Some(opener @ ('[' | '(')) => { Some(opener @ ('[' | '(')) => {
let img = ty == SpanType::Image; let img = ty == SpanType::Image;

View file

@ -82,8 +82,9 @@ impl Sequence {
} }
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct Lexer<I> { pub(crate) struct Lexer<I: Iterator + Clone> {
chars: I, chars: I,
chars_non_peeked: I,
/// Next character should be escaped. /// Next character should be escaped.
escape: bool, escape: bool,
/// Token to be peeked or next'ed. /// Token to be peeked or next'ed.
@ -95,26 +96,30 @@ pub(crate) struct Lexer<I> {
impl<I: Iterator<Item = char> + Clone> Lexer<I> { impl<I: Iterator<Item = char> + Clone> Lexer<I> {
pub fn new(chars: I) -> Lexer<I> { pub fn new(chars: I) -> Lexer<I> {
Lexer { Lexer {
chars, chars: chars.clone(),
chars_non_peeked: chars,
escape: false, escape: false,
next: None, next: None,
len: 0, len: 0,
} }
} }
/// NOTE: Peeked [`Kind::Text`] tokens are only one char long, they may be longer when
/// consumed.
pub fn peek(&mut self) -> Option<&Token> { pub fn peek(&mut self) -> Option<&Token> {
if self.next.is_none() { if self.next.is_none() {
self.next = self.next_token(); self.next = self.token();
} }
self.next.as_ref() self.next.as_ref()
} }
pub fn inner(&self) -> &I { pub fn chars(&self) -> I {
&self.chars self.chars_non_peeked.clone()
} }
fn next_token(&mut self) -> Option<Token> { fn next_token(&mut self) -> Option<Token> {
let mut current = self.token(); let mut current = self.token();
self.chars_non_peeked = self.chars.clone();
// concatenate text tokens // concatenate text tokens
if let Some(Token { kind: Text, len }) = &mut current { if let Some(Token { kind: Text, len }) = &mut current {
@ -145,6 +150,7 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
} }
fn token(&mut self) -> Option<Token> { fn token(&mut self) -> Option<Token> {
self.chars_non_peeked = self.chars.clone();
self.len = 0; self.len = 0;
let first = self.eat_char()?; let first = self.eat_char()?;
@ -271,7 +277,13 @@ impl<I: Iterator<Item = char> + Clone> Iterator for Lexer<I> {
type Item = Token; type Item = Token;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.next.take().or_else(|| self.next_token()) self.next
.take()
.map(|x| {
self.chars_non_peeked = self.chars.clone();
x
})
.or_else(|| self.next_token())
} }
} }