inline: separate lex, span to separate Input object

easier handling of mutable pointers, can borrow self.input instead of
whole self

can e.g. borrow mutable state while still eating new tokens
This commit is contained in:
Noah Hellman 2023-02-22 19:24:02 +01:00
parent 5d6d0e0840
commit e8e551fd8b

View file

@ -73,33 +73,24 @@ pub struct Event {
pub span: Span, pub span: Span,
} }
pub struct Parser<I: Iterator + Clone> { pub struct Input<I: Iterator + Clone> {
/// Lexer, hosting upcoming source. /// Lexer, hosting source.
lexer: lex::Lexer<I>, lexer: lex::Lexer<I>,
/// Span of current event. /// Span of current event.
span: Span, span: Span,
/// Stack with kind and index of _potential_ openers for containers.
openers: Vec<(Delim, usize)>,
/// Buffer queue for next events. Events are buffered until no modifications due to future
/// characters are needed.
events: std::collections::VecDeque<Event>,
} }
impl<I: Iterator<Item = char> + Clone> Parser<I> { impl<I: Iterator<Item = char> + Clone> Input<I> {
pub fn new(chars: I) -> Self { fn new(chars: I) -> Self {
Self { Self {
lexer: lex::Lexer::new(chars), lexer: lex::Lexer::new(chars),
span: Span::new(0, 0), span: Span::new(0, 0),
openers: Vec::new(),
events: std::collections::VecDeque::new(),
} }
} }
pub fn reset(&mut self, chars: I) { fn reset(&mut self, chars: I) {
self.lexer = lex::Lexer::new(chars); self.lexer = lex::Lexer::new(chars);
self.span = Span::new(0, 0); self.span = Span::new(0, 0);
self.openers.clear();
debug_assert!(self.events.is_empty());
} }
fn eat(&mut self) -> Option<lex::Token> { fn eat(&mut self) -> Option<lex::Token> {
@ -118,18 +109,62 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
self.span = self.span.empty_after(); self.span = self.span.empty_after();
} }
fn ahead_attributes(&mut self) -> Option<(bool, Span)> {
let mut span = self.span.empty_after();
let mut ahead = self.lexer.chars();
let (mut attr_len, mut has_attr) = attr::valid(&mut ahead);
if attr_len > 0 {
while attr_len > 0 {
span = span.extend(attr_len);
self.lexer = lex::Lexer::new(ahead.clone());
let (l, non_empty) = attr::valid(&mut ahead);
has_attr |= non_empty;
attr_len = l;
}
Some((has_attr, span))
} else {
None
}
}
}
pub struct Parser<I: Iterator + Clone> {
input: Input<I>,
/// Stack with kind and index of _potential_ openers for containers.
openers: Vec<(Delim, usize)>,
/// Buffer queue for next events. Events are buffered until no modifications due to future
/// characters are needed.
events: std::collections::VecDeque<Event>,
}
impl<I: Iterator<Item = char> + Clone> Parser<I> {
pub fn new(chars: I) -> Self {
Self {
input: Input::new(chars),
openers: Vec::new(),
events: std::collections::VecDeque::new(),
}
}
pub fn reset(&mut self, chars: I) {
self.input.reset(chars);
self.openers.clear();
debug_assert!(self.events.is_empty());
}
fn push_sp(&mut self, kind: EventKind, span: Span) -> Option<()> { fn push_sp(&mut self, kind: EventKind, span: Span) -> Option<()> {
self.events.push_back(Event { kind, span }); self.events.push_back(Event { kind, span });
Some(()) Some(())
} }
fn push(&mut self, kind: EventKind) -> Option<()> { fn push(&mut self, kind: EventKind) -> Option<()> {
self.push_sp(kind, self.span) self.push_sp(kind, self.input.span)
} }
fn parse_event(&mut self) -> Option<()> { fn parse_event(&mut self) -> Option<()> {
self.reset_span(); self.input.reset_span();
self.eat().map(|first| { self.input.eat().map(|first| {
self.parse_verbatim(&first) self.parse_verbatim(&first)
.or_else(|| self.parse_attributes(&first)) .or_else(|| self.parse_attributes(&first))
.or_else(|| self.parse_autolink(&first)) .or_else(|| self.parse_autolink(&first))
@ -155,7 +190,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
if let Some(lex::Token { if let Some(lex::Token {
kind: lex::Kind::Seq(Sequence::Backtick), kind: lex::Kind::Seq(Sequence::Backtick),
len, len,
}) = self.peek() }) = self.input.peek()
{ {
Some(( Some((
if first.len == 2 { if first.len == 2 {
@ -171,7 +206,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
}) })
.flatten(); .flatten();
if math_opt.is_some() { if math_opt.is_some() {
self.eat(); // backticks self.input.eat(); // backticks
} }
math_opt math_opt
} }
@ -180,25 +215,28 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
}?; }?;
let e_attr = self.events.len(); let e_attr = self.events.len();
self.push_sp(EventKind::Placeholder, Span::empty_at(self.span.start())); self.push_sp(
EventKind::Placeholder,
Span::empty_at(self.input.span.start()),
);
let opener_event = self.events.len(); let opener_event = self.events.len();
self.push(EventKind::Enter(kind)); self.push(EventKind::Enter(kind));
let mut span_inner = self.span.empty_after(); let mut span_inner = self.input.span.empty_after();
let mut span_outer = None; let mut span_outer = None;
let mut non_whitespace_first = None; let mut non_whitespace_first = None;
let mut non_whitespace_last = None; let mut non_whitespace_last = None;
while let Some(t) = self.eat() { while let Some(t) = self.input.eat() {
if matches!(t.kind, lex::Kind::Seq(Sequence::Backtick)) && t.len == opener_len { if matches!(t.kind, lex::Kind::Seq(Sequence::Backtick)) && t.len == opener_len {
if matches!(kind, Verbatim) if matches!(kind, Verbatim)
&& matches!( && matches!(
self.lexer.peek().map(|t| &t.kind), self.input.peek().map(|t| &t.kind),
Some(lex::Kind::Open(Delimiter::BraceEqual)) Some(lex::Kind::Open(Delimiter::BraceEqual))
) )
{ {
let mut ahead = self.lexer.chars(); let mut ahead = self.input.lexer.chars();
let mut end = false; let mut end = false;
let len = (&mut ahead) let len = (&mut ahead)
.skip(2) // {= .skip(2) // {=
@ -214,7 +252,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
.map(char::len_utf8) .map(char::len_utf8)
.sum(); .sum();
if len > 0 && end { if len > 0 && end {
let tok = self.eat(); let tok = self.input.eat();
debug_assert_eq!( debug_assert_eq!(
tok, tok,
Some(lex::Token { Some(lex::Token {
@ -222,12 +260,12 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
len: 2, len: 2,
}) })
); );
self.lexer = lex::Lexer::new(ahead); self.input.lexer = lex::Lexer::new(ahead);
let span_format = self.span.after(len); let span_format = self.input.span.after(len);
kind = RawFormat; kind = RawFormat;
self.events[opener_event].kind = EventKind::Enter(kind); self.events[opener_event].kind = EventKind::Enter(kind);
self.events[opener_event].span = span_format; self.events[opener_event].span = span_format;
self.span = span_format.translate(1); // } self.input.span = span_format.translate(1); // }
span_outer = Some(span_format); span_outer = Some(span_format);
} }
} }
@ -240,7 +278,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
non_whitespace_last = Some((t.kind, span_inner.end() + t.len)); non_whitespace_last = Some((t.kind, span_inner.end() + t.len));
} }
span_inner = span_inner.extend(t.len); span_inner = span_inner.extend(t.len);
self.reset_span(); self.input.reset_span();
} }
if let Some((lex::Kind::Seq(Sequence::Backtick), pos)) = non_whitespace_first { if let Some((lex::Kind::Seq(Sequence::Backtick), pos)) = non_whitespace_first {
@ -251,10 +289,10 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
} }
self.push_sp(EventKind::Str, span_inner); self.push_sp(EventKind::Str, span_inner);
self.push_sp(EventKind::Exit(kind), span_outer.unwrap_or(self.span)); self.push_sp(EventKind::Exit(kind), span_outer.unwrap_or(self.input.span));
if let Some((non_empty, span)) = self.ahead_attributes() { if let Some((non_empty, span)) = self.input.ahead_attributes() {
self.span = span; self.input.span = span;
if non_empty { if non_empty {
self.events[e_attr] = Event { self.events[e_attr] = Event {
kind: EventKind::Attributes, kind: EventKind::Attributes,
@ -268,13 +306,13 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
fn parse_attributes(&mut self, first: &lex::Token) -> Option<()> { fn parse_attributes(&mut self, first: &lex::Token) -> Option<()> {
if first.kind == lex::Kind::Open(Delimiter::Brace) { if first.kind == lex::Kind::Open(Delimiter::Brace) {
let mut ahead = self.lexer.chars(); let mut ahead = self.input.lexer.chars();
let (mut attr_len, mut has_attr) = attr::valid(std::iter::once('{').chain(&mut ahead)); let (mut attr_len, mut has_attr) = attr::valid(std::iter::once('{').chain(&mut ahead));
attr_len = attr_len.saturating_sub(1); // rm { attr_len = attr_len.saturating_sub(1); // rm {
if attr_len > 0 { if attr_len > 0 {
while attr_len > 0 { while attr_len > 0 {
self.span = self.span.extend(attr_len); self.input.span = self.input.span.extend(attr_len);
self.lexer = lex::Lexer::new(ahead.clone()); self.input.lexer = lex::Lexer::new(ahead.clone());
let (l, non_empty) = attr::valid(&mut ahead); let (l, non_empty) = attr::valid(&mut ahead);
attr_len = l; attr_len = l;
@ -303,7 +341,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
self.push_sp(EventKind::Str, span_str); self.push_sp(EventKind::Str, span_str);
return self.push_sp(EventKind::Exit(Span), span_str.empty_after()); return self.push_sp(EventKind::Exit(Span), span_str.empty_after());
} else { } else {
return self.push_sp(EventKind::Placeholder, self.span.empty_before()); return self.push_sp(EventKind::Placeholder, self.input.span.empty_before());
} }
} }
} }
@ -313,7 +351,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
fn parse_autolink(&mut self, first: &lex::Token) -> Option<()> { fn parse_autolink(&mut self, first: &lex::Token) -> Option<()> {
if first.kind == lex::Kind::Sym(Symbol::Lt) { if first.kind == lex::Kind::Sym(Symbol::Lt) {
let mut ahead = self.lexer.chars(); let mut ahead = self.input.lexer.chars();
let mut end = false; let mut end = false;
let mut is_url = false; let mut is_url = false;
let len = (&mut ahead) let len = (&mut ahead)
@ -332,11 +370,11 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
.map(char::len_utf8) .map(char::len_utf8)
.sum(); .sum();
if end && is_url { if end && is_url {
self.lexer = lex::Lexer::new(ahead); self.input.lexer = lex::Lexer::new(ahead);
self.span = self.span.after(len); self.input.span = self.input.span.after(len);
self.push(EventKind::Enter(Autolink)); self.push(EventKind::Enter(Autolink));
self.push(EventKind::Str); self.push(EventKind::Str);
self.span = self.span.after(1); self.input.span = self.input.span.after(1);
return self.push(EventKind::Exit(Autolink)); return self.push(EventKind::Exit(Autolink));
} }
} }
@ -345,7 +383,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
fn parse_symbol(&mut self, first: &lex::Token) -> Option<()> { fn parse_symbol(&mut self, first: &lex::Token) -> Option<()> {
if first.kind == lex::Kind::Sym(Symbol::Colon) { if first.kind == lex::Kind::Sym(Symbol::Colon) {
let mut ahead = self.lexer.chars(); let mut ahead = self.input.lexer.chars();
let mut end = false; let mut end = false;
let mut valid = true; let mut valid = true;
let len = (&mut ahead) let len = (&mut ahead)
@ -360,10 +398,10 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
.map(char::len_utf8) .map(char::len_utf8)
.sum(); .sum();
if end && valid { if end && valid {
self.lexer = lex::Lexer::new(ahead); self.input.lexer = lex::Lexer::new(ahead);
self.span = self.span.after(len); self.input.span = self.input.span.after(len);
let span = self.span; let span = self.input.span;
self.span = self.span.after(1); self.input.span = self.input.span.after(1);
return self.push_sp(EventKind::Atom(Symbol), span); return self.push_sp(EventKind::Atom(Symbol), span);
} }
} }
@ -373,14 +411,14 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
fn parse_footnote_reference(&mut self, first: &lex::Token) -> Option<()> { fn parse_footnote_reference(&mut self, first: &lex::Token) -> Option<()> {
if first.kind == lex::Kind::Open(Delimiter::Bracket) if first.kind == lex::Kind::Open(Delimiter::Bracket)
&& matches!( && matches!(
self.peek(), self.input.peek(),
Some(lex::Token { Some(lex::Token {
kind: lex::Kind::Sym(Symbol::Caret), kind: lex::Kind::Sym(Symbol::Caret),
.. ..
}) })
) )
{ {
let tok = self.eat(); let tok = self.input.eat();
debug_assert_eq!( debug_assert_eq!(
tok, tok,
Some(lex::Token { Some(lex::Token {
@ -388,7 +426,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
len: 1, len: 1,
}) })
); );
let mut ahead = self.lexer.chars(); let mut ahead = self.input.lexer.chars();
let mut end = false; let mut end = false;
let len = (&mut ahead) let len = (&mut ahead)
.take_while(|c| { .take_while(|c| {
@ -403,10 +441,10 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
.map(char::len_utf8) .map(char::len_utf8)
.sum(); .sum();
if end { if end {
self.lexer = lex::Lexer::new(ahead); self.input.lexer = lex::Lexer::new(ahead);
self.span = self.span.after(len); self.input.span = self.input.span.after(len);
self.push(EventKind::Atom(FootnoteReference)); self.push(EventKind::Atom(FootnoteReference));
self.span = self.span.after(1); self.input.span = self.input.span.after(1);
return Some(()); return Some(());
} }
} }
@ -440,7 +478,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
return None; return None;
} }
let inner_span = self.events[e_opener].span.between(self.span); let inner_span = self.events[e_opener].span.between(self.input.span);
let mut closed = match DelimEventKind::from(d) { let mut closed = match DelimEventKind::from(d) {
DelimEventKind::Container(cont) => { DelimEventKind::Container(cont) => {
self.events[e_opener].kind = EventKind::Enter(cont); self.events[e_opener].kind = EventKind::Enter(cont);
@ -467,7 +505,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
} }
} }
if let Some((non_empty, span)) = self.ahead_attributes() { if let Some((non_empty, span)) = self.input.ahead_attributes() {
if non_empty { if non_empty {
self.events[e_attr] = Event { self.events[e_attr] = Event {
kind: EventKind::Attributes, kind: EventKind::Attributes,
@ -480,7 +518,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
closed = self.push(EventKind::Exit(Container::Span)); closed = self.push(EventKind::Exit(Container::Span));
} }
self.span = span; self.input.span = span;
} }
closed closed
@ -491,6 +529,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
} }
if matches!(dir, Dir::Both) if matches!(dir, Dir::Both)
&& self && self
.input
.peek() .peek()
.map_or(true, |t| matches!(t.kind, lex::Kind::Whitespace)) .map_or(true, |t| matches!(t.kind, lex::Kind::Whitespace))
{ {
@ -506,7 +545,10 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
} }
self.openers.push((delim, self.events.len())); self.openers.push((delim, self.events.len()));
// push dummy event in case attributes are encountered after closing delimiter // push dummy event in case attributes are encountered after closing delimiter
self.push_sp(EventKind::Placeholder, Span::empty_at(self.span.start())); self.push_sp(
EventKind::Placeholder,
Span::empty_at(self.input.span.start()),
);
// use non-opener for now, replace if closed later // use non-opener for now, replace if closed later
self.push(match delim { self.push(match delim {
Delim::SingleQuoted => EventKind::Atom(Quote { Delim::SingleQuoted => EventKind::Atom(Quote {
@ -522,27 +564,8 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
}) })
} }
fn ahead_attributes(&mut self) -> Option<(bool, Span)> {
let mut span = self.span.empty_after();
let mut ahead = self.lexer.chars();
let (mut attr_len, mut has_attr) = attr::valid(&mut ahead);
if attr_len > 0 {
while attr_len > 0 {
span = span.extend(attr_len);
self.lexer = lex::Lexer::new(ahead.clone());
let (l, non_empty) = attr::valid(&mut ahead);
has_attr |= non_empty;
attr_len = l;
}
Some((has_attr, span))
} else {
None
}
}
fn post_span(&mut self, ty: SpanType, opener_event: usize) -> Option<()> { fn post_span(&mut self, ty: SpanType, opener_event: usize) -> Option<()> {
let mut ahead = self.lexer.chars(); let mut ahead = self.input.lexer.chars();
let (kind, span) = match ahead.next() { let (kind, span) = match ahead.next() {
Some(opener @ ('[' | '(')) => { Some(opener @ ('[' | '(')) => {
let img = ty == SpanType::Image; let img = ty == SpanType::Image;
@ -565,17 +588,17 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
.map(char::len_utf8) .map(char::len_utf8)
.sum(); .sum();
end.then(|| { end.then(|| {
let span = self.span.after(len).translate(1); let span = self.input.span.after(len).translate(1);
(kind, span) (kind, span)
}) })
} }
_ => None, _ => None,
}?; }?;
self.lexer = lex::Lexer::new(ahead); self.input.lexer = lex::Lexer::new(ahead);
self.events[opener_event].kind = EventKind::Enter(kind); self.events[opener_event].kind = EventKind::Enter(kind);
self.events[opener_event].span = span; self.events[opener_event].span = span;
self.span = span.translate(1); self.input.span = span.translate(1);
self.push_sp(EventKind::Exit(kind), span) self.push_sp(EventKind::Exit(kind), span)
} }
@ -586,11 +609,11 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
lex::Kind::Escape => Escape, lex::Kind::Escape => Escape,
lex::Kind::Nbsp => Nbsp, lex::Kind::Nbsp => Nbsp,
lex::Kind::Seq(Sequence::Period) if first.len >= 3 => { lex::Kind::Seq(Sequence::Period) if first.len >= 3 => {
while self.span.len() > 3 { while self.input.span.len() > 3 {
self.push_sp(EventKind::Atom(Ellipsis), self.span.with_len(3)); self.push_sp(EventKind::Atom(Ellipsis), self.input.span.with_len(3));
self.span = self.span.skip(3); self.input.span = self.input.span.skip(3);
} }
if self.span.len() == 3 { if self.input.span.len() == 3 {
Ellipsis Ellipsis
} else { } else {
return self.push(EventKind::Str); return self.push(EventKind::Str);
@ -610,8 +633,8 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
.chain(std::iter::repeat(EnDash).take(n)) .chain(std::iter::repeat(EnDash).take(n))
.for_each(|atom| { .for_each(|atom| {
let l = if matches!(atom, EnDash) { 2 } else { 3 }; let l = if matches!(atom, EnDash) { 2 } else { 3 };
self.push_sp(EventKind::Atom(atom), self.span.with_len(l)); self.push_sp(EventKind::Atom(atom), self.input.span.with_len(l));
self.span = self.span.skip(l); self.input.span = self.input.span.skip(l);
}); });
return Some(()); return Some(());
} }