wip parse inf loop

This commit is contained in:
Noah Hellman 2022-11-21 22:40:11 +01:00
parent 6c5fbc2af1
commit 8bd9323c48
2 changed files with 206 additions and 170 deletions

View file

@ -1,4 +1,5 @@
use crate::lex; use crate::lex;
use crate::Span;
use lex::Delimiter; use lex::Delimiter;
use lex::Symbol; use lex::Symbol;
@ -54,7 +55,7 @@ pub enum Container {
Destination, Destination,
} }
#[derive(Debug)] #[derive(Debug, PartialEq, Eq)]
pub enum Event { pub enum Event {
Start(Container), Start(Container),
End(Container), End(Container),
@ -80,7 +81,6 @@ pub enum Dir {
pub struct Parser { pub struct Parser {
openers: Vec<Container>, openers: Vec<Container>,
events: Vec<Event>, events: Vec<Event>,
//tree: tree::Builder<Container, Atom>,
} }
impl Parser { impl Parser {
@ -91,68 +91,106 @@ impl Parser {
} }
} }
/* pub fn parse<'a>(&'a mut self, src: &'a str) -> impl Iterator<Item = Event> + 'a {
pub fn parse(mut self, src: &str) -> impl Iterator<Event> { std::iter::from_fn(|| {
todo!() if self.events.is_empty() {
Parse::new(src, &mut self.openers, &mut self.events).parse();
}
self.events.pop()
})
} }
*/
} }
struct Parse<'s> { struct Parse<'s> {
src: &'s str,
lexer: lex::Lexer<'s>, lexer: lex::Lexer<'s>,
openers: &'s mut Vec<Container>,
events: &'s mut Vec<Event>, events: &'s mut Vec<Event>,
/// Next token to be eaten.
next_token: lex::Token,
/// Position after `next_token`.
pos: usize,
/// Span of last eaten token.
span: Span,
} }
impl<'s> Parse<'s> { impl<'s> Parse<'s> {
fn new(src: &'s str, events: &'s mut Vec<Event>) -> Self { fn new(src: &'s str, openers: &'s mut Vec<Container>, events: &'s mut Vec<Event>) -> Self {
todo!() let mut lexer = lex::Lexer::new(src);
let next_token = lexer.next_token();
let pos = next_token.len;
Self {
lexer,
openers,
events,
next_token,
pos,
span: Span::new(0, 0),
}
}
fn step(&mut self) -> lex::Token {
let token = self.lexer.next_token();
dbg!(&token, self.pos);
self.pos += token.len;
std::mem::replace(&mut self.next_token, token)
}
fn eat(&mut self) -> lex::Kind {
let end = self.pos;
let token = self.step();
self.span = Span::new(end - token.len, end);
token.kind
}
fn peek(&mut self) -> &lex::Kind {
&self.next_token.kind
}
fn parse(&mut self) {
let mut kind = self.eat();
//dbg!(&kind);
if kind == lex::Kind::Eof {
return;
} }
/*
fn parse(mut self, src: &str, evs: &mut Vec<Event>) {
let mut chars = src.chars();
while let Some(t) = chars.next() {
{ {
let verbatim_opt = match t.kind { let verbatim_opt = match kind {
lex::Kind::Seq(lex::Sequence::Dollar) => { lex::Kind::Seq(lex::Sequence::Dollar) => {
let math_opt = (t.len <= 2) let math_opt = (self.span.len() <= 2)
.then(|| { .then(|| {
if let Some(lex::Token { if let lex::Kind::Seq(lex::Sequence::Backtick) = self.peek() {
kind: lex::Kind::Seq(lex::Sequence::Backtick), Some((DisplayMath, self.span.len()))
len,
}) = self.chars.clone().next()
{
Some((DisplayMath, *len))
} else { } else {
None None
} }
}) })
.flatten(); .flatten();
if math_opt.is_some() { if math_opt.is_some() {
chars.next(); // backticks self.eat(); // backticks
} }
math_opt math_opt
} }
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, t.len)), lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, self.span.len())),
_ => None, _ => None,
}; };
if let Some((atom, opener_len)) = verbatim_opt { if let Some((atom, opener_len)) = verbatim_opt {
for tok in chars { while !matches!(kind, lex::Kind::Seq(lex::Sequence::Backtick))
if let lex::Kind::Seq(lex::Sequence::Backtick) = tok.kind { || self.span.len() != opener_len
if tok.len >= opener_len { {
break; kind = self.eat();
} }
} self.events.push(Event::Atom(atom));
}
evs.push(Event::Atom(atom));
return; return;
} }
} }
{ {
let container_opt = match t.kind { let container_opt = match kind {
lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)), lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)),
lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)), lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)),
lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)), lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)),
@ -182,7 +220,7 @@ impl<'s> Parse<'s> {
if matches!(ty, Dir::Close | Dir::Both) && self.openers.contains(&cont) { if matches!(ty, Dir::Close | Dir::Both) && self.openers.contains(&cont) {
loop { loop {
let c = self.openers.pop().unwrap(); let c = self.openers.pop().unwrap();
evs.push(Event::End(c)); self.events.push(Event::End(c));
if c == cont { if c == cont {
break; break;
} }
@ -190,49 +228,36 @@ impl<'s> Parse<'s> {
return; return;
} else if matches!(ty, Dir::Open | Dir::Both) { } else if matches!(ty, Dir::Open | Dir::Both) {
self.openers.push(cont); self.openers.push(cont);
evs.push(Event::Start(cont)); self.events.push(Event::Start(cont));
} }
return; return;
} }
} }
{ {
if let lex::Kind::Open(Delimiter::Brace) = t.kind { if let lex::Kind::Open(Delimiter::Brace) = kind {
todo!(); // check for attr todo!(); // check for attr
} }
} }
if let Some(Event::Atom(Str)) = evs.last() { if let Some(Event::Atom(Str)) = self.events.last() {
} else { } else {
evs.push(Event::Atom(Str)); self.events.push(Event::Atom(Str));
} }
} }
} }
*/
}
/* #[cfg(test)]
impl<'s> Iterator for Parser<'s> { mod test {
type Item = (Event<'s>, Span); use super::Atom::*;
use super::Event::*;
fn next(&mut self) -> Option<Self::Item> { #[test]
self.chars.next().map(|c| { fn container_brace() {
match c { let mut p = super::Parser::new();
'*' => todo!(), assert_eq!(
'_' => todo!(), &[Atom(Str)],
'^' => todo!(), p.parse("{_hej_}").collect::<Vec<_>>().as_slice(),
'~' => todo!(), );
'\'' => todo!(),
'"' => todo!(),
'$' => todo!(),
'<' => todo!(),
'{' => todo!(),
'[' => todo!(),
_ =>
}
})
} }
} }
*/
mod test {}

View file

@ -22,6 +22,7 @@ pub enum Kind {
Close(Delimiter), Close(Delimiter),
Sym(Symbol), Sym(Symbol),
Seq(Sequence), Seq(Sequence),
Eof,
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
@ -95,12 +96,31 @@ impl<'s> Lexer<'s> {
} }
} }
pub fn next_token(&mut self) -> Token {
if let Some(token) = self.next.take() {
token
} else {
let mut current = self.token();
// concatenate text tokens
if let Token { kind: Text, len } = &mut current {
self.next = Some(self.token());
while let Some(Token { kind: Text, len: l }) = self.next {
*len += l;
self.next = Some(self.token());
}
}
current
}
}
fn peek(&mut self) -> char { fn peek(&mut self) -> char {
self.chars.clone().next().unwrap_or(EOF) self.chars.clone().next().unwrap_or(EOF)
} }
fn eat(&mut self) -> Option<char> { fn eat(&mut self) -> char {
self.chars.next() self.chars.next().unwrap_or(EOF)
} }
fn len(&self) -> usize { fn len(&self) -> usize {
@ -113,12 +133,14 @@ impl<'s> Lexer<'s> {
} }
} }
fn token(&mut self) -> Option<Token> { fn token(&mut self) -> Token {
let first = self.eat()?; let first = self.eat();
let escape = self.escape; let escape = self.escape;
let kind = match first { let kind = match first {
EOF => Eof,
_ if escape && first == ' ' => Nbsp, _ if escape && first == ' ' => Nbsp,
_ if escape => Text, _ if escape => Text,
@ -202,7 +224,7 @@ impl<'s> Lexer<'s> {
let len = self.len(); let len = self.len();
Some(Token { kind, len }) Token { kind, len }
} }
fn eat_seq(&mut self, s: Sequence) -> Kind { fn eat_seq(&mut self, s: Sequence) -> Kind {
@ -220,29 +242,6 @@ impl<'s> Lexer<'s> {
} }
} }
impl<'s> Iterator for Lexer<'s> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
if let Some(token) = self.next.take() {
Some(token)
} else {
let mut current = self.token();
// concatenate text tokens
if let Some(Token { kind: Text, len }) = &mut current {
self.next = self.token();
while let Some(Token { kind: Text, len: l }) = self.next {
*len += l;
self.next = self.token();
}
}
current
}
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::Delimiter::*; use super::Delimiter::*;
@ -250,10 +249,22 @@ mod test {
use super::Sequence::*; use super::Sequence::*;
use super::Symbol::*; use super::Symbol::*;
fn tokenize(src: &str) -> impl Iterator<Item = super::Token> + '_ {
let mut lexer = super::Lexer::new(src);
std::iter::from_fn(move || {
let tok = lexer.next_token();
if matches!(tok.kind, Eof) {
None
} else {
Some(tok)
}
})
}
macro_rules! test_lex { macro_rules! test_lex {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)] #[allow(unused)]
let actual = super::Lexer::new($src).map(|t| t.kind).collect::<Vec<_>>(); let actual = tokenize($src).map(|t| t.kind).collect::<Vec<_>>();
let expected = vec![$($($token),*,)?]; let expected = vec![$($($token),*,)?];
assert_eq!(actual, expected, "{}", $src); assert_eq!(actual, expected, "{}", $src);
}; };