From cc594840861dff2280b19badd1e5a82cbde624d6 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Mon, 21 Nov 2022 19:44:59 +0100 Subject: [PATCH] wip --- src/inline.rs | 242 +++++++++++++++++++++++++------------------------- src/lex.rs | 28 +++--- src/lib.rs | 6 +- 3 files changed, 137 insertions(+), 139 deletions(-) diff --git a/src/inline.rs b/src/inline.rs index 29d129f..7ee0e80 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -1,40 +1,32 @@ -use crate::Span; +use crate::lex; -use crate::tree; -use crate::CowStr; +use lex::Delimiter; +use lex::Symbol; use Atom::*; use Container::*; -pub type Tree<'s> = tree::Tree>; - -/* -pub fn parse<'s, I: Iterator>(src: &'s str, inlines: I) -> Vec> { - Parser::new(src).parse(inlines) -} -*/ - -pub enum Inline<'s> { - Atom(Atom<'s>), - Container(Container), -} - #[derive(Debug, Clone, PartialEq, Eq)] -pub enum Atom<'s> { +pub enum Atom { Str, Softbreak, Hardbreak, Escape, - Nbsp, // ?? - OpenMarker, // ?? - Ellipses, // ?? + Nbsp, + OpenMarker, // ?? + Ellipses, ImageMarker, // ?? - EmDash, // ?? - FootnoteReference { label: CowStr<'s> }, - ExplicitLink { label: CowStr<'s> }, - ReferenceLink { label: CowStr<'s> }, - Emoji { name: CowStr<'s> }, - RawFormat { format: CowStr<'s> }, + EmDash, + EnDash, + FootnoteReference, + Link, + ReferenceLink, + Emoji, + RawFormat, + // math + DisplayMath, + InlineMath, + Verbatim, } #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -47,18 +39,14 @@ pub enum Container { Superscript, Insert, Delete, - Emph, + Emphasis, Strong, Mark, - Verbatim, // smart quoting SingleQuoted, DoubleQuoted, - // math - DisplayMath, - InlineMath, // URLs - Email, + AutoUrl, Url, ImageText, LinkText, @@ -67,124 +55,138 @@ pub enum Container { } #[derive(Debug)] -pub enum Event<'s> { - Start(Container, OpenerState), +pub enum Event { + Start(Container), End(Container), - Atom(Atom<'s>), + Atom(Atom), } +/* #[derive(Debug)] pub enum OpenerState { Unclosed, Closed, Discarded, } +*/ #[derive(Debug)] -pub enum ContainerType { - Opener, - Closer, +pub enum Dir { + Open, + Close, Both, } -pub struct Parser<'s, I: Iterator> { - chars: std::iter::Peekable, - openers: Vec<(Container, usize)>, - events: Vec>, +pub struct Parser> { + tokens: std::iter::Peekable>, + openers: Vec, //tree: tree::Builder, } -impl<'s, I: Iterator> Parser<'s, I> { +impl> Parser { pub fn new(chars: I) -> Self { Self { - chars: chars.peekable(), + tokens: lex::Lexer::new(chars).peekable(), openers: Vec::new(), - events: Vec::new(), } } - /* - fn step(&mut self) -> lex::Token { - let token = lex::Lexer::new(&self.src[self.pos..]).next_token(); - self.pos += token.len; - std::mem::replace(&mut self.next_token, token) - } - - fn eat(&mut self) -> lex::TokenKind { - loop { - let end = self.pos; - let token = self.step(); - if !matches!(token.kind, lex::TokenKind::Whitespace) { - self.span = Span::new(end - token.len, end); - return token.kind; - } - } - } - - fn peek(&mut self) -> &lex::TokenKind { - if matches!(self.next_token.kind, lex::TokenKind::Whitespace) { - let _whitespace = self.step(); - } - &self.next_token.kind - } - */ - - pub fn parse(mut self) -> Vec<(Event<'s>, u32)> { - let mut len = 0; - - while let Some(c) = self.chars.peek() { - //let start = self.pos(); - - let cont = match c { - '*' => Some((Strong, ContainerType::Both)), - '_' => Some((Emph, ContainerType::Both)), - '^' => Some((Superscript, ContainerType::Both)), - '~' => Some((Subscript, ContainerType::Both)), - '\'' => Some((SingleQuoted, ContainerType::Both)), - '"' => Some((DoubleQuoted, ContainerType::Both)), - '`' => todo!(), - '{' => todo!(), - '$' => todo!(), - '<' => todo!(), - '[' => todo!(), - _ => None, - }; - - let ev = cont - .and_then(|(cont, ty)| { - self.openers - .iter() - .rposition(|(c, _)| *c == cont) - .map(|i| { - if let Event::Start(c, state) = &mut self.events[i] { - assert_eq!(*c, cont); - if matches!(ty, ContainerType::Closer | ContainerType::Both) { - *state = OpenerState::Closed; - Some(Event::End(cont)) - } else if matches!(ty, ContainerType::Opener | ContainerType::Both) + pub fn parse(mut self, evs: &mut Vec) { + while let Some(t) = self.tokens.next() { + { + let verbatim_opt = match t.kind { + lex::Kind::Seq(lex::Sequence::Dollar) => { + let math_opt = (t.len <= 2) + .then(|| { + if let Some(lex::Token { + kind: lex::Kind::Seq(lex::Sequence::Backtick), + len, + }) = self.tokens.peek() { - *state = OpenerState::Discarded; - Some(Event::Start(cont, OpenerState::Unclosed)) + Some((DisplayMath, *len)) } else { None } - } else { - unreachable!() - } - }) - .unwrap_or_else(|| { - matches!(ty, ContainerType::Opener | ContainerType::Both).then(|| { - self.openers.push((cont, self.events.len())); - Event::Start(cont, OpenerState::Unclosed) }) - }) - }) - .unwrap_or(Event::Atom(Str)); + .flatten(); + if math_opt.is_some() { + self.tokens.next(); // backticks + } + math_opt + } + lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, t.len)), + _ => None, + }; - self.events.push(ev); + if let Some((atom, opener_len)) = verbatim_opt { + for tok in self.tokens { + if let lex::Kind::Seq(lex::Sequence::Backtick) = tok.kind { + if tok.len >= opener_len { + break; + } + } + } + evs.push(Event::Atom(atom)); + return; + } + } + + { + let container_opt = match t.kind { + lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)), + lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)), + lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)), + lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)), + lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)), + lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)), + lex::Kind::Open(Delimiter::Bracket) => Some((LinkText, Dir::Open)), + lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)), + lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)), + lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)), + lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)), + lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)), + lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)), + lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)), + lex::Kind::Close(Delimiter::Bracket) => Some((LinkText, Dir::Close)), + lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)), + lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)), + lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)), + lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)), + lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)), + lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)), + lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)), + _ => None, + }; + + if let Some((cont, ty)) = container_opt { + if matches!(ty, Dir::Close | Dir::Both) && self.openers.contains(&cont) { + loop { + let c = self.openers.pop().unwrap(); + evs.push(Event::End(c)); + if c == cont { + break; + } + } + return; + } else if matches!(ty, Dir::Open | Dir::Both) { + self.openers.push(cont); + evs.push(Event::Start(cont)); + } + return; + } + } + + { + if let lex::Kind::Open(Delimiter::Brace) = t.kind { + todo!(); // check for attr + } + } + + if let Some(Event::Atom(Str)) = evs.last() { + } else { + evs.push(Event::Atom(Str)); + } } - //self.events - todo!() } } diff --git a/src/lex.rs b/src/lex.rs index fab6c60..d5742b7 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -3,16 +3,16 @@ use crate::EOF; use Delimiter::*; use Sequence::*; use Symbol::*; -use TokenKind::*; +use Kind::*; #[derive(Debug)] pub(crate) struct Token { - pub kind: TokenKind, + pub kind: Kind, pub len: usize, } #[derive(Debug, PartialEq, Eq)] -pub enum TokenKind { +pub enum Kind { Text, Whitespace, Nbsp, @@ -42,8 +42,6 @@ pub enum Delimiter { pub enum Symbol { Asterisk, Caret, - Dollar1, - Dollar2, Equal, Exclaim, Gt, @@ -61,6 +59,7 @@ pub enum Symbol { pub enum Sequence { Backtick, Colon, + Dollar, Hash, Hyphen, Period, @@ -71,6 +70,7 @@ impl Sequence { match self { Self::Backtick => '`', Self::Colon => ':', + Self::Dollar => '$', Self::Hash => '#', Self::Period => '.', Self::Hyphen => '-', @@ -176,14 +176,6 @@ impl> Lexer { } } - '$' => { - if self.peek() == '$' { - self.eat(); - Sym(Dollar2) - } else { - Sym(Dollar1) - } - } '!' => Sym(Exclaim), '%' => Sym(Percentage), '<' => Sym(Lt), @@ -194,6 +186,7 @@ impl> Lexer { '`' => self.eat_seq(Backtick), ':' => self.eat_seq(Colon), + '$' => self.eat_seq(Dollar), '#' => self.eat_seq(Hash), '.' => self.eat_seq(Period), @@ -214,12 +207,12 @@ impl> Lexer { Some(Token { kind, len }) } - fn eat_seq(&mut self, s: Sequence) -> TokenKind { + fn eat_seq(&mut self, s: Sequence) -> Kind { self.eat_while(|c| c == s.ch()); Seq(s) } - fn maybe_eat_close_brace(&mut self, s: Symbol, d: Delimiter) -> TokenKind { + fn maybe_eat_close_brace(&mut self, s: Symbol, d: Delimiter) -> Kind { if self.peek() == '}' { self.eat(); Close(d) @@ -257,7 +250,7 @@ mod test { use super::Delimiter::*; use super::Sequence::*; use super::Symbol::*; - use super::TokenKind::*; + use super::Kind::*; macro_rules! test_lex { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { @@ -339,9 +332,10 @@ mod test { test_lex!("`", Seq(Backtick)); test_lex!("```", Seq(Backtick)); test_lex!( - "`:#-.", + "`:$#-.", Seq(Backtick), Seq(Colon), + Seq(Dollar), Seq(Hash), Seq(Hyphen), Seq(Period), diff --git a/src/lib.rs b/src/lib.rs index 362062c..bb89ae6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -59,6 +59,7 @@ impl<'s> Parser<'s> { Iter { src: self.src, tree: self.tree.iter().peekable(), + events: Vec::new(), } } } @@ -66,6 +67,7 @@ impl<'s> Parser<'s> { pub struct Iter<'s> { src: &'s str, tree: std::iter::Peekable>, + events: Vec, } impl<'s> Iterator for Iter<'s> { @@ -81,7 +83,7 @@ impl<'s> Iterator for Iter<'s> { let chars = (&mut self.tree) .take_while(|ev| matches!(ev, tree::Event::Element(..))) .flat_map(|ev| ev.span().of(self.src).chars()); - let evs = inline::Parser::new(chars).parse(); + inline::Parser::new(chars).parse(&mut self.events); /* let chars = std::iter::from_fn(|| { let mut eat = false; @@ -99,7 +101,7 @@ impl<'s> Iterator for Iter<'s> { }) .flatten(); */ - format!("leaf {:?} {:?}", leaf, evs) + format!("leaf {:?} {:?}", leaf, self.events) } tree::Event::Element(atom, _sp) => { format!("atom {:?}", atom)