This commit is contained in:
Noah Hellman 2022-11-21 19:44:59 +01:00
parent fe45519ca9
commit cc59484086
3 changed files with 137 additions and 139 deletions

View file

@ -1,40 +1,32 @@
use crate::Span; use crate::lex;
use crate::tree; use lex::Delimiter;
use crate::CowStr; use lex::Symbol;
use Atom::*; use Atom::*;
use Container::*; use Container::*;
pub type Tree<'s> = tree::Tree<Container, Atom<'s>>;
/*
pub fn parse<'s, I: Iterator<Item = Span>>(src: &'s str, inlines: I) -> Vec<Event<'s>> {
Parser::new(src).parse(inlines)
}
*/
pub enum Inline<'s> {
Atom(Atom<'s>),
Container(Container),
}
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Atom<'s> { pub enum Atom {
Str, Str,
Softbreak, Softbreak,
Hardbreak, Hardbreak,
Escape, Escape,
Nbsp, // ?? Nbsp,
OpenMarker, // ?? OpenMarker, // ??
Ellipses, // ?? Ellipses,
ImageMarker, // ?? ImageMarker, // ??
EmDash, // ?? EmDash,
FootnoteReference { label: CowStr<'s> }, EnDash,
ExplicitLink { label: CowStr<'s> }, FootnoteReference,
ReferenceLink { label: CowStr<'s> }, Link,
Emoji { name: CowStr<'s> }, ReferenceLink,
RawFormat { format: CowStr<'s> }, Emoji,
RawFormat,
// math
DisplayMath,
InlineMath,
Verbatim,
} }
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
@ -47,18 +39,14 @@ pub enum Container {
Superscript, Superscript,
Insert, Insert,
Delete, Delete,
Emph, Emphasis,
Strong, Strong,
Mark, Mark,
Verbatim,
// smart quoting // smart quoting
SingleQuoted, SingleQuoted,
DoubleQuoted, DoubleQuoted,
// math
DisplayMath,
InlineMath,
// URLs // URLs
Email, AutoUrl,
Url, Url,
ImageText, ImageText,
LinkText, LinkText,
@ -67,124 +55,138 @@ pub enum Container {
} }
#[derive(Debug)] #[derive(Debug)]
pub enum Event<'s> { pub enum Event {
Start(Container, OpenerState), Start(Container),
End(Container), End(Container),
Atom(Atom<'s>), Atom(Atom),
} }
/*
#[derive(Debug)] #[derive(Debug)]
pub enum OpenerState { pub enum OpenerState {
Unclosed, Unclosed,
Closed, Closed,
Discarded, Discarded,
} }
*/
#[derive(Debug)] #[derive(Debug)]
pub enum ContainerType { pub enum Dir {
Opener, Open,
Closer, Close,
Both, Both,
} }
pub struct Parser<'s, I: Iterator<Item = char>> { pub struct Parser<I: Iterator<Item = char>> {
chars: std::iter::Peekable<I>, tokens: std::iter::Peekable<lex::Lexer<I>>,
openers: Vec<(Container, usize)>, openers: Vec<Container>,
events: Vec<Event<'s>>,
//tree: tree::Builder<Container, Atom>, //tree: tree::Builder<Container, Atom>,
} }
impl<'s, I: Iterator<Item = char>> Parser<'s, I> { impl<I: Iterator<Item = char>> Parser<I> {
pub fn new(chars: I) -> Self { pub fn new(chars: I) -> Self {
Self { Self {
chars: chars.peekable(), tokens: lex::Lexer::new(chars).peekable(),
openers: Vec::new(), openers: Vec::new(),
events: Vec::new(),
} }
} }
/* pub fn parse(mut self, evs: &mut Vec<Event>) {
fn step(&mut self) -> lex::Token { while let Some(t) = self.tokens.next() {
let token = lex::Lexer::new(&self.src[self.pos..]).next_token(); {
self.pos += token.len; let verbatim_opt = match t.kind {
std::mem::replace(&mut self.next_token, token) lex::Kind::Seq(lex::Sequence::Dollar) => {
} let math_opt = (t.len <= 2)
.then(|| {
fn eat(&mut self) -> lex::TokenKind { if let Some(lex::Token {
loop { kind: lex::Kind::Seq(lex::Sequence::Backtick),
let end = self.pos; len,
let token = self.step(); }) = self.tokens.peek()
if !matches!(token.kind, lex::TokenKind::Whitespace) {
self.span = Span::new(end - token.len, end);
return token.kind;
}
}
}
fn peek(&mut self) -> &lex::TokenKind {
if matches!(self.next_token.kind, lex::TokenKind::Whitespace) {
let _whitespace = self.step();
}
&self.next_token.kind
}
*/
pub fn parse(mut self) -> Vec<(Event<'s>, u32)> {
let mut len = 0;
while let Some(c) = self.chars.peek() {
//let start = self.pos();
let cont = match c {
'*' => Some((Strong, ContainerType::Both)),
'_' => Some((Emph, ContainerType::Both)),
'^' => Some((Superscript, ContainerType::Both)),
'~' => Some((Subscript, ContainerType::Both)),
'\'' => Some((SingleQuoted, ContainerType::Both)),
'"' => Some((DoubleQuoted, ContainerType::Both)),
'`' => todo!(),
'{' => todo!(),
'$' => todo!(),
'<' => todo!(),
'[' => todo!(),
_ => None,
};
let ev = cont
.and_then(|(cont, ty)| {
self.openers
.iter()
.rposition(|(c, _)| *c == cont)
.map(|i| {
if let Event::Start(c, state) = &mut self.events[i] {
assert_eq!(*c, cont);
if matches!(ty, ContainerType::Closer | ContainerType::Both) {
*state = OpenerState::Closed;
Some(Event::End(cont))
} else if matches!(ty, ContainerType::Opener | ContainerType::Both)
{ {
*state = OpenerState::Discarded; Some((DisplayMath, *len))
Some(Event::Start(cont, OpenerState::Unclosed))
} else { } else {
None None
} }
} else {
unreachable!()
}
})
.unwrap_or_else(|| {
matches!(ty, ContainerType::Opener | ContainerType::Both).then(|| {
self.openers.push((cont, self.events.len()));
Event::Start(cont, OpenerState::Unclosed)
}) })
}) .flatten();
}) if math_opt.is_some() {
.unwrap_or(Event::Atom(Str)); self.tokens.next(); // backticks
}
math_opt
}
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, t.len)),
_ => None,
};
self.events.push(ev); if let Some((atom, opener_len)) = verbatim_opt {
for tok in self.tokens {
if let lex::Kind::Seq(lex::Sequence::Backtick) = tok.kind {
if tok.len >= opener_len {
break;
}
}
}
evs.push(Event::Atom(atom));
return;
}
}
{
let container_opt = match t.kind {
lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)),
lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)),
lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)),
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
lex::Kind::Open(Delimiter::Bracket) => Some((LinkText, Dir::Open)),
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)),
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)),
lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)),
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)),
lex::Kind::Close(Delimiter::Bracket) => Some((LinkText, Dir::Close)),
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)),
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)),
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)),
lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)),
lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)),
_ => None,
};
if let Some((cont, ty)) = container_opt {
if matches!(ty, Dir::Close | Dir::Both) && self.openers.contains(&cont) {
loop {
let c = self.openers.pop().unwrap();
evs.push(Event::End(c));
if c == cont {
break;
}
}
return;
} else if matches!(ty, Dir::Open | Dir::Both) {
self.openers.push(cont);
evs.push(Event::Start(cont));
}
return;
}
}
{
if let lex::Kind::Open(Delimiter::Brace) = t.kind {
todo!(); // check for attr
}
}
if let Some(Event::Atom(Str)) = evs.last() {
} else {
evs.push(Event::Atom(Str));
}
} }
//self.events
todo!()
} }
} }

View file

@ -3,16 +3,16 @@ use crate::EOF;
use Delimiter::*; use Delimiter::*;
use Sequence::*; use Sequence::*;
use Symbol::*; use Symbol::*;
use TokenKind::*; use Kind::*;
#[derive(Debug)] #[derive(Debug)]
pub(crate) struct Token { pub(crate) struct Token {
pub kind: TokenKind, pub kind: Kind,
pub len: usize, pub len: usize,
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum TokenKind { pub enum Kind {
Text, Text,
Whitespace, Whitespace,
Nbsp, Nbsp,
@ -42,8 +42,6 @@ pub enum Delimiter {
pub enum Symbol { pub enum Symbol {
Asterisk, Asterisk,
Caret, Caret,
Dollar1,
Dollar2,
Equal, Equal,
Exclaim, Exclaim,
Gt, Gt,
@ -61,6 +59,7 @@ pub enum Symbol {
pub enum Sequence { pub enum Sequence {
Backtick, Backtick,
Colon, Colon,
Dollar,
Hash, Hash,
Hyphen, Hyphen,
Period, Period,
@ -71,6 +70,7 @@ impl Sequence {
match self { match self {
Self::Backtick => '`', Self::Backtick => '`',
Self::Colon => ':', Self::Colon => ':',
Self::Dollar => '$',
Self::Hash => '#', Self::Hash => '#',
Self::Period => '.', Self::Period => '.',
Self::Hyphen => '-', Self::Hyphen => '-',
@ -176,14 +176,6 @@ impl<I: Iterator<Item = char>> Lexer<I> {
} }
} }
'$' => {
if self.peek() == '$' {
self.eat();
Sym(Dollar2)
} else {
Sym(Dollar1)
}
}
'!' => Sym(Exclaim), '!' => Sym(Exclaim),
'%' => Sym(Percentage), '%' => Sym(Percentage),
'<' => Sym(Lt), '<' => Sym(Lt),
@ -194,6 +186,7 @@ impl<I: Iterator<Item = char>> Lexer<I> {
'`' => self.eat_seq(Backtick), '`' => self.eat_seq(Backtick),
':' => self.eat_seq(Colon), ':' => self.eat_seq(Colon),
'$' => self.eat_seq(Dollar),
'#' => self.eat_seq(Hash), '#' => self.eat_seq(Hash),
'.' => self.eat_seq(Period), '.' => self.eat_seq(Period),
@ -214,12 +207,12 @@ impl<I: Iterator<Item = char>> Lexer<I> {
Some(Token { kind, len }) Some(Token { kind, len })
} }
fn eat_seq(&mut self, s: Sequence) -> TokenKind { fn eat_seq(&mut self, s: Sequence) -> Kind {
self.eat_while(|c| c == s.ch()); self.eat_while(|c| c == s.ch());
Seq(s) Seq(s)
} }
fn maybe_eat_close_brace(&mut self, s: Symbol, d: Delimiter) -> TokenKind { fn maybe_eat_close_brace(&mut self, s: Symbol, d: Delimiter) -> Kind {
if self.peek() == '}' { if self.peek() == '}' {
self.eat(); self.eat();
Close(d) Close(d)
@ -257,7 +250,7 @@ mod test {
use super::Delimiter::*; use super::Delimiter::*;
use super::Sequence::*; use super::Sequence::*;
use super::Symbol::*; use super::Symbol::*;
use super::TokenKind::*; use super::Kind::*;
macro_rules! test_lex { macro_rules! test_lex {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
@ -339,9 +332,10 @@ mod test {
test_lex!("`", Seq(Backtick)); test_lex!("`", Seq(Backtick));
test_lex!("```", Seq(Backtick)); test_lex!("```", Seq(Backtick));
test_lex!( test_lex!(
"`:#-.", "`:$#-.",
Seq(Backtick), Seq(Backtick),
Seq(Colon), Seq(Colon),
Seq(Dollar),
Seq(Hash), Seq(Hash),
Seq(Hyphen), Seq(Hyphen),
Seq(Period), Seq(Period),

View file

@ -59,6 +59,7 @@ impl<'s> Parser<'s> {
Iter { Iter {
src: self.src, src: self.src,
tree: self.tree.iter().peekable(), tree: self.tree.iter().peekable(),
events: Vec::new(),
} }
} }
} }
@ -66,6 +67,7 @@ impl<'s> Parser<'s> {
pub struct Iter<'s> { pub struct Iter<'s> {
src: &'s str, src: &'s str,
tree: std::iter::Peekable<block::TreeIter<'s>>, tree: std::iter::Peekable<block::TreeIter<'s>>,
events: Vec<inline::Event>,
} }
impl<'s> Iterator for Iter<'s> { impl<'s> Iterator for Iter<'s> {
@ -81,7 +83,7 @@ impl<'s> Iterator for Iter<'s> {
let chars = (&mut self.tree) let chars = (&mut self.tree)
.take_while(|ev| matches!(ev, tree::Event::Element(..))) .take_while(|ev| matches!(ev, tree::Event::Element(..)))
.flat_map(|ev| ev.span().of(self.src).chars()); .flat_map(|ev| ev.span().of(self.src).chars());
let evs = inline::Parser::new(chars).parse(); inline::Parser::new(chars).parse(&mut self.events);
/* /*
let chars = std::iter::from_fn(|| { let chars = std::iter::from_fn(|| {
let mut eat = false; let mut eat = false;
@ -99,7 +101,7 @@ impl<'s> Iterator for Iter<'s> {
}) })
.flatten(); .flatten();
*/ */
format!("leaf {:?} {:?}", leaf, evs) format!("leaf {:?} {:?}", leaf, self.events)
} }
tree::Event::Element(atom, _sp) => { tree::Event::Element(atom, _sp) => {
format!("atom {:?}", atom) format!("atom {:?}", atom)