2022-11-21 13:44:59 -05:00
|
|
|
use crate::lex;
|
2022-11-21 16:40:11 -05:00
|
|
|
use crate::Span;
|
2022-11-16 16:11:55 -05:00
|
|
|
|
2022-11-21 13:44:59 -05:00
|
|
|
use lex::Delimiter;
|
|
|
|
use lex::Symbol;
|
2022-11-16 16:11:55 -05:00
|
|
|
|
|
|
|
use Atom::*;
|
|
|
|
use Container::*;
|
2022-11-26 19:12:56 -05:00
|
|
|
use NodeKind::*;
|
2022-11-16 16:11:55 -05:00
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
2022-11-21 13:44:59 -05:00
|
|
|
pub enum Atom {
|
2022-11-16 16:11:55 -05:00
|
|
|
Softbreak,
|
|
|
|
Hardbreak,
|
|
|
|
Escape,
|
2022-11-21 13:44:59 -05:00
|
|
|
Nbsp,
|
|
|
|
OpenMarker, // ??
|
|
|
|
Ellipses,
|
2022-11-20 13:13:48 -05:00
|
|
|
ImageMarker, // ??
|
2022-11-21 13:44:59 -05:00
|
|
|
EmDash,
|
|
|
|
EnDash,
|
2022-11-26 19:12:56 -05:00
|
|
|
Lt,
|
|
|
|
Gt,
|
|
|
|
Ampersand,
|
|
|
|
Quote,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
|
|
pub struct Node {
|
|
|
|
pub kind: NodeKind,
|
|
|
|
pub span: Span,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
|
|
pub enum NodeKind {
|
|
|
|
Str,
|
|
|
|
// link
|
2022-11-27 15:59:54 -05:00
|
|
|
Url,
|
|
|
|
ImageSource,
|
|
|
|
LinkReference,
|
2022-11-21 13:44:59 -05:00
|
|
|
FootnoteReference,
|
2022-11-26 19:12:56 -05:00
|
|
|
// verbatim
|
|
|
|
Verbatim,
|
2022-11-21 13:44:59 -05:00
|
|
|
RawFormat,
|
|
|
|
InlineMath,
|
2022-11-27 15:59:54 -05:00
|
|
|
DisplayMath,
|
2022-11-16 16:11:55 -05:00
|
|
|
}
|
|
|
|
|
2022-11-20 13:13:48 -05:00
|
|
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
2022-11-16 16:11:55 -05:00
|
|
|
pub enum Container {
|
|
|
|
Span,
|
2022-11-27 15:59:54 -05:00
|
|
|
Attributes,
|
2022-11-16 16:11:55 -05:00
|
|
|
// typesetting
|
|
|
|
Subscript,
|
|
|
|
Superscript,
|
|
|
|
Insert,
|
|
|
|
Delete,
|
2022-11-21 13:44:59 -05:00
|
|
|
Emphasis,
|
2022-11-16 16:11:55 -05:00
|
|
|
Strong,
|
2022-11-27 17:56:19 -05:00
|
|
|
Mark,
|
2022-11-16 16:11:55 -05:00
|
|
|
// smart quoting
|
|
|
|
SingleQuoted,
|
|
|
|
DoubleQuoted,
|
|
|
|
}
|
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2022-11-21 13:44:59 -05:00
|
|
|
pub enum Event {
|
2022-11-27 17:56:19 -05:00
|
|
|
Enter(Container, OpenerState),
|
2022-11-22 13:19:21 -05:00
|
|
|
Exit(Container),
|
2022-11-21 13:44:59 -05:00
|
|
|
Atom(Atom),
|
2022-11-26 19:12:56 -05:00
|
|
|
Node(Node),
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
|
2022-11-27 17:56:19 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
|
|
pub enum OpenerState {
|
|
|
|
Unclosed,
|
|
|
|
Closed,
|
|
|
|
}
|
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
#[derive(Debug, Clone, Copy)]
|
2022-11-21 13:44:59 -05:00
|
|
|
pub enum Dir {
|
|
|
|
Open,
|
|
|
|
Close,
|
2022-11-20 13:13:48 -05:00
|
|
|
Both,
|
|
|
|
}
|
2022-11-16 16:11:55 -05:00
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
pub struct Parser<'s> {
|
2022-11-27 17:56:19 -05:00
|
|
|
openers: Vec<(Container, usize)>,
|
|
|
|
events: std::collections::VecDeque<Event>,
|
2022-11-26 19:12:56 -05:00
|
|
|
span: Span,
|
|
|
|
|
|
|
|
lexer: std::iter::Peekable<lex::Lexer<'s>>,
|
2022-11-16 16:11:55 -05:00
|
|
|
}
|
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
impl<'s> Parser<'s> {
|
2022-11-21 13:56:11 -05:00
|
|
|
pub fn new() -> Self {
|
2022-11-16 16:11:55 -05:00
|
|
|
Self {
|
|
|
|
openers: Vec::new(),
|
2022-11-27 17:56:19 -05:00
|
|
|
events: std::collections::VecDeque::new(),
|
2022-11-26 19:12:56 -05:00
|
|
|
span: Span::new(0, 0),
|
|
|
|
|
|
|
|
lexer: lex::Lexer::new("").peekable(),
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
pub fn parse(&mut self, src: &'s str) {
|
2022-11-26 19:12:56 -05:00
|
|
|
self.lexer = lex::Lexer::new(src).peekable();
|
2022-11-22 13:19:21 -05:00
|
|
|
}
|
|
|
|
|
2022-11-26 19:12:56 -05:00
|
|
|
fn eat(&mut self) -> Option<lex::Token> {
|
|
|
|
let tok = self.lexer.next();
|
|
|
|
if let Some(t) = &tok {
|
|
|
|
self.span = self.span.extend(t.len);
|
2022-11-22 13:19:21 -05:00
|
|
|
}
|
2022-11-26 19:12:56 -05:00
|
|
|
tok
|
2022-11-21 13:56:11 -05:00
|
|
|
}
|
|
|
|
|
2022-11-26 19:12:56 -05:00
|
|
|
fn peek(&mut self) -> Option<&lex::Token> {
|
|
|
|
self.lexer.peek()
|
|
|
|
}
|
2022-11-21 13:56:11 -05:00
|
|
|
|
2022-11-26 19:12:56 -05:00
|
|
|
fn reset_span(&mut self) {
|
|
|
|
self.span = Span::empty_at(self.span.end());
|
2022-11-21 13:56:11 -05:00
|
|
|
}
|
|
|
|
|
2022-11-26 19:12:56 -05:00
|
|
|
fn node(&self, kind: NodeKind) -> Event {
|
|
|
|
Event::Node(Node {
|
|
|
|
span: self.span,
|
|
|
|
kind,
|
|
|
|
})
|
2022-11-21 17:32:28 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
|
2022-11-26 19:12:56 -05:00
|
|
|
fn parse_event(&mut self) -> Option<Event> {
|
|
|
|
self.reset_span();
|
|
|
|
self.eat().map(|first| {
|
|
|
|
self.parse_verbatim(&first)
|
|
|
|
.or_else(|| self.parse_container(&first))
|
|
|
|
.or_else(|| self.parse_atom(&first))
|
|
|
|
.unwrap_or_else(|| self.node(Str))
|
|
|
|
})
|
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
|
2022-11-26 19:12:56 -05:00
|
|
|
fn parse_atom(&mut self, first: &lex::Token) -> Option<Event> {
|
|
|
|
match first.kind {
|
|
|
|
lex::Kind::Escape => Some(Event::Atom(Escape)),
|
|
|
|
lex::Kind::Nbsp => Some(Event::Atom(Nbsp)),
|
|
|
|
lex::Kind::Sym(lex::Symbol::Lt) => Some(Event::Atom(Lt)),
|
|
|
|
lex::Kind::Sym(lex::Symbol::Gt) => Some(Event::Atom(Gt)),
|
|
|
|
lex::Kind::Sym(lex::Symbol::Quote2) => Some(Event::Atom(Quote)),
|
|
|
|
_ => None,
|
2022-11-21 16:40:11 -05:00
|
|
|
}
|
2022-11-26 19:12:56 -05:00
|
|
|
}
|
2022-11-20 13:13:48 -05:00
|
|
|
|
2022-11-26 19:12:56 -05:00
|
|
|
fn parse_verbatim(&mut self, first: &lex::Token) -> Option<Event> {
|
|
|
|
match first.kind {
|
|
|
|
lex::Kind::Seq(lex::Sequence::Dollar) => {
|
|
|
|
let math_opt = (first.len <= 2)
|
|
|
|
.then(|| {
|
|
|
|
if let Some(lex::Token {
|
|
|
|
kind: lex::Kind::Seq(lex::Sequence::Backtick),
|
|
|
|
len,
|
|
|
|
}) = self.peek()
|
|
|
|
{
|
2022-11-27 15:59:54 -05:00
|
|
|
Some((
|
|
|
|
if first.len == 2 {
|
|
|
|
DisplayMath
|
|
|
|
} else {
|
|
|
|
InlineMath
|
|
|
|
},
|
|
|
|
*len,
|
|
|
|
))
|
2022-11-26 19:12:56 -05:00
|
|
|
} else {
|
|
|
|
None
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-26 19:12:56 -05:00
|
|
|
})
|
|
|
|
.flatten();
|
|
|
|
if math_opt.is_some() {
|
|
|
|
self.eat(); // backticks
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-26 19:12:56 -05:00
|
|
|
math_opt
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-26 19:12:56 -05:00
|
|
|
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, first.len)),
|
|
|
|
_ => None,
|
2022-11-21 16:40:11 -05:00
|
|
|
}
|
2022-11-26 19:12:56 -05:00
|
|
|
.map(|(kind, opener_len)| {
|
|
|
|
let mut span = Span::empty_at(self.span.end());
|
|
|
|
while let Some(tok) = self.eat() {
|
|
|
|
if matches!(tok.kind, lex::Kind::Seq(lex::Sequence::Backtick))
|
|
|
|
&& tok.len == opener_len
|
|
|
|
{
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
span = span.extend(tok.len);
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-26 19:12:56 -05:00
|
|
|
Event::Node(Node { kind, span })
|
|
|
|
})
|
|
|
|
}
|
2022-11-21 13:44:59 -05:00
|
|
|
|
2022-11-26 19:12:56 -05:00
|
|
|
fn parse_container(&mut self, first: &lex::Token) -> Option<Event> {
|
|
|
|
match first.kind {
|
|
|
|
lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
|
2022-11-27 15:59:54 -05:00
|
|
|
lex::Kind::Open(Delimiter::Bracket) => Some((Span, Dir::Open)),
|
|
|
|
lex::Kind::Close(Delimiter::Bracket) => Some((Span, Dir::Close)),
|
2022-11-26 19:12:56 -05:00
|
|
|
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)),
|
2022-11-27 17:56:19 -05:00
|
|
|
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
|
2022-11-26 19:12:56 -05:00
|
|
|
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)),
|
|
|
|
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)),
|
|
|
|
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)),
|
|
|
|
_ => None,
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
2022-11-27 17:56:19 -05:00
|
|
|
.map(|(cont_new, dir)| {
|
|
|
|
self.openers
|
|
|
|
.iter()
|
|
|
|
.rposition(|(c, _)| *c == cont_new)
|
|
|
|
.and_then(|o| {
|
|
|
|
matches!(dir, Dir::Close | Dir::Both).then(|| {
|
2022-11-27 17:59:01 -05:00
|
|
|
let (_, e) = &mut self.openers[o];
|
|
|
|
if let Event::Enter(_, state_ev) = &mut self.events[*e] {
|
2022-11-27 17:56:19 -05:00
|
|
|
*state_ev = OpenerState::Closed;
|
|
|
|
self.openers.drain(o..);
|
|
|
|
Event::Exit(cont_new)
|
|
|
|
} else {
|
|
|
|
panic!()
|
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
|
|
|
.unwrap_or_else(|| {
|
|
|
|
self.openers.push((cont_new, self.events.len()));
|
|
|
|
Event::Enter(cont_new, OpenerState::Unclosed)
|
|
|
|
})
|
2022-11-26 19:12:56 -05:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> Iterator for Parser<'s> {
|
|
|
|
type Item = Event;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
2022-11-27 17:56:19 -05:00
|
|
|
while self.events.is_empty() || !self.openers.is_empty() {
|
|
|
|
if let Some(ev) = self.parse_event() {
|
|
|
|
self.events.push_back(ev);
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2022-11-26 19:12:56 -05:00
|
|
|
|
2022-11-27 17:56:19 -05:00
|
|
|
// TODO merge str/unclosed enters
|
|
|
|
self.events.pop_front()
|
2022-11-16 16:11:55 -05:00
|
|
|
}
|
|
|
|
}
|
2022-11-20 13:13:48 -05:00
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
2022-11-26 19:12:56 -05:00
|
|
|
use crate::Span;
|
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
use super::Atom::*;
|
2022-11-22 13:19:21 -05:00
|
|
|
use super::Container::*;
|
2022-11-21 16:40:11 -05:00
|
|
|
use super::Event::*;
|
2022-11-26 19:12:56 -05:00
|
|
|
use super::NodeKind::*;
|
2022-11-27 17:56:19 -05:00
|
|
|
use super::OpenerState::*;
|
2022-11-21 16:40:11 -05:00
|
|
|
|
2022-11-22 13:48:17 -05:00
|
|
|
macro_rules! test_parse {
|
|
|
|
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
|
|
|
#[allow(unused)]
|
|
|
|
let mut p = super::Parser::new();
|
|
|
|
p.parse($src);
|
|
|
|
let actual = p.collect::<Vec<_>>();
|
|
|
|
let expected = &[$($($token),*,)?];
|
|
|
|
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-11-26 19:12:56 -05:00
|
|
|
impl super::NodeKind {
|
|
|
|
pub fn span(self, start: usize, end: usize) -> super::Node {
|
|
|
|
super::Node {
|
|
|
|
span: Span::new(start, end),
|
|
|
|
kind: self,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-22 13:48:17 -05:00
|
|
|
#[test]
|
|
|
|
fn str() {
|
2022-11-26 19:12:56 -05:00
|
|
|
test_parse!("abc", Node(Str.span(0, 3)));
|
|
|
|
test_parse!("abc def", Node(Str.span(0, 7)));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn verbatim() {
|
|
|
|
test_parse!("`abc`", Node(Verbatim.span(1, 4)));
|
|
|
|
test_parse!("`abc", Node(Verbatim.span(1, 4)));
|
|
|
|
test_parse!("``abc``", Node(Verbatim.span(2, 5)));
|
|
|
|
test_parse!("abc `def`", Node(Str.span(0, 4)), Node(Verbatim.span(5, 8)));
|
|
|
|
}
|
|
|
|
|
2022-11-27 15:59:54 -05:00
|
|
|
#[test]
|
|
|
|
fn math() {
|
|
|
|
test_parse!("$`abc`", Node(InlineMath.span(2, 5)));
|
|
|
|
test_parse!("$$```abc", Node(DisplayMath.span(5, 8)));
|
|
|
|
}
|
|
|
|
|
2022-11-26 19:12:56 -05:00
|
|
|
#[test]
|
|
|
|
fn container_basic() {
|
|
|
|
test_parse!(
|
|
|
|
"_abc_",
|
2022-11-27 17:56:19 -05:00
|
|
|
Enter(Emphasis, Closed),
|
2022-11-26 19:12:56 -05:00
|
|
|
Node(Str.span(1, 4)),
|
|
|
|
Exit(Emphasis)
|
|
|
|
);
|
|
|
|
test_parse!(
|
|
|
|
"{_abc_}",
|
2022-11-27 17:56:19 -05:00
|
|
|
Enter(Emphasis, Closed),
|
2022-11-26 19:12:56 -05:00
|
|
|
Node(Str.span(2, 5)),
|
|
|
|
Exit(Emphasis)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn container_nest() {
|
|
|
|
test_parse!(
|
|
|
|
"{_{_abc_}_}",
|
2022-11-27 17:56:19 -05:00
|
|
|
Enter(Emphasis, Closed),
|
|
|
|
Enter(Emphasis, Closed),
|
2022-11-26 19:12:56 -05:00
|
|
|
Node(Str.span(4, 7)),
|
|
|
|
Exit(Emphasis),
|
|
|
|
Exit(Emphasis)
|
|
|
|
);
|
|
|
|
test_parse!(
|
|
|
|
"*_abc_*",
|
2022-11-27 17:56:19 -05:00
|
|
|
Enter(Strong, Closed),
|
|
|
|
Enter(Emphasis, Closed),
|
2022-11-26 19:12:56 -05:00
|
|
|
Node(Str.span(2, 5)),
|
|
|
|
Exit(Emphasis),
|
|
|
|
Exit(Strong)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn container_unopened() {
|
|
|
|
test_parse!("*}abc", Node(Str.span(0, 5)),);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn container_close_parent() {
|
|
|
|
test_parse!(
|
|
|
|
"{*{_abc*}",
|
2022-11-27 17:56:19 -05:00
|
|
|
Enter(Strong, Closed),
|
|
|
|
Enter(Emphasis, Unclosed),
|
2022-11-26 19:12:56 -05:00
|
|
|
Node(Str.span(4, 7)),
|
2022-11-27 17:56:19 -05:00
|
|
|
Exit(Strong),
|
2022-11-26 19:12:56 -05:00
|
|
|
);
|
2022-11-22 13:48:17 -05:00
|
|
|
}
|
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
#[test]
|
2022-11-26 19:12:56 -05:00
|
|
|
fn container_close_block() {
|
2022-11-27 17:56:19 -05:00
|
|
|
test_parse!("{_abc", Enter(Emphasis, Unclosed), Node(Str.span(2, 5)));
|
2022-11-26 19:12:56 -05:00
|
|
|
test_parse!(
|
|
|
|
"{_{*{_abc",
|
2022-11-27 17:56:19 -05:00
|
|
|
Enter(Emphasis, Unclosed),
|
|
|
|
Enter(Strong, Unclosed),
|
|
|
|
Enter(Emphasis, Unclosed),
|
2022-11-26 19:12:56 -05:00
|
|
|
Node(Str.span(6, 9)),
|
|
|
|
);
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
}
|