2022-11-21 13:44:59 -05:00
|
|
|
use crate::lex;
|
2022-11-21 16:40:11 -05:00
|
|
|
use crate::Span;
|
2022-11-16 16:11:55 -05:00
|
|
|
|
2022-11-21 13:44:59 -05:00
|
|
|
use lex::Delimiter;
|
|
|
|
use lex::Symbol;
|
2022-11-16 16:11:55 -05:00
|
|
|
|
|
|
|
use Atom::*;
|
|
|
|
use Container::*;
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
2022-11-21 13:44:59 -05:00
|
|
|
pub enum Atom {
|
2022-11-16 16:11:55 -05:00
|
|
|
Str,
|
|
|
|
Softbreak,
|
|
|
|
Hardbreak,
|
|
|
|
Escape,
|
2022-11-21 13:44:59 -05:00
|
|
|
Nbsp,
|
|
|
|
OpenMarker, // ??
|
|
|
|
Ellipses,
|
2022-11-20 13:13:48 -05:00
|
|
|
ImageMarker, // ??
|
2022-11-21 13:44:59 -05:00
|
|
|
EmDash,
|
|
|
|
EnDash,
|
|
|
|
FootnoteReference,
|
|
|
|
Link,
|
|
|
|
ReferenceLink,
|
|
|
|
Emoji,
|
|
|
|
RawFormat,
|
|
|
|
// math
|
|
|
|
DisplayMath,
|
|
|
|
InlineMath,
|
|
|
|
Verbatim,
|
2022-11-16 16:11:55 -05:00
|
|
|
}
|
|
|
|
|
2022-11-20 13:13:48 -05:00
|
|
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
2022-11-16 16:11:55 -05:00
|
|
|
pub enum Container {
|
|
|
|
// attributes
|
|
|
|
Attributes,
|
|
|
|
Span,
|
|
|
|
// typesetting
|
|
|
|
Subscript,
|
|
|
|
Superscript,
|
|
|
|
Insert,
|
|
|
|
Delete,
|
2022-11-21 13:44:59 -05:00
|
|
|
Emphasis,
|
2022-11-16 16:11:55 -05:00
|
|
|
Strong,
|
|
|
|
Mark,
|
|
|
|
// smart quoting
|
|
|
|
SingleQuoted,
|
|
|
|
DoubleQuoted,
|
|
|
|
// URLs
|
2022-11-21 13:44:59 -05:00
|
|
|
AutoUrl,
|
2022-11-16 16:11:55 -05:00
|
|
|
Url,
|
|
|
|
ImageText,
|
|
|
|
LinkText,
|
|
|
|
Reference,
|
|
|
|
Destination,
|
|
|
|
}
|
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2022-11-21 13:44:59 -05:00
|
|
|
pub enum Event {
|
2022-11-22 13:19:21 -05:00
|
|
|
Enter(Container),
|
|
|
|
Exit(Container),
|
2022-11-21 13:44:59 -05:00
|
|
|
Atom(Atom),
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
#[derive(Debug, Clone, Copy)]
|
2022-11-21 13:44:59 -05:00
|
|
|
pub enum Dir {
|
|
|
|
Open,
|
|
|
|
Close,
|
2022-11-20 13:13:48 -05:00
|
|
|
Both,
|
|
|
|
}
|
2022-11-16 16:11:55 -05:00
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
pub struct Parser<'s> {
|
2022-11-21 13:44:59 -05:00
|
|
|
openers: Vec<Container>,
|
2022-11-21 13:56:11 -05:00
|
|
|
events: Vec<Event>,
|
2022-11-22 13:19:21 -05:00
|
|
|
lexer: Option<std::iter::Peekable<lex::Lexer<'s>>>,
|
2022-11-16 16:11:55 -05:00
|
|
|
}
|
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
impl<'s> Parser<'s> {
|
2022-11-21 13:56:11 -05:00
|
|
|
pub fn new() -> Self {
|
2022-11-16 16:11:55 -05:00
|
|
|
Self {
|
|
|
|
openers: Vec::new(),
|
2022-11-21 13:56:11 -05:00
|
|
|
events: Vec::new(),
|
2022-11-22 13:19:21 -05:00
|
|
|
lexer: None,
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
pub fn parse(&mut self, src: &'s str) {
|
|
|
|
self.lexer = Some(lex::Lexer::new(src).peekable());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> Iterator for Parser<'s> {
|
|
|
|
type Item = Event;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
if self.events.is_empty() {
|
|
|
|
if let Some(lexer) = &mut self.lexer {
|
|
|
|
Parse::new(lexer, &mut self.openers, &mut self.events).parse();
|
2022-11-21 16:40:11 -05:00
|
|
|
}
|
2022-11-22 13:19:21 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
self.events.pop()
|
2022-11-21 13:56:11 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-21 17:32:28 -05:00
|
|
|
struct Parse<'l, 's, 'e> {
|
|
|
|
tokens: &'l mut std::iter::Peekable<lex::Lexer<'s>>,
|
|
|
|
openers: &'e mut Vec<Container>,
|
|
|
|
events: &'e mut Vec<Event>,
|
2022-11-21 13:56:11 -05:00
|
|
|
}
|
|
|
|
|
2022-11-21 17:32:28 -05:00
|
|
|
impl<'l, 's, 'e> Parse<'l, 's, 'e> {
|
|
|
|
fn new(
|
|
|
|
tokens: &'l mut std::iter::Peekable<lex::Lexer<'s>>,
|
|
|
|
openers: &'e mut Vec<Container>,
|
|
|
|
events: &'e mut Vec<Event>,
|
|
|
|
) -> Self {
|
2022-11-21 16:40:11 -05:00
|
|
|
Self {
|
2022-11-21 17:32:28 -05:00
|
|
|
tokens,
|
2022-11-21 16:40:11 -05:00
|
|
|
openers,
|
|
|
|
events,
|
|
|
|
}
|
2022-11-21 13:56:11 -05:00
|
|
|
}
|
|
|
|
|
2022-11-21 17:32:28 -05:00
|
|
|
fn peek(&mut self) -> Option<&lex::Kind> {
|
|
|
|
self.tokens.peek().map(|t| &t.kind)
|
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
|
|
|
|
fn parse(&mut self) {
|
2022-11-21 17:32:28 -05:00
|
|
|
let mut t = if let Some(t) = self.tokens.next() {
|
|
|
|
t
|
|
|
|
} else {
|
|
|
|
return;
|
|
|
|
};
|
2022-11-21 16:40:11 -05:00
|
|
|
|
|
|
|
{
|
2022-11-21 17:32:28 -05:00
|
|
|
let verbatim_opt = match t.kind {
|
2022-11-21 16:40:11 -05:00
|
|
|
lex::Kind::Seq(lex::Sequence::Dollar) => {
|
2022-11-21 17:32:28 -05:00
|
|
|
let math_opt = (t.len <= 2)
|
2022-11-21 16:40:11 -05:00
|
|
|
.then(|| {
|
2022-11-21 17:32:28 -05:00
|
|
|
if let Some(lex::Kind::Seq(lex::Sequence::Backtick)) = self.peek() {
|
|
|
|
Some((DisplayMath, t.len))
|
2022-11-21 16:40:11 -05:00
|
|
|
} else {
|
|
|
|
None
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
})
|
|
|
|
.flatten();
|
|
|
|
if math_opt.is_some() {
|
2022-11-21 17:32:28 -05:00
|
|
|
self.tokens.next(); // backticks
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
math_opt
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 17:32:28 -05:00
|
|
|
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, t.len)),
|
2022-11-21 16:40:11 -05:00
|
|
|
_ => None,
|
|
|
|
};
|
|
|
|
|
|
|
|
if let Some((atom, opener_len)) = verbatim_opt {
|
2022-11-21 17:32:28 -05:00
|
|
|
for tok in &mut self.tokens {
|
|
|
|
if matches!(tok.kind, lex::Kind::Seq(lex::Sequence::Backtick))
|
|
|
|
&& tok.len == opener_len
|
|
|
|
{
|
|
|
|
self.events.push(Event::Atom(atom));
|
|
|
|
return;
|
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
}
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
}
|
2022-11-20 13:13:48 -05:00
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
{
|
2022-11-21 17:32:28 -05:00
|
|
|
let container_opt = match t.kind {
|
2022-11-21 16:40:11 -05:00
|
|
|
lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
|
|
|
|
lex::Kind::Open(Delimiter::Bracket) => Some((LinkText, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)),
|
|
|
|
lex::Kind::Close(Delimiter::Bracket) => Some((LinkText, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)),
|
|
|
|
_ => None,
|
|
|
|
};
|
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
if let Some((cont, dir)) = container_opt {
|
|
|
|
if matches!(dir, Dir::Close | Dir::Both) && self.openers.contains(&cont) {
|
2022-11-21 16:40:11 -05:00
|
|
|
loop {
|
|
|
|
let c = self.openers.pop().unwrap();
|
2022-11-22 13:19:21 -05:00
|
|
|
self.events.push(Event::Exit(c));
|
2022-11-21 16:40:11 -05:00
|
|
|
if c == cont {
|
|
|
|
break;
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
2022-11-22 13:19:21 -05:00
|
|
|
} else if matches!(dir, Dir::Open | Dir::Both) {
|
2022-11-21 16:40:11 -05:00
|
|
|
self.openers.push(cont);
|
2022-11-22 13:19:21 -05:00
|
|
|
self.events.push(Event::Enter(cont));
|
|
|
|
return;
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
}
|
2022-11-21 13:44:59 -05:00
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
{
|
2022-11-21 17:32:28 -05:00
|
|
|
if let lex::Kind::Open(Delimiter::Brace) = t.kind {
|
2022-11-21 16:40:11 -05:00
|
|
|
todo!(); // check for attr
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
}
|
2022-11-21 13:44:59 -05:00
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
if let Some(Event::Atom(Str)) = self.events.last() {
|
|
|
|
} else {
|
|
|
|
self.events.push(Event::Atom(Str));
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
2022-11-16 16:11:55 -05:00
|
|
|
}
|
|
|
|
}
|
2022-11-20 13:13:48 -05:00
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use super::Atom::*;
|
2022-11-22 13:19:21 -05:00
|
|
|
use super::Container::*;
|
2022-11-21 16:40:11 -05:00
|
|
|
use super::Event::*;
|
|
|
|
|
2022-11-22 13:48:17 -05:00
|
|
|
macro_rules! test_parse {
|
|
|
|
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
|
|
|
#[allow(unused)]
|
|
|
|
let mut p = super::Parser::new();
|
|
|
|
p.parse($src);
|
|
|
|
let actual = p.collect::<Vec<_>>();
|
|
|
|
let expected = &[$($($token),*,)?];
|
|
|
|
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn str() {
|
|
|
|
test_parse!("abc", Atom(Str));
|
|
|
|
test_parse!("abc def", Atom(Str));
|
|
|
|
}
|
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
#[test]
|
|
|
|
fn container_brace() {
|
2022-11-22 13:48:17 -05:00
|
|
|
test_parse!("{_abc_}", Enter(Emphasis), Atom(Str), Exit(Emphasis));
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
}
|