2022-11-21 13:44:59 -05:00
|
|
|
use crate::lex;
|
2022-11-21 16:40:11 -05:00
|
|
|
use crate::Span;
|
2022-11-16 16:11:55 -05:00
|
|
|
|
2022-11-21 13:44:59 -05:00
|
|
|
use lex::Delimiter;
|
|
|
|
use lex::Symbol;
|
2022-11-16 16:11:55 -05:00
|
|
|
|
|
|
|
use Atom::*;
|
|
|
|
use Container::*;
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
2022-11-21 13:44:59 -05:00
|
|
|
pub enum Atom {
|
2022-11-16 16:11:55 -05:00
|
|
|
Str,
|
|
|
|
Softbreak,
|
|
|
|
Hardbreak,
|
|
|
|
Escape,
|
2022-11-21 13:44:59 -05:00
|
|
|
Nbsp,
|
|
|
|
OpenMarker, // ??
|
|
|
|
Ellipses,
|
2022-11-20 13:13:48 -05:00
|
|
|
ImageMarker, // ??
|
2022-11-21 13:44:59 -05:00
|
|
|
EmDash,
|
|
|
|
EnDash,
|
|
|
|
FootnoteReference,
|
|
|
|
Link,
|
|
|
|
ReferenceLink,
|
|
|
|
Emoji,
|
|
|
|
RawFormat,
|
|
|
|
// math
|
|
|
|
DisplayMath,
|
|
|
|
InlineMath,
|
|
|
|
Verbatim,
|
2022-11-16 16:11:55 -05:00
|
|
|
}
|
|
|
|
|
2022-11-20 13:13:48 -05:00
|
|
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
2022-11-16 16:11:55 -05:00
|
|
|
pub enum Container {
|
|
|
|
// attributes
|
|
|
|
Attributes,
|
|
|
|
Span,
|
|
|
|
// typesetting
|
|
|
|
Subscript,
|
|
|
|
Superscript,
|
|
|
|
Insert,
|
|
|
|
Delete,
|
2022-11-21 13:44:59 -05:00
|
|
|
Emphasis,
|
2022-11-16 16:11:55 -05:00
|
|
|
Strong,
|
|
|
|
Mark,
|
|
|
|
// smart quoting
|
|
|
|
SingleQuoted,
|
|
|
|
DoubleQuoted,
|
|
|
|
// URLs
|
2022-11-21 13:44:59 -05:00
|
|
|
AutoUrl,
|
2022-11-16 16:11:55 -05:00
|
|
|
Url,
|
|
|
|
ImageText,
|
|
|
|
LinkText,
|
|
|
|
Reference,
|
|
|
|
Destination,
|
|
|
|
}
|
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2022-11-21 13:44:59 -05:00
|
|
|
pub enum Event {
|
|
|
|
Start(Container),
|
2022-11-20 13:13:48 -05:00
|
|
|
End(Container),
|
2022-11-21 13:44:59 -05:00
|
|
|
Atom(Atom),
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
|
2022-11-21 13:44:59 -05:00
|
|
|
/*
|
2022-11-20 13:13:48 -05:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub enum OpenerState {
|
|
|
|
Unclosed,
|
|
|
|
Closed,
|
|
|
|
Discarded,
|
|
|
|
}
|
2022-11-21 13:44:59 -05:00
|
|
|
*/
|
2022-11-20 13:13:48 -05:00
|
|
|
|
|
|
|
#[derive(Debug)]
|
2022-11-21 13:44:59 -05:00
|
|
|
pub enum Dir {
|
|
|
|
Open,
|
|
|
|
Close,
|
2022-11-20 13:13:48 -05:00
|
|
|
Both,
|
|
|
|
}
|
2022-11-16 16:11:55 -05:00
|
|
|
|
2022-11-21 13:56:11 -05:00
|
|
|
pub struct Parser {
|
2022-11-21 13:44:59 -05:00
|
|
|
openers: Vec<Container>,
|
2022-11-21 13:56:11 -05:00
|
|
|
events: Vec<Event>,
|
2022-11-16 16:11:55 -05:00
|
|
|
}
|
|
|
|
|
2022-11-21 13:56:11 -05:00
|
|
|
impl Parser {
|
|
|
|
pub fn new() -> Self {
|
2022-11-16 16:11:55 -05:00
|
|
|
Self {
|
|
|
|
openers: Vec::new(),
|
2022-11-21 13:56:11 -05:00
|
|
|
events: Vec::new(),
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
pub fn parse<'a>(&'a mut self, src: &'a str) -> impl Iterator<Item = Event> + 'a {
|
|
|
|
std::iter::from_fn(|| {
|
|
|
|
if self.events.is_empty() {
|
|
|
|
Parse::new(src, &mut self.openers, &mut self.events).parse();
|
|
|
|
}
|
|
|
|
|
|
|
|
self.events.pop()
|
|
|
|
})
|
2022-11-21 13:56:11 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct Parse<'s> {
|
|
|
|
lexer: lex::Lexer<'s>,
|
2022-11-21 16:40:11 -05:00
|
|
|
openers: &'s mut Vec<Container>,
|
2022-11-21 13:56:11 -05:00
|
|
|
events: &'s mut Vec<Event>,
|
2022-11-21 16:40:11 -05:00
|
|
|
|
|
|
|
/// Next token to be eaten.
|
|
|
|
next_token: lex::Token,
|
|
|
|
/// Position after `next_token`.
|
|
|
|
pos: usize,
|
|
|
|
/// Span of last eaten token.
|
|
|
|
span: Span,
|
2022-11-21 13:56:11 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> Parse<'s> {
|
2022-11-21 16:40:11 -05:00
|
|
|
fn new(src: &'s str, openers: &'s mut Vec<Container>, events: &'s mut Vec<Event>) -> Self {
|
|
|
|
let mut lexer = lex::Lexer::new(src);
|
|
|
|
let next_token = lexer.next_token();
|
|
|
|
let pos = next_token.len;
|
|
|
|
Self {
|
|
|
|
lexer,
|
|
|
|
openers,
|
|
|
|
events,
|
|
|
|
next_token,
|
|
|
|
pos,
|
|
|
|
span: Span::new(0, 0),
|
|
|
|
}
|
2022-11-21 13:56:11 -05:00
|
|
|
}
|
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
fn step(&mut self) -> lex::Token {
|
|
|
|
let token = self.lexer.next_token();
|
|
|
|
dbg!(&token, self.pos);
|
|
|
|
self.pos += token.len;
|
|
|
|
std::mem::replace(&mut self.next_token, token)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn eat(&mut self) -> lex::Kind {
|
|
|
|
let end = self.pos;
|
|
|
|
let token = self.step();
|
|
|
|
self.span = Span::new(end - token.len, end);
|
|
|
|
token.kind
|
|
|
|
}
|
|
|
|
|
|
|
|
fn peek(&mut self) -> &lex::Kind {
|
|
|
|
&self.next_token.kind
|
|
|
|
}
|
|
|
|
|
|
|
|
fn parse(&mut self) {
|
|
|
|
let mut kind = self.eat();
|
|
|
|
|
|
|
|
//dbg!(&kind);
|
|
|
|
|
|
|
|
if kind == lex::Kind::Eof {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
let verbatim_opt = match kind {
|
|
|
|
lex::Kind::Seq(lex::Sequence::Dollar) => {
|
|
|
|
let math_opt = (self.span.len() <= 2)
|
|
|
|
.then(|| {
|
|
|
|
if let lex::Kind::Seq(lex::Sequence::Backtick) = self.peek() {
|
|
|
|
Some((DisplayMath, self.span.len()))
|
|
|
|
} else {
|
|
|
|
None
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
})
|
|
|
|
.flatten();
|
|
|
|
if math_opt.is_some() {
|
|
|
|
self.eat(); // backticks
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
math_opt
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, self.span.len())),
|
|
|
|
_ => None,
|
|
|
|
};
|
|
|
|
|
|
|
|
if let Some((atom, opener_len)) = verbatim_opt {
|
|
|
|
while !matches!(kind, lex::Kind::Seq(lex::Sequence::Backtick))
|
|
|
|
|| self.span.len() != opener_len
|
|
|
|
{
|
|
|
|
kind = self.eat();
|
|
|
|
}
|
|
|
|
self.events.push(Event::Atom(atom));
|
|
|
|
return;
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
}
|
2022-11-20 13:13:48 -05:00
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
{
|
|
|
|
let container_opt = match kind {
|
|
|
|
lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
|
|
|
|
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
|
|
|
|
lex::Kind::Open(Delimiter::Bracket) => Some((LinkText, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)),
|
|
|
|
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)),
|
|
|
|
lex::Kind::Close(Delimiter::Bracket) => Some((LinkText, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)),
|
|
|
|
lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)),
|
|
|
|
_ => None,
|
|
|
|
};
|
|
|
|
|
|
|
|
if let Some((cont, ty)) = container_opt {
|
|
|
|
if matches!(ty, Dir::Close | Dir::Both) && self.openers.contains(&cont) {
|
|
|
|
loop {
|
|
|
|
let c = self.openers.pop().unwrap();
|
|
|
|
self.events.push(Event::End(c));
|
|
|
|
if c == cont {
|
|
|
|
break;
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
2022-11-21 16:40:11 -05:00
|
|
|
} else if matches!(ty, Dir::Open | Dir::Both) {
|
|
|
|
self.openers.push(cont);
|
|
|
|
self.events.push(Event::Start(cont));
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
return;
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
}
|
2022-11-21 13:44:59 -05:00
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
{
|
|
|
|
if let lex::Kind::Open(Delimiter::Brace) = kind {
|
|
|
|
todo!(); // check for attr
|
2022-11-21 13:44:59 -05:00
|
|
|
}
|
2022-11-21 16:40:11 -05:00
|
|
|
}
|
2022-11-21 13:44:59 -05:00
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
if let Some(Event::Atom(Str)) = self.events.last() {
|
|
|
|
} else {
|
|
|
|
self.events.push(Event::Atom(Str));
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
2022-11-16 16:11:55 -05:00
|
|
|
}
|
|
|
|
}
|
2022-11-20 13:13:48 -05:00
|
|
|
|
2022-11-21 16:40:11 -05:00
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use super::Atom::*;
|
|
|
|
use super::Event::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn container_brace() {
|
|
|
|
let mut p = super::Parser::new();
|
|
|
|
assert_eq!(
|
|
|
|
&[Atom(Str)],
|
|
|
|
p.parse("{_hej_}").collect::<Vec<_>>().as_slice(),
|
|
|
|
);
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
}
|