jotdown/src/inline.rs

255 lines
7.2 KiB
Rust
Raw Normal View History

2022-11-21 13:44:59 -05:00
use crate::lex;
2022-11-21 16:40:11 -05:00
use crate::Span;
2022-11-16 16:11:55 -05:00
2022-11-21 13:44:59 -05:00
use lex::Delimiter;
use lex::Symbol;
2022-11-16 16:11:55 -05:00
use Atom::*;
use Container::*;
#[derive(Debug, Clone, PartialEq, Eq)]
2022-11-21 13:44:59 -05:00
pub enum Atom {
2022-11-16 16:11:55 -05:00
Str,
Softbreak,
Hardbreak,
Escape,
2022-11-21 13:44:59 -05:00
Nbsp,
OpenMarker, // ??
Ellipses,
2022-11-20 13:13:48 -05:00
ImageMarker, // ??
2022-11-21 13:44:59 -05:00
EmDash,
EnDash,
FootnoteReference,
Link,
ReferenceLink,
Emoji,
RawFormat,
// math
DisplayMath,
InlineMath,
Verbatim,
2022-11-16 16:11:55 -05:00
}
2022-11-20 13:13:48 -05:00
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
2022-11-16 16:11:55 -05:00
pub enum Container {
// attributes
Attributes,
Span,
// typesetting
Subscript,
Superscript,
Insert,
Delete,
2022-11-21 13:44:59 -05:00
Emphasis,
2022-11-16 16:11:55 -05:00
Strong,
Mark,
// smart quoting
SingleQuoted,
DoubleQuoted,
// URLs
2022-11-21 13:44:59 -05:00
AutoUrl,
2022-11-16 16:11:55 -05:00
Url,
ImageText,
LinkText,
Reference,
Destination,
}
2022-11-21 16:40:11 -05:00
#[derive(Debug, PartialEq, Eq)]
2022-11-21 13:44:59 -05:00
pub enum Event {
2022-11-22 13:19:21 -05:00
Enter(Container),
Exit(Container),
2022-11-21 13:44:59 -05:00
Atom(Atom),
2022-11-20 13:13:48 -05:00
}
2022-11-22 13:19:21 -05:00
#[derive(Debug, Clone, Copy)]
2022-11-21 13:44:59 -05:00
pub enum Dir {
Open,
Close,
2022-11-20 13:13:48 -05:00
Both,
}
2022-11-16 16:11:55 -05:00
2022-11-22 13:19:21 -05:00
pub struct Parser<'s> {
2022-11-21 13:44:59 -05:00
openers: Vec<Container>,
2022-11-21 13:56:11 -05:00
events: Vec<Event>,
2022-11-22 13:19:21 -05:00
lexer: Option<std::iter::Peekable<lex::Lexer<'s>>>,
2022-11-16 16:11:55 -05:00
}
2022-11-22 13:19:21 -05:00
impl<'s> Parser<'s> {
2022-11-21 13:56:11 -05:00
pub fn new() -> Self {
2022-11-16 16:11:55 -05:00
Self {
openers: Vec::new(),
2022-11-21 13:56:11 -05:00
events: Vec::new(),
2022-11-22 13:19:21 -05:00
lexer: None,
2022-11-20 13:13:48 -05:00
}
}
2022-11-22 13:19:21 -05:00
pub fn parse(&mut self, src: &'s str) {
self.lexer = Some(lex::Lexer::new(src).peekable());
}
}
impl<'s> Iterator for Parser<'s> {
type Item = Event;
fn next(&mut self) -> Option<Self::Item> {
if self.events.is_empty() {
if let Some(lexer) = &mut self.lexer {
Parse::new(lexer, &mut self.openers, &mut self.events).parse();
2022-11-21 16:40:11 -05:00
}
2022-11-22 13:19:21 -05:00
}
2022-11-21 16:40:11 -05:00
2022-11-22 13:19:21 -05:00
self.events.pop()
2022-11-21 13:56:11 -05:00
}
}
2022-11-21 17:32:28 -05:00
struct Parse<'l, 's, 'e> {
tokens: &'l mut std::iter::Peekable<lex::Lexer<'s>>,
openers: &'e mut Vec<Container>,
events: &'e mut Vec<Event>,
2022-11-21 13:56:11 -05:00
}
2022-11-21 17:32:28 -05:00
impl<'l, 's, 'e> Parse<'l, 's, 'e> {
fn new(
tokens: &'l mut std::iter::Peekable<lex::Lexer<'s>>,
openers: &'e mut Vec<Container>,
events: &'e mut Vec<Event>,
) -> Self {
2022-11-21 16:40:11 -05:00
Self {
2022-11-21 17:32:28 -05:00
tokens,
2022-11-21 16:40:11 -05:00
openers,
events,
}
2022-11-21 13:56:11 -05:00
}
2022-11-21 17:32:28 -05:00
fn peek(&mut self) -> Option<&lex::Kind> {
self.tokens.peek().map(|t| &t.kind)
}
2022-11-21 16:40:11 -05:00
fn parse(&mut self) {
2022-11-21 17:32:28 -05:00
let mut t = if let Some(t) = self.tokens.next() {
t
} else {
return;
};
2022-11-21 16:40:11 -05:00
{
2022-11-21 17:32:28 -05:00
let verbatim_opt = match t.kind {
2022-11-21 16:40:11 -05:00
lex::Kind::Seq(lex::Sequence::Dollar) => {
2022-11-21 17:32:28 -05:00
let math_opt = (t.len <= 2)
2022-11-21 16:40:11 -05:00
.then(|| {
2022-11-21 17:32:28 -05:00
if let Some(lex::Kind::Seq(lex::Sequence::Backtick)) = self.peek() {
Some((DisplayMath, t.len))
2022-11-21 16:40:11 -05:00
} else {
None
2022-11-21 13:44:59 -05:00
}
2022-11-21 16:40:11 -05:00
})
.flatten();
if math_opt.is_some() {
2022-11-21 17:32:28 -05:00
self.tokens.next(); // backticks
2022-11-21 13:44:59 -05:00
}
2022-11-21 16:40:11 -05:00
math_opt
2022-11-21 13:44:59 -05:00
}
2022-11-21 17:32:28 -05:00
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, t.len)),
2022-11-21 16:40:11 -05:00
_ => None,
};
if let Some((atom, opener_len)) = verbatim_opt {
2022-11-21 17:32:28 -05:00
for tok in &mut self.tokens {
if matches!(tok.kind, lex::Kind::Seq(lex::Sequence::Backtick))
&& tok.len == opener_len
{
self.events.push(Event::Atom(atom));
return;
}
2022-11-21 16:40:11 -05:00
}
2022-11-21 13:44:59 -05:00
}
2022-11-21 16:40:11 -05:00
}
2022-11-20 13:13:48 -05:00
2022-11-21 16:40:11 -05:00
{
2022-11-21 17:32:28 -05:00
let container_opt = match t.kind {
2022-11-21 16:40:11 -05:00
lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)),
lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)),
lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)),
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
lex::Kind::Open(Delimiter::Bracket) => Some((LinkText, Dir::Open)),
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)),
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)),
lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)),
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)),
lex::Kind::Close(Delimiter::Bracket) => Some((LinkText, Dir::Close)),
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)),
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)),
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)),
lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)),
lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)),
_ => None,
};
2022-11-22 13:19:21 -05:00
if let Some((cont, dir)) = container_opt {
if matches!(dir, Dir::Close | Dir::Both) && self.openers.contains(&cont) {
2022-11-21 16:40:11 -05:00
loop {
let c = self.openers.pop().unwrap();
2022-11-22 13:19:21 -05:00
self.events.push(Event::Exit(c));
2022-11-21 16:40:11 -05:00
if c == cont {
break;
2022-11-21 13:44:59 -05:00
}
}
return;
2022-11-22 13:19:21 -05:00
} else if matches!(dir, Dir::Open | Dir::Both) {
2022-11-21 16:40:11 -05:00
self.openers.push(cont);
2022-11-22 13:19:21 -05:00
self.events.push(Event::Enter(cont));
return;
2022-11-21 13:44:59 -05:00
}
}
2022-11-21 16:40:11 -05:00
}
2022-11-21 13:44:59 -05:00
2022-11-21 16:40:11 -05:00
{
2022-11-21 17:32:28 -05:00
if let lex::Kind::Open(Delimiter::Brace) = t.kind {
2022-11-21 16:40:11 -05:00
todo!(); // check for attr
2022-11-21 13:44:59 -05:00
}
2022-11-21 16:40:11 -05:00
}
2022-11-21 13:44:59 -05:00
2022-11-21 16:40:11 -05:00
if let Some(Event::Atom(Str)) = self.events.last() {
} else {
self.events.push(Event::Atom(Str));
2022-11-20 13:13:48 -05:00
}
2022-11-16 16:11:55 -05:00
}
}
2022-11-20 13:13:48 -05:00
2022-11-21 16:40:11 -05:00
#[cfg(test)]
mod test {
use super::Atom::*;
2022-11-22 13:19:21 -05:00
use super::Container::*;
2022-11-21 16:40:11 -05:00
use super::Event::*;
2022-11-22 13:48:17 -05:00
macro_rules! test_parse {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)]
let mut p = super::Parser::new();
p.parse($src);
let actual = p.collect::<Vec<_>>();
let expected = &[$($($token),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
};
}
#[test]
fn str() {
test_parse!("abc", Atom(Str));
test_parse!("abc def", Atom(Str));
}
2022-11-21 16:40:11 -05:00
#[test]
fn container_brace() {
2022-11-22 13:48:17 -05:00
test_parse!("{_abc_}", Enter(Emphasis), Atom(Str), Exit(Emphasis));
2022-11-20 13:13:48 -05:00
}
}