jotdown/src/inline.rs

264 lines
7.2 KiB
Rust
Raw Normal View History

2022-11-21 13:44:59 -05:00
use crate::lex;
2022-11-21 16:40:11 -05:00
use crate::Span;
2022-11-16 16:11:55 -05:00
2022-11-21 13:44:59 -05:00
use lex::Delimiter;
use lex::Symbol;
2022-11-16 16:11:55 -05:00
use Atom::*;
use Container::*;
#[derive(Debug, Clone, PartialEq, Eq)]
2022-11-21 13:44:59 -05:00
pub enum Atom {
2022-11-16 16:11:55 -05:00
Str,
Softbreak,
Hardbreak,
Escape,
2022-11-21 13:44:59 -05:00
Nbsp,
OpenMarker, // ??
Ellipses,
2022-11-20 13:13:48 -05:00
ImageMarker, // ??
2022-11-21 13:44:59 -05:00
EmDash,
EnDash,
FootnoteReference,
Link,
ReferenceLink,
Emoji,
RawFormat,
// math
DisplayMath,
InlineMath,
Verbatim,
2022-11-16 16:11:55 -05:00
}
2022-11-20 13:13:48 -05:00
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
2022-11-16 16:11:55 -05:00
pub enum Container {
// attributes
Attributes,
Span,
// typesetting
Subscript,
Superscript,
Insert,
Delete,
2022-11-21 13:44:59 -05:00
Emphasis,
2022-11-16 16:11:55 -05:00
Strong,
Mark,
// smart quoting
SingleQuoted,
DoubleQuoted,
// URLs
2022-11-21 13:44:59 -05:00
AutoUrl,
2022-11-16 16:11:55 -05:00
Url,
ImageText,
LinkText,
Reference,
Destination,
}
2022-11-21 16:40:11 -05:00
#[derive(Debug, PartialEq, Eq)]
2022-11-21 13:44:59 -05:00
pub enum Event {
Start(Container),
2022-11-20 13:13:48 -05:00
End(Container),
2022-11-21 13:44:59 -05:00
Atom(Atom),
2022-11-20 13:13:48 -05:00
}
2022-11-21 13:44:59 -05:00
/*
2022-11-20 13:13:48 -05:00
#[derive(Debug)]
pub enum OpenerState {
Unclosed,
Closed,
Discarded,
}
2022-11-21 13:44:59 -05:00
*/
2022-11-20 13:13:48 -05:00
#[derive(Debug)]
2022-11-21 13:44:59 -05:00
pub enum Dir {
Open,
Close,
2022-11-20 13:13:48 -05:00
Both,
}
2022-11-16 16:11:55 -05:00
2022-11-21 13:56:11 -05:00
pub struct Parser {
2022-11-21 13:44:59 -05:00
openers: Vec<Container>,
2022-11-21 13:56:11 -05:00
events: Vec<Event>,
2022-11-16 16:11:55 -05:00
}
2022-11-21 13:56:11 -05:00
impl Parser {
pub fn new() -> Self {
2022-11-16 16:11:55 -05:00
Self {
openers: Vec::new(),
2022-11-21 13:56:11 -05:00
events: Vec::new(),
2022-11-20 13:13:48 -05:00
}
}
2022-11-21 16:40:11 -05:00
pub fn parse<'a>(&'a mut self, src: &'a str) -> impl Iterator<Item = Event> + 'a {
2022-11-21 17:32:28 -05:00
let mut lexer = lex::Lexer::new(src).peekable();
std::iter::from_fn(move || {
dbg!(&src);
2022-11-21 16:40:11 -05:00
if self.events.is_empty() {
2022-11-21 17:32:28 -05:00
Parse::new(&mut lexer, &mut self.openers, &mut self.events).parse();
2022-11-21 16:40:11 -05:00
}
self.events.pop()
})
2022-11-21 13:56:11 -05:00
}
}
2022-11-21 17:32:28 -05:00
struct Parse<'l, 's, 'e> {
tokens: &'l mut std::iter::Peekable<lex::Lexer<'s>>,
openers: &'e mut Vec<Container>,
events: &'e mut Vec<Event>,
2022-11-21 13:56:11 -05:00
}
2022-11-21 17:32:28 -05:00
impl<'l, 's, 'e> Parse<'l, 's, 'e> {
fn new(
tokens: &'l mut std::iter::Peekable<lex::Lexer<'s>>,
openers: &'e mut Vec<Container>,
events: &'e mut Vec<Event>,
) -> Self {
2022-11-21 16:40:11 -05:00
Self {
2022-11-21 17:32:28 -05:00
tokens,
2022-11-21 16:40:11 -05:00
openers,
events,
}
2022-11-21 13:56:11 -05:00
}
2022-11-21 17:32:28 -05:00
/*
2022-11-21 16:40:11 -05:00
fn step(&mut self) -> lex::Token {
let token = self.lexer.next_token();
dbg!(&token, self.pos);
self.pos += token.len;
std::mem::replace(&mut self.next_token, token)
}
fn eat(&mut self) -> lex::Kind {
let end = self.pos;
let token = self.step();
self.span = Span::new(end - token.len, end);
token.kind
}
fn peek(&mut self) -> &lex::Kind {
&self.next_token.kind
}
2022-11-21 17:32:28 -05:00
*/
fn peek(&mut self) -> Option<&lex::Kind> {
self.tokens.peek().map(|t| &t.kind)
}
2022-11-21 16:40:11 -05:00
fn parse(&mut self) {
2022-11-21 17:32:28 -05:00
let mut t = if let Some(t) = self.tokens.next() {
t
} else {
return;
};
2022-11-21 16:40:11 -05:00
//dbg!(&kind);
{
2022-11-21 17:32:28 -05:00
let verbatim_opt = match t.kind {
2022-11-21 16:40:11 -05:00
lex::Kind::Seq(lex::Sequence::Dollar) => {
2022-11-21 17:32:28 -05:00
let math_opt = (t.len <= 2)
2022-11-21 16:40:11 -05:00
.then(|| {
2022-11-21 17:32:28 -05:00
if let Some(lex::Kind::Seq(lex::Sequence::Backtick)) = self.peek() {
Some((DisplayMath, t.len))
2022-11-21 16:40:11 -05:00
} else {
None
2022-11-21 13:44:59 -05:00
}
2022-11-21 16:40:11 -05:00
})
.flatten();
if math_opt.is_some() {
2022-11-21 17:32:28 -05:00
self.tokens.next(); // backticks
2022-11-21 13:44:59 -05:00
}
2022-11-21 16:40:11 -05:00
math_opt
2022-11-21 13:44:59 -05:00
}
2022-11-21 17:32:28 -05:00
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, t.len)),
2022-11-21 16:40:11 -05:00
_ => None,
};
if let Some((atom, opener_len)) = verbatim_opt {
2022-11-21 17:32:28 -05:00
for tok in &mut self.tokens {
if matches!(tok.kind, lex::Kind::Seq(lex::Sequence::Backtick))
&& tok.len == opener_len
{
self.events.push(Event::Atom(atom));
return;
}
2022-11-21 16:40:11 -05:00
}
2022-11-21 13:44:59 -05:00
}
2022-11-21 16:40:11 -05:00
}
2022-11-20 13:13:48 -05:00
2022-11-21 16:40:11 -05:00
{
2022-11-21 17:32:28 -05:00
let container_opt = match t.kind {
2022-11-21 16:40:11 -05:00
lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)),
lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)),
lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)),
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
lex::Kind::Open(Delimiter::Bracket) => Some((LinkText, Dir::Open)),
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)),
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)),
lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)),
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)),
lex::Kind::Close(Delimiter::Bracket) => Some((LinkText, Dir::Close)),
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)),
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)),
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)),
lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)),
lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)),
_ => None,
};
if let Some((cont, ty)) = container_opt {
if matches!(ty, Dir::Close | Dir::Both) && self.openers.contains(&cont) {
loop {
let c = self.openers.pop().unwrap();
self.events.push(Event::End(c));
if c == cont {
break;
2022-11-21 13:44:59 -05:00
}
}
return;
2022-11-21 16:40:11 -05:00
} else if matches!(ty, Dir::Open | Dir::Both) {
self.openers.push(cont);
self.events.push(Event::Start(cont));
2022-11-21 13:44:59 -05:00
}
2022-11-21 16:40:11 -05:00
return;
2022-11-21 13:44:59 -05:00
}
2022-11-21 16:40:11 -05:00
}
2022-11-21 13:44:59 -05:00
2022-11-21 16:40:11 -05:00
{
2022-11-21 17:32:28 -05:00
if let lex::Kind::Open(Delimiter::Brace) = t.kind {
2022-11-21 16:40:11 -05:00
todo!(); // check for attr
2022-11-21 13:44:59 -05:00
}
2022-11-21 16:40:11 -05:00
}
2022-11-21 13:44:59 -05:00
2022-11-21 16:40:11 -05:00
if let Some(Event::Atom(Str)) = self.events.last() {
} else {
self.events.push(Event::Atom(Str));
2022-11-20 13:13:48 -05:00
}
2022-11-16 16:11:55 -05:00
}
}
2022-11-20 13:13:48 -05:00
2022-11-21 16:40:11 -05:00
#[cfg(test)]
mod test {
use super::Atom::*;
use super::Event::*;
#[test]
fn container_brace() {
let mut p = super::Parser::new();
assert_eq!(
&[Atom(Str)],
p.parse("{_hej_}").collect::<Vec<_>>().as_slice(),
);
2022-11-20 13:13:48 -05:00
}
}