jotdown/src/inline.rs

1276 lines
40 KiB
Rust
Raw Normal View History

2022-12-22 14:39:11 -05:00
use crate::attr;
2022-11-21 13:44:59 -05:00
use crate::lex;
2022-11-21 16:40:11 -05:00
use crate::Span;
2022-11-16 16:11:55 -05:00
2022-11-21 13:44:59 -05:00
use lex::Delimiter;
use lex::Symbol;
2022-11-16 16:11:55 -05:00
use Atom::*;
use Container::*;
#[derive(Debug, Clone, PartialEq, Eq)]
2022-11-21 13:44:59 -05:00
pub enum Atom {
2023-01-18 16:30:24 -05:00
FootnoteReference,
2023-02-04 11:10:38 -05:00
Symbol,
2022-11-16 16:11:55 -05:00
Softbreak,
Hardbreak,
Escape,
2022-11-21 13:44:59 -05:00
Nbsp,
2022-11-28 18:33:43 -05:00
Ellipsis,
2022-11-21 13:44:59 -05:00
EnDash,
2022-12-01 12:09:09 -05:00
EmDash,
2023-01-27 13:04:01 -05:00
Quote { ty: QuoteType, left: bool },
2022-11-16 16:11:55 -05:00
}
2022-11-20 13:13:48 -05:00
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
2022-11-16 16:11:55 -05:00
pub enum Container {
Span,
Subscript,
Superscript,
Insert,
Delete,
2022-11-21 13:44:59 -05:00
Emphasis,
2022-11-16 16:11:55 -05:00
Strong,
2022-11-27 17:56:19 -05:00
Mark,
2022-12-08 11:42:54 -05:00
Verbatim,
2022-12-13 14:44:58 -05:00
/// Span is the format.
2022-12-08 11:42:54 -05:00
RawFormat,
InlineMath,
DisplayMath,
2022-12-13 14:44:58 -05:00
/// Span is the reference link tag.
2022-12-11 03:26:55 -05:00
ReferenceLink,
2022-12-17 12:03:06 -05:00
/// Span is the reference link tag.
ReferenceImage,
/// Span is the URL.
2022-12-11 03:26:55 -05:00
InlineLink,
2022-12-17 12:03:06 -05:00
/// Span is the URL.
InlineImage,
2023-01-28 10:37:35 -05:00
/// Open delimiter span is URL, closing is '>'.
2022-12-24 05:18:15 -05:00
Autolink,
2022-11-16 16:11:55 -05:00
}
2023-01-27 13:04:01 -05:00
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum QuoteType {
Single,
Double,
}
2023-02-04 14:10:56 -05:00
#[derive(Clone, Debug, PartialEq, Eq)]
2022-11-27 18:10:28 -05:00
pub enum EventKind {
2022-11-28 13:08:49 -05:00
Enter(Container),
2022-11-22 13:19:21 -05:00
Exit(Container),
2022-11-21 13:44:59 -05:00
Atom(Atom),
2022-12-08 11:42:54 -05:00
Str,
2023-01-15 17:48:55 -05:00
Whitespace,
2022-12-11 03:26:55 -05:00
Attributes,
Placeholder,
2022-11-20 13:13:48 -05:00
}
2023-02-04 14:10:56 -05:00
#[derive(Clone, Debug, PartialEq, Eq)]
2022-11-27 18:10:28 -05:00
pub struct Event {
pub kind: EventKind,
pub span: Span,
}
2023-01-17 16:36:10 -05:00
pub struct Parser<I: Iterator + Clone> {
2022-12-11 06:26:40 -05:00
/// Lexer, hosting upcoming source.
2022-12-11 12:47:00 -05:00
lexer: lex::Lexer<I>,
2022-12-11 06:26:40 -05:00
/// Span of current event.
span: Span,
/// Stack with kind and index of _potential_ openers for containers.
2022-12-22 14:39:11 -05:00
openers: Vec<(Delim, usize)>,
2022-12-11 06:26:40 -05:00
/// Buffer queue for next events. Events are buffered until no modifications due to future
/// characters are needed.
events: std::collections::VecDeque<Event>,
2022-11-16 16:11:55 -05:00
}
2022-12-11 12:47:00 -05:00
impl<I: Iterator<Item = char> + Clone> Parser<I> {
pub fn new(chars: I) -> Self {
2022-11-16 16:11:55 -05:00
Self {
2022-12-11 12:47:00 -05:00
lexer: lex::Lexer::new(chars),
2022-12-11 06:26:40 -05:00
span: Span::new(0, 0),
2022-12-22 14:39:11 -05:00
openers: Vec::new(),
2022-12-11 06:26:40 -05:00
events: std::collections::VecDeque::new(),
2022-11-20 13:13:48 -05:00
}
}
2022-11-26 19:12:56 -05:00
fn eat(&mut self) -> Option<lex::Token> {
let tok = self.lexer.next();
if let Some(t) = &tok {
self.span = self.span.extend(t.len);
2022-11-22 13:19:21 -05:00
}
2022-11-26 19:12:56 -05:00
tok
2022-11-21 13:56:11 -05:00
}
2022-11-26 19:12:56 -05:00
fn peek(&mut self) -> Option<&lex::Token> {
self.lexer.peek()
}
2022-11-21 13:56:11 -05:00
2022-11-26 19:12:56 -05:00
fn reset_span(&mut self) {
self.span = self.span.empty_after();
2022-11-21 13:56:11 -05:00
}
2022-11-26 19:12:56 -05:00
fn parse_event(&mut self) -> Option<Event> {
self.reset_span();
self.eat().map(|first| {
2022-12-11 14:49:57 -05:00
self.parse_verbatim(&first)
2023-01-15 17:48:55 -05:00
.or_else(|| self.parse_attributes(&first))
2022-12-24 05:18:15 -05:00
.or_else(|| self.parse_autolink(&first))
2023-02-04 11:10:38 -05:00
.or_else(|| self.parse_symbol(&first))
2023-01-18 16:30:24 -05:00
.or_else(|| self.parse_footnote_reference(&first))
2022-12-22 14:39:11 -05:00
.or_else(|| self.parse_container(&first))
2022-11-26 19:12:56 -05:00
.or_else(|| self.parse_atom(&first))
2022-12-08 11:42:54 -05:00
.unwrap_or(Event {
kind: if matches!(first.kind, lex::Kind::Whitespace) {
2023-01-15 17:48:55 -05:00
EventKind::Whitespace
} else {
EventKind::Str
},
2022-12-08 11:42:54 -05:00
span: self.span,
})
2022-11-26 19:12:56 -05:00
})
}
2022-11-21 16:40:11 -05:00
2022-12-11 14:49:57 -05:00
fn parse_verbatim(&mut self, first: &lex::Token) -> Option<Event> {
2022-12-11 15:43:22 -05:00
match first.kind {
lex::Kind::Seq(lex::Sequence::Dollar) => {
let math_opt = (first.len <= 2)
.then(|| {
if let Some(lex::Token {
kind: lex::Kind::Seq(lex::Sequence::Backtick),
len,
}) = self.peek()
{
Some((
if first.len == 2 {
DisplayMath
} else {
InlineMath
},
*len,
))
} else {
None
}
})
.flatten();
if math_opt.is_some() {
self.eat(); // backticks
}
math_opt
}
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, first.len)),
_ => None,
}
.map(|(mut kind, opener_len)| {
let opener_event = self.events.len();
self.events.push_back(Event {
kind: EventKind::Enter(kind),
span: self.span,
});
let mut span_inner = self.span.empty_after();
2022-12-13 14:44:58 -05:00
let mut span_outer = None;
2022-12-11 15:43:22 -05:00
2023-01-15 15:40:15 -05:00
let mut non_whitespace_first = None;
let mut non_whitespace_last = None;
2022-12-11 15:43:22 -05:00
while let Some(t) = self.eat() {
if matches!(t.kind, lex::Kind::Seq(lex::Sequence::Backtick)) && t.len == opener_len
2022-12-08 11:42:54 -05:00
{
2022-12-11 15:43:22 -05:00
if matches!(kind, Verbatim)
2022-12-11 14:49:57 -05:00
&& matches!(
self.lexer.peek().map(|t| &t.kind),
Some(lex::Kind::Open(Delimiter::BraceEqual))
2022-12-11 15:43:22 -05:00
)
{
2023-01-17 16:36:10 -05:00
let mut ahead = self.lexer.chars();
2022-12-11 14:49:57 -05:00
let mut end = false;
let len = (&mut ahead)
2023-01-17 16:36:10 -05:00
.skip(2) // {=
2022-12-11 14:49:57 -05:00
.take_while(|c| {
if *c == '{' {
return false;
}
if *c == '}' {
end = true;
};
!end && !c.is_whitespace()
})
2023-02-01 15:56:54 -05:00
.map(char::len_utf8)
.sum();
2022-12-11 14:49:57 -05:00
if len > 0 && end {
2023-01-17 16:36:10 -05:00
let tok = self.eat();
debug_assert_eq!(
tok,
Some(lex::Token {
kind: lex::Kind::Open(Delimiter::BraceEqual),
len: 2,
})
);
2022-12-11 14:49:57 -05:00
self.lexer = lex::Lexer::new(ahead);
let span_format = self.span.after(len);
2022-12-11 15:43:22 -05:00
kind = RawFormat;
self.events[opener_event].kind = EventKind::Enter(kind);
2022-12-11 14:49:57 -05:00
self.events[opener_event].span = span_format;
2022-12-13 14:44:58 -05:00
self.span = span_format.translate(1); // }
span_outer = Some(span_format);
2022-11-21 13:44:59 -05:00
}
2022-12-11 14:49:57 -05:00
}
2022-12-11 15:43:22 -05:00
break;
2022-11-21 13:44:59 -05:00
}
2023-01-15 15:40:15 -05:00
if !matches!(t.kind, lex::Kind::Whitespace) {
if non_whitespace_first.is_none() {
non_whitespace_first = Some((t.kind, span_inner.end()));
}
non_whitespace_last = Some((t.kind, span_inner.end() + t.len));
}
2022-12-11 15:43:22 -05:00
span_inner = span_inner.extend(t.len);
self.reset_span();
}
2023-01-15 15:40:15 -05:00
if let Some((lex::Kind::Seq(lex::Sequence::Backtick), pos)) = non_whitespace_first {
span_inner = span_inner.with_start(pos);
}
if let Some((lex::Kind::Seq(lex::Sequence::Backtick), pos)) = non_whitespace_last {
span_inner = span_inner.with_end(pos);
}
2022-12-11 15:43:22 -05:00
self.events.push_back(Event {
kind: EventKind::Str,
span: span_inner,
});
Event {
kind: EventKind::Exit(kind),
2022-12-13 14:44:58 -05:00
span: span_outer.unwrap_or(self.span),
2022-12-11 15:43:22 -05:00
}
})
2022-12-11 06:26:40 -05:00
}
2023-01-15 17:48:55 -05:00
fn parse_attributes(&mut self, first: &lex::Token) -> Option<Event> {
if first.kind == lex::Kind::Open(Delimiter::Brace) {
2023-01-17 16:36:10 -05:00
let mut ahead = self.lexer.chars();
let (mut attr_len, mut has_attr) = attr::valid(std::iter::once('{').chain(&mut ahead));
attr_len = attr_len.saturating_sub(1); // rm {
2023-01-15 17:48:55 -05:00
if attr_len > 0 {
while attr_len > 0 {
self.span = self.span.extend(attr_len);
self.lexer = lex::Lexer::new(ahead.clone());
let (l, non_empty) = attr::valid(&mut ahead);
attr_len = l;
has_attr |= non_empty;
2023-01-15 17:48:55 -05:00
}
let set_attr = has_attr
&& self
.events
.back()
.map_or(false, |e| e.kind == EventKind::Str);
Some(if set_attr {
let i = self
.events
.iter()
.rposition(|e| e.kind != EventKind::Str)
.map_or(0, |i| i + 1);
let span_str = self.events[i]
.span
.union(self.events[self.events.len() - 1].span);
self.events.drain(i..);
2023-01-15 17:48:55 -05:00
self.events.push_back(Event {
kind: EventKind::Attributes,
span: self.span,
});
self.events.push_back(Event {
kind: EventKind::Enter(Container::Span),
span: span_str.empty_before(),
});
self.events.push_back(Event {
kind: EventKind::Str,
span: span_str,
});
2023-01-15 17:48:55 -05:00
Event {
kind: EventKind::Exit(Container::Span),
span: span_str.empty_after(),
}
} else {
Event {
kind: EventKind::Placeholder,
span: self.span.empty_before(),
}
2023-01-15 17:48:55 -05:00
})
} else {
None
}
} else {
None
}
}
2022-12-24 05:18:15 -05:00
fn parse_autolink(&mut self, first: &lex::Token) -> Option<Event> {
if first.kind == lex::Kind::Sym(Symbol::Lt) {
2023-01-17 16:36:10 -05:00
let mut ahead = self.lexer.chars();
2022-12-24 05:18:15 -05:00
let mut end = false;
let mut is_url = false;
let len = (&mut ahead)
.take_while(|c| {
if *c == '<' {
return false;
}
2022-12-24 05:18:15 -05:00
if *c == '>' {
end = true;
};
if matches!(*c, ':' | '@') {
is_url = true;
}
!end && !c.is_whitespace()
})
2023-02-01 15:56:54 -05:00
.map(char::len_utf8)
.sum();
2022-12-24 05:18:15 -05:00
(end && is_url).then(|| {
self.lexer = lex::Lexer::new(ahead);
2023-01-28 10:37:35 -05:00
self.span = self.span.after(len);
2022-12-24 05:18:15 -05:00
self.events.push_back(Event {
kind: EventKind::Enter(Autolink),
span: self.span,
});
self.events.push_back(Event {
kind: EventKind::Str,
span: self.span,
});
self.span = self.span.after(1);
2022-12-24 05:18:15 -05:00
Event {
kind: EventKind::Exit(Autolink),
span: self.span,
}
})
} else {
None
2022-12-11 03:26:55 -05:00
}
2022-12-24 05:18:15 -05:00
}
2022-12-11 03:26:55 -05:00
2023-02-04 11:10:38 -05:00
fn parse_symbol(&mut self, first: &lex::Token) -> Option<Event> {
if first.kind == lex::Kind::Sym(Symbol::Colon) {
let mut ahead = self.lexer.chars();
let mut end = false;
let mut valid = true;
let len = (&mut ahead)
.take_while(|c| {
if *c == ':' {
end = true;
} else if !c.is_ascii_alphanumeric() && !matches!(c, '-' | '+' | '_') {
valid = false;
}
!end && !c.is_whitespace()
})
.map(char::len_utf8)
.sum();
(end && valid).then(|| {
self.lexer = lex::Lexer::new(ahead);
self.span = self.span.after(len);
let span = self.span;
self.span = self.span.after(1);
Event {
kind: EventKind::Atom(Symbol),
span,
}
})
} else {
None
}
}
2023-01-18 16:30:24 -05:00
fn parse_footnote_reference(&mut self, first: &lex::Token) -> Option<Event> {
if first.kind == lex::Kind::Open(Delimiter::Bracket)
&& matches!(
self.peek(),
Some(lex::Token {
kind: lex::Kind::Sym(Symbol::Caret),
..
})
)
{
let tok = self.eat();
debug_assert_eq!(
tok,
Some(lex::Token {
kind: lex::Kind::Sym(Symbol::Caret),
len: 1,
})
);
let mut ahead = self.lexer.chars();
let mut end = false;
let len = (&mut ahead)
.take_while(|c| {
if *c == '[' {
return false;
}
if *c == ']' {
end = true;
};
!end && *c != '\n'
})
2023-02-01 15:56:54 -05:00
.map(char::len_utf8)
.sum();
2023-01-18 16:30:24 -05:00
end.then(|| {
self.lexer = lex::Lexer::new(ahead);
self.span = self.span.after(len);
2023-01-18 16:30:24 -05:00
let ev = Event {
kind: EventKind::Atom(FootnoteReference),
span: self.span,
};
self.span = self.span.after(1);
2023-01-18 16:30:24 -05:00
ev
})
} else {
None
}
}
2022-12-24 05:18:15 -05:00
fn parse_container(&mut self, first: &lex::Token) -> Option<Event> {
2023-01-26 15:39:48 -05:00
Delim::from_token(first.kind).and_then(|(delim, dir)| {
2022-12-22 14:39:11 -05:00
self.openers
2022-11-27 17:56:19 -05:00
.iter()
.rposition(|(d, _)| {
*d == delim || matches!((d, delim), (Delim::Span(..), Delim::Span(..)))
})
2022-11-27 17:56:19 -05:00
.and_then(|o| {
2023-01-26 15:55:12 -05:00
if matches!(dir, Dir::Open) {
2023-01-26 15:39:48 -05:00
return None;
}
if matches!(dir, Dir::Both)
&& self.events.back().map_or(false, |ev| {
matches!(ev.kind, EventKind::Whitespace | EventKind::Atom(Softbreak))
})
{
return None;
}
2023-01-26 15:39:48 -05:00
let (d, e) = self.openers[o];
let e_attr = e;
let e_opener = e + 1;
if e_opener == self.events.len() - 1 {
// empty container
return None;
}
let inner_span = self.events[e_opener].span.between(self.span);
2023-01-27 13:04:01 -05:00
let mut event_closer = match DelimEventKind::from(d) {
DelimEventKind::Container(cont) => {
2023-01-26 15:39:48 -05:00
self.events[e_opener].kind = EventKind::Enter(cont);
Some(Event {
kind: EventKind::Exit(cont),
span: self.span,
})
}
2023-01-27 13:04:01 -05:00
DelimEventKind::Quote(ty) => {
self.events[e_opener].kind =
EventKind::Atom(Atom::Quote { ty, left: true });
Some(Event {
kind: EventKind::Atom(Atom::Quote { ty, left: false }),
span: self.span,
})
}
DelimEventKind::Span(ty) => self.post_span(ty, e_opener),
2023-01-26 15:39:48 -05:00
};
self.openers.drain(o..);
if let Some(event_closer) = &mut event_closer {
2023-01-30 17:26:35 -05:00
if event_closer.span.is_empty()
&& matches!(
2023-01-26 15:39:48 -05:00
event_closer.kind,
EventKind::Exit(
Container::ReferenceLink | Container::ReferenceImage
)
2023-01-30 17:26:35 -05:00
)
{
2023-01-26 15:39:48 -05:00
assert_eq!(self.events[e_opener].span, event_closer.span);
event_closer.span = inner_span;
self.events[e_opener].span = inner_span;
}
2023-01-26 15:39:48 -05:00
}
2023-01-26 15:39:48 -05:00
let mut ahead = self.lexer.chars();
let (mut attr_len, mut has_attr) = attr::valid(&mut ahead);
if attr_len > 0 {
let span_closer = self.span;
self.span = self.span.empty_after();
2023-01-26 15:39:48 -05:00
while attr_len > 0 {
self.span = self.span.extend(attr_len);
self.lexer = lex::Lexer::new(ahead.clone());
let (l, non_empty) = attr::valid(&mut ahead);
has_attr |= non_empty;
attr_len = l;
}
2023-01-26 15:39:48 -05:00
if has_attr {
self.events[e_attr] = Event {
kind: EventKind::Attributes,
span: self.span,
};
}
2023-01-15 13:50:48 -05:00
2023-01-26 15:39:48 -05:00
if event_closer.is_none() {
if has_attr {
self.events[e_opener].kind = EventKind::Enter(Container::Span);
2023-01-15 13:50:48 -05:00
}
2023-01-26 15:39:48 -05:00
event_closer = Some(Event {
kind: if has_attr {
EventKind::Exit(Container::Span)
} else {
EventKind::Str
},
span: span_closer,
});
2023-01-12 11:28:01 -05:00
}
2022-12-22 14:39:11 -05:00
}
2023-01-26 15:39:48 -05:00
event_closer
2022-11-27 17:56:19 -05:00
})
2023-01-26 15:39:48 -05:00
.or_else(|| {
2023-01-26 15:55:12 -05:00
if matches!(dir, Dir::Close) {
2023-01-26 15:39:48 -05:00
return None;
}
if matches!(dir, Dir::Both)
&& self
.peek()
.map_or(true, |t| matches!(t.kind, lex::Kind::Whitespace))
{
return None;
}
2023-01-27 13:04:01 -05:00
if matches!(delim, Delim::SingleQuoted | Delim::DoubleQuoted)
&& self
.events
.back()
.map_or(false, |ev| matches!(ev.kind, EventKind::Str))
{
return None;
}
2022-12-22 14:39:11 -05:00
self.openers.push((delim, self.events.len()));
// push dummy event in case attributes are encountered after closing delimiter
2023-01-12 11:28:01 -05:00
self.events.push_back(Event {
kind: EventKind::Placeholder,
2023-01-12 11:28:01 -05:00
span: Span::empty_at(self.span.start()),
});
2023-01-27 13:04:01 -05:00
// use non-opener for now, replace if closed later
2023-01-26 15:39:48 -05:00
Some(Event {
2023-01-27 13:04:01 -05:00
kind: match delim {
Delim::SingleQuoted => EventKind::Atom(Quote {
ty: QuoteType::Single,
left: false,
}),
Delim::DoubleQuoted => EventKind::Atom(Quote {
ty: QuoteType::Double,
left: true,
}),
_ => EventKind::Str,
},
2022-12-22 14:39:11 -05:00
span: self.span,
2023-01-26 15:39:48 -05:00
})
2022-11-27 17:56:19 -05:00
})
2022-11-26 19:12:56 -05:00
})
2022-12-22 14:39:11 -05:00
}
fn post_span(&mut self, ty: SpanType, opener_event: usize) -> Option<Event> {
2023-01-17 16:36:10 -05:00
let mut ahead = self.lexer.chars();
2022-12-22 14:39:11 -05:00
match ahead.next() {
Some(opener @ ('[' | '(')) => {
2022-12-24 05:18:15 -05:00
let img = ty == SpanType::Image;
2022-12-22 14:39:11 -05:00
let (closer, kind) = match opener {
2022-12-24 05:18:15 -05:00
'[' => (']', if img { ReferenceImage } else { ReferenceLink }),
'(' => (')', if img { InlineImage } else { InlineLink }),
2022-12-22 14:39:11 -05:00
_ => unreachable!(),
};
let mut end = false;
let len = (&mut ahead)
.take_while(|c| {
if *c == opener {
return false;
}
2022-12-22 14:39:11 -05:00
if *c == closer {
end = true;
};
!end
2022-12-22 14:39:11 -05:00
})
2023-02-01 15:56:54 -05:00
.map(char::len_utf8)
.sum();
2022-12-22 14:39:11 -05:00
end.then(|| {
let span = self.span.after(len).translate(1);
2022-12-22 14:39:11 -05:00
(kind, span)
})
}
_ => None,
}
.map(|(kind, span)| {
self.lexer = lex::Lexer::new(ahead);
self.events[opener_event].kind = EventKind::Enter(kind);
self.events[opener_event].span = span;
self.span = span.translate(1);
Event {
kind: EventKind::Exit(kind),
span,
}
2022-11-27 18:10:28 -05:00
})
2022-11-26 19:12:56 -05:00
}
2022-12-11 06:26:40 -05:00
fn parse_atom(&mut self, first: &lex::Token) -> Option<Event> {
2023-01-27 13:04:01 -05:00
let atom =
match first.kind {
lex::Kind::Newline => Softbreak,
lex::Kind::Hardbreak => Hardbreak,
lex::Kind::Escape => Escape,
lex::Kind::Nbsp => Nbsp,
2023-01-27 13:17:06 -05:00
lex::Kind::Seq(lex::Sequence::Period) if first.len >= 3 => {
while self.span.len() > 3 {
self.events.push_back(Event {
kind: EventKind::Atom(Ellipsis),
span: self.span.with_len(3),
});
self.span = self.span.skip(3);
}
if self.span.len() == 3 {
Ellipsis
} else {
return Some(Event {
kind: EventKind::Str,
span: self.span,
});
}
}
2023-01-28 03:30:28 -05:00
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len >= 2 => {
let (m, n) = if first.len % 3 == 0 {
(first.len / 3, 0)
} else if first.len % 2 == 0 {
(0, first.len / 2)
} else {
let n = (1..).find(|n| (first.len - 2 * n) % 3 == 0).unwrap();
((first.len - 2 * n) / 3, n)
};
std::iter::repeat(EmDash)
.take(m)
.chain(std::iter::repeat(EnDash).take(n))
.for_each(|atom| {
let l = if matches!(atom, EnDash) { 2 } else { 3 };
self.events.push_back(Event {
kind: EventKind::Atom(atom),
span: self.span.with_len(l),
});
self.span = self.span.skip(l);
});
return self.events.pop_back();
}
2023-01-27 13:04:01 -05:00
lex::Kind::Open(lex::Delimiter::BraceQuote1) => Quote {
ty: QuoteType::Single,
left: true,
},
lex::Kind::Sym(lex::Symbol::Quote1)
| lex::Kind::Close(lex::Delimiter::BraceQuote1) => Quote {
ty: QuoteType::Single,
left: false,
},
lex::Kind::Open(lex::Delimiter::BraceQuote2) => Quote {
ty: QuoteType::Double,
left: true,
},
lex::Kind::Sym(lex::Symbol::Quote2)
| lex::Kind::Close(lex::Delimiter::BraceQuote2) => Quote {
ty: QuoteType::Double,
left: false,
},
_ => return None,
};
2022-12-11 06:26:40 -05:00
Some(Event {
kind: EventKind::Atom(atom),
span: self.span,
})
}
2022-11-26 19:12:56 -05:00
}
2022-12-22 14:39:11 -05:00
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Directionality {
Uni,
Bi,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SpanType {
Image,
General,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2022-12-22 14:39:11 -05:00
enum Delim {
Span(SpanType),
Strong(Directionality),
Emphasis(Directionality),
Superscript(Directionality),
Subscript(Directionality),
Mark,
Delete,
Insert,
2023-01-27 13:04:01 -05:00
SingleQuoted,
DoubleQuoted,
2022-12-22 14:39:11 -05:00
}
2023-01-26 16:10:00 -05:00
#[derive(Debug, Clone, Copy)]
2022-12-24 05:18:15 -05:00
enum Dir {
Open,
Close,
Both,
}
2022-12-22 14:39:11 -05:00
impl Delim {
2022-12-24 05:18:15 -05:00
fn from_token(kind: lex::Kind) -> Option<(Self, Dir)> {
use Delim::*;
use Dir::{Both, Close, Open};
use Directionality::{Bi, Uni};
use SpanType::{General, Image};
match kind {
lex::Kind::Sym(Symbol::Asterisk) => Some((Strong(Bi), Both)),
lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis(Bi), Both)),
lex::Kind::Sym(Symbol::Caret) => Some((Superscript(Bi), Both)),
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript(Bi), Both)),
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Both)),
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Both)),
lex::Kind::Sym(Symbol::ExclaimBracket) => Some((Span(Image), Open)),
lex::Kind::Open(Delimiter::Bracket) => Some((Span(General), Open)),
lex::Kind::Close(Delimiter::Bracket) => Some((Span(General), Close)),
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong(Uni), Open)),
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong(Uni), Close)),
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis(Uni), Open)),
lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis(Uni), Close)),
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript(Uni), Open)),
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript(Uni), Close)),
lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript(Uni), Open)),
lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript(Uni), Close)),
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Open)),
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Close)),
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Open)),
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Close)),
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Open)),
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Close)),
2023-01-27 13:04:01 -05:00
lex::Kind::Open(Delimiter::BraceQuote1) => Some((SingleQuoted, Open)),
lex::Kind::Close(Delimiter::BraceQuote1) => Some((SingleQuoted, Close)),
lex::Kind::Open(Delimiter::BraceQuote2) => Some((DoubleQuoted, Open)),
lex::Kind::Close(Delimiter::BraceQuote2) => Some((DoubleQuoted, Close)),
2022-12-24 05:18:15 -05:00
_ => None,
}
}
2022-12-22 14:39:11 -05:00
}
2023-01-27 13:04:01 -05:00
enum DelimEventKind {
Container(Container),
Span(SpanType),
Quote(QuoteType),
}
2022-12-22 14:39:11 -05:00
2023-01-27 13:04:01 -05:00
impl From<Delim> for DelimEventKind {
fn from(d: Delim) -> Self {
2022-12-22 14:39:11 -05:00
match d {
2023-01-27 13:04:01 -05:00
Delim::Span(ty) => Self::Span(ty),
Delim::Strong(..) => Self::Container(Strong),
Delim::Emphasis(..) => Self::Container(Emphasis),
Delim::Superscript(..) => Self::Container(Superscript),
Delim::Subscript(..) => Self::Container(Subscript),
Delim::Mark => Self::Container(Mark),
Delim::Delete => Self::Container(Delete),
Delim::Insert => Self::Container(Insert),
Delim::SingleQuoted => Self::Quote(QuoteType::Single),
Delim::DoubleQuoted => Self::Quote(QuoteType::Double),
2022-12-22 14:39:11 -05:00
}
}
}
2022-12-11 12:47:00 -05:00
impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
2022-11-26 19:12:56 -05:00
type Item = Event;
fn next(&mut self) -> Option<Self::Item> {
2022-11-27 18:34:30 -05:00
while self.events.is_empty()
2022-12-22 14:39:11 -05:00
|| !self.openers.is_empty()
2023-01-15 17:48:55 -05:00
|| self // for merge or attributes
2022-11-28 13:08:49 -05:00
.events
.back()
2023-01-15 17:48:55 -05:00
.map_or(false, |ev| {
matches!(ev.kind, EventKind::Str | EventKind::Whitespace)
})
2022-11-27 18:34:30 -05:00
{
2022-11-27 17:56:19 -05:00
if let Some(ev) = self.parse_event() {
self.events.push_back(ev);
} else {
break;
}
}
2022-11-26 19:12:56 -05:00
2023-01-12 11:28:01 -05:00
self.events.pop_front().and_then(|e| {
match e.kind {
2023-02-01 12:14:37 -05:00
EventKind::Str if e.span.is_empty() => self.next(),
2023-01-15 17:48:55 -05:00
EventKind::Str | EventKind::Whitespace => {
2023-01-12 11:28:01 -05:00
// merge str events
let mut span = e.span;
while self.events.front().map_or(false, |e| {
2023-01-15 17:48:55 -05:00
matches!(
e.kind,
EventKind::Str | EventKind::Whitespace | EventKind::Placeholder
) && span.end() == e.span.start()
2023-01-12 11:28:01 -05:00
}) {
let ev = self.events.pop_front().unwrap();
span = span.union(ev.span);
}
Some(Event {
kind: EventKind::Str,
span,
})
2022-12-11 15:43:22 -05:00
}
EventKind::Placeholder => self.next(),
2023-01-12 11:28:01 -05:00
_ => Some(e),
2022-12-11 15:43:22 -05:00
}
})
2022-11-27 18:34:30 -05:00
}
}
2022-11-21 16:40:11 -05:00
#[cfg(test)]
mod test {
2023-01-18 16:30:24 -05:00
use super::Atom::*;
2022-11-22 13:19:21 -05:00
use super::Container::*;
2022-11-27 18:10:28 -05:00
use super::EventKind::*;
2022-12-08 11:42:54 -05:00
use super::Verbatim;
2022-11-21 16:40:11 -05:00
2022-11-22 13:48:17 -05:00
macro_rules! test_parse {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)]
2022-12-11 12:47:00 -05:00
let mut p = super::Parser::new($src.chars());
2022-11-28 14:12:49 -05:00
let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
2022-11-22 13:48:17 -05:00
let expected = &[$($($token),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
};
}
#[test]
fn str() {
2022-12-08 11:42:54 -05:00
test_parse!("abc", (Str, "abc"));
test_parse!("abc def", (Str, "abc def"));
2022-11-26 19:12:56 -05:00
}
#[test]
fn verbatim() {
2022-12-08 11:42:54 -05:00
test_parse!(
"`abc`",
(Enter(Verbatim), "`"),
(Str, "abc"),
(Exit(Verbatim), "`"),
);
test_parse!(
"`abc\ndef`",
(Enter(Verbatim), "`"),
(Str, "abc\ndef"),
(Exit(Verbatim), "`"),
);
test_parse!(
"`abc&def`",
(Enter(Verbatim), "`"),
(Str, "abc&def"),
(Exit(Verbatim), "`"),
);
test_parse!(
"`abc",
(Enter(Verbatim), "`"),
(Str, "abc"),
(Exit(Verbatim), ""),
);
test_parse!(
"``abc``",
(Enter(Verbatim), "``"),
(Str, "abc"),
(Exit(Verbatim), "``"),
);
test_parse!(
"abc `def`",
(Str, "abc "),
(Enter(Verbatim), "`"),
(Str, "def"),
(Exit(Verbatim), "`"),
);
test_parse!(
"abc`def`",
(Str, "abc"),
(Enter(Verbatim), "`"),
(Str, "def"),
(Exit(Verbatim), "`"),
2023-01-15 15:40:15 -05:00
);
}
#[test]
fn verbatim_whitespace() {
test_parse!(
"` `",
(Enter(Verbatim), "`"),
(Str, " "),
(Exit(Verbatim), "`"),
);
test_parse!(
"` abc `",
(Enter(Verbatim), "`"),
(Str, " abc "),
(Exit(Verbatim), "`"),
);
}
#[test]
fn verbatim_trim() {
test_parse!(
"` ``abc`` `",
(Enter(Verbatim), "`"),
(Str, "``abc``"),
(Exit(Verbatim), "`"),
2022-12-08 11:42:54 -05:00
);
2022-11-26 19:12:56 -05:00
}
2022-11-27 15:59:54 -05:00
#[test]
fn math() {
2022-12-08 11:42:54 -05:00
test_parse!(
"$`abc`",
(Enter(InlineMath), "$`"),
(Str, "abc"),
(Exit(InlineMath), "`"),
);
test_parse!(
"$`abc` str",
(Enter(InlineMath), "$`"),
(Str, "abc"),
(Exit(InlineMath), "`"),
(Str, " str"),
);
test_parse!(
"$$`abc`",
(Enter(DisplayMath), "$$`"),
(Str, "abc"),
(Exit(DisplayMath), "`"),
);
test_parse!(
"$`abc",
(Enter(InlineMath), "$`"),
(Str, "abc"),
(Exit(InlineMath), ""),
);
test_parse!(
"$```abc```",
(Enter(InlineMath), "$```"),
(Str, "abc"),
(Exit(InlineMath), "```"),
);
2022-11-27 15:59:54 -05:00
}
2022-12-13 14:44:58 -05:00
#[test]
fn raw_format() {
test_parse!(
"`raw`{=format}",
(Enter(RawFormat), "format"),
(Str, "raw"),
(Exit(RawFormat), "format"),
);
test_parse!(
"before `raw`{=format} after",
(Str, "before "),
(Enter(RawFormat), "format"),
(Str, "raw"),
(Exit(RawFormat), "format"),
(Str, " after"),
);
}
#[test]
fn raw_attr() {
test_parse!(
"`raw`{=format #id}",
(Enter(Verbatim), "`"),
(Str, "raw"),
(Exit(Verbatim), "`"),
(Str, "{=format #id}"),
);
}
#[test]
fn span_tag() {
test_parse!(
"[text][tag]",
(Enter(ReferenceLink), "tag"),
(Str, "text"),
(Exit(ReferenceLink), "tag"),
);
2022-12-17 12:03:06 -05:00
test_parse!(
"![text][tag]",
(Enter(ReferenceImage), "tag"),
(Str, "text"),
(Exit(ReferenceImage), "tag"),
);
2022-12-13 14:44:58 -05:00
test_parse!(
"before [text][tag] after",
(Str, "before "),
(Enter(ReferenceLink), "tag"),
(Str, "text"),
(Exit(ReferenceLink), "tag"),
(Str, " after"),
);
test_parse!(
"[[inner][i]][o]",
(Enter(ReferenceLink), "o"),
(Enter(ReferenceLink), "i"),
(Str, "inner"),
(Exit(ReferenceLink), "i"),
(Exit(ReferenceLink), "o"),
);
}
#[test]
fn span_tag_empty() {
test_parse!(
"[text][]",
(Enter(ReferenceLink), "text"),
(Str, "text"),
(Exit(ReferenceLink), "text"),
);
test_parse!(
"![text][]",
(Enter(ReferenceImage), "text"),
(Str, "text"),
(Exit(ReferenceImage), "text"),
);
}
#[test]
fn span_tag_empty_nested() {
// TODO strip non str from tag?
test_parse!(
"[some _text_][]",
(Enter(ReferenceLink), "some _text_"),
(Str, "some "),
(Enter(Emphasis), "_"),
(Str, "text"),
(Exit(Emphasis), "_"),
(Exit(ReferenceLink), "some _text_"),
);
}
2022-12-13 14:44:58 -05:00
#[test]
fn span_url() {
test_parse!(
"before [text](url) after",
(Str, "before "),
(Enter(InlineLink), "url"),
(Str, "text"),
(Exit(InlineLink), "url"),
(Str, " after"),
);
test_parse!(
"[outer [inner](i)](o)",
(Enter(InlineLink), "o"),
(Str, "outer "),
(Enter(InlineLink), "i"),
(Str, "inner"),
(Exit(InlineLink), "i"),
(Exit(InlineLink), "o"),
);
}
2023-01-15 13:50:48 -05:00
#[test]
fn span_url_empty() {
test_parse!(
"before [text]() after",
(Str, "before "),
(Enter(InlineLink), ""),
(Str, "text"),
(Exit(InlineLink), ""),
(Str, " after"),
);
}
#[test]
fn span() {
test_parse!("[abc]", (Str, "[abc]"));
}
#[test]
fn span_attr() {
test_parse!(
"[abc]{.def}",
(Attributes, "{.def}"),
(Enter(Span), "["),
(Str, "abc"),
(Exit(Span), "]"),
);
test_parse!("not a [span] {#id}.", (Str, "not a [span] "), (Str, "."));
2023-01-15 13:50:48 -05:00
}
2022-12-24 05:18:15 -05:00
#[test]
fn autolink() {
test_parse!(
"<https://example.com>",
2023-01-28 10:37:35 -05:00
(Enter(Autolink), "https://example.com"),
2022-12-24 05:18:15 -05:00
(Str, "https://example.com"),
(Exit(Autolink), ">")
);
test_parse!(
"<a@b.c>",
2023-01-28 10:37:35 -05:00
(Enter(Autolink), "a@b.c"),
2022-12-24 05:18:15 -05:00
(Str, "a@b.c"),
(Exit(Autolink), ">"),
);
test_parse!(
"<http://a.b><http://c.d>",
2023-01-28 10:37:35 -05:00
(Enter(Autolink), "http://a.b"),
2022-12-24 05:18:15 -05:00
(Str, "http://a.b"),
(Exit(Autolink), ">"),
2023-01-28 10:37:35 -05:00
(Enter(Autolink), "http://c.d"),
2022-12-24 05:18:15 -05:00
(Str, "http://c.d"),
(Exit(Autolink), ">")
);
test_parse!("<not-a-url>", (Str, "<not-a-url>"));
}
2023-01-18 16:30:24 -05:00
#[test]
fn footnote_reference() {
test_parse!(
"text[^footnote]. more text",
(Str, "text"),
(Atom(FootnoteReference), "footnote"),
(Str, ". more text"),
);
}
2022-11-26 19:12:56 -05:00
#[test]
2023-01-12 11:28:21 -05:00
fn container_basic() {
2022-11-26 19:12:56 -05:00
test_parse!(
"_abc_",
2022-11-28 14:12:49 -05:00
(Enter(Emphasis), "_"),
2022-12-08 11:42:54 -05:00
(Str, "abc"),
2022-11-28 14:12:49 -05:00
(Exit(Emphasis), "_"),
2022-11-26 19:12:56 -05:00
);
test_parse!(
"{_abc_}",
2022-11-28 14:12:49 -05:00
(Enter(Emphasis), "{_"),
2022-12-08 11:42:54 -05:00
(Str, "abc"),
2022-11-28 14:12:49 -05:00
(Exit(Emphasis), "_}"),
2022-11-26 19:12:56 -05:00
);
}
#[test]
2023-01-12 11:28:21 -05:00
fn container_nest() {
2022-11-26 19:12:56 -05:00
test_parse!(
"{_{_abc_}_}",
2022-11-28 14:12:49 -05:00
(Enter(Emphasis), "{_"),
(Enter(Emphasis), "{_"),
2022-12-08 11:42:54 -05:00
(Str, "abc"),
2022-11-28 14:12:49 -05:00
(Exit(Emphasis), "_}"),
(Exit(Emphasis), "_}"),
2022-11-26 19:12:56 -05:00
);
test_parse!(
"*_abc_*",
2022-11-28 14:12:49 -05:00
(Enter(Strong), "*"),
(Enter(Emphasis), "_"),
2022-12-08 11:42:54 -05:00
(Str, "abc"),
2022-11-28 14:12:49 -05:00
(Exit(Emphasis), "_"),
(Exit(Strong), "*"),
2022-11-26 19:12:56 -05:00
);
}
#[test]
2023-01-12 11:28:21 -05:00
fn container_unopened() {
2022-12-08 11:42:54 -05:00
test_parse!("*}abc", (Str, "*}abc"));
2022-11-26 19:12:56 -05:00
}
#[test]
2023-01-12 11:28:21 -05:00
fn container_close_parent() {
2022-11-26 19:12:56 -05:00
test_parse!(
"{*{_abc*}",
2022-11-28 14:12:49 -05:00
(Enter(Strong), "{*"),
2022-12-08 11:42:54 -05:00
(Str, "{_abc"),
2022-11-28 14:12:49 -05:00
(Exit(Strong), "*}"),
2022-11-26 19:12:56 -05:00
);
2022-11-22 13:48:17 -05:00
}
2022-11-21 16:40:11 -05:00
#[test]
2023-01-12 11:28:21 -05:00
fn container_close_block() {
2022-12-08 11:42:54 -05:00
test_parse!("{_abc", (Str, "{_abc"));
test_parse!("{_{*{_abc", (Str, "{_{*{_abc"));
2022-11-20 13:13:48 -05:00
}
2023-01-12 11:28:01 -05:00
#[test]
fn container_attr() {
test_parse!(
"_abc def_{.attr}",
(Attributes, "{.attr}"),
(Enter(Emphasis), "_"),
(Str, "abc def"),
(Exit(Emphasis), "_"),
);
}
#[test]
fn container_attr_empty() {
test_parse!(
"_abc def_{}",
(Enter(Emphasis), "_"),
(Str, "abc def"),
(Exit(Emphasis), "_"),
);
test_parse!(
"_abc def_{ % comment % } ghi",
(Enter(Emphasis), "_"),
(Str, "abc def"),
(Exit(Emphasis), "_"),
(Str, " ghi"),
);
}
#[test]
fn container_attr_multiple() {
test_parse!(
"_abc def_{.a}{.b}{.c} {.d}",
(Attributes, "{.a}{.b}{.c}"),
(Enter(Emphasis), "_"),
(Str, "abc def"),
(Exit(Emphasis), "_"),
(Str, " "),
);
}
2023-01-15 17:48:55 -05:00
#[test]
fn attr() {
2023-01-16 12:07:47 -05:00
test_parse!(
"word{a=b}",
(Attributes, "{a=b}"),
(Enter(Span), ""),
(Str, "word"),
(Exit(Span), ""),
);
2023-01-15 17:48:55 -05:00
test_parse!(
"some word{.a}{.b} with attrs",
(Str, "some "),
(Attributes, "{.a}{.b}"),
(Enter(Span), ""),
(Str, "word"),
(Exit(Span), ""),
(Str, " with attrs"),
);
}
#[test]
fn attr_whitespace() {
test_parse!("word {%comment%}", (Str, "word "));
test_parse!("word {%comment%} word", (Str, "word "), (Str, " word"));
test_parse!("word {a=b}", (Str, "word "));
}
#[test]
fn attr_empty() {
test_parse!("word{}", (Str, "word"));
test_parse!("word{ % comment % } trail", (Str, "word"), (Str, " trail"));
}
2022-11-20 13:13:48 -05:00
}