2022-11-12 12:45:17 -05:00
|
|
|
use crate::Span;
|
|
|
|
use crate::EOF;
|
|
|
|
|
2022-12-18 12:05:39 -05:00
|
|
|
use crate::attr;
|
2022-11-12 12:45:17 -05:00
|
|
|
use crate::tree;
|
|
|
|
|
2022-12-10 04:26:06 -05:00
|
|
|
use Atom::*;
|
2022-11-12 12:45:17 -05:00
|
|
|
use Container::*;
|
|
|
|
use Leaf::*;
|
|
|
|
|
2022-12-12 12:22:13 -05:00
|
|
|
pub type Tree = tree::Tree<Node, Atom>;
|
|
|
|
pub type TreeBuilder = tree::Builder<Node, Atom>;
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
|
|
pub enum Node {
|
|
|
|
Container(Container),
|
|
|
|
Leaf(Leaf),
|
|
|
|
}
|
2022-11-12 12:45:17 -05:00
|
|
|
|
2022-12-07 12:44:03 -05:00
|
|
|
#[must_use]
|
2022-11-12 12:45:17 -05:00
|
|
|
pub fn parse(src: &str) -> Tree {
|
2022-12-10 04:57:15 -05:00
|
|
|
TreeParser::new(src).parse()
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
2022-11-12 12:45:17 -05:00
|
|
|
pub enum Block {
|
2022-12-10 04:26:06 -05:00
|
|
|
/// An atomic block, containing no children elements.
|
|
|
|
Atom(Atom),
|
|
|
|
|
2022-12-07 12:44:03 -05:00
|
|
|
/// A leaf block, containing only inline elements.
|
2022-11-12 12:45:17 -05:00
|
|
|
Leaf(Leaf),
|
2022-12-07 12:44:03 -05:00
|
|
|
|
|
|
|
/// A container block, containing children blocks.
|
2022-11-12 12:45:17 -05:00
|
|
|
Container(Container),
|
|
|
|
}
|
|
|
|
|
2022-12-10 04:26:06 -05:00
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
|
|
pub enum Atom {
|
|
|
|
/// A line with no non-whitespace characters.
|
|
|
|
Blankline,
|
|
|
|
|
|
|
|
/// A list of attributes.
|
|
|
|
Attributes,
|
|
|
|
|
|
|
|
/// A thematic break.
|
|
|
|
ThematicBreak,
|
|
|
|
}
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
2022-11-12 12:45:17 -05:00
|
|
|
pub enum Leaf {
|
2022-12-07 12:44:03 -05:00
|
|
|
/// Span is empty, before first character of paragraph.
|
|
|
|
/// Each inline is a line.
|
2022-11-12 12:45:17 -05:00
|
|
|
Paragraph,
|
2022-12-07 12:44:03 -05:00
|
|
|
|
|
|
|
/// Span is `#` characters.
|
|
|
|
/// Each inline is a line.
|
2022-12-10 04:57:15 -05:00
|
|
|
Heading,
|
2022-12-07 12:44:03 -05:00
|
|
|
|
|
|
|
/// Span is first `|` character.
|
|
|
|
/// Each inline is a line (row).
|
2022-11-12 12:45:17 -05:00
|
|
|
Table,
|
2022-12-07 12:44:03 -05:00
|
|
|
|
|
|
|
/// Span is the link tag.
|
|
|
|
/// Inlines are lines of the URL.
|
2022-11-12 12:45:17 -05:00
|
|
|
LinkDefinition,
|
2022-12-07 12:44:03 -05:00
|
|
|
|
|
|
|
/// Span is language specifier.
|
|
|
|
/// Each inline is a line.
|
2022-12-10 04:57:15 -05:00
|
|
|
CodeBlock,
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
2022-11-12 12:45:17 -05:00
|
|
|
pub enum Container {
|
2022-12-18 12:05:39 -05:00
|
|
|
/// Span is `>`.
|
2022-11-12 12:45:17 -05:00
|
|
|
Blockquote,
|
2022-12-07 12:44:03 -05:00
|
|
|
|
|
|
|
/// Span is class specifier.
|
2022-12-10 04:57:15 -05:00
|
|
|
Div,
|
2022-12-07 12:44:03 -05:00
|
|
|
|
|
|
|
/// Span is the list marker.
|
2022-12-10 04:57:15 -05:00
|
|
|
ListItem,
|
2022-12-07 12:44:03 -05:00
|
|
|
|
2022-12-18 12:05:39 -05:00
|
|
|
/// Span is `[^`.
|
2022-12-10 04:57:15 -05:00
|
|
|
Footnote,
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
|
2022-12-10 04:57:15 -05:00
|
|
|
/// Parser for block-level tree structure of entire document.
|
|
|
|
struct TreeParser<'s> {
|
2022-11-12 12:45:17 -05:00
|
|
|
src: &'s str,
|
2022-12-12 12:22:13 -05:00
|
|
|
tree: TreeBuilder,
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
|
2022-12-10 04:57:15 -05:00
|
|
|
impl<'s> TreeParser<'s> {
|
2022-11-12 12:45:17 -05:00
|
|
|
#[must_use]
|
|
|
|
pub fn new(src: &'s str) -> Self {
|
|
|
|
Self {
|
|
|
|
src,
|
2022-12-12 12:22:13 -05:00
|
|
|
tree: TreeBuilder::new(),
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[must_use]
|
|
|
|
pub fn parse(mut self) -> Tree {
|
|
|
|
let mut lines = lines(self.src).collect::<Vec<_>>();
|
|
|
|
let mut line_pos = 0;
|
2022-11-28 14:12:49 -05:00
|
|
|
while line_pos < lines.len() {
|
2022-11-12 12:45:17 -05:00
|
|
|
let line_count = self.parse_block(&mut lines[line_pos..]);
|
|
|
|
if line_count == 0 {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
line_pos += line_count;
|
|
|
|
}
|
|
|
|
self.tree.finish()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Recursively parse a block and all of its children. Return number of lines the block uses.
|
|
|
|
fn parse_block(&mut self, lines: &mut [Span]) -> usize {
|
2022-12-10 04:57:15 -05:00
|
|
|
BlockParser::parse(lines.iter().map(|sp| sp.of(self.src))).map_or(
|
2022-12-10 04:26:06 -05:00
|
|
|
0,
|
2022-12-10 04:57:15 -05:00
|
|
|
|(indent, kind, span, line_count)| {
|
2022-12-07 12:44:03 -05:00
|
|
|
let lines = {
|
|
|
|
let l = lines.len().min(line_count);
|
|
|
|
&mut lines[..l]
|
|
|
|
};
|
|
|
|
let truncated = lines.len() < line_count;
|
|
|
|
let span = span.translate(lines[0].start());
|
|
|
|
|
|
|
|
// skip part of first inline that is shared with the block span
|
|
|
|
lines[0] = lines[0].with_start(span.end());
|
|
|
|
|
|
|
|
// remove junk from footnotes / link defs
|
|
|
|
if matches!(
|
|
|
|
kind,
|
|
|
|
Block::Leaf(LinkDefinition) | Block::Container(Footnote { .. })
|
|
|
|
) {
|
|
|
|
assert_eq!(&lines[0].of(self.src).chars().as_str()[0..2], "]:");
|
|
|
|
lines[0] = lines[0].skip(2);
|
|
|
|
}
|
|
|
|
|
|
|
|
// skip closing fence of code blocks / divs
|
|
|
|
let lines = if !truncated
|
2022-12-10 04:57:15 -05:00
|
|
|
&& matches!(kind, Block::Leaf(CodeBlock) | Block::Container(Div))
|
|
|
|
{
|
2022-12-07 12:44:03 -05:00
|
|
|
let l = lines.len();
|
|
|
|
&mut lines[..l - 1]
|
|
|
|
} else {
|
|
|
|
lines
|
|
|
|
};
|
|
|
|
|
2022-12-10 04:26:06 -05:00
|
|
|
match kind {
|
2022-12-11 14:49:57 -05:00
|
|
|
Block::Atom(a) => self.tree.atom(a, span),
|
2022-11-28 14:30:18 -05:00
|
|
|
Block::Leaf(l) => {
|
2022-12-12 12:22:13 -05:00
|
|
|
self.tree.enter(Node::Leaf(l), span);
|
2022-12-07 12:44:03 -05:00
|
|
|
|
|
|
|
// trim starting whitespace of the block contents
|
|
|
|
lines[0] = lines[0].trim_start(self.src);
|
|
|
|
|
|
|
|
// skip first inline if empty (e.g. code block)
|
|
|
|
let lines = if lines[0].is_empty() {
|
|
|
|
&mut lines[1..]
|
|
|
|
} else {
|
|
|
|
lines
|
2022-11-28 14:30:18 -05:00
|
|
|
};
|
2022-12-07 12:44:03 -05:00
|
|
|
|
|
|
|
// trim ending whitespace of block if not verbatim
|
2022-12-10 04:57:15 -05:00
|
|
|
if !matches!(l, Leaf::CodeBlock) {
|
2022-12-10 02:37:00 -05:00
|
|
|
let l = lines.len();
|
|
|
|
if l > 0 {
|
|
|
|
let last = &mut lines[l - 1];
|
|
|
|
*last = last.trim_end(self.src);
|
|
|
|
}
|
2022-12-01 14:34:23 -05:00
|
|
|
}
|
2022-12-07 12:44:03 -05:00
|
|
|
|
2022-12-11 14:49:57 -05:00
|
|
|
lines.iter().for_each(|line| self.tree.inline(*line));
|
2022-12-10 04:26:06 -05:00
|
|
|
self.tree.exit();
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
2022-11-28 14:30:18 -05:00
|
|
|
Block::Container(c) => {
|
2022-12-10 04:57:15 -05:00
|
|
|
let (skip_chars, skip_lines_suffix) = match c {
|
2022-11-28 14:30:18 -05:00
|
|
|
Blockquote => (2, 0),
|
2022-12-10 04:57:15 -05:00
|
|
|
ListItem | Footnote => (indent, 0),
|
|
|
|
Div => (0, 1),
|
2022-11-28 14:30:18 -05:00
|
|
|
};
|
|
|
|
let line_count_inner = lines.len() - skip_lines_suffix;
|
|
|
|
|
|
|
|
// update spans, remove indentation / container prefix
|
|
|
|
lines
|
|
|
|
.iter_mut()
|
|
|
|
.skip(1)
|
|
|
|
.take(line_count_inner)
|
|
|
|
.for_each(|sp| {
|
|
|
|
let skip = (sp
|
|
|
|
.of(self.src)
|
|
|
|
.chars()
|
|
|
|
.take_while(|c| c.is_whitespace())
|
|
|
|
.count()
|
2022-12-10 04:57:15 -05:00
|
|
|
+ skip_chars)
|
|
|
|
.min(sp.len() - usize::from(sp.of(self.src).ends_with('\n')));
|
2022-12-06 15:55:46 -05:00
|
|
|
*sp = sp.skip(skip);
|
2022-11-28 14:30:18 -05:00
|
|
|
});
|
|
|
|
|
2022-12-12 12:22:13 -05:00
|
|
|
self.tree.enter(Node::Container(c), span);
|
2022-11-28 14:30:18 -05:00
|
|
|
let mut l = 0;
|
|
|
|
while l < line_count_inner {
|
|
|
|
l += self.parse_block(&mut lines[l..line_count_inner]);
|
|
|
|
}
|
2022-12-10 04:26:06 -05:00
|
|
|
self.tree.exit();
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
}
|
2022-12-10 04:26:06 -05:00
|
|
|
|
|
|
|
line_count
|
2022-11-28 14:30:18 -05:00
|
|
|
},
|
|
|
|
)
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-10 04:57:15 -05:00
|
|
|
/// Parser for a single block.
|
|
|
|
struct BlockParser {
|
|
|
|
indent: usize,
|
|
|
|
kind: Block,
|
|
|
|
span: Span,
|
|
|
|
fence: Option<(char, usize)>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl BlockParser {
|
2022-11-12 12:45:17 -05:00
|
|
|
/// Parse a single block. Return number of lines the block uses.
|
2022-12-10 04:57:15 -05:00
|
|
|
fn parse<'s, I: Iterator<Item = &'s str>>(mut lines: I) -> Option<(usize, Block, Span, usize)> {
|
2022-11-27 15:59:54 -05:00
|
|
|
lines.next().map(|l| {
|
2022-12-10 04:57:15 -05:00
|
|
|
let mut p = BlockParser::new(l);
|
|
|
|
let has_end_delimiter =
|
|
|
|
matches!(p.kind, Block::Leaf(CodeBlock) | Block::Container(Div));
|
|
|
|
let line_count_match = lines.take_while(|l| p.continues(l)).count();
|
2022-12-02 02:16:47 -05:00
|
|
|
let line_count = 1 + line_count_match + usize::from(has_end_delimiter);
|
2022-12-10 04:57:15 -05:00
|
|
|
(p.indent, p.kind, p.span, line_count)
|
2022-11-27 15:59:54 -05:00
|
|
|
})
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
|
2022-12-10 04:57:15 -05:00
|
|
|
fn new(line: &str) -> Self {
|
2022-12-10 04:26:06 -05:00
|
|
|
let start = line
|
|
|
|
.chars()
|
|
|
|
.take_while(|c| *c != '\n' && c.is_whitespace())
|
|
|
|
.count();
|
2022-12-07 12:44:03 -05:00
|
|
|
let line_t = &line[start..];
|
|
|
|
let mut chars = line_t.chars();
|
2022-12-06 15:31:08 -05:00
|
|
|
|
2022-12-10 04:57:15 -05:00
|
|
|
let mut fence = None;
|
|
|
|
let (kind, span) = match chars.next().unwrap_or(EOF) {
|
|
|
|
EOF => Some((Block::Atom(Blankline), Span::empty_at(start))),
|
|
|
|
'\n' => Some((Block::Atom(Blankline), Span::by_len(start, 1))),
|
2022-11-12 12:45:17 -05:00
|
|
|
'#' => chars
|
|
|
|
.find(|c| *c != '#')
|
|
|
|
.map_or(true, char::is_whitespace)
|
|
|
|
.then(|| {
|
2022-12-10 04:57:15 -05:00
|
|
|
(
|
|
|
|
Block::Leaf(Heading),
|
|
|
|
Span::by_len(start, line_t.len() - chars.as_str().len() - 1),
|
|
|
|
)
|
|
|
|
}),
|
2022-11-30 13:56:08 -05:00
|
|
|
'>' => {
|
|
|
|
if let Some(c) = chars.next() {
|
|
|
|
c.is_whitespace().then(|| {
|
|
|
|
(
|
2022-12-10 04:57:15 -05:00
|
|
|
Block::Container(Blockquote),
|
2022-12-07 12:44:03 -05:00
|
|
|
Span::by_len(start, line_t.len() - chars.as_str().len() - 1),
|
2022-11-30 13:56:08 -05:00
|
|
|
)
|
|
|
|
})
|
|
|
|
} else {
|
|
|
|
Some((
|
2022-12-10 04:57:15 -05:00
|
|
|
Block::Container(Blockquote),
|
2022-12-07 12:44:03 -05:00
|
|
|
Span::by_len(start, line_t.len() - chars.as_str().len()),
|
2022-11-30 13:56:08 -05:00
|
|
|
))
|
|
|
|
}
|
|
|
|
}
|
2023-01-16 11:24:27 -05:00
|
|
|
'{' => (attr::valid(line_t.chars()).0 == line_t.trim_end().len())
|
2022-12-18 12:05:39 -05:00
|
|
|
.then(|| (Block::Atom(Attributes), Span::by_len(start, line_t.len()))),
|
2022-12-07 12:44:03 -05:00
|
|
|
'|' => (&line_t[line_t.len() - 1..] == "|"
|
|
|
|
&& &line_t[line_t.len() - 2..line_t.len() - 1] != "\\")
|
2022-12-10 04:57:15 -05:00
|
|
|
.then(|| (Block::Leaf(Table), Span::by_len(start, 1))),
|
2022-12-07 12:44:03 -05:00
|
|
|
'[' => chars.as_str().find("]:").map(|l| {
|
|
|
|
let tag = &chars.as_str()[0..l];
|
|
|
|
let (tag, is_footnote) = if let Some(tag) = tag.strip_prefix('^') {
|
|
|
|
(tag, true)
|
|
|
|
} else {
|
|
|
|
(tag, false)
|
|
|
|
};
|
|
|
|
(
|
|
|
|
if is_footnote {
|
2022-12-10 04:57:15 -05:00
|
|
|
Block::Container(Footnote)
|
2022-12-07 12:44:03 -05:00
|
|
|
} else {
|
2022-12-10 04:57:15 -05:00
|
|
|
Block::Leaf(LinkDefinition)
|
2022-12-07 12:44:03 -05:00
|
|
|
},
|
|
|
|
Span::from_slice(line, tag),
|
|
|
|
)
|
|
|
|
}),
|
|
|
|
'-' | '*' if Self::is_thematic_break(chars.clone()) => Some((
|
2022-12-10 04:57:15 -05:00
|
|
|
Block::Atom(ThematicBreak),
|
2022-12-07 12:44:03 -05:00
|
|
|
Span::from_slice(line, line_t.trim()),
|
|
|
|
)),
|
2022-12-06 15:31:08 -05:00
|
|
|
'-' => chars.next().map_or(true, char::is_whitespace).then(|| {
|
|
|
|
let task_list = chars.next() == Some('[')
|
|
|
|
&& matches!(chars.next(), Some('X' | ' '))
|
|
|
|
&& chars.next() == Some(']')
|
|
|
|
&& chars.next().map_or(true, char::is_whitespace);
|
|
|
|
(
|
2022-12-10 04:57:15 -05:00
|
|
|
Block::Container(ListItem),
|
2022-12-06 15:31:08 -05:00
|
|
|
Span::by_len(start, if task_list { 3 } else { 1 }),
|
|
|
|
)
|
|
|
|
}),
|
2022-12-10 04:57:15 -05:00
|
|
|
'+' | '*' | ':' if chars.next().map_or(true, char::is_whitespace) => {
|
|
|
|
Some((Block::Container(ListItem), Span::by_len(start, 1)))
|
|
|
|
}
|
2022-12-07 12:44:03 -05:00
|
|
|
f @ ('`' | ':' | '~') => {
|
2022-12-02 14:07:37 -05:00
|
|
|
let fence_length = (&mut chars).take_while(|c| *c == f).count() + 1;
|
2022-12-10 04:57:15 -05:00
|
|
|
fence = Some((f, fence_length));
|
2022-12-07 12:44:03 -05:00
|
|
|
let lang = line_t[fence_length..].trim();
|
2022-12-08 11:42:54 -05:00
|
|
|
let valid_spec =
|
|
|
|
!lang.chars().any(char::is_whitespace) && !lang.chars().any(|c| c == '`');
|
2022-12-10 04:57:15 -05:00
|
|
|
(valid_spec && fence_length >= 3).then(|| {
|
|
|
|
(
|
|
|
|
match f {
|
|
|
|
':' => Block::Container(Div),
|
|
|
|
_ => Block::Leaf(CodeBlock),
|
|
|
|
},
|
|
|
|
Span::from_slice(line, lang),
|
|
|
|
)
|
|
|
|
})
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
2022-12-06 15:31:08 -05:00
|
|
|
_ => None,
|
|
|
|
}
|
2022-12-10 04:57:15 -05:00
|
|
|
.unwrap_or((Block::Leaf(Paragraph), Span::new(0, 0)));
|
|
|
|
|
|
|
|
Self {
|
|
|
|
indent: start,
|
|
|
|
kind,
|
|
|
|
span,
|
|
|
|
fence,
|
|
|
|
}
|
2022-12-06 15:31:08 -05:00
|
|
|
}
|
2022-11-28 14:12:49 -05:00
|
|
|
|
2022-12-06 15:31:08 -05:00
|
|
|
fn is_thematic_break(chars: std::str::Chars) -> bool {
|
|
|
|
let mut n = 1;
|
|
|
|
for c in chars {
|
|
|
|
if matches!(c, '-' | '*') {
|
|
|
|
n += 1;
|
|
|
|
} else if !c.is_whitespace() {
|
|
|
|
return false;
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
}
|
2022-12-06 15:31:08 -05:00
|
|
|
n >= 3
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
|
2022-12-10 04:57:15 -05:00
|
|
|
/// Determine if this line continues the block.
|
|
|
|
fn continues(&mut self, line: &str) -> bool {
|
|
|
|
match self.kind {
|
|
|
|
Block::Atom(..) => false,
|
|
|
|
Block::Leaf(Paragraph | Heading | Table) => !line.trim().is_empty(),
|
|
|
|
Block::Leaf(LinkDefinition) => line.starts_with(' ') && !line.trim().is_empty(),
|
|
|
|
Block::Container(Blockquote) => line.trim().starts_with('>'),
|
|
|
|
Block::Container(Footnote | ListItem) => {
|
2022-11-12 12:45:17 -05:00
|
|
|
let spaces = line.chars().take_while(|c| c.is_whitespace()).count();
|
2022-12-10 04:57:15 -05:00
|
|
|
line.trim().is_empty() || spaces > self.indent
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
2022-12-10 04:57:15 -05:00
|
|
|
Block::Container(Div) | Block::Leaf(CodeBlock) => {
|
|
|
|
let (fence, fence_length) = self.fence.unwrap();
|
2022-11-27 15:59:54 -05:00
|
|
|
let mut c = line.chars();
|
2022-12-10 04:57:15 -05:00
|
|
|
!((&mut c).take(fence_length).all(|c| c == fence)
|
2022-12-02 14:07:37 -05:00
|
|
|
&& c.next().map_or(true, char::is_whitespace))
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl std::fmt::Display for Block {
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
match self {
|
2022-12-10 04:26:06 -05:00
|
|
|
Block::Atom(a) => std::fmt::Debug::fmt(a, f),
|
2022-11-12 12:45:17 -05:00
|
|
|
Block::Leaf(e) => std::fmt::Debug::fmt(e, f),
|
|
|
|
Block::Container(c) => std::fmt::Debug::fmt(c, f),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl std::fmt::Display for Atom {
|
|
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
write!(f, "Inline")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Similar to `std::str::split('\n')` but newline is included and spans are used instead of `str`.
|
|
|
|
fn lines(src: &str) -> impl Iterator<Item = Span> + '_ {
|
|
|
|
let mut chars = src.chars();
|
|
|
|
std::iter::from_fn(move || {
|
|
|
|
if chars.as_str().is_empty() {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
let start = src.len() - chars.as_str().len();
|
|
|
|
chars.find(|c| *c == '\n');
|
|
|
|
let end = src.len() - chars.as_str().len();
|
|
|
|
if start == end {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
Some(Span::new(start, end))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
2022-11-28 14:12:49 -05:00
|
|
|
use crate::tree::EventKind::*;
|
2022-12-12 12:22:13 -05:00
|
|
|
use crate::tree::EventKind;
|
2022-11-12 12:45:17 -05:00
|
|
|
|
|
|
|
use super::Atom::*;
|
|
|
|
use super::Block;
|
|
|
|
use super::Container::*;
|
|
|
|
use super::Leaf::*;
|
2022-12-12 12:22:13 -05:00
|
|
|
use super::Node::*;
|
2022-11-12 12:45:17 -05:00
|
|
|
|
2022-11-27 16:19:15 -05:00
|
|
|
macro_rules! test_parse {
|
2022-11-28 14:19:22 -05:00
|
|
|
($src:expr $(,$($event:expr),* $(,)?)?) => {
|
2022-12-10 04:57:15 -05:00
|
|
|
let t = super::TreeParser::new($src).parse();
|
2022-11-28 14:19:22 -05:00
|
|
|
let actual = t.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
|
|
|
|
let expected = &[$($($event),*,)?];
|
|
|
|
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
|
|
|
|
};
|
|
|
|
}
|
2022-11-27 16:19:15 -05:00
|
|
|
|
2022-11-12 12:45:17 -05:00
|
|
|
#[test]
|
2022-11-28 14:12:49 -05:00
|
|
|
fn parse_para_oneline() {
|
2022-11-27 16:19:15 -05:00
|
|
|
test_parse!(
|
|
|
|
"para\n",
|
2022-11-28 14:12:49 -05:00
|
|
|
(Enter(Leaf(Paragraph)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "para"),
|
2022-11-28 18:33:43 -05:00
|
|
|
(Exit(Leaf(Paragraph)), ""),
|
2022-11-12 12:45:17 -05:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2022-11-28 14:12:49 -05:00
|
|
|
fn parse_para_multiline() {
|
2022-11-27 16:19:15 -05:00
|
|
|
test_parse!(
|
2022-11-28 14:12:49 -05:00
|
|
|
"para0\npara1\n",
|
|
|
|
(Enter(Leaf(Paragraph)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "para0\n"),
|
|
|
|
(Inline, "para1"),
|
2022-11-28 18:33:43 -05:00
|
|
|
(Exit(Leaf(Paragraph)), ""),
|
2022-11-12 12:45:17 -05:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2022-11-28 14:12:49 -05:00
|
|
|
fn parse_heading_multi() {
|
2022-11-27 16:19:15 -05:00
|
|
|
test_parse!(
|
|
|
|
concat!(
|
2022-12-12 12:22:13 -05:00
|
|
|
"# 2\n",
|
|
|
|
"\n",
|
|
|
|
" # 8\n",
|
|
|
|
" 12\n",
|
|
|
|
"15\n", //
|
|
|
|
),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Enter(Leaf(Heading)), "#"),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "2"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Exit(Leaf(Heading)), "#"),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Atom(Blankline), "\n"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Enter(Leaf(Heading)), "#"),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "8\n"),
|
|
|
|
(Inline, " 12\n"),
|
|
|
|
(Inline, "15"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Exit(Leaf(Heading)), "#"),
|
2022-11-12 12:45:17 -05:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2022-11-28 14:12:49 -05:00
|
|
|
fn parse_blockquote() {
|
2022-11-30 13:56:08 -05:00
|
|
|
test_parse!(
|
|
|
|
"> a\n",
|
2022-12-10 04:57:15 -05:00
|
|
|
(Enter(Container(Blockquote)), ">"),
|
|
|
|
(Enter(Leaf(Paragraph)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "a"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Exit(Leaf(Paragraph)), ""),
|
|
|
|
(Exit(Container(Blockquote)), ">"),
|
2022-11-30 13:56:08 -05:00
|
|
|
);
|
|
|
|
test_parse!(
|
2022-12-12 12:22:13 -05:00
|
|
|
"> a\nb\nc\n",
|
2022-12-10 04:57:15 -05:00
|
|
|
(Enter(Container(Blockquote)), ">"),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Enter(Leaf(Paragraph)), ""),
|
|
|
|
(Inline, "a\n"),
|
|
|
|
(Inline, "b\n"),
|
|
|
|
(Inline, "c"),
|
|
|
|
(Exit(Leaf(Paragraph)), ""),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Exit(Container(Blockquote)), ">"),
|
2022-11-30 13:56:08 -05:00
|
|
|
);
|
2022-11-27 16:19:15 -05:00
|
|
|
test_parse!(
|
|
|
|
concat!(
|
|
|
|
"> a\n",
|
|
|
|
">\n",
|
|
|
|
"> ## hl\n",
|
|
|
|
">\n",
|
2022-11-28 14:12:49 -05:00
|
|
|
"> para\n", //
|
2022-11-27 16:19:15 -05:00
|
|
|
),
|
2022-11-28 14:12:49 -05:00
|
|
|
(Enter(Container(Blockquote)), ">"),
|
|
|
|
(Enter(Leaf(Paragraph)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "a"),
|
2022-11-28 18:33:43 -05:00
|
|
|
(Exit(Leaf(Paragraph)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Atom(Blankline), "\n"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Enter(Leaf(Heading)), "##"),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "hl"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Exit(Leaf(Heading)), "##"),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Atom(Blankline), "\n"),
|
2022-11-28 14:12:49 -05:00
|
|
|
(Enter(Leaf(Paragraph)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "para"),
|
2022-11-28 18:33:43 -05:00
|
|
|
(Exit(Leaf(Paragraph)), ""),
|
|
|
|
(Exit(Container(Blockquote)), ">"),
|
2022-11-12 12:45:17 -05:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2022-12-02 14:07:37 -05:00
|
|
|
#[test]
|
|
|
|
fn parse_blockquote_empty() {
|
|
|
|
test_parse!(
|
|
|
|
"> \n",
|
|
|
|
(Enter(Container(Blockquote)), ">"),
|
2022-12-10 04:26:06 -05:00
|
|
|
(EventKind::Atom(Blankline), "\n"),
|
2022-12-02 14:07:37 -05:00
|
|
|
(Exit(Container(Blockquote)), ">"),
|
|
|
|
);
|
|
|
|
test_parse!(
|
|
|
|
">",
|
|
|
|
(Enter(Container(Blockquote)), ">"),
|
2022-12-10 04:26:06 -05:00
|
|
|
(EventKind::Atom(Blankline), ""),
|
2022-12-02 14:07:37 -05:00
|
|
|
(Exit(Container(Blockquote)), ">"),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2022-11-27 15:59:54 -05:00
|
|
|
#[test]
|
|
|
|
fn parse_code_block() {
|
2022-12-07 12:44:03 -05:00
|
|
|
test_parse!(
|
|
|
|
concat!("```\n", "l0\n"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Enter(Leaf(CodeBlock)), "",),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "l0\n"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Exit(Leaf(CodeBlock)), "",),
|
2022-12-07 12:44:03 -05:00
|
|
|
);
|
2022-11-27 16:19:15 -05:00
|
|
|
test_parse!(
|
|
|
|
concat!(
|
2022-11-28 14:30:18 -05:00
|
|
|
"```\n",
|
2022-11-27 16:19:15 -05:00
|
|
|
"l0\n",
|
2022-12-02 14:07:37 -05:00
|
|
|
"```\n",
|
|
|
|
"\n",
|
|
|
|
"para\n", //
|
2022-11-27 16:19:15 -05:00
|
|
|
),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Enter(Leaf(CodeBlock)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "l0\n"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Exit(Leaf(CodeBlock)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Atom(Blankline), "\n"),
|
2022-11-28 18:33:43 -05:00
|
|
|
(Enter(Leaf(Paragraph)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "para"),
|
2022-11-28 18:33:43 -05:00
|
|
|
(Exit(Leaf(Paragraph)), ""),
|
2022-11-28 14:30:18 -05:00
|
|
|
);
|
|
|
|
test_parse!(
|
|
|
|
concat!(
|
2022-12-02 02:16:47 -05:00
|
|
|
"```` lang\n",
|
2022-11-28 14:30:18 -05:00
|
|
|
"l0\n",
|
|
|
|
"```\n",
|
|
|
|
" l1\n",
|
|
|
|
"````", //
|
|
|
|
),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Enter(Leaf(CodeBlock)), "lang"),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "l0\n"),
|
|
|
|
(Inline, "```\n"),
|
|
|
|
(Inline, " l1\n"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Exit(Leaf(CodeBlock)), "lang"),
|
2022-11-27 15:59:54 -05:00
|
|
|
);
|
2022-12-02 02:16:47 -05:00
|
|
|
test_parse!(
|
|
|
|
concat!(
|
|
|
|
"```\n", //
|
|
|
|
"a\n", //
|
|
|
|
"```\n", //
|
|
|
|
"```\n", //
|
|
|
|
"bbb\n", //
|
|
|
|
"```\n", //
|
|
|
|
),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Enter(Leaf(CodeBlock)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "a\n"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Exit(Leaf(CodeBlock)), ""),
|
|
|
|
(Enter(Leaf(CodeBlock)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "bbb\n"),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Exit(Leaf(CodeBlock)), ""),
|
2022-12-07 12:44:03 -05:00
|
|
|
);
|
|
|
|
test_parse!(
|
|
|
|
concat!(
|
|
|
|
"~~~\n",
|
|
|
|
"code\n",
|
|
|
|
" block\n",
|
|
|
|
"~~~\n", //
|
|
|
|
),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Enter(Leaf(CodeBlock)), ""),
|
|
|
|
(Inline, "code\n"),
|
|
|
|
(Inline, " block\n"),
|
|
|
|
(Exit(Leaf(CodeBlock)), ""),
|
2022-12-07 12:44:03 -05:00
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn parse_link_definition() {
|
|
|
|
test_parse!(
|
|
|
|
"[tag]: url\n",
|
|
|
|
(Enter(Leaf(LinkDefinition)), "tag"),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "url"),
|
2022-12-07 12:44:03 -05:00
|
|
|
(Exit(Leaf(LinkDefinition)), "tag"),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn parse_footnote() {
|
|
|
|
test_parse!(
|
|
|
|
"[^tag]: description\n",
|
2022-12-10 04:57:15 -05:00
|
|
|
(Enter(Container(Footnote)), "tag"),
|
2022-12-07 12:44:03 -05:00
|
|
|
(Enter(Leaf(Paragraph)), ""),
|
2022-12-12 12:22:13 -05:00
|
|
|
(Inline, "description"),
|
2022-12-07 12:44:03 -05:00
|
|
|
(Exit(Leaf(Paragraph)), ""),
|
2022-12-10 04:57:15 -05:00
|
|
|
(Exit(Container(Footnote)), "tag"),
|
2022-12-02 02:16:47 -05:00
|
|
|
);
|
2022-11-27 16:19:15 -05:00
|
|
|
}
|
2022-11-27 15:59:54 -05:00
|
|
|
|
2022-12-18 12:05:39 -05:00
|
|
|
#[test]
|
|
|
|
fn parse_attr() {
|
|
|
|
test_parse!(
|
|
|
|
"{.some_class}\npara\n",
|
|
|
|
(Atom(Attributes), "{.some_class}\n"),
|
|
|
|
(Enter(Leaf(Paragraph)), ""),
|
|
|
|
(Inline, "para"),
|
|
|
|
(Exit(Leaf(Paragraph)), ""),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2022-11-27 16:19:15 -05:00
|
|
|
macro_rules! test_block {
|
|
|
|
($src:expr, $kind:expr, $str:expr, $len:expr $(,)?) => {
|
|
|
|
let lines = super::lines($src).map(|sp| sp.of($src));
|
2022-12-10 04:57:15 -05:00
|
|
|
let (_indent, kind, sp, len) = super::BlockParser::parse(lines).unwrap();
|
2022-11-27 16:19:15 -05:00
|
|
|
assert_eq!(
|
|
|
|
(kind, sp.of($src), len),
|
|
|
|
($kind, $str, $len),
|
|
|
|
"\n\n{}\n\n",
|
|
|
|
$src
|
|
|
|
);
|
|
|
|
};
|
2022-11-27 15:59:54 -05:00
|
|
|
}
|
|
|
|
|
2022-12-10 04:26:06 -05:00
|
|
|
#[test]
|
|
|
|
fn block_blankline() {
|
|
|
|
test_block!("\n", Block::Atom(Blankline), "\n", 1);
|
|
|
|
test_block!(" \n", Block::Atom(Blankline), "\n", 1);
|
|
|
|
}
|
|
|
|
|
2022-11-12 12:45:17 -05:00
|
|
|
#[test]
|
|
|
|
fn block_multiline() {
|
2022-12-12 12:22:13 -05:00
|
|
|
test_block!(
|
|
|
|
"# heading\n spanning two lines\n",
|
|
|
|
Block::Leaf(Heading),
|
|
|
|
"#",
|
|
|
|
2
|
|
|
|
);
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
2022-12-02 14:07:37 -05:00
|
|
|
fn block_blockquote() {
|
2022-11-27 16:19:15 -05:00
|
|
|
test_block!(
|
|
|
|
concat!(
|
|
|
|
"> a\n", //
|
|
|
|
">\n", //
|
|
|
|
" > b\n", //
|
|
|
|
">\n", //
|
|
|
|
"> c\n", //
|
|
|
|
),
|
|
|
|
Block::Container(Blockquote),
|
|
|
|
">",
|
|
|
|
5,
|
|
|
|
);
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|
2022-12-02 14:07:37 -05:00
|
|
|
|
2022-12-10 02:37:00 -05:00
|
|
|
#[test]
|
|
|
|
fn block_thematic_break() {
|
2022-12-10 04:26:06 -05:00
|
|
|
test_block!("---\n", Block::Atom(ThematicBreak), "---", 1);
|
2022-12-10 02:37:00 -05:00
|
|
|
test_block!(
|
|
|
|
concat!(
|
|
|
|
" -*- -*-\n",
|
|
|
|
"\n", //
|
|
|
|
"para", //
|
|
|
|
),
|
2022-12-10 04:26:06 -05:00
|
|
|
Block::Atom(ThematicBreak),
|
2022-12-10 02:37:00 -05:00
|
|
|
"-*- -*-",
|
|
|
|
1
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2022-12-02 14:07:37 -05:00
|
|
|
#[test]
|
2022-12-04 11:56:49 -05:00
|
|
|
fn block_code_block() {
|
2022-12-02 14:07:37 -05:00
|
|
|
test_block!(
|
|
|
|
concat!(
|
|
|
|
"```` lang\n",
|
|
|
|
"l0\n",
|
|
|
|
"```\n",
|
|
|
|
" l1\n",
|
|
|
|
"````", //
|
|
|
|
),
|
2022-12-12 12:22:13 -05:00
|
|
|
Block::Leaf(CodeBlock),
|
2022-12-07 12:44:03 -05:00
|
|
|
"lang",
|
2022-12-02 14:07:37 -05:00
|
|
|
5,
|
|
|
|
);
|
|
|
|
test_block!(
|
|
|
|
concat!(
|
|
|
|
"```\n", //
|
|
|
|
"a\n", //
|
|
|
|
"```\n", //
|
|
|
|
"```\n", //
|
|
|
|
"bbb\n", //
|
|
|
|
"```\n", //
|
|
|
|
),
|
2022-12-12 12:22:13 -05:00
|
|
|
Block::Leaf(CodeBlock),
|
2022-12-07 12:44:03 -05:00
|
|
|
"",
|
2022-12-02 14:07:37 -05:00
|
|
|
3,
|
|
|
|
);
|
2022-12-04 11:56:49 -05:00
|
|
|
test_block!(
|
|
|
|
concat!(
|
|
|
|
"``` no space in lang specifier\n",
|
|
|
|
"l0\n",
|
|
|
|
"```\n", //
|
|
|
|
),
|
2022-12-12 12:22:13 -05:00
|
|
|
Block::Leaf(Paragraph),
|
2022-12-04 11:56:49 -05:00
|
|
|
"",
|
|
|
|
3,
|
|
|
|
);
|
2022-12-02 14:07:37 -05:00
|
|
|
}
|
2022-12-06 15:31:08 -05:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn block_link_definition() {
|
2022-12-12 12:22:13 -05:00
|
|
|
test_block!("[tag]: url\n", Block::Leaf(LinkDefinition), "tag", 1);
|
2022-12-06 15:31:08 -05:00
|
|
|
test_block!(
|
|
|
|
concat!(
|
|
|
|
"[tag]: uuu\n",
|
|
|
|
" rl\n", //
|
|
|
|
),
|
2022-12-12 12:22:13 -05:00
|
|
|
Block::Leaf(LinkDefinition),
|
2022-12-07 12:44:03 -05:00
|
|
|
"tag",
|
2022-12-06 15:31:08 -05:00
|
|
|
2,
|
|
|
|
);
|
|
|
|
test_block!(
|
|
|
|
concat!(
|
|
|
|
"[tag]: url\n",
|
|
|
|
"para\n", //
|
|
|
|
),
|
2022-12-12 12:22:13 -05:00
|
|
|
Block::Leaf(LinkDefinition),
|
2022-12-07 12:44:03 -05:00
|
|
|
"tag",
|
2022-12-06 15:31:08 -05:00
|
|
|
1,
|
|
|
|
);
|
|
|
|
}
|
2022-11-12 12:45:17 -05:00
|
|
|
}
|