block: replace tree with event vec

a lot simpler to use and reason about

should also make it easier to move to streaming
This commit is contained in:
Noah Hellman 2023-04-25 20:18:31 +02:00
parent 5e99d98f4f
commit e84385c2db
3 changed files with 265 additions and 584 deletions

View file

@ -5,15 +5,26 @@ use crate::Span;
use crate::attr; use crate::attr;
use crate::lex; use crate::lex;
use crate::tree;
use Atom::*; use Atom::*;
use Container::*; use Container::*;
use Leaf::*; use Leaf::*;
use ListType::*; use ListType::*;
pub type Tree<'s> = tree::Tree<Node<'s>, Atom>; #[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub type TreeBuilder<'s> = tree::Builder<Node<'s>, Atom>; pub struct Event<'s> {
pub kind: EventKind<'s>,
pub span: Span,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum EventKind<'s> {
Enter(Node<'s>),
Inline,
Exit(Node<'s>),
Atom(Atom),
Stale,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Node<'s> { pub enum Node<'s> {
@ -22,7 +33,7 @@ pub enum Node<'s> {
} }
#[must_use] #[must_use]
pub fn parse(src: &str) -> Tree { pub fn parse(src: &str) -> Vec<Event> {
TreeParser::new(src).parse() TreeParser::new(src).parse()
} }
@ -106,15 +117,13 @@ struct OpenList {
/// Depth in the tree where the direct list items of the list are. Needed to determine when to /// Depth in the tree where the direct list items of the list are. Needed to determine when to
/// close the list. /// close the list.
depth: u16, depth: u16,
/// Index to node in tree, required to update tightness. /// Index to event in tree, required to update tightness.
node: tree::NodeIndex, event: usize,
} }
/// Parser for block-level tree structure of entire document. /// Parser for block-level tree structure of entire document.
struct TreeParser<'s> { struct TreeParser<'s> {
src: &'s str, src: &'s str,
tree: TreeBuilder<'s>,
/// The previous block element was a blank line. /// The previous block element was a blank line.
prev_blankline: bool, prev_blankline: bool,
prev_loose: bool, prev_loose: bool,
@ -124,24 +133,30 @@ struct TreeParser<'s> {
open_sections: Vec<usize>, open_sections: Vec<usize>,
/// Alignments for each column in for the current table. /// Alignments for each column in for the current table.
alignments: Vec<Alignment>, alignments: Vec<Alignment>,
/// Current container depth.
open: Vec<usize>,
/// Buffer queue for next events. Events are buffered until no modifications due to future
/// characters are needed.
events: Vec<Event<'s>>,
} }
impl<'s> TreeParser<'s> { impl<'s> TreeParser<'s> {
#[must_use] #[must_use]
pub fn new(src: &'s str) -> Self { fn new(src: &'s str) -> Self {
Self { Self {
src, src,
tree: TreeBuilder::new(),
prev_blankline: false, prev_blankline: false,
prev_loose: false, prev_loose: false,
open_lists: Vec::new(), open_lists: Vec::new(),
alignments: Vec::new(), alignments: Vec::new(),
open_sections: Vec::new(), open_sections: Vec::new(),
open: Vec::new(),
events: Vec::new(),
} }
} }
#[must_use] #[must_use]
pub fn parse(mut self) -> Tree<'s> { fn parse(mut self) -> Vec<Event<'s>> {
let mut lines = lines(self.src).collect::<Vec<_>>(); let mut lines = lines(self.src).collect::<Vec<_>>();
let mut line_pos = 0; let mut line_pos = 0;
while line_pos < lines.len() { while line_pos < lines.len() {
@ -154,10 +169,43 @@ impl<'s> TreeParser<'s> {
while let Some(l) = self.open_lists.pop() { while let Some(l) = self.open_lists.pop() {
self.close_list(l, self.src.len()); self.close_list(l, self.src.len());
} }
for _ in self.open_sections.drain(..) {
self.tree.exit(Span::empty_at(self.src.len())); // section for _ in std::mem::take(&mut self.open_sections).drain(..) {
self.exit(Span::empty_at(self.src.len()));
} }
self.tree.finish() debug_assert_eq!(self.open, &[]);
self.events
}
fn inline(&mut self, span: Span) {
self.events.push(Event {
kind: EventKind::Inline,
span,
});
}
fn enter(&mut self, node: Node<'s>, span: Span) -> usize {
let i = self.events.len();
self.open.push(i);
self.events.push(Event {
kind: EventKind::Enter(node),
span,
});
i
}
fn exit(&mut self, span: Span) -> usize {
let i = self.events.len();
let node = if let EventKind::Enter(node) = self.events[self.open.pop().unwrap()].kind {
node
} else {
panic!();
};
self.events.push(Event {
kind: EventKind::Exit(node),
span,
});
i
} }
/// Recursively parse a block and all of its children. Return number of lines the block uses. /// Recursively parse a block and all of its children. Return number of lines the block uses.
@ -198,8 +246,8 @@ impl<'s> TreeParser<'s> {
// close list if a non list item or a list item of new type appeared // close list if a non list item or a list item of new type appeared
if let Some(OpenList { ty, depth, .. }) = self.open_lists.last() { if let Some(OpenList { ty, depth, .. }) = self.open_lists.last() {
debug_assert!(usize::from(*depth) <= self.tree.depth()); debug_assert!(usize::from(*depth) <= self.open.len());
if self.tree.depth() == (*depth).into() if self.open.len() == (*depth).into()
&& !matches!(kind, Kind::ListItem { ty: ty_new, .. } if *ty == ty_new) && !matches!(kind, Kind::ListItem { ty: ty_new, .. } if *ty == ty_new)
{ {
let l = self.open_lists.pop().unwrap(); let l = self.open_lists.pop().unwrap();
@ -213,15 +261,17 @@ impl<'s> TreeParser<'s> {
} else { } else {
self.prev_loose = false; self.prev_loose = false;
if self.prev_blankline { if self.prev_blankline {
if let Some(OpenList { node, depth, .. }) = self.open_lists.last() { if let Some(OpenList { event, depth, .. }) = self.open_lists.last() {
if usize::from(*depth) >= self.tree.depth() if usize::from(*depth) >= self.open.len()
|| !matches!(kind, Kind::ListItem { .. }) || !matches!(kind, Kind::ListItem { .. })
{ {
let mut elem = self.tree.elem(*node); if let EventKind::Enter(Node::Container(List { kind, .. })) =
let ListKind { tight, .. } = elem.list_mut().unwrap(); &mut self.events[*event].kind
if *tight { {
self.prev_loose = true; if kind.tight {
*tight = false; self.prev_loose = true;
kind.tight = false;
}
} }
} }
} }
@ -269,7 +319,10 @@ impl<'s> TreeParser<'s> {
}; };
match block { match block {
Block::Atom(a) => self.tree.atom(a, span_start), Block::Atom(a) => self.events.push(Event {
kind: EventKind::Atom(a),
span: span_start,
}),
Block::Leaf(l) => self.parse_leaf(l, &kind, span_start, span_end, lines), Block::Leaf(l) => self.parse_leaf(l, &kind, span_start, span_end, lines),
Block::Container(Table) => self.parse_table(lines, span_start, span_end), Block::Container(Table) => self.parse_table(lines, span_start, span_end),
Block::Container(c) => { Block::Container(c) => {
@ -325,16 +378,13 @@ impl<'s> TreeParser<'s> {
.iter() .iter()
.rposition(|l| l < level) .rposition(|l| l < level)
.map_or(0, |i| i + 1); .map_or(0, |i| i + 1);
self.open_sections.drain(first_close..).for_each(|_| { let pos = span_start.start() as u32;
self.tree.exit(Span::empty_at(span_start.start())); // section for _ in 0..(self.open_sections.len() - first_close) {
}); self.exit(Span::empty_at(span_start.start())); // section
}
self.open_sections.drain(first_close..);
self.open_sections.push(*level); self.open_sections.push(*level);
self.tree.enter( self.enter(Node::Container(Section { pos }), span_start.empty_before());
Node::Container(Section {
pos: span_start.start() as u32,
}),
span_start.empty_before(),
);
} }
// trim '#' characters // trim '#' characters
@ -343,12 +393,12 @@ impl<'s> TreeParser<'s> {
} }
} }
self.tree.enter(Node::Leaf(leaf), span_start); self.enter(Node::Leaf(leaf), span_start);
lines lines
.iter() .iter()
.filter(|l| !matches!(k, Kind::Heading { .. }) || !l.is_empty()) .filter(|l| !matches!(k, Kind::Heading { .. }) || !l.is_empty())
.for_each(|line| self.tree.inline(*line)); .for_each(|line| self.inline(*line));
self.tree.exit(span_end); self.exit(span_end);
} }
fn parse_container( fn parse_container(
@ -392,11 +442,11 @@ impl<'s> TreeParser<'s> {
.open_lists .open_lists
.last() .last()
.map_or(true, |OpenList { depth, .. }| { .map_or(true, |OpenList { depth, .. }| {
usize::from(*depth) < self.tree.depth() usize::from(*depth) < self.open.len()
}); });
if same_depth { if same_depth {
let tight = true; let tight = true;
let node = self.tree.enter( let event = self.enter(
Node::Container(Container::List { Node::Container(Container::List {
kind: ListKind { ty: *ty, tight }, kind: ListKind { ty: *ty, tight },
marker: span_start.of(self.src), marker: span_start.of(self.src),
@ -405,48 +455,77 @@ impl<'s> TreeParser<'s> {
); );
self.open_lists.push(OpenList { self.open_lists.push(OpenList {
ty: *ty, ty: *ty,
depth: self.tree.depth().try_into().unwrap(), depth: self.open.len().try_into().unwrap(),
node, event,
}); });
} }
} }
let dt = if let ListItem(ListItemKind::Description) = c { let dt = if let ListItem(ListItemKind::Description) = c {
let dt = self.tree.enter(Node::Leaf(DescriptionTerm), span_start); let dt = self.enter(Node::Leaf(DescriptionTerm), span_start);
self.tree.exit(span_start.trim_end(self.src).empty_after()); self.exit(span_start.trim_end(self.src).empty_after());
let span_open = span_start;
span_start = lines[0].empty_before(); span_start = lines[0].empty_before();
Some((dt, span_open)) Some((dt, self.events.len(), self.open.len()))
} else { } else {
None None
}; };
let node = self.tree.enter(Node::Container(c), span_start); self.enter(Node::Container(c), span_start);
let mut l = 0; let mut l = 0;
while l < lines.len() { while l < lines.len() {
l += self.parse_block(&mut lines[l..], false); l += self.parse_block(&mut lines[l..], false);
} }
if let Some((node_dt, span_open)) = dt { if let Some((empty_term, enter_detail, open_detail)) = dt {
let node_child = if let Some(node_child) = self.tree.children(node).next() { let enter_term = enter_detail + 1;
if let tree::Element::Container(Node::Leaf(l @ Paragraph)) = node_child.elem { if let Some(first_child) = self.events.get_mut(enter_term) {
if let EventKind::Enter(Node::Leaf(l @ Paragraph)) = &mut first_child.kind {
// convert paragraph into description term
*l = DescriptionTerm; *l = DescriptionTerm;
Some(node_child.index) let exit_term = if let Some(i) = self.events[enter_term + 1..]
} else { .iter_mut()
None .position(|e| matches!(e.kind, EventKind::Exit(Node::Leaf(Paragraph))))
{
enter_term + 1 + i
} else {
panic!()
};
if let EventKind::Exit(Node::Leaf(l)) = &mut self.events[exit_term].kind {
*l = DescriptionTerm;
} else {
panic!()
}
// remove empty description term
self.events[empty_term].kind = EventKind::Stale;
self.events[empty_term + 1].kind = EventKind::Stale;
// move out term before detail
self.events[enter_term].span = self.events[empty_term].span;
let first_detail = self.events[exit_term + 1..]
.iter()
.position(|e| !matches!(e.kind, EventKind::Atom(Blankline)))
.map(|i| exit_term + 1 + i)
.unwrap_or(self.events.len());
let detail_pos = self
.events
.get(first_detail)
.map(|e| e.span.start())
.unwrap_or_else(|| self.events.last().unwrap().span.end());
self.events
.copy_within(enter_term..first_detail, enter_detail);
self.events[first_detail - 1] = Event {
kind: EventKind::Enter(Node::Container(c)),
span: Span::empty_at(detail_pos),
};
self.open[open_detail] = first_detail - 1;
} }
} else {
None
};
if let Some(node_child) = node_child {
self.tree.swap_prev(node_child, span_open);
self.tree.remove(node_dt);
} }
} }
if let Some(OpenList { depth, .. }) = self.open_lists.last() { if let Some(OpenList { depth, .. }) = self.open_lists.last() {
debug_assert!(usize::from(*depth) <= self.tree.depth()); debug_assert!(usize::from(*depth) <= self.open.len());
if self.tree.depth() == (*depth).into() { if self.open.len() == (*depth).into() {
self.prev_blankline = false; self.prev_blankline = false;
self.prev_loose = false; self.prev_loose = false;
let l = self.open_lists.pop().unwrap(); let l = self.open_lists.pop().unwrap();
@ -454,38 +533,37 @@ impl<'s> TreeParser<'s> {
} }
} }
self.tree.exit(span_end); self.exit(span_end);
} }
fn parse_table(&mut self, lines: &mut [Span], span_start: Span, span_end: Span) { fn parse_table(&mut self, lines: &mut [Span], span_start: Span, span_end: Span) {
self.alignments.clear(); self.alignments.clear();
self.tree.enter(Node::Container(Table), span_start); self.enter(Node::Container(Table), span_start);
let caption_line = lines let caption_line = lines
.iter() .iter()
.position(|sp| sp.of(self.src).trim_start().starts_with('^')) .position(|sp| sp.of(self.src).trim_start().starts_with('^'))
.map_or(lines.len(), |caption_line| { .map_or(lines.len(), |caption_line| {
self.tree.enter(Node::Leaf(Caption), span_start); self.enter(Node::Leaf(Caption), span_start);
lines[caption_line] = lines[caption_line] lines[caption_line] = lines[caption_line]
.trim_start(self.src) .trim_start(self.src)
.skip_chars(2, self.src); .skip_chars(2, self.src);
lines[lines.len() - 1] = lines[lines.len() - 1].trim_end(self.src); lines[lines.len() - 1] = lines[lines.len() - 1].trim_end(self.src);
for line in &lines[caption_line..] { for line in &lines[caption_line..] {
self.tree.inline(*line); self.inline(*line);
} }
self.tree.exit(span_end); self.exit(span_end);
caption_line caption_line
}); });
let mut last_row_node = None; let mut last_row_event = None;
for row in &lines[..caption_line] { for row in &lines[..caption_line] {
let row = row.trim(self.src); let row = row.trim(self.src);
if row.is_empty() { if row.is_empty() {
break; break;
} }
let row_node = self let row_event_enter =
.tree self.enter(Node::Container(TableRow { head: false }), row.with_len(1));
.enter(Node::Container(TableRow { head: false }), row.with_len(1));
let rem = row.skip(1); // | let rem = row.skip(1); // |
let lex = lex::Lexer::new(rem.of(self.src)); let lex = lex::Lexer::new(rem.of(self.src));
let mut pos = rem.start(); let mut pos = rem.start();
@ -514,7 +592,7 @@ impl<'s> TreeParser<'s> {
} }
}; };
separator_row &= separator_cell; separator_row &= separator_cell;
self.tree.enter( self.enter(
Node::Leaf(TableCell( Node::Leaf(TableCell(
self.alignments self.alignments
.get(column_index) .get(column_index)
@ -523,8 +601,8 @@ impl<'s> TreeParser<'s> {
)), )),
Span::empty_at(cell_start), Span::empty_at(cell_start),
); );
self.tree.inline(span); self.inline(span);
self.tree.exit(Span::new(pos, pos + 1)); // cell self.exit(Span::new(pos, pos + 1));
cell_start = pos + len; cell_start = pos + len;
column_index += 1; column_index += 1;
} }
@ -540,11 +618,11 @@ impl<'s> TreeParser<'s> {
if separator_row && verbatim.is_none() { if separator_row && verbatim.is_none() {
self.alignments.clear(); self.alignments.clear();
self.alignments.extend( self.alignments.extend(
self.tree self.events[row_event_enter + 1..]
.children(row_node) .iter()
.filter(|n| matches!(n.elem, tree::Element::Inline)) .filter(|e| matches!(e.kind, EventKind::Inline))
.map(|n| { .map(|e| {
let cell = n.span.of(self.src); let cell = e.span.of(self.src);
let l = cell.as_bytes()[0] == b':'; let l = cell.as_bytes()[0] == b':';
let r = cell.as_bytes()[cell.len() - 1] == b':'; let r = cell.as_bytes()[cell.len() - 1] == b':';
match (l, r) { match (l, r) {
@ -555,62 +633,67 @@ impl<'s> TreeParser<'s> {
} }
}), }),
); );
self.tree.exit_discard(); // table row self.open.pop();
if let Some(head_row) = last_row_node { self.events.drain(row_event_enter..); // remove table row
self.tree if let Some((head_row_enter, head_row_exit)) = last_row_event {
.children(head_row) self.events[head_row_enter + 1..]
.filter(|n| { .iter_mut()
matches!(n.elem, tree::Element::Container(Node::Leaf(TableCell(..)))) .filter(|e| {
matches!(
e.kind,
EventKind::Enter(Node::Leaf(TableCell(..)))
| EventKind::Exit(Node::Leaf(TableCell(..)))
)
}) })
.zip( .zip(
self.alignments self.alignments
.iter() .iter()
.copied() .copied()
.chain(std::iter::repeat(Alignment::Unspecified)), .chain(std::iter::repeat(Alignment::Unspecified))
.flat_map(|a| [a, a].into_iter()),
) )
.for_each(|(n, new_align)| { .for_each(|(e, new_align)| match &mut e.kind {
if let tree::Element::Container(Node::Leaf(TableCell(alignment))) = EventKind::Enter(Node::Leaf(TableCell(alignment)))
n.elem | EventKind::Exit(Node::Leaf(TableCell(alignment))) => {
{
*alignment = new_align; *alignment = new_align;
} }
_ => panic!(),
}); });
if let tree::Element::Container(Node::Container(TableRow { head })) = let event: &mut Event = &mut self.events[head_row_enter];
self.tree.elem(head_row) if let EventKind::Enter(Node::Container(TableRow { head })) = &mut event.kind {
{ *head = true;
} else {
panic!()
}
let event: &mut Event = &mut self.events[head_row_exit];
if let EventKind::Exit(Node::Container(TableRow { head })) = &mut event.kind {
*head = true; *head = true;
} else { } else {
panic!() panic!()
} }
} }
} else { } else {
self.tree.exit(Span::empty_at(pos)); // table row let row_event_exit = self.exit(Span::empty_at(pos)); // table row
last_row_node = Some(row_node); last_row_event = Some((row_event_enter, row_event_exit));
} }
} }
self.tree.exit(span_end); // table self.exit(span_end);
} }
fn close_list(&mut self, list: OpenList, pos: usize) { fn close_list(&mut self, list: OpenList, pos: usize) {
if self.prev_loose { if self.prev_loose {
let mut elem = self.tree.elem(list.node); if let EventKind::Enter(Node::Container(List { kind, .. })) =
let ListKind { tight, .. } = elem.list_mut().unwrap(); &mut self.events[list.event].kind
// ignore blankline at end {
*tight = true; // ignore blankline at end
kind.tight = true;
} else {
panic!()
}
} }
self.tree.exit(Span::empty_at(pos)); // list self.exit(Span::empty_at(pos)); // list
}
}
impl<'t, 's> tree::Element<'t, Node<'s>, Atom> {
fn list_mut(&mut self) -> Option<&mut ListKind> {
if let tree::Element::Container(Node::Container(Container::List { kind, .. })) = self {
Some(kind)
} else {
None
}
} }
} }
@ -1023,13 +1106,13 @@ fn lines(src: &str) -> impl Iterator<Item = Span> + '_ {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::tree::EventKind::*;
use crate::Alignment; use crate::Alignment;
use crate::OrderedListNumbering::*; use crate::OrderedListNumbering::*;
use crate::OrderedListStyle::*; use crate::OrderedListStyle::*;
use super::Atom::*; use super::Atom::*;
use super::Container::*; use super::Container::*;
use super::EventKind::*;
use super::FenceKind; use super::FenceKind;
use super::Kind; use super::Kind;
use super::Leaf::*; use super::Leaf::*;
@ -1041,7 +1124,7 @@ mod test {
macro_rules! test_parse { macro_rules! test_parse {
($src:expr $(,$($event:expr),* $(,)?)?) => { ($src:expr $(,$($event:expr),* $(,)?)?) => {
let t = super::TreeParser::new($src).parse(); let t = super::TreeParser::new($src).parse();
let actual = t.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>(); let actual = t.into_iter().map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
let expected = &[$($($event),*,)?]; let expected = &[$($($event),*,)?];
assert_eq!( assert_eq!(
actual, actual,
@ -2189,11 +2272,13 @@ mod test {
})), })),
"" ""
), ),
(Stale, ":"),
(Stale, ""),
(Enter(Leaf(DescriptionTerm)), ":"), (Enter(Leaf(DescriptionTerm)), ":"),
(Inline, "term"), (Inline, "term"),
(Exit(Leaf(DescriptionTerm)), ""), (Exit(Leaf(DescriptionTerm)), ""),
(Enter(Container(ListItem(ListItemKind::Description))), ""),
(Atom(Blankline), "\n"), (Atom(Blankline), "\n"),
(Enter(Container(ListItem(ListItemKind::Description))), ""),
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(Inline, "description"), (Inline, "description"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
@ -2233,12 +2318,14 @@ mod test {
})), })),
"", "",
), ),
(Stale, ":"),
(Stale, ""),
(Enter(Leaf(DescriptionTerm)), ":"), (Enter(Leaf(DescriptionTerm)), ":"),
(Inline, "apple\n"), (Inline, "apple\n"),
(Inline, "fruit"), (Inline, "fruit"),
(Exit(Leaf(DescriptionTerm)), ""), (Exit(Leaf(DescriptionTerm)), ""),
(Enter(Container(ListItem(ListItemKind::Description))), ""),
(Atom(Blankline), "\n"), (Atom(Blankline), "\n"),
(Enter(Container(ListItem(ListItemKind::Description))), ""),
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(Inline, "Paragraph one"), (Inline, "Paragraph one"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
@ -2279,6 +2366,8 @@ mod test {
"", "",
), ),
(Exit(Container(ListItem(ListItemKind::Description))), ""), (Exit(Container(ListItem(ListItemKind::Description))), ""),
(Stale, ":"),
(Stale, ""),
(Enter(Leaf(DescriptionTerm)), ":"), (Enter(Leaf(DescriptionTerm)), ":"),
(Inline, "orange"), (Inline, "orange"),
(Exit(Leaf(DescriptionTerm)), ""), (Exit(Leaf(DescriptionTerm)), ""),

View file

@ -60,7 +60,6 @@ mod block;
mod inline; mod inline;
mod lex; mod lex;
mod span; mod span;
mod tree;
use span::Span; use span::Span;
@ -555,7 +554,7 @@ pub struct Parser<'s> {
src: &'s str, src: &'s str,
/// Block tree parsed at first. /// Block tree parsed at first.
tree: block::Tree<'s>, blocks: std::iter::Peekable<std::vec::IntoIter<block::Event<'s>>>,
/// Contents obtained by the prepass. /// Contents obtained by the prepass.
pre_pass: PrePass<'s>, pre_pass: PrePass<'s>,
@ -600,31 +599,48 @@ impl<'s> PrePass<'s> {
#[must_use] #[must_use]
fn new( fn new(
src: &'s str, src: &'s str,
mut tree: block::Tree<'s>, blocks: std::slice::Iter<block::Event<'s>>,
inline_parser: &mut inline::Parser<'s>, inline_parser: &mut inline::Parser<'s>,
) -> Self { ) -> Self {
let mut link_definitions = Map::new(); let mut link_definitions = Map::new();
let mut headings: Vec<Heading> = Vec::new(); let mut headings: Vec<Heading> = Vec::new();
let mut used_ids: Set<&str> = Set::new(); let mut used_ids: Set<&str> = Set::new();
let mut blocks = blocks.peekable();
let mut attr_prev: Option<Span> = None; let mut attr_prev: Option<Span> = None;
while let Some(e) = tree.next() { while let Some(e) = blocks.next() {
match e.kind { match e.kind {
tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition { block::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition {
label, label,
})) => { })) => {
fn next_is_inline(
bs: &mut std::iter::Peekable<std::slice::Iter<block::Event>>,
) -> bool {
matches!(bs.peek().map(|e| &e.kind), Some(block::EventKind::Inline))
}
// All link definition tags have to be obtained initially, as references can // All link definition tags have to be obtained initially, as references can
// appear before the definition. // appear before the definition.
let attrs = let attrs =
attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src))); attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src)));
let url = match tree.count_children() { let url = if !next_is_inline(&mut blocks) {
0 => "".into(), "".into()
1 => tree.take_inlines().next().unwrap().of(src).trim().into(), } else {
_ => tree.take_inlines().map(|sp| sp.of(src).trim()).collect(), let start = blocks.next().unwrap().span.of(src).trim();
if !next_is_inline(&mut blocks) {
start.into()
} else {
let mut url = start.to_string();
while next_is_inline(&mut blocks) {
url.push_str(blocks.next().unwrap().span.of(src).trim());
}
url.into()
}
}; };
link_definitions.insert(label, (url, attrs)); link_definitions.insert(label, (url, attrs));
} }
tree::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => { block::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => {
// All headings ids have to be obtained initially, as references can appear // All headings ids have to be obtained initially, as references can appear
// before the heading. Additionally, determining the id requires inline parsing // before the heading. Additionally, determining the id requires inline parsing
// as formatting must be removed. // as formatting must be removed.
@ -639,10 +655,21 @@ impl<'s> PrePass<'s> {
let mut id_auto = String::new(); let mut id_auto = String::new();
let mut text = String::new(); let mut text = String::new();
let mut last_whitespace = true; let mut last_whitespace = true;
let inlines = tree.take_inlines().collect::<Vec<_>>();
inline_parser.reset(); inline_parser.reset();
inlines.iter().enumerate().for_each(|(i, sp)| { let mut last_end = 0;
inline_parser.feed_line(*sp, i == inlines.len() - 1); loop {
let span_inline = blocks.next().and_then(|e| {
if matches!(e.kind, block::EventKind::Inline) {
last_end = e.span.end();
Some(e.span)
} else {
None
}
});
inline_parser.feed_line(
span_inline.unwrap_or_else(|| Span::empty_at(last_end)),
span_inline.is_none(),
);
inline_parser.for_each(|ev| match ev.kind { inline_parser.for_each(|ev| match ev.kind {
inline::EventKind::Str => { inline::EventKind::Str => {
text.push_str(ev.span.of(src)); text.push_str(ev.span.of(src));
@ -667,8 +694,11 @@ impl<'s> PrePass<'s> {
id_auto.push('-'); id_auto.push('-');
} }
_ => {} _ => {}
}) });
}); if span_inline.is_none() {
break;
}
}
id_auto.drain(id_auto.trim_end_matches('-').len()..); id_auto.drain(id_auto.trim_end_matches('-').len()..);
// ensure id unique // ensure id unique
@ -700,11 +730,11 @@ impl<'s> PrePass<'s> {
id_override, id_override,
}); });
} }
tree::EventKind::Atom(block::Atom::Attributes) => { block::EventKind::Atom(block::Atom::Attributes) => {
attr_prev = Some(e.span); attr_prev = Some(e.span);
} }
tree::EventKind::Enter(..) block::EventKind::Enter(..)
| tree::EventKind::Exit(block::Node::Container(block::Container::Section { | block::EventKind::Exit(block::Node::Container(block::Container::Section {
.. ..
})) => {} })) => {}
_ => { _ => {
@ -746,13 +776,13 @@ impl<'s> PrePass<'s> {
impl<'s> Parser<'s> { impl<'s> Parser<'s> {
#[must_use] #[must_use]
pub fn new(src: &'s str) -> Self { pub fn new(src: &'s str) -> Self {
let tree = block::parse(src); let blocks = block::parse(src);
let mut inline_parser = inline::Parser::new(src); let mut inline_parser = inline::Parser::new(src);
let pre_pass = PrePass::new(src, tree.clone(), &mut inline_parser); let pre_pass = PrePass::new(src, blocks.iter(), &mut inline_parser);
Self { Self {
src, src,
tree, blocks: blocks.into_iter().peekable(),
pre_pass, pre_pass,
block_attributes: Attributes::new(), block_attributes: Attributes::new(),
table_head_row: false, table_head_row: false,
@ -866,10 +896,10 @@ impl<'s> Parser<'s> {
} }
fn block(&mut self) -> Option<Event<'s>> { fn block(&mut self) -> Option<Event<'s>> {
while let Some(ev) = &mut self.tree.next() { while let Some(ev) = &mut self.blocks.next() {
let content = ev.span.of(self.src); let content = ev.span.of(self.src);
let event = match ev.kind { let event = match ev.kind {
tree::EventKind::Atom(a) => match a { block::EventKind::Atom(a) => match a {
block::Atom::Blankline => Event::Blankline, block::Atom::Blankline => Event::Blankline,
block::Atom::ThematicBreak => { block::Atom::ThematicBreak => {
Event::ThematicBreak(self.block_attributes.take()) Event::ThematicBreak(self.block_attributes.take())
@ -879,8 +909,8 @@ impl<'s> Parser<'s> {
continue; continue;
} }
}, },
tree::EventKind::Enter(c) | tree::EventKind::Exit(c) => { block::EventKind::Enter(c) | block::EventKind::Exit(c) => {
let enter = matches!(ev.kind, tree::EventKind::Enter(..)); let enter = matches!(ev.kind, block::EventKind::Enter(..));
let cont = match c { let cont = match c {
block::Node::Leaf(l) => { block::Node::Leaf(l) => {
self.inline_parser.reset(); self.inline_parser.reset();
@ -977,15 +1007,21 @@ impl<'s> Parser<'s> {
Event::End(cont) Event::End(cont)
} }
} }
tree::EventKind::Inline => { block::EventKind::Inline => {
if self.verbatim { if self.verbatim {
Event::Str(content.into()) Event::Str(content.into())
} else { } else {
self.inline_parser self.inline_parser.feed_line(
.feed_line(ev.span, self.tree.branch_is_empty()); ev.span,
!matches!(
self.blocks.peek().map(|e| &e.kind),
Some(block::EventKind::Inline),
),
);
return self.next(); return self.next();
} }
} }
block::EventKind::Stale => continue,
}; };
return Some(event); return Some(event);
} }

View file

@ -1,444 +0,0 @@
use crate::Span;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EventKind<C, A> {
Enter(C),
Inline,
Exit(C),
Atom(A),
}
#[derive(Debug)]
pub struct Node<'a, C, A> {
pub index: NodeIndex,
pub elem: Element<'a, C, A>,
pub span: Span,
}
#[derive(Debug)]
pub enum Element<'a, C, A> {
Container(&'a mut C),
Atom(&'a mut A),
Inline,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Event<C, A> {
pub kind: EventKind<C, A>,
pub span: Span,
}
#[derive(Clone)]
pub struct Tree<C, A> {
nodes: std::rc::Rc<[InternalNode<C, A>]>,
branch: Vec<NodeIndex>,
head: Option<NodeIndex>,
}
impl<C: Clone, A: Clone> Tree<C, A> {
/// Count number of direct children nodes.
pub fn count_children(&self) -> usize {
let mut head = self.head;
let mut count = 0;
while let Some(h) = head {
let n = &self.nodes[h.index()];
head = n.next;
count += 1;
}
count
}
/// Retrieve all inlines until the end of the current container. Panics if any upcoming node is
/// not an inline node.
pub fn take_inlines(&mut self) -> impl Iterator<Item = Span> + '_ {
let mut head = self.head.take();
std::iter::from_fn(move || {
head.take().map(|h| {
let n = &self.nodes[h.index()];
debug_assert!(matches!(n.kind, NodeKind::Inline));
head = n.next;
n.span
})
})
}
pub fn branch_is_empty(&self) -> bool {
matches!(self.head, None)
}
}
impl<C: Clone, A: Clone> Iterator for Tree<C, A> {
type Item = Event<C, A>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(head) = self.head {
let n = &self.nodes[head.index()];
let kind = match &n.kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, child, ..) => {
self.branch.push(head);
self.head = *child;
EventKind::Enter(c.clone())
}
NodeKind::Atom(a) => {
self.head = n.next;
EventKind::Atom(a.clone())
}
NodeKind::Inline => {
self.head = n.next;
EventKind::Inline
}
};
Some(Event { kind, span: n.span })
} else if let Some(block_ni) = self.branch.pop() {
let InternalNode { next, kind, .. } = &self.nodes[block_ni.index()];
if let NodeKind::Container(c, _, span) = kind {
self.head = *next;
Some(Event {
kind: EventKind::Exit(c.clone()),
span: *span,
})
} else {
panic!()
}
} else {
None
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct NodeIndex(std::num::NonZeroUsize);
impl NodeIndex {
fn new(i: usize) -> Self {
debug_assert_ne!(i, usize::MAX);
Self((i + 1).try_into().unwrap())
}
fn root() -> Self {
Self::new(0)
}
fn index(self) -> usize {
usize::from(self.0) - 1
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum NodeKind<C, A> {
Root,
Container(C, Option<NodeIndex>, Span),
Atom(A),
Inline,
}
#[derive(Debug, Clone)]
struct InternalNode<C, A> {
span: Span,
kind: NodeKind<C, A>,
next: Option<NodeIndex>,
}
#[derive(Clone)]
pub struct Builder<C, A> {
nodes: Vec<InternalNode<C, A>>,
branch: Vec<NodeIndex>,
head: Option<NodeIndex>,
depth: usize,
}
impl<C, A> NodeKind<C, A> {
fn child(&self) -> Option<NodeIndex> {
if let NodeKind::Container(_, child, _) = self {
*child
} else {
None
}
}
fn child_mut(&mut self) -> &mut Option<NodeIndex> {
if let NodeKind::Container(_, child, _) = self {
child
} else {
panic!()
}
}
}
impl<'a, C, A> From<&'a mut NodeKind<C, A>> for Element<'a, C, A> {
fn from(kind: &'a mut NodeKind<C, A>) -> Self {
match kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, ..) => Element::Container(c),
NodeKind::Atom(a) => Element::Atom(a),
NodeKind::Inline => Element::Inline,
}
}
}
impl<C, A> Builder<C, A> {
pub(super) fn new() -> Self {
Builder {
nodes: vec![InternalNode {
span: Span::default(),
kind: NodeKind::Root,
next: None,
}],
branch: vec![],
head: Some(NodeIndex::root()),
depth: 0,
}
}
pub(super) fn atom(&mut self, a: A, span: Span) {
self.add_node(InternalNode {
span,
kind: NodeKind::Atom(a),
next: None,
});
}
pub(super) fn inline(&mut self, span: Span) {
self.add_node(InternalNode {
span,
kind: NodeKind::Inline,
next: None,
});
}
pub(super) fn enter(&mut self, c: C, span: Span) -> NodeIndex {
self.depth += 1;
self.add_node(InternalNode {
span,
kind: NodeKind::Container(c, None, Span::new(0, 0)),
next: None,
})
}
pub(super) fn exit(&mut self, span: Span) {
self.depth -= 1;
if let Some(head) = self.head.take() {
if let NodeKind::Container(_, _, sp) = &mut self.nodes[head.index()].kind {
*sp = span;
self.branch.push(head);
return;
}
} else {
let last = self.branch.pop();
debug_assert_ne!(last, None);
}
if let NodeKind::Container(_, _, sp) =
&mut self.nodes[self.branch.last().unwrap().index()].kind
{
*sp = span;
} else {
panic!();
}
}
/// Exit and discard all the contents of the current container.
pub(super) fn exit_discard(&mut self) {
self.exit(Span::new(0, (1 << 31) - 1));
let exited = self.branch.pop().unwrap();
self.nodes.drain(exited.index()..);
let (prev, has_parent) = self.replace(exited, None);
if has_parent {
self.head = Some(prev);
} else {
self.branch.push(prev);
}
}
/// Swap the node and its children with either its parent or the node before.
pub fn swap_prev(&mut self, node: NodeIndex, span: Span) {
let next = self.nodes[node.index()].next;
let (prev, _) = self.replace(node, next);
if let Some(n) = next {
self.nodes[prev.index()].span = self.nodes[n.index()].span.empty_before();
self.replace(n, None);
} else {
self.nodes[prev.index()].span = self.nodes[self.nodes.len() - 1].span.empty_after();
}
self.replace(prev, Some(node));
self.nodes[node.index()].next = Some(prev);
self.nodes[node.index()].span = span;
let span = self.nodes[prev.index()].span;
if let NodeKind::Container(_, _, sp) = &mut self.nodes[node.index()].kind {
*sp = span;
} else {
panic!()
}
}
/// Remove the specified node and its children.
pub fn remove(&mut self, node: NodeIndex) {
let next = self.nodes[node.index()].next;
self.replace(node, next);
}
pub(super) fn depth(&self) -> usize {
self.depth
}
pub(super) fn elem(&mut self, ni: NodeIndex) -> Element<C, A> {
match &mut self.nodes[ni.index()].kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, ..) => Element::Container(c),
NodeKind::Atom(a) => Element::Atom(a),
NodeKind::Inline => Element::Inline,
}
}
/// Retrieve all children nodes for the specified node, in the order that they were added.
pub(super) fn children(&mut self, node: NodeIndex) -> impl Iterator<Item = Node<C, A>> {
// XXX assumes no modifications
let n = &self.nodes[node.index()];
let range = if let Some(start) = n.kind.child() {
start.index()..n.next.map_or(self.nodes.len(), NodeIndex::index)
} else {
0..0
};
range
.clone()
.map(NodeIndex::new)
.zip(self.nodes[range].iter_mut())
.map(|(index, n)| Node {
index,
elem: Element::from(&mut n.kind),
span: n.span,
})
}
pub(super) fn finish(self) -> Tree<C, A> {
debug_assert_eq!(self.depth, 0);
let head = self.nodes[NodeIndex::root().index()].next;
Tree {
nodes: self.nodes.into_boxed_slice().into(),
branch: Vec::new(),
head,
}
}
fn add_node(&mut self, node: InternalNode<C, A>) -> NodeIndex {
let ni = NodeIndex::new(self.nodes.len());
self.nodes.push(node);
if let Some(head_ni) = &mut self.head {
let mut head = &mut self.nodes[head_ni.index()];
match &mut head.kind {
NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => {
// set next pointer of previous node
debug_assert_eq!(head.next, None);
head.next = Some(ni);
}
NodeKind::Container(_, child, _) => {
self.branch.push(*head_ni);
// set child pointer of current container
debug_assert_eq!(*child, None);
*child = Some(ni);
}
}
} else if let Some(block) = self.branch.pop() {
let mut block = &mut self.nodes[block.index()];
debug_assert!(matches!(block.kind, NodeKind::Container(..)));
block.next = Some(ni);
} else {
panic!()
}
self.head = Some(ni);
ni
}
/// Remove the link from the node that points to the specified node. Optionally replace the
/// node with another node. Return the pointer node and whether it is a container or not.
fn replace(&mut self, node: NodeIndex, next: Option<NodeIndex>) -> (NodeIndex, bool) {
for (i, n) in self.nodes.iter_mut().enumerate().rev() {
let ni = NodeIndex::new(i);
if n.next == Some(node) {
n.next = next;
return (ni, false);
} else if n.kind.child() == Some(node) {
*n.kind.child_mut() = next;
return (ni, true);
}
}
panic!("node is never linked to")
}
}
impl<C: std::fmt::Debug + Clone, A: std::fmt::Debug + Clone> std::fmt::Debug for Builder<C, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.clone().finish().fmt(f)
}
}
impl<C: std::fmt::Debug + Clone, A: std::fmt::Debug + Clone> std::fmt::Debug for Tree<C, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
const INDENT: &str = " ";
let mut level = 0;
writeln!(f)?;
for e in self.clone() {
let indent = INDENT.repeat(level);
match e.kind {
EventKind::Enter(c) => {
write!(f, "{}{:?}", indent, c)?;
level += 1;
}
EventKind::Inline => write!(f, "{}Inline", indent)?,
EventKind::Exit(..) => {
level -= 1;
continue;
}
EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?,
}
writeln!(f, " ({}:{})", e.span.start(), e.span.end())?;
}
Ok(())
}
}
#[cfg(test)]
mod test {
use crate::Span;
#[test]
fn fmt() {
let mut tree = super::Builder::new();
tree.enter(1, Span::new(0, 1));
tree.atom(11, Span::new(0, 1));
tree.atom(12, Span::new(0, 1));
tree.exit(Span::new(0, 0));
tree.enter(2, Span::new(1, 5));
tree.enter(21, Span::new(2, 5));
tree.enter(211, Span::new(3, 4));
tree.atom(2111, Span::new(3, 4));
tree.exit(Span::new(0, 0));
tree.exit(Span::new(0, 0));
tree.enter(22, Span::new(4, 5));
tree.atom(221, Span::new(4, 5));
tree.exit(Span::new(0, 0));
tree.exit(Span::new(0, 0));
tree.enter(3, Span::new(5, 6));
tree.atom(31, Span::new(5, 6));
tree.exit(Span::new(0, 0));
assert_eq!(
format!("{:?}", tree.finish()),
concat!(
"\n",
"1 (0:1)\n",
" 11 (0:1)\n",
" 12 (0:1)\n",
"2 (1:5)\n",
" 21 (2:5)\n",
" 211 (3:4)\n",
" 2111 (3:4)\n",
" 22 (4:5)\n",
" 221 (4:5)\n",
"3 (5:6)\n",
" 31 (5:6)\n",
)
);
}
}