From e84385c2db7fb2ef1cc0eb5c1274d4a3fe4c514d Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Tue, 25 Apr 2023 20:18:31 +0200 Subject: [PATCH] block: replace tree with event vec a lot simpler to use and reason about should also make it easier to move to streaming --- src/block.rs | 313 +++++++++++++++++++++++------------- src/lib.rs | 92 +++++++---- src/tree.rs | 444 --------------------------------------------------- 3 files changed, 265 insertions(+), 584 deletions(-) delete mode 100644 src/tree.rs diff --git a/src/block.rs b/src/block.rs index 1693849..40485e0 100644 --- a/src/block.rs +++ b/src/block.rs @@ -5,15 +5,26 @@ use crate::Span; use crate::attr; use crate::lex; -use crate::tree; use Atom::*; use Container::*; use Leaf::*; use ListType::*; -pub type Tree<'s> = tree::Tree, Atom>; -pub type TreeBuilder<'s> = tree::Builder, Atom>; +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct Event<'s> { + pub kind: EventKind<'s>, + pub span: Span, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum EventKind<'s> { + Enter(Node<'s>), + Inline, + Exit(Node<'s>), + Atom(Atom), + Stale, +} #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Node<'s> { @@ -22,7 +33,7 @@ pub enum Node<'s> { } #[must_use] -pub fn parse(src: &str) -> Tree { +pub fn parse(src: &str) -> Vec { TreeParser::new(src).parse() } @@ -106,15 +117,13 @@ struct OpenList { /// Depth in the tree where the direct list items of the list are. Needed to determine when to /// close the list. depth: u16, - /// Index to node in tree, required to update tightness. - node: tree::NodeIndex, + /// Index to event in tree, required to update tightness. + event: usize, } /// Parser for block-level tree structure of entire document. struct TreeParser<'s> { src: &'s str, - tree: TreeBuilder<'s>, - /// The previous block element was a blank line. prev_blankline: bool, prev_loose: bool, @@ -124,24 +133,30 @@ struct TreeParser<'s> { open_sections: Vec, /// Alignments for each column in for the current table. alignments: Vec, + /// Current container depth. + open: Vec, + /// Buffer queue for next events. Events are buffered until no modifications due to future + /// characters are needed. + events: Vec>, } impl<'s> TreeParser<'s> { #[must_use] - pub fn new(src: &'s str) -> Self { + fn new(src: &'s str) -> Self { Self { src, - tree: TreeBuilder::new(), prev_blankline: false, prev_loose: false, open_lists: Vec::new(), alignments: Vec::new(), open_sections: Vec::new(), + open: Vec::new(), + events: Vec::new(), } } #[must_use] - pub fn parse(mut self) -> Tree<'s> { + fn parse(mut self) -> Vec> { let mut lines = lines(self.src).collect::>(); let mut line_pos = 0; while line_pos < lines.len() { @@ -154,10 +169,43 @@ impl<'s> TreeParser<'s> { while let Some(l) = self.open_lists.pop() { self.close_list(l, self.src.len()); } - for _ in self.open_sections.drain(..) { - self.tree.exit(Span::empty_at(self.src.len())); // section + + for _ in std::mem::take(&mut self.open_sections).drain(..) { + self.exit(Span::empty_at(self.src.len())); } - self.tree.finish() + debug_assert_eq!(self.open, &[]); + self.events + } + + fn inline(&mut self, span: Span) { + self.events.push(Event { + kind: EventKind::Inline, + span, + }); + } + + fn enter(&mut self, node: Node<'s>, span: Span) -> usize { + let i = self.events.len(); + self.open.push(i); + self.events.push(Event { + kind: EventKind::Enter(node), + span, + }); + i + } + + fn exit(&mut self, span: Span) -> usize { + let i = self.events.len(); + let node = if let EventKind::Enter(node) = self.events[self.open.pop().unwrap()].kind { + node + } else { + panic!(); + }; + self.events.push(Event { + kind: EventKind::Exit(node), + span, + }); + i } /// Recursively parse a block and all of its children. Return number of lines the block uses. @@ -198,8 +246,8 @@ impl<'s> TreeParser<'s> { // close list if a non list item or a list item of new type appeared if let Some(OpenList { ty, depth, .. }) = self.open_lists.last() { - debug_assert!(usize::from(*depth) <= self.tree.depth()); - if self.tree.depth() == (*depth).into() + debug_assert!(usize::from(*depth) <= self.open.len()); + if self.open.len() == (*depth).into() && !matches!(kind, Kind::ListItem { ty: ty_new, .. } if *ty == ty_new) { let l = self.open_lists.pop().unwrap(); @@ -213,15 +261,17 @@ impl<'s> TreeParser<'s> { } else { self.prev_loose = false; if self.prev_blankline { - if let Some(OpenList { node, depth, .. }) = self.open_lists.last() { - if usize::from(*depth) >= self.tree.depth() + if let Some(OpenList { event, depth, .. }) = self.open_lists.last() { + if usize::from(*depth) >= self.open.len() || !matches!(kind, Kind::ListItem { .. }) { - let mut elem = self.tree.elem(*node); - let ListKind { tight, .. } = elem.list_mut().unwrap(); - if *tight { - self.prev_loose = true; - *tight = false; + if let EventKind::Enter(Node::Container(List { kind, .. })) = + &mut self.events[*event].kind + { + if kind.tight { + self.prev_loose = true; + kind.tight = false; + } } } } @@ -269,7 +319,10 @@ impl<'s> TreeParser<'s> { }; match block { - Block::Atom(a) => self.tree.atom(a, span_start), + Block::Atom(a) => self.events.push(Event { + kind: EventKind::Atom(a), + span: span_start, + }), Block::Leaf(l) => self.parse_leaf(l, &kind, span_start, span_end, lines), Block::Container(Table) => self.parse_table(lines, span_start, span_end), Block::Container(c) => { @@ -325,16 +378,13 @@ impl<'s> TreeParser<'s> { .iter() .rposition(|l| l < level) .map_or(0, |i| i + 1); - self.open_sections.drain(first_close..).for_each(|_| { - self.tree.exit(Span::empty_at(span_start.start())); // section - }); + let pos = span_start.start() as u32; + for _ in 0..(self.open_sections.len() - first_close) { + self.exit(Span::empty_at(span_start.start())); // section + } + self.open_sections.drain(first_close..); self.open_sections.push(*level); - self.tree.enter( - Node::Container(Section { - pos: span_start.start() as u32, - }), - span_start.empty_before(), - ); + self.enter(Node::Container(Section { pos }), span_start.empty_before()); } // trim '#' characters @@ -343,12 +393,12 @@ impl<'s> TreeParser<'s> { } } - self.tree.enter(Node::Leaf(leaf), span_start); + self.enter(Node::Leaf(leaf), span_start); lines .iter() .filter(|l| !matches!(k, Kind::Heading { .. }) || !l.is_empty()) - .for_each(|line| self.tree.inline(*line)); - self.tree.exit(span_end); + .for_each(|line| self.inline(*line)); + self.exit(span_end); } fn parse_container( @@ -392,11 +442,11 @@ impl<'s> TreeParser<'s> { .open_lists .last() .map_or(true, |OpenList { depth, .. }| { - usize::from(*depth) < self.tree.depth() + usize::from(*depth) < self.open.len() }); if same_depth { let tight = true; - let node = self.tree.enter( + let event = self.enter( Node::Container(Container::List { kind: ListKind { ty: *ty, tight }, marker: span_start.of(self.src), @@ -405,48 +455,77 @@ impl<'s> TreeParser<'s> { ); self.open_lists.push(OpenList { ty: *ty, - depth: self.tree.depth().try_into().unwrap(), - node, + depth: self.open.len().try_into().unwrap(), + event, }); } } let dt = if let ListItem(ListItemKind::Description) = c { - let dt = self.tree.enter(Node::Leaf(DescriptionTerm), span_start); - self.tree.exit(span_start.trim_end(self.src).empty_after()); - let span_open = span_start; + let dt = self.enter(Node::Leaf(DescriptionTerm), span_start); + self.exit(span_start.trim_end(self.src).empty_after()); span_start = lines[0].empty_before(); - Some((dt, span_open)) + Some((dt, self.events.len(), self.open.len())) } else { None }; - let node = self.tree.enter(Node::Container(c), span_start); + self.enter(Node::Container(c), span_start); let mut l = 0; while l < lines.len() { l += self.parse_block(&mut lines[l..], false); } - if let Some((node_dt, span_open)) = dt { - let node_child = if let Some(node_child) = self.tree.children(node).next() { - if let tree::Element::Container(Node::Leaf(l @ Paragraph)) = node_child.elem { + if let Some((empty_term, enter_detail, open_detail)) = dt { + let enter_term = enter_detail + 1; + if let Some(first_child) = self.events.get_mut(enter_term) { + if let EventKind::Enter(Node::Leaf(l @ Paragraph)) = &mut first_child.kind { + // convert paragraph into description term *l = DescriptionTerm; - Some(node_child.index) - } else { - None + let exit_term = if let Some(i) = self.events[enter_term + 1..] + .iter_mut() + .position(|e| matches!(e.kind, EventKind::Exit(Node::Leaf(Paragraph)))) + { + enter_term + 1 + i + } else { + panic!() + }; + if let EventKind::Exit(Node::Leaf(l)) = &mut self.events[exit_term].kind { + *l = DescriptionTerm; + } else { + panic!() + } + + // remove empty description term + self.events[empty_term].kind = EventKind::Stale; + self.events[empty_term + 1].kind = EventKind::Stale; + + // move out term before detail + self.events[enter_term].span = self.events[empty_term].span; + let first_detail = self.events[exit_term + 1..] + .iter() + .position(|e| !matches!(e.kind, EventKind::Atom(Blankline))) + .map(|i| exit_term + 1 + i) + .unwrap_or(self.events.len()); + let detail_pos = self + .events + .get(first_detail) + .map(|e| e.span.start()) + .unwrap_or_else(|| self.events.last().unwrap().span.end()); + self.events + .copy_within(enter_term..first_detail, enter_detail); + self.events[first_detail - 1] = Event { + kind: EventKind::Enter(Node::Container(c)), + span: Span::empty_at(detail_pos), + }; + self.open[open_detail] = first_detail - 1; } - } else { - None - }; - if let Some(node_child) = node_child { - self.tree.swap_prev(node_child, span_open); - self.tree.remove(node_dt); } } if let Some(OpenList { depth, .. }) = self.open_lists.last() { - debug_assert!(usize::from(*depth) <= self.tree.depth()); - if self.tree.depth() == (*depth).into() { + debug_assert!(usize::from(*depth) <= self.open.len()); + if self.open.len() == (*depth).into() { self.prev_blankline = false; self.prev_loose = false; let l = self.open_lists.pop().unwrap(); @@ -454,38 +533,37 @@ impl<'s> TreeParser<'s> { } } - self.tree.exit(span_end); + self.exit(span_end); } fn parse_table(&mut self, lines: &mut [Span], span_start: Span, span_end: Span) { self.alignments.clear(); - self.tree.enter(Node::Container(Table), span_start); + self.enter(Node::Container(Table), span_start); let caption_line = lines .iter() .position(|sp| sp.of(self.src).trim_start().starts_with('^')) .map_or(lines.len(), |caption_line| { - self.tree.enter(Node::Leaf(Caption), span_start); + self.enter(Node::Leaf(Caption), span_start); lines[caption_line] = lines[caption_line] .trim_start(self.src) .skip_chars(2, self.src); lines[lines.len() - 1] = lines[lines.len() - 1].trim_end(self.src); for line in &lines[caption_line..] { - self.tree.inline(*line); + self.inline(*line); } - self.tree.exit(span_end); + self.exit(span_end); caption_line }); - let mut last_row_node = None; + let mut last_row_event = None; for row in &lines[..caption_line] { let row = row.trim(self.src); if row.is_empty() { break; } - let row_node = self - .tree - .enter(Node::Container(TableRow { head: false }), row.with_len(1)); + let row_event_enter = + self.enter(Node::Container(TableRow { head: false }), row.with_len(1)); let rem = row.skip(1); // | let lex = lex::Lexer::new(rem.of(self.src)); let mut pos = rem.start(); @@ -514,7 +592,7 @@ impl<'s> TreeParser<'s> { } }; separator_row &= separator_cell; - self.tree.enter( + self.enter( Node::Leaf(TableCell( self.alignments .get(column_index) @@ -523,8 +601,8 @@ impl<'s> TreeParser<'s> { )), Span::empty_at(cell_start), ); - self.tree.inline(span); - self.tree.exit(Span::new(pos, pos + 1)); // cell + self.inline(span); + self.exit(Span::new(pos, pos + 1)); cell_start = pos + len; column_index += 1; } @@ -540,11 +618,11 @@ impl<'s> TreeParser<'s> { if separator_row && verbatim.is_none() { self.alignments.clear(); self.alignments.extend( - self.tree - .children(row_node) - .filter(|n| matches!(n.elem, tree::Element::Inline)) - .map(|n| { - let cell = n.span.of(self.src); + self.events[row_event_enter + 1..] + .iter() + .filter(|e| matches!(e.kind, EventKind::Inline)) + .map(|e| { + let cell = e.span.of(self.src); let l = cell.as_bytes()[0] == b':'; let r = cell.as_bytes()[cell.len() - 1] == b':'; match (l, r) { @@ -555,62 +633,67 @@ impl<'s> TreeParser<'s> { } }), ); - self.tree.exit_discard(); // table row - if let Some(head_row) = last_row_node { - self.tree - .children(head_row) - .filter(|n| { - matches!(n.elem, tree::Element::Container(Node::Leaf(TableCell(..)))) + self.open.pop(); + self.events.drain(row_event_enter..); // remove table row + if let Some((head_row_enter, head_row_exit)) = last_row_event { + self.events[head_row_enter + 1..] + .iter_mut() + .filter(|e| { + matches!( + e.kind, + EventKind::Enter(Node::Leaf(TableCell(..))) + | EventKind::Exit(Node::Leaf(TableCell(..))) + ) }) .zip( self.alignments .iter() .copied() - .chain(std::iter::repeat(Alignment::Unspecified)), + .chain(std::iter::repeat(Alignment::Unspecified)) + .flat_map(|a| [a, a].into_iter()), ) - .for_each(|(n, new_align)| { - if let tree::Element::Container(Node::Leaf(TableCell(alignment))) = - n.elem - { + .for_each(|(e, new_align)| match &mut e.kind { + EventKind::Enter(Node::Leaf(TableCell(alignment))) + | EventKind::Exit(Node::Leaf(TableCell(alignment))) => { *alignment = new_align; } + _ => panic!(), }); - if let tree::Element::Container(Node::Container(TableRow { head })) = - self.tree.elem(head_row) - { + let event: &mut Event = &mut self.events[head_row_enter]; + if let EventKind::Enter(Node::Container(TableRow { head })) = &mut event.kind { + *head = true; + } else { + panic!() + } + let event: &mut Event = &mut self.events[head_row_exit]; + if let EventKind::Exit(Node::Container(TableRow { head })) = &mut event.kind { *head = true; } else { panic!() } } } else { - self.tree.exit(Span::empty_at(pos)); // table row - last_row_node = Some(row_node); + let row_event_exit = self.exit(Span::empty_at(pos)); // table row + last_row_event = Some((row_event_enter, row_event_exit)); } } - self.tree.exit(span_end); // table + self.exit(span_end); } fn close_list(&mut self, list: OpenList, pos: usize) { if self.prev_loose { - let mut elem = self.tree.elem(list.node); - let ListKind { tight, .. } = elem.list_mut().unwrap(); - // ignore blankline at end - *tight = true; + if let EventKind::Enter(Node::Container(List { kind, .. })) = + &mut self.events[list.event].kind + { + // ignore blankline at end + kind.tight = true; + } else { + panic!() + } } - self.tree.exit(Span::empty_at(pos)); // list - } -} - -impl<'t, 's> tree::Element<'t, Node<'s>, Atom> { - fn list_mut(&mut self) -> Option<&mut ListKind> { - if let tree::Element::Container(Node::Container(Container::List { kind, .. })) = self { - Some(kind) - } else { - None - } + self.exit(Span::empty_at(pos)); // list } } @@ -1023,13 +1106,13 @@ fn lines(src: &str) -> impl Iterator + '_ { #[cfg(test)] mod test { - use crate::tree::EventKind::*; use crate::Alignment; use crate::OrderedListNumbering::*; use crate::OrderedListStyle::*; use super::Atom::*; use super::Container::*; + use super::EventKind::*; use super::FenceKind; use super::Kind; use super::Leaf::*; @@ -1041,7 +1124,7 @@ mod test { macro_rules! test_parse { ($src:expr $(,$($event:expr),* $(,)?)?) => { let t = super::TreeParser::new($src).parse(); - let actual = t.map(|ev| (ev.kind, ev.span.of($src))).collect::>(); + let actual = t.into_iter().map(|ev| (ev.kind, ev.span.of($src))).collect::>(); let expected = &[$($($event),*,)?]; assert_eq!( actual, @@ -2189,11 +2272,13 @@ mod test { })), "" ), + (Stale, ":"), + (Stale, ""), (Enter(Leaf(DescriptionTerm)), ":"), (Inline, "term"), (Exit(Leaf(DescriptionTerm)), ""), - (Enter(Container(ListItem(ListItemKind::Description))), ""), (Atom(Blankline), "\n"), + (Enter(Container(ListItem(ListItemKind::Description))), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "description"), (Exit(Leaf(Paragraph)), ""), @@ -2233,12 +2318,14 @@ mod test { })), "", ), + (Stale, ":"), + (Stale, ""), (Enter(Leaf(DescriptionTerm)), ":"), (Inline, "apple\n"), (Inline, "fruit"), (Exit(Leaf(DescriptionTerm)), ""), - (Enter(Container(ListItem(ListItemKind::Description))), ""), (Atom(Blankline), "\n"), + (Enter(Container(ListItem(ListItemKind::Description))), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "Paragraph one"), (Exit(Leaf(Paragraph)), ""), @@ -2279,6 +2366,8 @@ mod test { "", ), (Exit(Container(ListItem(ListItemKind::Description))), ""), + (Stale, ":"), + (Stale, ""), (Enter(Leaf(DescriptionTerm)), ":"), (Inline, "orange"), (Exit(Leaf(DescriptionTerm)), ""), diff --git a/src/lib.rs b/src/lib.rs index 0051486..889b6c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,7 +60,6 @@ mod block; mod inline; mod lex; mod span; -mod tree; use span::Span; @@ -555,7 +554,7 @@ pub struct Parser<'s> { src: &'s str, /// Block tree parsed at first. - tree: block::Tree<'s>, + blocks: std::iter::Peekable>>, /// Contents obtained by the prepass. pre_pass: PrePass<'s>, @@ -600,31 +599,48 @@ impl<'s> PrePass<'s> { #[must_use] fn new( src: &'s str, - mut tree: block::Tree<'s>, + blocks: std::slice::Iter>, inline_parser: &mut inline::Parser<'s>, ) -> Self { let mut link_definitions = Map::new(); let mut headings: Vec = Vec::new(); let mut used_ids: Set<&str> = Set::new(); + let mut blocks = blocks.peekable(); + let mut attr_prev: Option = None; - while let Some(e) = tree.next() { + while let Some(e) = blocks.next() { match e.kind { - tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition { + block::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition { label, })) => { + fn next_is_inline( + bs: &mut std::iter::Peekable>, + ) -> bool { + matches!(bs.peek().map(|e| &e.kind), Some(block::EventKind::Inline)) + } + // All link definition tags have to be obtained initially, as references can // appear before the definition. let attrs = attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src))); - let url = match tree.count_children() { - 0 => "".into(), - 1 => tree.take_inlines().next().unwrap().of(src).trim().into(), - _ => tree.take_inlines().map(|sp| sp.of(src).trim()).collect(), + let url = if !next_is_inline(&mut blocks) { + "".into() + } else { + let start = blocks.next().unwrap().span.of(src).trim(); + if !next_is_inline(&mut blocks) { + start.into() + } else { + let mut url = start.to_string(); + while next_is_inline(&mut blocks) { + url.push_str(blocks.next().unwrap().span.of(src).trim()); + } + url.into() + } }; link_definitions.insert(label, (url, attrs)); } - tree::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => { + block::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => { // All headings ids have to be obtained initially, as references can appear // before the heading. Additionally, determining the id requires inline parsing // as formatting must be removed. @@ -639,10 +655,21 @@ impl<'s> PrePass<'s> { let mut id_auto = String::new(); let mut text = String::new(); let mut last_whitespace = true; - let inlines = tree.take_inlines().collect::>(); inline_parser.reset(); - inlines.iter().enumerate().for_each(|(i, sp)| { - inline_parser.feed_line(*sp, i == inlines.len() - 1); + let mut last_end = 0; + loop { + let span_inline = blocks.next().and_then(|e| { + if matches!(e.kind, block::EventKind::Inline) { + last_end = e.span.end(); + Some(e.span) + } else { + None + } + }); + inline_parser.feed_line( + span_inline.unwrap_or_else(|| Span::empty_at(last_end)), + span_inline.is_none(), + ); inline_parser.for_each(|ev| match ev.kind { inline::EventKind::Str => { text.push_str(ev.span.of(src)); @@ -667,8 +694,11 @@ impl<'s> PrePass<'s> { id_auto.push('-'); } _ => {} - }) - }); + }); + if span_inline.is_none() { + break; + } + } id_auto.drain(id_auto.trim_end_matches('-').len()..); // ensure id unique @@ -700,11 +730,11 @@ impl<'s> PrePass<'s> { id_override, }); } - tree::EventKind::Atom(block::Atom::Attributes) => { + block::EventKind::Atom(block::Atom::Attributes) => { attr_prev = Some(e.span); } - tree::EventKind::Enter(..) - | tree::EventKind::Exit(block::Node::Container(block::Container::Section { + block::EventKind::Enter(..) + | block::EventKind::Exit(block::Node::Container(block::Container::Section { .. })) => {} _ => { @@ -746,13 +776,13 @@ impl<'s> PrePass<'s> { impl<'s> Parser<'s> { #[must_use] pub fn new(src: &'s str) -> Self { - let tree = block::parse(src); + let blocks = block::parse(src); let mut inline_parser = inline::Parser::new(src); - let pre_pass = PrePass::new(src, tree.clone(), &mut inline_parser); + let pre_pass = PrePass::new(src, blocks.iter(), &mut inline_parser); Self { src, - tree, + blocks: blocks.into_iter().peekable(), pre_pass, block_attributes: Attributes::new(), table_head_row: false, @@ -866,10 +896,10 @@ impl<'s> Parser<'s> { } fn block(&mut self) -> Option> { - while let Some(ev) = &mut self.tree.next() { + while let Some(ev) = &mut self.blocks.next() { let content = ev.span.of(self.src); let event = match ev.kind { - tree::EventKind::Atom(a) => match a { + block::EventKind::Atom(a) => match a { block::Atom::Blankline => Event::Blankline, block::Atom::ThematicBreak => { Event::ThematicBreak(self.block_attributes.take()) @@ -879,8 +909,8 @@ impl<'s> Parser<'s> { continue; } }, - tree::EventKind::Enter(c) | tree::EventKind::Exit(c) => { - let enter = matches!(ev.kind, tree::EventKind::Enter(..)); + block::EventKind::Enter(c) | block::EventKind::Exit(c) => { + let enter = matches!(ev.kind, block::EventKind::Enter(..)); let cont = match c { block::Node::Leaf(l) => { self.inline_parser.reset(); @@ -977,15 +1007,21 @@ impl<'s> Parser<'s> { Event::End(cont) } } - tree::EventKind::Inline => { + block::EventKind::Inline => { if self.verbatim { Event::Str(content.into()) } else { - self.inline_parser - .feed_line(ev.span, self.tree.branch_is_empty()); + self.inline_parser.feed_line( + ev.span, + !matches!( + self.blocks.peek().map(|e| &e.kind), + Some(block::EventKind::Inline), + ), + ); return self.next(); } } + block::EventKind::Stale => continue, }; return Some(event); } diff --git a/src/tree.rs b/src/tree.rs deleted file mode 100644 index d441805..0000000 --- a/src/tree.rs +++ /dev/null @@ -1,444 +0,0 @@ -use crate::Span; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum EventKind { - Enter(C), - Inline, - Exit(C), - Atom(A), -} - -#[derive(Debug)] -pub struct Node<'a, C, A> { - pub index: NodeIndex, - pub elem: Element<'a, C, A>, - pub span: Span, -} - -#[derive(Debug)] -pub enum Element<'a, C, A> { - Container(&'a mut C), - Atom(&'a mut A), - Inline, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Event { - pub kind: EventKind, - pub span: Span, -} - -#[derive(Clone)] -pub struct Tree { - nodes: std::rc::Rc<[InternalNode]>, - branch: Vec, - head: Option, -} - -impl Tree { - /// Count number of direct children nodes. - pub fn count_children(&self) -> usize { - let mut head = self.head; - let mut count = 0; - while let Some(h) = head { - let n = &self.nodes[h.index()]; - head = n.next; - count += 1; - } - count - } - - /// Retrieve all inlines until the end of the current container. Panics if any upcoming node is - /// not an inline node. - pub fn take_inlines(&mut self) -> impl Iterator + '_ { - let mut head = self.head.take(); - std::iter::from_fn(move || { - head.take().map(|h| { - let n = &self.nodes[h.index()]; - debug_assert!(matches!(n.kind, NodeKind::Inline)); - head = n.next; - n.span - }) - }) - } - - pub fn branch_is_empty(&self) -> bool { - matches!(self.head, None) - } -} - -impl Iterator for Tree { - type Item = Event; - - fn next(&mut self) -> Option { - if let Some(head) = self.head { - let n = &self.nodes[head.index()]; - let kind = match &n.kind { - NodeKind::Root => unreachable!(), - NodeKind::Container(c, child, ..) => { - self.branch.push(head); - self.head = *child; - EventKind::Enter(c.clone()) - } - NodeKind::Atom(a) => { - self.head = n.next; - EventKind::Atom(a.clone()) - } - NodeKind::Inline => { - self.head = n.next; - EventKind::Inline - } - }; - Some(Event { kind, span: n.span }) - } else if let Some(block_ni) = self.branch.pop() { - let InternalNode { next, kind, .. } = &self.nodes[block_ni.index()]; - if let NodeKind::Container(c, _, span) = kind { - self.head = *next; - Some(Event { - kind: EventKind::Exit(c.clone()), - span: *span, - }) - } else { - panic!() - } - } else { - None - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct NodeIndex(std::num::NonZeroUsize); - -impl NodeIndex { - fn new(i: usize) -> Self { - debug_assert_ne!(i, usize::MAX); - Self((i + 1).try_into().unwrap()) - } - - fn root() -> Self { - Self::new(0) - } - - fn index(self) -> usize { - usize::from(self.0) - 1 - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -enum NodeKind { - Root, - Container(C, Option, Span), - Atom(A), - Inline, -} - -#[derive(Debug, Clone)] -struct InternalNode { - span: Span, - kind: NodeKind, - next: Option, -} - -#[derive(Clone)] -pub struct Builder { - nodes: Vec>, - branch: Vec, - head: Option, - depth: usize, -} - -impl NodeKind { - fn child(&self) -> Option { - if let NodeKind::Container(_, child, _) = self { - *child - } else { - None - } - } - - fn child_mut(&mut self) -> &mut Option { - if let NodeKind::Container(_, child, _) = self { - child - } else { - panic!() - } - } -} - -impl<'a, C, A> From<&'a mut NodeKind> for Element<'a, C, A> { - fn from(kind: &'a mut NodeKind) -> Self { - match kind { - NodeKind::Root => unreachable!(), - NodeKind::Container(c, ..) => Element::Container(c), - NodeKind::Atom(a) => Element::Atom(a), - NodeKind::Inline => Element::Inline, - } - } -} - -impl Builder { - pub(super) fn new() -> Self { - Builder { - nodes: vec![InternalNode { - span: Span::default(), - kind: NodeKind::Root, - next: None, - }], - branch: vec![], - head: Some(NodeIndex::root()), - depth: 0, - } - } - - pub(super) fn atom(&mut self, a: A, span: Span) { - self.add_node(InternalNode { - span, - kind: NodeKind::Atom(a), - next: None, - }); - } - - pub(super) fn inline(&mut self, span: Span) { - self.add_node(InternalNode { - span, - kind: NodeKind::Inline, - next: None, - }); - } - - pub(super) fn enter(&mut self, c: C, span: Span) -> NodeIndex { - self.depth += 1; - self.add_node(InternalNode { - span, - kind: NodeKind::Container(c, None, Span::new(0, 0)), - next: None, - }) - } - - pub(super) fn exit(&mut self, span: Span) { - self.depth -= 1; - if let Some(head) = self.head.take() { - if let NodeKind::Container(_, _, sp) = &mut self.nodes[head.index()].kind { - *sp = span; - self.branch.push(head); - return; - } - } else { - let last = self.branch.pop(); - debug_assert_ne!(last, None); - } - - if let NodeKind::Container(_, _, sp) = - &mut self.nodes[self.branch.last().unwrap().index()].kind - { - *sp = span; - } else { - panic!(); - } - } - - /// Exit and discard all the contents of the current container. - pub(super) fn exit_discard(&mut self) { - self.exit(Span::new(0, (1 << 31) - 1)); - let exited = self.branch.pop().unwrap(); - self.nodes.drain(exited.index()..); - let (prev, has_parent) = self.replace(exited, None); - if has_parent { - self.head = Some(prev); - } else { - self.branch.push(prev); - } - } - - /// Swap the node and its children with either its parent or the node before. - pub fn swap_prev(&mut self, node: NodeIndex, span: Span) { - let next = self.nodes[node.index()].next; - let (prev, _) = self.replace(node, next); - if let Some(n) = next { - self.nodes[prev.index()].span = self.nodes[n.index()].span.empty_before(); - self.replace(n, None); - } else { - self.nodes[prev.index()].span = self.nodes[self.nodes.len() - 1].span.empty_after(); - } - self.replace(prev, Some(node)); - self.nodes[node.index()].next = Some(prev); - self.nodes[node.index()].span = span; - - let span = self.nodes[prev.index()].span; - if let NodeKind::Container(_, _, sp) = &mut self.nodes[node.index()].kind { - *sp = span; - } else { - panic!() - } - } - - /// Remove the specified node and its children. - pub fn remove(&mut self, node: NodeIndex) { - let next = self.nodes[node.index()].next; - self.replace(node, next); - } - - pub(super) fn depth(&self) -> usize { - self.depth - } - - pub(super) fn elem(&mut self, ni: NodeIndex) -> Element { - match &mut self.nodes[ni.index()].kind { - NodeKind::Root => unreachable!(), - NodeKind::Container(c, ..) => Element::Container(c), - NodeKind::Atom(a) => Element::Atom(a), - NodeKind::Inline => Element::Inline, - } - } - - /// Retrieve all children nodes for the specified node, in the order that they were added. - pub(super) fn children(&mut self, node: NodeIndex) -> impl Iterator> { - // XXX assumes no modifications - let n = &self.nodes[node.index()]; - let range = if let Some(start) = n.kind.child() { - start.index()..n.next.map_or(self.nodes.len(), NodeIndex::index) - } else { - 0..0 - }; - range - .clone() - .map(NodeIndex::new) - .zip(self.nodes[range].iter_mut()) - .map(|(index, n)| Node { - index, - elem: Element::from(&mut n.kind), - span: n.span, - }) - } - - pub(super) fn finish(self) -> Tree { - debug_assert_eq!(self.depth, 0); - let head = self.nodes[NodeIndex::root().index()].next; - Tree { - nodes: self.nodes.into_boxed_slice().into(), - branch: Vec::new(), - head, - } - } - - fn add_node(&mut self, node: InternalNode) -> NodeIndex { - let ni = NodeIndex::new(self.nodes.len()); - self.nodes.push(node); - if let Some(head_ni) = &mut self.head { - let mut head = &mut self.nodes[head_ni.index()]; - match &mut head.kind { - NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => { - // set next pointer of previous node - debug_assert_eq!(head.next, None); - head.next = Some(ni); - } - NodeKind::Container(_, child, _) => { - self.branch.push(*head_ni); - // set child pointer of current container - debug_assert_eq!(*child, None); - *child = Some(ni); - } - } - } else if let Some(block) = self.branch.pop() { - let mut block = &mut self.nodes[block.index()]; - debug_assert!(matches!(block.kind, NodeKind::Container(..))); - block.next = Some(ni); - } else { - panic!() - } - self.head = Some(ni); - ni - } - - /// Remove the link from the node that points to the specified node. Optionally replace the - /// node with another node. Return the pointer node and whether it is a container or not. - fn replace(&mut self, node: NodeIndex, next: Option) -> (NodeIndex, bool) { - for (i, n) in self.nodes.iter_mut().enumerate().rev() { - let ni = NodeIndex::new(i); - if n.next == Some(node) { - n.next = next; - return (ni, false); - } else if n.kind.child() == Some(node) { - *n.kind.child_mut() = next; - return (ni, true); - } - } - panic!("node is never linked to") - } -} - -impl std::fmt::Debug for Builder { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.clone().finish().fmt(f) - } -} - -impl std::fmt::Debug for Tree { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - const INDENT: &str = " "; - let mut level = 0; - writeln!(f)?; - for e in self.clone() { - let indent = INDENT.repeat(level); - match e.kind { - EventKind::Enter(c) => { - write!(f, "{}{:?}", indent, c)?; - level += 1; - } - EventKind::Inline => write!(f, "{}Inline", indent)?, - EventKind::Exit(..) => { - level -= 1; - continue; - } - EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?, - } - writeln!(f, " ({}:{})", e.span.start(), e.span.end())?; - } - Ok(()) - } -} - -#[cfg(test)] -mod test { - use crate::Span; - - #[test] - fn fmt() { - let mut tree = super::Builder::new(); - tree.enter(1, Span::new(0, 1)); - tree.atom(11, Span::new(0, 1)); - tree.atom(12, Span::new(0, 1)); - tree.exit(Span::new(0, 0)); - tree.enter(2, Span::new(1, 5)); - tree.enter(21, Span::new(2, 5)); - tree.enter(211, Span::new(3, 4)); - tree.atom(2111, Span::new(3, 4)); - tree.exit(Span::new(0, 0)); - tree.exit(Span::new(0, 0)); - tree.enter(22, Span::new(4, 5)); - tree.atom(221, Span::new(4, 5)); - tree.exit(Span::new(0, 0)); - tree.exit(Span::new(0, 0)); - tree.enter(3, Span::new(5, 6)); - tree.atom(31, Span::new(5, 6)); - tree.exit(Span::new(0, 0)); - assert_eq!( - format!("{:?}", tree.finish()), - concat!( - "\n", - "1 (0:1)\n", - " 11 (0:1)\n", - " 12 (0:1)\n", - "2 (1:5)\n", - " 21 (2:5)\n", - " 211 (3:4)\n", - " 2111 (3:4)\n", - " 22 (4:5)\n", - " 221 (4:5)\n", - "3 (5:6)\n", - " 31 (5:6)\n", - ) - ); - } -}