This commit is contained in:
Noah Hellman 2022-12-10 10:26:06 +01:00
parent 5afc6a41a8
commit 3a70cd8255
3 changed files with 148 additions and 143 deletions

View file

@ -3,6 +3,7 @@ use crate::EOF;
use crate::tree; use crate::tree;
use Atom::*;
use Container::*; use Container::*;
use Leaf::*; use Leaf::*;
@ -15,6 +16,9 @@ pub fn parse(src: &str) -> Tree {
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Block { pub enum Block {
/// An atomic block, containing no children elements.
Atom(Atom),
/// A leaf block, containing only inline elements. /// A leaf block, containing only inline elements.
Leaf(Leaf), Leaf(Leaf),
@ -22,6 +26,21 @@ pub enum Block {
Container(Container), Container(Container),
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Atom {
/// Inline content with unparsed inline elements.
Inline,
/// A line with no non-whitespace characters.
Blankline,
/// A list of attributes.
Attributes,
/// A thematic break.
ThematicBreak,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Leaf { pub enum Leaf {
/// Span is empty, before first character of paragraph. /// Span is empty, before first character of paragraph.
@ -43,10 +62,6 @@ pub enum Leaf {
/// Span is language specifier. /// Span is language specifier.
/// Each inline is a line. /// Each inline is a line.
CodeBlock { fence_length: u8, c: u8 }, CodeBlock { fence_length: u8, c: u8 },
/// Span is from first to last character.
/// No inlines.
ThematicBreak,
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@ -64,18 +79,6 @@ pub enum Container {
Footnote { indent: u8 }, Footnote { indent: u8 },
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Atom {
/// Inline content with unparsed inline elements.
Inline,
/// A line with no non-whitespace characters.
Blankline,
/// A list of attributes.
Attributes,
}
struct Parser<'s> { struct Parser<'s> {
src: &'s str, src: &'s str,
tree: tree::Builder<Block, Atom>, tree: tree::Builder<Block, Atom>,
@ -106,15 +109,8 @@ impl<'s> Parser<'s> {
/// Recursively parse a block and all of its children. Return number of lines the block uses. /// Recursively parse a block and all of its children. Return number of lines the block uses.
fn parse_block(&mut self, lines: &mut [Span]) -> usize { fn parse_block(&mut self, lines: &mut [Span]) -> usize {
let blanklines = lines
.iter()
.take_while(|sp| sp.of(self.src).trim().is_empty())
.map(|sp| self.tree.elem(Atom::Blankline, *sp))
.count();
let lines = &mut lines[blanklines..];
Block::parse(lines.iter().map(|sp| sp.of(self.src))).map_or( Block::parse(lines.iter().map(|sp| sp.of(self.src))).map_or(
blanklines, 0,
|(kind, span, line_count)| { |(kind, span, line_count)| {
let lines = { let lines = {
let l = lines.len().min(line_count); let l = lines.len().min(line_count);
@ -147,7 +143,11 @@ impl<'s> Parser<'s> {
lines lines
}; };
match &kind { match kind {
Block::Atom(a) => {
assert_ne!(a, Inline);
self.tree.atom(a, span);
}
Block::Leaf(l) => { Block::Leaf(l) => {
self.tree.enter(kind, span); self.tree.enter(kind, span);
@ -170,9 +170,8 @@ impl<'s> Parser<'s> {
} }
} }
lines lines.iter().for_each(|line| self.tree.atom(Inline, *line));
.iter() self.tree.exit();
.for_each(|line| self.tree.elem(Atom::Inline, *line));
} }
Block::Container(c) => { Block::Container(c) => {
let (skip_chars, skip_lines_suffix) = match &c { let (skip_chars, skip_lines_suffix) = match &c {
@ -194,7 +193,7 @@ impl<'s> Parser<'s> {
.take_while(|c| c.is_whitespace()) .take_while(|c| c.is_whitespace())
.count() .count()
+ usize::from(skip_chars)) + usize::from(skip_chars))
.min(sp.len()); .min(sp.len() - usize::from(sp.of(self.src).ends_with('\n')));
*sp = sp.skip(skip); *sp = sp.skip(skip);
}); });
@ -203,10 +202,11 @@ impl<'s> Parser<'s> {
while l < line_count_inner { while l < line_count_inner {
l += self.parse_block(&mut lines[l..line_count_inner]); l += self.parse_block(&mut lines[l..line_count_inner]);
} }
self.tree.exit();
} }
} }
self.tree.exit();
blanklines + line_count line_count
}, },
) )
} }
@ -229,11 +229,16 @@ impl Block {
/// Determine what type of block a line can start. /// Determine what type of block a line can start.
fn start(line: &str) -> (Self, Span) { fn start(line: &str) -> (Self, Span) {
let start = line.chars().take_while(|c| c.is_whitespace()).count(); let start = line
.chars()
.take_while(|c| *c != '\n' && c.is_whitespace())
.count();
let line_t = &line[start..]; let line_t = &line[start..];
let mut chars = line_t.chars(); let mut chars = line_t.chars();
match chars.next().unwrap_or(EOF) { match chars.next().unwrap_or(EOF) {
EOF => Some((Self::Atom(Blankline), Span::empty_at(start))),
'\n' => Some((Self::Atom(Blankline), Span::by_len(start, 1))),
'#' => chars '#' => chars
.find(|c| *c != '#') .find(|c| *c != '#')
.map_or(true, char::is_whitespace) .map_or(true, char::is_whitespace)
@ -286,7 +291,7 @@ impl Block {
) )
}), }),
'-' | '*' if Self::is_thematic_break(chars.clone()) => Some(( '-' | '*' if Self::is_thematic_break(chars.clone()) => Some((
Self::Leaf(ThematicBreak), Self::Atom(ThematicBreak),
Span::from_slice(line, line_t.trim()), Span::from_slice(line, line_t.trim()),
)), )),
'-' => chars.next().map_or(true, char::is_whitespace).then(|| { '-' => chars.next().map_or(true, char::is_whitespace).then(|| {
@ -350,9 +355,9 @@ impl Block {
fn continues(self, line: &str) -> bool { fn continues(self, line: &str) -> bool {
//let start = Self::start(line); // TODO allow starting new block without blank line //let start = Self::start(line); // TODO allow starting new block without blank line
match self { match self {
Self::Atom(..) => false,
Self::Leaf(Paragraph | Heading { .. } | Table) => !line.trim().is_empty(), Self::Leaf(Paragraph | Heading { .. } | Table) => !line.trim().is_empty(),
Self::Leaf(LinkDefinition) => line.starts_with(' ') && !line.trim().is_empty(), Self::Leaf(LinkDefinition) => line.starts_with(' ') && !line.trim().is_empty(),
Self::Leaf(ThematicBreak) => false,
Self::Container(Blockquote) => line.trim().starts_with('>'), Self::Container(Blockquote) => line.trim().starts_with('>'),
Self::Container(Footnote { indent } | ListItem { indent }) => { Self::Container(Footnote { indent } | ListItem { indent }) => {
let spaces = line.chars().take_while(|c| c.is_whitespace()).count(); let spaces = line.chars().take_while(|c| c.is_whitespace()).count();
@ -362,7 +367,7 @@ impl Block {
let fence = match self { let fence = match self {
Self::Container(..) => ':', Self::Container(..) => ':',
Self::Leaf(CodeBlock { c, .. }) => c as char, Self::Leaf(CodeBlock { c, .. }) => c as char,
Self::Leaf(..) => unreachable!(), Self::Leaf(..) | Self::Atom(..) => unreachable!(),
}; };
let mut c = line.chars(); let mut c = line.chars();
!((&mut c).take((fence_length).into()).all(|c| c == fence) !((&mut c).take((fence_length).into()).all(|c| c == fence)
@ -375,6 +380,7 @@ impl Block {
impl std::fmt::Display for Block { impl std::fmt::Display for Block {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
Block::Atom(a) => std::fmt::Debug::fmt(a, f),
Block::Leaf(e) => std::fmt::Debug::fmt(e, f), Block::Leaf(e) => std::fmt::Debug::fmt(e, f),
Block::Container(c) => std::fmt::Debug::fmt(c, f), Block::Container(c) => std::fmt::Debug::fmt(c, f),
} }
@ -408,6 +414,7 @@ fn lines(src: &str) -> impl Iterator<Item = Span> + '_ {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::tree::EventKind;
use crate::tree::EventKind::*; use crate::tree::EventKind::*;
use super::Atom::*; use super::Atom::*;
@ -430,7 +437,7 @@ mod test {
test_parse!( test_parse!(
"para\n", "para\n",
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(Element(Inline), "para"), (EventKind::Atom(Inline), "para"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
); );
} }
@ -440,8 +447,8 @@ mod test {
test_parse!( test_parse!(
"para0\npara1\n", "para0\npara1\n",
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(Element(Inline), "para0\n"), (EventKind::Atom(Inline), "para0\n"),
(Element(Inline), "para1"), (EventKind::Atom(Inline), "para1"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
); );
} }
@ -457,39 +464,41 @@ mod test {
"15\n", // "15\n", //
), ),
(Enter(Leaf(Heading { level: 1 })), "#"), (Enter(Leaf(Heading { level: 1 })), "#"),
(Element(Inline), "2"), (EventKind::Atom(Inline), "2"),
(Exit(Leaf(Heading { level: 1 })), "#"), (Exit(Leaf(Heading { level: 1 })), "#"),
(Element(Blankline), "\n"), (EventKind::Atom(Blankline), "\n"),
(Enter(Leaf(Heading { level: 1 })), "#"), (Enter(Leaf(Heading { level: 1 })), "#"),
(Element(Inline), "8\n"), (EventKind::Atom(Inline), "8\n"),
(Element(Inline), " 12\n"), (EventKind::Atom(Inline), " 12\n"),
(Element(Inline), "15"), (EventKind::Atom(Inline), "15"),
(Exit(Leaf(Heading { level: 1 })), "#"), (Exit(Leaf(Heading { level: 1 })), "#"),
); );
} }
#[test] #[test]
fn parse_blockquote() { fn parse_blockquote() {
/*
test_parse!( test_parse!(
"> a\n", "> a\n",
(Enter(Container(Blockquote)), ">"), (Enter, Container(Blockquote), ">"),
(Enter(Leaf(Paragraph)), ""), (Enter, Leaf(Paragraph), ""),
(Element(Inline), "a"), (Element, Atom(Inline), "a"),
(Exit(Leaf(Paragraph)), ""), (Exit, Leaf(Paragraph), ""),
(Exit(Container(Blockquote)), ">"), (Exit, Container(Blockquote), ">"),
); );
test_parse!( test_parse!(
"> \n", "> \n",
(Enter(Container(Blockquote)), ">"), (Enter, Container(Blockquote), ">"),
(Element(Blankline), " \n"), (Element, Atom(Blankline), "\n"),
(Exit(Container(Blockquote)), ">"), (Exit, Container(Blockquote), ">"),
); );
test_parse!( test_parse!(
">", ">",
(Enter(Container(Blockquote)), ">"), (Enter, Container(Blockquote), ">"),
(Element(Blankline), ""), (Element, Atom(Blankline), ""),
(Exit(Container(Blockquote)), ">"), (Exit, Container(Blockquote), ">"),
); );
*/
test_parse!( test_parse!(
concat!( concat!(
"> a\n", "> a\n",
@ -500,15 +509,15 @@ mod test {
), ),
(Enter(Container(Blockquote)), ">"), (Enter(Container(Blockquote)), ">"),
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(Element(Inline), "a"), (EventKind::Atom(Inline), "a"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
(Element(Blankline), ""), (EventKind::Atom(Blankline), "\n"),
(Enter(Leaf(Heading { level: 2 })), "##"), (Enter(Leaf(Heading { level: 2 })), "##"),
(Element(Inline), "hl"), (EventKind::Atom(Inline), "hl"),
(Exit(Leaf(Heading { level: 2 })), "##"), (Exit(Leaf(Heading { level: 2 })), "##"),
(Element(Blankline), ""), (EventKind::Atom(Blankline), "\n"),
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(Element(Inline), "para"), (EventKind::Atom(Inline), "para"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
(Exit(Container(Blockquote)), ">"), (Exit(Container(Blockquote)), ">"),
); );
@ -519,13 +528,13 @@ mod test {
test_parse!( test_parse!(
"> \n", "> \n",
(Enter(Container(Blockquote)), ">"), (Enter(Container(Blockquote)), ">"),
(Element(Blankline), "\n"), (EventKind::Atom(Blankline), "\n"),
(Exit(Container(Blockquote)), ">"), (Exit(Container(Blockquote)), ">"),
); );
test_parse!( test_parse!(
">", ">",
(Enter(Container(Blockquote)), ">"), (Enter(Container(Blockquote)), ">"),
(Element(Blankline), ""), (EventKind::Atom(Blankline), ""),
(Exit(Container(Blockquote)), ">"), (Exit(Container(Blockquote)), ">"),
); );
} }
@ -541,7 +550,7 @@ mod test {
})), })),
"", "",
), ),
(Element(Inline), "l0\n"), (EventKind::Atom(Inline), "l0\n"),
( (
Exit(Leaf(CodeBlock { Exit(Leaf(CodeBlock {
fence_length: 3, fence_length: 3,
@ -565,7 +574,7 @@ mod test {
})), })),
"" ""
), ),
(Element(Inline), "l0\n"), (EventKind::Atom(Inline), "l0\n"),
( (
Exit(Leaf(CodeBlock { Exit(Leaf(CodeBlock {
fence_length: 3, fence_length: 3,
@ -573,9 +582,9 @@ mod test {
})), })),
"" ""
), ),
(Element(Blankline), "\n"), (EventKind::Atom(Blankline), "\n"),
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(Element(Inline), "para"), (EventKind::Atom(Inline), "para"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
); );
test_parse!( test_parse!(
@ -593,9 +602,9 @@ mod test {
})), })),
"lang" "lang"
), ),
(Element(Inline), "l0\n"), (EventKind::Atom(Inline), "l0\n"),
(Element(Inline), "```\n"), (EventKind::Atom(Inline), "```\n"),
(Element(Inline), " l1\n"), (EventKind::Atom(Inline), " l1\n"),
( (
Exit(Leaf(CodeBlock { Exit(Leaf(CodeBlock {
fence_length: 4, fence_length: 4,
@ -620,7 +629,7 @@ mod test {
})), })),
"" ""
), ),
(Element(Inline), "a\n"), (EventKind::Atom(Inline), "a\n"),
( (
Exit(Leaf(CodeBlock { Exit(Leaf(CodeBlock {
fence_length: 3, fence_length: 3,
@ -635,7 +644,7 @@ mod test {
})), })),
"" ""
), ),
(Element(Inline), "bbb\n"), (EventKind::Atom(Inline), "bbb\n"),
( (
Exit(Leaf(CodeBlock { Exit(Leaf(CodeBlock {
fence_length: 3, fence_length: 3,
@ -658,8 +667,8 @@ mod test {
})), })),
"", "",
), ),
(Element(Inline), "code\n"), (EventKind::Atom(Inline), "code\n"),
(Element(Inline), " block\n"), (EventKind::Atom(Inline), " block\n"),
( (
Exit(Leaf(CodeBlock { Exit(Leaf(CodeBlock {
fence_length: 3, fence_length: 3,
@ -675,7 +684,7 @@ mod test {
test_parse!( test_parse!(
"[tag]: url\n", "[tag]: url\n",
(Enter(Leaf(LinkDefinition)), "tag"), (Enter(Leaf(LinkDefinition)), "tag"),
(Element(Inline), "url"), (EventKind::Atom(Inline), "url"),
(Exit(Leaf(LinkDefinition)), "tag"), (Exit(Leaf(LinkDefinition)), "tag"),
); );
} }
@ -686,7 +695,7 @@ mod test {
"[^tag]: description\n", "[^tag]: description\n",
(Enter(Container(Footnote { indent: 0 })), "tag"), (Enter(Container(Footnote { indent: 0 })), "tag"),
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(Element(Inline), "description"), (EventKind::Atom(Inline), "description"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
(Exit(Container(Footnote { indent: 0 })), "tag"), (Exit(Container(Footnote { indent: 0 })), "tag"),
); );
@ -705,6 +714,12 @@ mod test {
}; };
} }
#[test]
fn block_blankline() {
test_block!("\n", Block::Atom(Blankline), "\n", 1);
test_block!(" \n", Block::Atom(Blankline), "\n", 1);
}
#[test] #[test]
fn block_multiline() { fn block_multiline() {
test_block!( test_block!(
@ -733,14 +748,14 @@ mod test {
#[test] #[test]
fn block_thematic_break() { fn block_thematic_break() {
test_block!("---\n", Block::Leaf(ThematicBreak), "---", 1); test_block!("---\n", Block::Atom(ThematicBreak), "---", 1);
test_block!( test_block!(
concat!( concat!(
" -*- -*-\n", " -*- -*-\n",
"\n", // "\n", //
"para", // "para", //
), ),
Block::Leaf(ThematicBreak), Block::Atom(ThematicBreak),
"-*- -*-", "-*- -*-",
1 1
); );

View file

@ -269,6 +269,7 @@ impl<'s> Event<'s> {
impl<'s> Container<'s> { impl<'s> Container<'s> {
fn from_block(src: &'s str, block: block::Block) -> Self { fn from_block(src: &'s str, block: block::Block) -> Self {
match block { match block {
block::Block::Atom(a) => todo!(),
block::Block::Leaf(l) => match l { block::Block::Leaf(l) => match l {
block::Leaf::Paragraph => Self::Paragraph, block::Leaf::Paragraph => Self::Paragraph,
block::Leaf::Heading { level } => Self::Heading { level }, block::Leaf::Heading { level } => Self::Heading { level },
@ -342,14 +343,14 @@ impl<'s> Iterator for Parser<'s> {
return Some(Event::from_inline(self.src, inline)); return Some(Event::from_inline(self.src, inline));
} else if let Some(ev) = self.tree.next() { } else if let Some(ev) = self.tree.next() {
match ev.kind { match ev.kind {
tree::EventKind::Element(atom) => { tree::EventKind::Atom(a) => {
assert_eq!(atom, block::Atom::Inline); assert_eq!(a, block::Atom::Inline);
let last_inline = self.tree.neighbors().next().is_none(); let last_inline = self.tree.atoms().next().is_none();
parser.parse(ev.span.of(self.src), last_inline); parser.parse(ev.span.of(self.src), last_inline);
} }
tree::EventKind::Exit(block) => { tree::EventKind::Exit(c) => {
self.parser = None; self.parser = None;
return Some(Event::End(Container::from_block(self.src, block))); return Some(Event::End(Container::from_block(self.src, c)));
} }
tree::EventKind::Enter(..) => unreachable!(), tree::EventKind::Enter(..) => unreachable!(),
} }
@ -359,20 +360,21 @@ impl<'s> Iterator for Parser<'s> {
for ev in &mut self.tree { for ev in &mut self.tree {
let content = ev.span.of(self.src); let content = ev.span.of(self.src);
let event = match ev.kind { let event = match ev.kind {
tree::EventKind::Element(atom) => match atom { tree::EventKind::Atom(a) => match a {
block::Atom::Inline => panic!("inline outside leaf block"), block::Atom::Inline => panic!("inline outside leaf block"),
block::Atom::Blankline => Event::Atom(Atom::Blankline), block::Atom::Blankline => Event::Atom(Atom::Blankline),
block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak),
block::Atom::Attributes => { block::Atom::Attributes => {
self.block_attributes.parse(content); self.block_attributes.parse(content);
continue; continue;
} }
}, },
tree::EventKind::Enter(block) => { tree::EventKind::Enter(c) => {
if matches!(block, block::Block::Leaf(_)) { if matches!(c, block::Block::Leaf(_)) {
self.parser = Some(inline::Parser::new()); self.parser = Some(inline::Parser::new());
self.inline_start = ev.span.end(); self.inline_start = ev.span.end();
} }
let container = match block { let container = match c {
block::Block::Leaf(block::Leaf::CodeBlock { .. }) => { block::Block::Leaf(block::Leaf::CodeBlock { .. }) => {
self.inline_start += 1; // skip newline self.inline_start += 1; // skip newline
Container::CodeBlock { Container::CodeBlock {
@ -386,7 +388,7 @@ impl<'s> Iterator for Parser<'s> {
}; };
Event::Start(container, self.block_attributes.take()) Event::Start(container, self.block_attributes.take())
} }
tree::EventKind::Exit(block) => Event::End(Container::from_block(self.src, block)), tree::EventKind::Exit(c) => Event::End(Container::from_block(self.src, c)),
}; };
return Some(event); return Some(event);
} }
@ -465,6 +467,7 @@ mod test {
Start(Paragraph, Attributes::none()), Start(Paragraph, Attributes::none()),
Str("para0"), Str("para0"),
End(Paragraph), End(Paragraph),
Atom(Blankline),
Start(Paragraph, Attributes::none()), Start(Paragraph, Attributes::none()),
Str("para1"), Str("para1"),
End(Paragraph), End(Paragraph),

View file

@ -1,10 +1,10 @@
use crate::Span; use crate::Span;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum EventKind<C, E> { pub enum EventKind<C, A> {
Enter(C), Enter(C),
Element(E),
Exit(C), Exit(C),
Atom(A),
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
@ -13,25 +13,15 @@ pub struct Event<C, A> {
pub span: Span, pub span: Span,
} }
pub struct Object<C, E> { #[derive(Clone)]
kind: ObjectKind<C, E>, pub struct Tree<C, A> {
span: Span, nodes: Vec<Node<C, A>>,
}
pub enum ObjectKind<C, E> {
Container(C),
Element(E),
}
#[derive(Debug, Clone)]
pub struct Tree<C, E> {
nodes: Vec<Node<C, E>>,
branch: Vec<NodeIndex>, branch: Vec<NodeIndex>,
head: Option<NodeIndex>, head: Option<NodeIndex>,
} }
impl<C: Copy, E: Copy> Tree<C, E> { impl<C: Copy, A: Copy> Tree<C, A> {
fn new(nodes: Vec<Node<C, E>>) -> Self { fn new(nodes: Vec<Node<C, A>>) -> Self {
let head = nodes[NodeIndex::root().index()].next; let head = nodes[NodeIndex::root().index()].next;
Self { Self {
nodes, nodes,
@ -40,26 +30,25 @@ impl<C: Copy, E: Copy> Tree<C, E> {
} }
} }
pub fn neighbors(&self) -> impl Iterator<Item = Object<C, E>> + '_ { pub fn atoms(&self) -> impl Iterator<Item = (A, Span)> + '_ {
let mut head = self.head; let mut head = self.head;
std::iter::from_fn(move || { std::iter::from_fn(move || {
head.take().map(|h| { head.take().map(|h| {
let n = &self.nodes[h.index()]; let n = &self.nodes[h.index()];
let kind = match &n.kind { let kind = match &n.kind {
NodeKind::Root => unreachable!(), NodeKind::Root => unreachable!(),
NodeKind::Container(c, _) => ObjectKind::Container(*c), NodeKind::Container(..) => panic!(),
NodeKind::Element(e) => ObjectKind::Element(*e), NodeKind::Atom(a) => *a,
}; };
let span = n.span;
head = n.next; head = n.next;
Object { kind, span } (kind, n.span)
}) })
}) })
} }
} }
impl<C: Copy, E: Copy> Iterator for Tree<C, E> { impl<C: Copy, A: Copy> Iterator for Tree<C, A> {
type Item = Event<C, E>; type Item = Event<C, A>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if let Some(head) = self.head { if let Some(head) = self.head {
@ -71,9 +60,9 @@ impl<C: Copy, E: Copy> Iterator for Tree<C, E> {
self.head = *child; self.head = *child;
EventKind::Enter(*c) EventKind::Enter(*c)
} }
NodeKind::Element(e) => { NodeKind::Atom(e) => {
self.head = n.next; self.head = n.next;
EventKind::Element(*e) EventKind::Atom(*e)
} }
}; };
Some(Event { kind, span: n.span }) Some(Event { kind, span: n.span })
@ -114,27 +103,27 @@ impl NodeIndex {
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
enum NodeKind<C, E> { enum NodeKind<C, A> {
Root, Root,
Container(C, Option<NodeIndex>), Container(C, Option<NodeIndex>),
Element(E), Atom(A),
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
struct Node<C, E> { struct Node<C, A> {
span: Span, span: Span,
kind: NodeKind<C, E>, kind: NodeKind<C, A>,
next: Option<NodeIndex>, next: Option<NodeIndex>,
} }
#[derive(Debug, Clone)] #[derive(Clone)]
pub struct Builder<C, E> { pub struct Builder<C, A> {
nodes: Vec<Node<C, E>>, nodes: Vec<Node<C, A>>,
branch: Vec<NodeIndex>, branch: Vec<NodeIndex>,
head: Option<NodeIndex>, head: Option<NodeIndex>,
} }
impl<C: Copy, E: Copy> Builder<C, E> { impl<C: Copy, A: Copy> Builder<C, A> {
pub(super) fn new() -> Self { pub(super) fn new() -> Self {
Builder { Builder {
nodes: vec![Node { nodes: vec![Node {
@ -147,10 +136,10 @@ impl<C: Copy, E: Copy> Builder<C, E> {
} }
} }
pub(super) fn elem(&mut self, e: E, span: Span) { pub(super) fn atom(&mut self, a: A, span: Span) {
self.add_node(Node { self.add_node(Node {
span, span,
kind: NodeKind::Element(e), kind: NodeKind::Atom(a),
next: None, next: None,
}); });
} }
@ -172,17 +161,17 @@ impl<C: Copy, E: Copy> Builder<C, E> {
} }
} }
pub(super) fn finish(self) -> Tree<C, E> { pub(super) fn finish(self) -> Tree<C, A> {
Tree::new(self.nodes) Tree::new(self.nodes)
} }
fn add_node(&mut self, node: Node<C, E>) { fn add_node(&mut self, node: Node<C, A>) {
let ni = NodeIndex::new(self.nodes.len()); let ni = NodeIndex::new(self.nodes.len());
self.nodes.push(node); self.nodes.push(node);
if let Some(head_ni) = &mut self.head { if let Some(head_ni) = &mut self.head {
let mut head = &mut self.nodes[head_ni.index()]; let mut head = &mut self.nodes[head_ni.index()];
match &mut head.kind { match &mut head.kind {
NodeKind::Root | NodeKind::Element(_) => { NodeKind::Root | NodeKind::Atom(_) => {
// update next pointer of previous node // update next pointer of previous node
assert_eq!(head.next, None); assert_eq!(head.next, None);
head.next = Some(ni); head.next = Some(ni);
@ -205,30 +194,28 @@ impl<C: Copy, E: Copy> Builder<C, E> {
} }
} }
impl<C: Copy + std::fmt::Display, E: Copy + std::fmt::Display> std::fmt::Display for Builder<C, E> { impl<C: Copy + std::fmt::Debug, A: Copy + std::fmt::Debug> std::fmt::Debug for Builder<C, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.clone().finish().fmt(f) self.clone().finish().fmt(f)
} }
} }
impl<C: Copy + std::fmt::Display, E: Copy + std::fmt::Display> std::fmt::Display for Tree<C, E> { impl<C: Copy + std::fmt::Debug, A: Copy + std::fmt::Debug> std::fmt::Debug for Tree<C, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
const INDENT: &str = " "; const INDENT: &str = " ";
let mut level = 0; let mut level = 0;
for e in self.clone() { for e in self.clone() {
let indent = INDENT.repeat(level); let indent = INDENT.repeat(level);
match e.kind { match e.kind {
EventKind::Enter(container) => { EventKind::Enter(c) => {
write!(f, "{}{}", indent, container)?; write!(f, "{}{:?}", indent, c)?;
level += 1; level += 1;
} }
EventKind::Exit(_) => { EventKind::Exit(..) => {
level -= 1; level -= 1;
continue; continue;
} }
EventKind::Element(element) => { EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?,
write!(f, "{}{}", indent, element)?;
}
} }
writeln!(f, " ({}:{})", e.span.start(), e.span.end())?; writeln!(f, " ({}:{})", e.span.start(), e.span.end())?;
} }
@ -243,11 +230,11 @@ mod test {
#[test] #[test]
fn fmt_linear() { fn fmt_linear() {
let mut tree: super::Builder<u8, u8> = super::Builder::new(); let mut tree: super::Builder<u8, u8> = super::Builder::new();
tree.elem(1, Span::new(0, 1)); tree.atom(1, Span::new(0, 1));
tree.elem(2, Span::new(1, 2)); tree.atom(2, Span::new(1, 2));
tree.elem(3, Span::new(3, 4)); tree.atom(3, Span::new(3, 4));
assert_eq!( assert_eq!(
tree.to_string(), format!("{:?}", tree),
concat!( concat!(
"1 (0:1)\n", "1 (0:1)\n",
"2 (1:2)\n", "2 (1:2)\n",
@ -260,24 +247,24 @@ mod test {
fn fmt_container() { fn fmt_container() {
let mut tree: super::Builder<u8, u16> = super::Builder::new(); let mut tree: super::Builder<u8, u16> = super::Builder::new();
tree.enter(1, Span::new(0, 1)); tree.enter(1, Span::new(0, 1));
tree.elem(11, Span::new(0, 1)); tree.atom(11, Span::new(0, 1));
tree.elem(12, Span::new(0, 1)); tree.atom(12, Span::new(0, 1));
tree.exit(); tree.exit();
tree.enter(2, Span::new(1, 5)); tree.enter(2, Span::new(1, 5));
tree.enter(21, Span::new(2, 5)); tree.enter(21, Span::new(2, 5));
tree.enter(211, Span::new(3, 4)); tree.enter(211, Span::new(3, 4));
tree.elem(2111, Span::new(3, 4)); tree.atom(2111, Span::new(3, 4));
tree.exit(); tree.exit();
tree.exit(); tree.exit();
tree.enter(22, Span::new(4, 5)); tree.enter(22, Span::new(4, 5));
tree.elem(221, Span::new(4, 5)); tree.atom(221, Span::new(4, 5));
tree.exit(); tree.exit();
tree.exit(); tree.exit();
tree.enter(3, Span::new(5, 6)); tree.enter(3, Span::new(5, 6));
tree.elem(31, Span::new(5, 6)); tree.atom(31, Span::new(5, 6));
tree.exit(); tree.exit();
assert_eq!( assert_eq!(
tree.to_string(), format!("{:?}", tree),
concat!( concat!(
"1 (0:1)\n", "1 (0:1)\n",
" 11 (0:1)\n", " 11 (0:1)\n",