refactorzzz

This commit is contained in:
Noah Hellman 2022-12-12 18:22:13 +01:00
parent 81a4edb884
commit f6fa422e6b
3 changed files with 140 additions and 120 deletions

View file

@ -7,7 +7,14 @@ use Atom::*;
use Container::*; use Container::*;
use Leaf::*; use Leaf::*;
pub type Tree = tree::Tree<Block, Atom>; pub type Tree = tree::Tree<Node, Atom>;
pub type TreeBuilder = tree::Builder<Node, Atom>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Node {
Container(Container),
Leaf(Leaf),
}
#[must_use] #[must_use]
pub fn parse(src: &str) -> Tree { pub fn parse(src: &str) -> Tree {
@ -79,7 +86,7 @@ pub enum Container {
/// Parser for block-level tree structure of entire document. /// Parser for block-level tree structure of entire document.
struct TreeParser<'s> { struct TreeParser<'s> {
src: &'s str, src: &'s str,
tree: tree::Builder<Block, Atom>, tree: TreeBuilder,
} }
impl<'s> TreeParser<'s> { impl<'s> TreeParser<'s> {
@ -87,7 +94,7 @@ impl<'s> TreeParser<'s> {
pub fn new(src: &'s str) -> Self { pub fn new(src: &'s str) -> Self {
Self { Self {
src, src,
tree: tree::Builder::new(), tree: TreeBuilder::new(),
} }
} }
@ -142,7 +149,7 @@ impl<'s> TreeParser<'s> {
match kind { match kind {
Block::Atom(a) => self.tree.atom(a, span), Block::Atom(a) => self.tree.atom(a, span),
Block::Leaf(l) => { Block::Leaf(l) => {
self.tree.enter(kind, span); self.tree.enter(Node::Leaf(l), span);
// trim starting whitespace of the block contents // trim starting whitespace of the block contents
lines[0] = lines[0].trim_start(self.src); lines[0] = lines[0].trim_start(self.src);
@ -190,7 +197,7 @@ impl<'s> TreeParser<'s> {
*sp = sp.skip(skip); *sp = sp.skip(skip);
}); });
self.tree.enter(kind, span); self.tree.enter(Node::Container(c), span);
let mut l = 0; let mut l = 0;
while l < line_count_inner { while l < line_count_inner {
l += self.parse_block(&mut lines[l..line_count_inner]); l += self.parse_block(&mut lines[l..line_count_inner]);
@ -396,16 +403,15 @@ fn lines(src: &str) -> impl Iterator<Item = Span> + '_ {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::tree::EventKind;
use crate::tree::EventKind::*; use crate::tree::EventKind::*;
use crate::tree::EventKind;
use super::Atom::*; use super::Atom::*;
use super::Block; use super::Block;
use super::Block::*;
use super::Container::*; use super::Container::*;
use super::Leaf::*; use super::Leaf::*;
use super::Node::*;
/*
macro_rules! test_parse { macro_rules! test_parse {
($src:expr $(,$($event:expr),* $(,)?)?) => { ($src:expr $(,$($event:expr),* $(,)?)?) => {
let t = super::TreeParser::new($src).parse(); let t = super::TreeParser::new($src).parse();
@ -420,7 +426,7 @@ mod test {
test_parse!( test_parse!(
"para\n", "para\n",
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "para"), (Inline, "para"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
); );
} }
@ -430,8 +436,8 @@ mod test {
test_parse!( test_parse!(
"para0\npara1\n", "para0\npara1\n",
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "para0\n"), (Inline, "para0\n"),
(EventKind::Atom(Inline), "para1"), (Inline, "para1"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
); );
} }
@ -440,20 +446,20 @@ mod test {
fn parse_heading_multi() { fn parse_heading_multi() {
test_parse!( test_parse!(
concat!( concat!(
"# 2\n", "# 2\n",
"\n", "\n",
" # 8\n", " # 8\n",
" 12\n", " 12\n",
"15\n", // "15\n", //
), ),
(Enter(Leaf(Heading)), "#"), (Enter(Leaf(Heading)), "#"),
(EventKind::Atom(Inline), "2"), (Inline, "2"),
(Exit(Leaf(Heading)), "#"), (Exit(Leaf(Heading)), "#"),
(EventKind::Atom(Blankline), "\n"), (Atom(Blankline), "\n"),
(Enter(Leaf(Heading)), "#"), (Enter(Leaf(Heading)), "#"),
(EventKind::Atom(Inline), "8\n"), (Inline, "8\n"),
(EventKind::Atom(Inline), " 12\n"), (Inline, " 12\n"),
(EventKind::Atom(Inline), "15"), (Inline, "15"),
(Exit(Leaf(Heading)), "#"), (Exit(Leaf(Heading)), "#"),
); );
} }
@ -464,20 +470,18 @@ mod test {
"> a\n", "> a\n",
(Enter(Container(Blockquote)), ">"), (Enter(Container(Blockquote)), ">"),
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "a"), (Inline, "a"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
(Exit(Container(Blockquote)), ">"), (Exit(Container(Blockquote)), ">"),
); );
test_parse!( test_parse!(
"> \n", "> a\nb\nc\n",
(Enter(Container(Blockquote)), ">"), (Enter(Container(Blockquote)), ">"),
(EventKind::Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""),
(Exit(Container(Blockquote)), ">"), (Inline, "a\n"),
); (Inline, "b\n"),
test_parse!( (Inline, "c"),
">", (Exit(Leaf(Paragraph)), ""),
(Enter(Container(Blockquote)), ">"),
(EventKind::Atom(Blankline), ""),
(Exit(Container(Blockquote)), ">"), (Exit(Container(Blockquote)), ">"),
); );
test_parse!( test_parse!(
@ -490,15 +494,15 @@ mod test {
), ),
(Enter(Container(Blockquote)), ">"), (Enter(Container(Blockquote)), ">"),
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "a"), (Inline, "a"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
(EventKind::Atom(Blankline), "\n"), (Atom(Blankline), "\n"),
(Enter(Leaf(Heading)), "##"), (Enter(Leaf(Heading)), "##"),
(EventKind::Atom(Inline), "hl"), (Inline, "hl"),
(Exit(Leaf(Heading)), "##"), (Exit(Leaf(Heading)), "##"),
(EventKind::Atom(Blankline), "\n"), (Atom(Blankline), "\n"),
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "para"), (Inline, "para"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
(Exit(Container(Blockquote)), ">"), (Exit(Container(Blockquote)), ">"),
); );
@ -525,7 +529,7 @@ mod test {
test_parse!( test_parse!(
concat!("```\n", "l0\n"), concat!("```\n", "l0\n"),
(Enter(Leaf(CodeBlock)), "",), (Enter(Leaf(CodeBlock)), "",),
(EventKind::Atom(Inline), "l0\n"), (Inline, "l0\n"),
(Exit(Leaf(CodeBlock)), "",), (Exit(Leaf(CodeBlock)), "",),
); );
test_parse!( test_parse!(
@ -537,11 +541,11 @@ mod test {
"para\n", // "para\n", //
), ),
(Enter(Leaf(CodeBlock)), ""), (Enter(Leaf(CodeBlock)), ""),
(EventKind::Atom(Inline), "l0\n"), (Inline, "l0\n"),
(Exit(Leaf(CodeBlock)), ""), (Exit(Leaf(CodeBlock)), ""),
(EventKind::Atom(Blankline), "\n"), (Atom(Blankline), "\n"),
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "para"), (Inline, "para"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
); );
test_parse!( test_parse!(
@ -553,9 +557,9 @@ mod test {
"````", // "````", //
), ),
(Enter(Leaf(CodeBlock)), "lang"), (Enter(Leaf(CodeBlock)), "lang"),
(EventKind::Atom(Inline), "l0\n"), (Inline, "l0\n"),
(EventKind::Atom(Inline), "```\n"), (Inline, "```\n"),
(EventKind::Atom(Inline), " l1\n"), (Inline, " l1\n"),
(Exit(Leaf(CodeBlock)), "lang"), (Exit(Leaf(CodeBlock)), "lang"),
); );
test_parse!( test_parse!(
@ -568,10 +572,10 @@ mod test {
"```\n", // "```\n", //
), ),
(Enter(Leaf(CodeBlock)), ""), (Enter(Leaf(CodeBlock)), ""),
(EventKind::Atom(Inline), "a\n"), (Inline, "a\n"),
(Exit(Leaf(CodeBlock)), ""), (Exit(Leaf(CodeBlock)), ""),
(Enter(Leaf(CodeBlock)), ""), (Enter(Leaf(CodeBlock)), ""),
(EventKind::Atom(Inline), "bbb\n"), (Inline, "bbb\n"),
(Exit(Leaf(CodeBlock)), ""), (Exit(Leaf(CodeBlock)), ""),
); );
test_parse!( test_parse!(
@ -581,10 +585,10 @@ mod test {
" block\n", " block\n",
"~~~\n", // "~~~\n", //
), ),
(Enter(Leaf(CodeBlock)), "",), (Enter(Leaf(CodeBlock)), ""),
(EventKind::Atom(Inline), "code\n"), (Inline, "code\n"),
(EventKind::Atom(Inline), " block\n"), (Inline, " block\n"),
(Exit(Leaf(CodeBlock)), "",), (Exit(Leaf(CodeBlock)), ""),
); );
} }
@ -593,7 +597,7 @@ mod test {
test_parse!( test_parse!(
"[tag]: url\n", "[tag]: url\n",
(Enter(Leaf(LinkDefinition)), "tag"), (Enter(Leaf(LinkDefinition)), "tag"),
(EventKind::Atom(Inline), "url"), (Inline, "url"),
(Exit(Leaf(LinkDefinition)), "tag"), (Exit(Leaf(LinkDefinition)), "tag"),
); );
} }
@ -604,7 +608,7 @@ mod test {
"[^tag]: description\n", "[^tag]: description\n",
(Enter(Container(Footnote)), "tag"), (Enter(Container(Footnote)), "tag"),
(Enter(Leaf(Paragraph)), ""), (Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "description"), (Inline, "description"),
(Exit(Leaf(Paragraph)), ""), (Exit(Leaf(Paragraph)), ""),
(Exit(Container(Footnote)), "tag"), (Exit(Container(Footnote)), "tag"),
); );
@ -631,7 +635,12 @@ mod test {
#[test] #[test]
fn block_multiline() { fn block_multiline() {
test_block!("# heading\n spanning two lines\n", Leaf(Heading), "#", 2); test_block!(
"# heading\n spanning two lines\n",
Block::Leaf(Heading),
"#",
2
);
} }
#[test] #[test]
@ -675,7 +684,7 @@ mod test {
" l1\n", " l1\n",
"````", // "````", //
), ),
Leaf(CodeBlock), Block::Leaf(CodeBlock),
"lang", "lang",
5, 5,
); );
@ -688,7 +697,7 @@ mod test {
"bbb\n", // "bbb\n", //
"```\n", // "```\n", //
), ),
Leaf(CodeBlock), Block::Leaf(CodeBlock),
"", "",
3, 3,
); );
@ -698,7 +707,7 @@ mod test {
"l0\n", "l0\n",
"```\n", // "```\n", //
), ),
Leaf(Paragraph), Block::Leaf(Paragraph),
"", "",
3, 3,
); );
@ -706,13 +715,13 @@ mod test {
#[test] #[test]
fn block_link_definition() { fn block_link_definition() {
test_block!("[tag]: url\n", Leaf(LinkDefinition), "tag", 1); test_block!("[tag]: url\n", Block::Leaf(LinkDefinition), "tag", 1);
test_block!( test_block!(
concat!( concat!(
"[tag]: uuu\n", "[tag]: uuu\n",
" rl\n", // " rl\n", //
), ),
Leaf(LinkDefinition), Block::Leaf(LinkDefinition),
"tag", "tag",
2, 2,
); );
@ -721,10 +730,9 @@ mod test {
"[tag]: url\n", "[tag]: url\n",
"para\n", // "para\n", //
), ),
Leaf(LinkDefinition), Block::Leaf(LinkDefinition),
"tag", "tag",
1, 1,
); );
} }
*/
} }

View file

@ -8,8 +8,6 @@ mod tree;
use span::Span; use span::Span;
pub struct Block;
const EOF: char = '\0'; const EOF: char = '\0';
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
@ -312,7 +310,7 @@ impl<'s> Attributes<'s> {
#[derive(Clone)] #[derive(Clone)]
struct InlineChars<'t, 's> { struct InlineChars<'t, 's> {
src: &'s str, src: &'s str,
inlines: tree::Inlines<'t, block::Block, block::Atom>, inlines: tree::Inlines<'t, block::Node, block::Atom>,
} }
impl<'t, 's> Iterator for InlineChars<'t, 's> { impl<'t, 's> Iterator for InlineChars<'t, 's> {
@ -328,7 +326,7 @@ impl<'t, 's> Iterator for InlineChars<'t, 's> {
pub struct Parser<'s> { pub struct Parser<'s> {
src: &'s str, src: &'s str,
tree: block::Tree, tree: block::Tree,
inline_parser: Option<inline::Parser<InlineChars<'s, 's>>>, inline_parser: Option<inline::Parser<InlineChars<'static, 's>>>,
inline_start: usize, inline_start: usize,
block_attributes: Attributes<'s>, block_attributes: Attributes<'s>,
} }
@ -369,39 +367,44 @@ impl<'s> Iterator for Parser<'s> {
continue; continue;
} }
}, },
tree::EventKind::Enter(b) => { tree::EventKind::Enter(c) => match c {
if matches!(b, block::Block::Leaf(_)) { block::Node::Leaf(l) => {
let inlines = self.tree.inlines();
let chars = InlineChars { let chars = InlineChars {
src: self.src, src: self.src,
inlines: self.tree.inlines(), inlines,
}; };
// TODO solve self-referential reference here without unsafe
self.inline_parser = self.inline_parser =
unsafe { Some(std::mem::transmute(inline::Parser::new(chars))) }; unsafe { Some(std::mem::transmute(inline::Parser::new(chars))) };
self.inline_start = ev.span.end(); self.inline_start = ev.span.end();
} let container = match l {
let container = match b { block::Leaf::CodeBlock { .. } => {
block::Block::Leaf(block::Leaf::CodeBlock { .. }) => { self.inline_start += 1; // skip newline
self.inline_start += 1; // skip newline Container::CodeBlock {
Container::CodeBlock { lang: (!ev.span.is_empty()).then(|| content),
lang: (!ev.span.is_empty()).then(|| content), }
} }
} _ => Container::from_leaf_block(content, l),
block::Block::Container(block::Container::Div { .. }) => Container::Div { };
class: (!ev.span.is_empty()).then(|| ev.span.of(self.src)), Event::Start(container, self.block_attributes.take())
}, }
block::Block::Leaf(l) => Container::from_leaf_block(content, l), block::Node::Container(c) => {
block::Block::Container(c) => Container::from_container_block(content, c), let container = match c {
block::Block::Atom(..) => panic!(), block::Container::Div { .. } => Container::Div {
}; class: (!ev.span.is_empty()).then(|| content),
Event::Start(container, self.block_attributes.take()) },
} _ => Container::from_container_block(content, c),
tree::EventKind::Exit(b) => Event::End(match b { };
block::Block::Leaf(l) => Container::from_leaf_block(content, l), Event::Start(container, self.block_attributes.take())
block::Block::Container(c) => Container::from_container_block(content, c), }
block::Block::Atom(..) => panic!(), },
}), tree::EventKind::Exit(c) => match c {
tree::EventKind::Inline => panic!(), block::Node::Leaf(l) => Event::End(Container::from_leaf_block(content, l)),
block::Node::Container(c) => {
Event::End(Container::from_container_block(content, c))
}
},
tree::EventKind::Inline => unreachable!(),
}; };
return Some(event); return Some(event);
} }

View file

@ -34,7 +34,7 @@ impl<'t, C, A> Iterator for Inlines<'t, C, A> {
} }
} }
impl<C: Clone, A: Clone> Tree<C, A> { impl<C, A> Tree<C, A> {
fn new(nodes: Vec<Node<C, A>>) -> Self { fn new(nodes: Vec<Node<C, A>>) -> Self {
let head = nodes[NodeIndex::root().index()].next; let head = nodes[NodeIndex::root().index()].next;
Self { Self {
@ -77,9 +77,9 @@ impl<C: Clone, A: Clone> Iterator for Tree<C, A> {
self.head = *child; self.head = *child;
EventKind::Enter(c.clone()) EventKind::Enter(c.clone())
} }
NodeKind::Atom(e) => { NodeKind::Atom(a) => {
self.head = n.next; self.head = n.next;
EventKind::Atom(e.clone()) EventKind::Atom(a.clone())
} }
NodeKind::Inline => { NodeKind::Inline => {
self.head = n.next; self.head = n.next;
@ -89,16 +89,12 @@ impl<C: Clone, A: Clone> Iterator for Tree<C, A> {
Some(Event { kind, span: n.span }) Some(Event { kind, span: n.span })
} else if let Some(block_ni) = self.branch.pop() { } else if let Some(block_ni) = self.branch.pop() {
let Node { next, kind, span } = &self.nodes[block_ni.index()]; let Node { next, kind, span } = &self.nodes[block_ni.index()];
let cont = if let NodeKind::Container(c, _) = kind { let kind = match kind {
c NodeKind::Container(c, _) => EventKind::Exit(c.clone()),
} else { _ => panic!(),
panic!();
}; };
self.head = *next; self.head = *next;
Some(Event { Some(Event { kind, span: *span })
kind: EventKind::Exit(cont.clone()),
span: *span,
})
} else { } else {
None None
} }
@ -123,7 +119,7 @@ impl NodeIndex {
} }
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, PartialEq, Eq)]
enum NodeKind<C, A> { enum NodeKind<C, A> {
Root, Root,
Container(C, Option<NodeIndex>), Container(C, Option<NodeIndex>),
@ -234,7 +230,7 @@ impl<C: std::fmt::Debug + Clone, A: std::fmt::Debug + Clone> std::fmt::Debug for
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
const INDENT: &str = " "; const INDENT: &str = " ";
let mut level = 0; let mut level = 0;
writeln!(f)?; write!(f, "\n")?;
for e in self.clone() { for e in self.clone() {
let indent = INDENT.repeat(level); let indent = INDENT.repeat(level);
match e.kind { match e.kind {
@ -257,30 +253,43 @@ impl<C: std::fmt::Debug + Clone, A: std::fmt::Debug + Clone> std::fmt::Debug for
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::block;
use crate::Span; use crate::Span;
#[test] #[test]
fn fmt_linear() { fn fmt() {
let mut tree: super::Builder<u8, u8> = super::Builder::new(); let mut tree = super::Builder::new();
tree.atom(1, Span::new(0, 1)); tree.enter(1, Span::new(0, 1));
tree.atom(2, Span::new(1, 2)); tree.atom(11, Span::new(0, 1));
tree.atom(3, Span::new(3, 4)); tree.atom(12, Span::new(0, 1));
tree.exit();
tree.enter(2, Span::new(1, 5));
tree.enter(21, Span::new(2, 5));
tree.enter(211, Span::new(3, 4));
tree.atom(2111, Span::new(3, 4));
tree.exit();
tree.exit();
tree.enter(22, Span::new(4, 5));
tree.atom(221, Span::new(4, 5));
tree.exit();
tree.exit();
tree.enter(3, Span::new(5, 6));
tree.atom(31, Span::new(5, 6));
tree.exit();
assert_eq!( assert_eq!(
format!("{:?}", tree), format!("{:?}", tree),
concat!( concat!(
"Heading (0:1)\n", "\n",
" 0:1\n", "1 (0:1)\n",
" 0:1\n", " 11 (0:1)\n",
"Blockquote (1:5)\n", " 12 (0:1)\n",
" Div (2:5)\n", "2 (1:5)\n",
" Paragraph (3:4)\n", " 21 (2:5)\n",
" 3:4\n", " 211 (3:4)\n",
" Blankline (4:5)\n", " 2111 (3:4)\n",
" Paragraph (4:5)\n", " 22 (4:5)\n",
" 4:5\n", " 221 (4:5)\n",
"Heading (5:6)\n", "3 (5:6)\n",
" 5:6\n", " 31 (5:6)\n",
) )
); );
} }