refactorzzz

This commit is contained in:
Noah Hellman 2022-12-12 18:22:13 +01:00
parent 81a4edb884
commit f6fa422e6b
3 changed files with 140 additions and 120 deletions

View file

@ -7,7 +7,14 @@ use Atom::*;
use Container::*;
use Leaf::*;
pub type Tree = tree::Tree<Block, Atom>;
pub type Tree = tree::Tree<Node, Atom>;
pub type TreeBuilder = tree::Builder<Node, Atom>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Node {
Container(Container),
Leaf(Leaf),
}
#[must_use]
pub fn parse(src: &str) -> Tree {
@ -79,7 +86,7 @@ pub enum Container {
/// Parser for block-level tree structure of entire document.
struct TreeParser<'s> {
src: &'s str,
tree: tree::Builder<Block, Atom>,
tree: TreeBuilder,
}
impl<'s> TreeParser<'s> {
@ -87,7 +94,7 @@ impl<'s> TreeParser<'s> {
pub fn new(src: &'s str) -> Self {
Self {
src,
tree: tree::Builder::new(),
tree: TreeBuilder::new(),
}
}
@ -142,7 +149,7 @@ impl<'s> TreeParser<'s> {
match kind {
Block::Atom(a) => self.tree.atom(a, span),
Block::Leaf(l) => {
self.tree.enter(kind, span);
self.tree.enter(Node::Leaf(l), span);
// trim starting whitespace of the block contents
lines[0] = lines[0].trim_start(self.src);
@ -190,7 +197,7 @@ impl<'s> TreeParser<'s> {
*sp = sp.skip(skip);
});
self.tree.enter(kind, span);
self.tree.enter(Node::Container(c), span);
let mut l = 0;
while l < line_count_inner {
l += self.parse_block(&mut lines[l..line_count_inner]);
@ -396,16 +403,15 @@ fn lines(src: &str) -> impl Iterator<Item = Span> + '_ {
#[cfg(test)]
mod test {
use crate::tree::EventKind;
use crate::tree::EventKind::*;
use crate::tree::EventKind;
use super::Atom::*;
use super::Block;
use super::Block::*;
use super::Container::*;
use super::Leaf::*;
use super::Node::*;
/*
macro_rules! test_parse {
($src:expr $(,$($event:expr),* $(,)?)?) => {
let t = super::TreeParser::new($src).parse();
@ -420,7 +426,7 @@ mod test {
test_parse!(
"para\n",
(Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "para"),
(Inline, "para"),
(Exit(Leaf(Paragraph)), ""),
);
}
@ -430,8 +436,8 @@ mod test {
test_parse!(
"para0\npara1\n",
(Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "para0\n"),
(EventKind::Atom(Inline), "para1"),
(Inline, "para0\n"),
(Inline, "para1"),
(Exit(Leaf(Paragraph)), ""),
);
}
@ -440,20 +446,20 @@ mod test {
fn parse_heading_multi() {
test_parse!(
concat!(
"# 2\n",
"\n",
" # 8\n",
" 12\n",
"15\n", //
),
"# 2\n",
"\n",
" # 8\n",
" 12\n",
"15\n", //
),
(Enter(Leaf(Heading)), "#"),
(EventKind::Atom(Inline), "2"),
(Inline, "2"),
(Exit(Leaf(Heading)), "#"),
(EventKind::Atom(Blankline), "\n"),
(Atom(Blankline), "\n"),
(Enter(Leaf(Heading)), "#"),
(EventKind::Atom(Inline), "8\n"),
(EventKind::Atom(Inline), " 12\n"),
(EventKind::Atom(Inline), "15"),
(Inline, "8\n"),
(Inline, " 12\n"),
(Inline, "15"),
(Exit(Leaf(Heading)), "#"),
);
}
@ -464,20 +470,18 @@ mod test {
"> a\n",
(Enter(Container(Blockquote)), ">"),
(Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "a"),
(Inline, "a"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(Blockquote)), ">"),
);
test_parse!(
"> \n",
"> a\nb\nc\n",
(Enter(Container(Blockquote)), ">"),
(EventKind::Atom(Blankline), "\n"),
(Exit(Container(Blockquote)), ">"),
);
test_parse!(
">",
(Enter(Container(Blockquote)), ">"),
(EventKind::Atom(Blankline), ""),
(Enter(Leaf(Paragraph)), ""),
(Inline, "a\n"),
(Inline, "b\n"),
(Inline, "c"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(Blockquote)), ">"),
);
test_parse!(
@ -490,15 +494,15 @@ mod test {
),
(Enter(Container(Blockquote)), ">"),
(Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "a"),
(Inline, "a"),
(Exit(Leaf(Paragraph)), ""),
(EventKind::Atom(Blankline), "\n"),
(Atom(Blankline), "\n"),
(Enter(Leaf(Heading)), "##"),
(EventKind::Atom(Inline), "hl"),
(Inline, "hl"),
(Exit(Leaf(Heading)), "##"),
(EventKind::Atom(Blankline), "\n"),
(Atom(Blankline), "\n"),
(Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "para"),
(Inline, "para"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(Blockquote)), ">"),
);
@ -525,7 +529,7 @@ mod test {
test_parse!(
concat!("```\n", "l0\n"),
(Enter(Leaf(CodeBlock)), "",),
(EventKind::Atom(Inline), "l0\n"),
(Inline, "l0\n"),
(Exit(Leaf(CodeBlock)), "",),
);
test_parse!(
@ -537,11 +541,11 @@ mod test {
"para\n", //
),
(Enter(Leaf(CodeBlock)), ""),
(EventKind::Atom(Inline), "l0\n"),
(Inline, "l0\n"),
(Exit(Leaf(CodeBlock)), ""),
(EventKind::Atom(Blankline), "\n"),
(Atom(Blankline), "\n"),
(Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "para"),
(Inline, "para"),
(Exit(Leaf(Paragraph)), ""),
);
test_parse!(
@ -553,9 +557,9 @@ mod test {
"````", //
),
(Enter(Leaf(CodeBlock)), "lang"),
(EventKind::Atom(Inline), "l0\n"),
(EventKind::Atom(Inline), "```\n"),
(EventKind::Atom(Inline), " l1\n"),
(Inline, "l0\n"),
(Inline, "```\n"),
(Inline, " l1\n"),
(Exit(Leaf(CodeBlock)), "lang"),
);
test_parse!(
@ -568,10 +572,10 @@ mod test {
"```\n", //
),
(Enter(Leaf(CodeBlock)), ""),
(EventKind::Atom(Inline), "a\n"),
(Inline, "a\n"),
(Exit(Leaf(CodeBlock)), ""),
(Enter(Leaf(CodeBlock)), ""),
(EventKind::Atom(Inline), "bbb\n"),
(Inline, "bbb\n"),
(Exit(Leaf(CodeBlock)), ""),
);
test_parse!(
@ -581,10 +585,10 @@ mod test {
" block\n",
"~~~\n", //
),
(Enter(Leaf(CodeBlock)), "",),
(EventKind::Atom(Inline), "code\n"),
(EventKind::Atom(Inline), " block\n"),
(Exit(Leaf(CodeBlock)), "",),
(Enter(Leaf(CodeBlock)), ""),
(Inline, "code\n"),
(Inline, " block\n"),
(Exit(Leaf(CodeBlock)), ""),
);
}
@ -593,7 +597,7 @@ mod test {
test_parse!(
"[tag]: url\n",
(Enter(Leaf(LinkDefinition)), "tag"),
(EventKind::Atom(Inline), "url"),
(Inline, "url"),
(Exit(Leaf(LinkDefinition)), "tag"),
);
}
@ -604,7 +608,7 @@ mod test {
"[^tag]: description\n",
(Enter(Container(Footnote)), "tag"),
(Enter(Leaf(Paragraph)), ""),
(EventKind::Atom(Inline), "description"),
(Inline, "description"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(Footnote)), "tag"),
);
@ -631,7 +635,12 @@ mod test {
#[test]
fn block_multiline() {
test_block!("# heading\n spanning two lines\n", Leaf(Heading), "#", 2);
test_block!(
"# heading\n spanning two lines\n",
Block::Leaf(Heading),
"#",
2
);
}
#[test]
@ -675,7 +684,7 @@ mod test {
" l1\n",
"````", //
),
Leaf(CodeBlock),
Block::Leaf(CodeBlock),
"lang",
5,
);
@ -688,7 +697,7 @@ mod test {
"bbb\n", //
"```\n", //
),
Leaf(CodeBlock),
Block::Leaf(CodeBlock),
"",
3,
);
@ -698,7 +707,7 @@ mod test {
"l0\n",
"```\n", //
),
Leaf(Paragraph),
Block::Leaf(Paragraph),
"",
3,
);
@ -706,13 +715,13 @@ mod test {
#[test]
fn block_link_definition() {
test_block!("[tag]: url\n", Leaf(LinkDefinition), "tag", 1);
test_block!("[tag]: url\n", Block::Leaf(LinkDefinition), "tag", 1);
test_block!(
concat!(
"[tag]: uuu\n",
" rl\n", //
),
Leaf(LinkDefinition),
Block::Leaf(LinkDefinition),
"tag",
2,
);
@ -721,10 +730,9 @@ mod test {
"[tag]: url\n",
"para\n", //
),
Leaf(LinkDefinition),
Block::Leaf(LinkDefinition),
"tag",
1,
);
}
*/
}

View file

@ -8,8 +8,6 @@ mod tree;
use span::Span;
pub struct Block;
const EOF: char = '\0';
#[derive(Debug, PartialEq, Eq)]
@ -312,7 +310,7 @@ impl<'s> Attributes<'s> {
#[derive(Clone)]
struct InlineChars<'t, 's> {
src: &'s str,
inlines: tree::Inlines<'t, block::Block, block::Atom>,
inlines: tree::Inlines<'t, block::Node, block::Atom>,
}
impl<'t, 's> Iterator for InlineChars<'t, 's> {
@ -328,7 +326,7 @@ impl<'t, 's> Iterator for InlineChars<'t, 's> {
pub struct Parser<'s> {
src: &'s str,
tree: block::Tree,
inline_parser: Option<inline::Parser<InlineChars<'s, 's>>>,
inline_parser: Option<inline::Parser<InlineChars<'static, 's>>>,
inline_start: usize,
block_attributes: Attributes<'s>,
}
@ -369,39 +367,44 @@ impl<'s> Iterator for Parser<'s> {
continue;
}
},
tree::EventKind::Enter(b) => {
if matches!(b, block::Block::Leaf(_)) {
tree::EventKind::Enter(c) => match c {
block::Node::Leaf(l) => {
let inlines = self.tree.inlines();
let chars = InlineChars {
src: self.src,
inlines: self.tree.inlines(),
inlines,
};
// TODO solve self-referential reference here without unsafe
self.inline_parser =
unsafe { Some(std::mem::transmute(inline::Parser::new(chars))) };
self.inline_start = ev.span.end();
}
let container = match b {
block::Block::Leaf(block::Leaf::CodeBlock { .. }) => {
self.inline_start += 1; // skip newline
Container::CodeBlock {
lang: (!ev.span.is_empty()).then(|| content),
let container = match l {
block::Leaf::CodeBlock { .. } => {
self.inline_start += 1; // skip newline
Container::CodeBlock {
lang: (!ev.span.is_empty()).then(|| content),
}
}
}
block::Block::Container(block::Container::Div { .. }) => Container::Div {
class: (!ev.span.is_empty()).then(|| ev.span.of(self.src)),
},
block::Block::Leaf(l) => Container::from_leaf_block(content, l),
block::Block::Container(c) => Container::from_container_block(content, c),
block::Block::Atom(..) => panic!(),
};
Event::Start(container, self.block_attributes.take())
}
tree::EventKind::Exit(b) => Event::End(match b {
block::Block::Leaf(l) => Container::from_leaf_block(content, l),
block::Block::Container(c) => Container::from_container_block(content, c),
block::Block::Atom(..) => panic!(),
}),
tree::EventKind::Inline => panic!(),
_ => Container::from_leaf_block(content, l),
};
Event::Start(container, self.block_attributes.take())
}
block::Node::Container(c) => {
let container = match c {
block::Container::Div { .. } => Container::Div {
class: (!ev.span.is_empty()).then(|| content),
},
_ => Container::from_container_block(content, c),
};
Event::Start(container, self.block_attributes.take())
}
},
tree::EventKind::Exit(c) => match c {
block::Node::Leaf(l) => Event::End(Container::from_leaf_block(content, l)),
block::Node::Container(c) => {
Event::End(Container::from_container_block(content, c))
}
},
tree::EventKind::Inline => unreachable!(),
};
return Some(event);
}

View file

@ -34,7 +34,7 @@ impl<'t, C, A> Iterator for Inlines<'t, C, A> {
}
}
impl<C: Clone, A: Clone> Tree<C, A> {
impl<C, A> Tree<C, A> {
fn new(nodes: Vec<Node<C, A>>) -> Self {
let head = nodes[NodeIndex::root().index()].next;
Self {
@ -77,9 +77,9 @@ impl<C: Clone, A: Clone> Iterator for Tree<C, A> {
self.head = *child;
EventKind::Enter(c.clone())
}
NodeKind::Atom(e) => {
NodeKind::Atom(a) => {
self.head = n.next;
EventKind::Atom(e.clone())
EventKind::Atom(a.clone())
}
NodeKind::Inline => {
self.head = n.next;
@ -89,16 +89,12 @@ impl<C: Clone, A: Clone> Iterator for Tree<C, A> {
Some(Event { kind, span: n.span })
} else if let Some(block_ni) = self.branch.pop() {
let Node { next, kind, span } = &self.nodes[block_ni.index()];
let cont = if let NodeKind::Container(c, _) = kind {
c
} else {
panic!();
let kind = match kind {
NodeKind::Container(c, _) => EventKind::Exit(c.clone()),
_ => panic!(),
};
self.head = *next;
Some(Event {
kind: EventKind::Exit(cont.clone()),
span: *span,
})
Some(Event { kind, span: *span })
} else {
None
}
@ -123,7 +119,7 @@ impl NodeIndex {
}
}
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
enum NodeKind<C, A> {
Root,
Container(C, Option<NodeIndex>),
@ -234,7 +230,7 @@ impl<C: std::fmt::Debug + Clone, A: std::fmt::Debug + Clone> std::fmt::Debug for
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
const INDENT: &str = " ";
let mut level = 0;
writeln!(f)?;
write!(f, "\n")?;
for e in self.clone() {
let indent = INDENT.repeat(level);
match e.kind {
@ -257,30 +253,43 @@ impl<C: std::fmt::Debug + Clone, A: std::fmt::Debug + Clone> std::fmt::Debug for
#[cfg(test)]
mod test {
use crate::block;
use crate::Span;
#[test]
fn fmt_linear() {
let mut tree: super::Builder<u8, u8> = super::Builder::new();
tree.atom(1, Span::new(0, 1));
tree.atom(2, Span::new(1, 2));
tree.atom(3, Span::new(3, 4));
fn fmt() {
let mut tree = super::Builder::new();
tree.enter(1, Span::new(0, 1));
tree.atom(11, Span::new(0, 1));
tree.atom(12, Span::new(0, 1));
tree.exit();
tree.enter(2, Span::new(1, 5));
tree.enter(21, Span::new(2, 5));
tree.enter(211, Span::new(3, 4));
tree.atom(2111, Span::new(3, 4));
tree.exit();
tree.exit();
tree.enter(22, Span::new(4, 5));
tree.atom(221, Span::new(4, 5));
tree.exit();
tree.exit();
tree.enter(3, Span::new(5, 6));
tree.atom(31, Span::new(5, 6));
tree.exit();
assert_eq!(
format!("{:?}", tree),
concat!(
"Heading (0:1)\n",
" 0:1\n",
" 0:1\n",
"Blockquote (1:5)\n",
" Div (2:5)\n",
" Paragraph (3:4)\n",
" 3:4\n",
" Blankline (4:5)\n",
" Paragraph (4:5)\n",
" 4:5\n",
"Heading (5:6)\n",
" 5:6\n",
"\n",
"1 (0:1)\n",
" 11 (0:1)\n",
" 12 (0:1)\n",
"2 (1:5)\n",
" 21 (2:5)\n",
" 211 (3:4)\n",
" 2111 (3:4)\n",
" 22 (4:5)\n",
" 221 (4:5)\n",
"3 (5:6)\n",
" 31 (5:6)\n",
)
);
}