From 3a70cd8255196e479140ee826e0d0cb979160af0 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 10 Dec 2022 10:26:06 +0100 Subject: [PATCH] wipppp --- src/block.rs | 167 ++++++++++++++++++++++++++++----------------------- src/lib.rs | 23 ++++--- src/tree.rs | 101 ++++++++++++++----------------- 3 files changed, 148 insertions(+), 143 deletions(-) diff --git a/src/block.rs b/src/block.rs index 370e403..f4c4646 100644 --- a/src/block.rs +++ b/src/block.rs @@ -3,6 +3,7 @@ use crate::EOF; use crate::tree; +use Atom::*; use Container::*; use Leaf::*; @@ -15,6 +16,9 @@ pub fn parse(src: &str) -> Tree { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Block { + /// An atomic block, containing no children elements. + Atom(Atom), + /// A leaf block, containing only inline elements. Leaf(Leaf), @@ -22,6 +26,21 @@ pub enum Block { Container(Container), } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Atom { + /// Inline content with unparsed inline elements. + Inline, + + /// A line with no non-whitespace characters. + Blankline, + + /// A list of attributes. + Attributes, + + /// A thematic break. + ThematicBreak, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Leaf { /// Span is empty, before first character of paragraph. @@ -43,10 +62,6 @@ pub enum Leaf { /// Span is language specifier. /// Each inline is a line. CodeBlock { fence_length: u8, c: u8 }, - - /// Span is from first to last character. - /// No inlines. - ThematicBreak, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -64,18 +79,6 @@ pub enum Container { Footnote { indent: u8 }, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Atom { - /// Inline content with unparsed inline elements. - Inline, - - /// A line with no non-whitespace characters. - Blankline, - - /// A list of attributes. - Attributes, -} - struct Parser<'s> { src: &'s str, tree: tree::Builder, @@ -106,15 +109,8 @@ impl<'s> Parser<'s> { /// Recursively parse a block and all of its children. Return number of lines the block uses. fn parse_block(&mut self, lines: &mut [Span]) -> usize { - let blanklines = lines - .iter() - .take_while(|sp| sp.of(self.src).trim().is_empty()) - .map(|sp| self.tree.elem(Atom::Blankline, *sp)) - .count(); - let lines = &mut lines[blanklines..]; - Block::parse(lines.iter().map(|sp| sp.of(self.src))).map_or( - blanklines, + 0, |(kind, span, line_count)| { let lines = { let l = lines.len().min(line_count); @@ -147,7 +143,11 @@ impl<'s> Parser<'s> { lines }; - match &kind { + match kind { + Block::Atom(a) => { + assert_ne!(a, Inline); + self.tree.atom(a, span); + } Block::Leaf(l) => { self.tree.enter(kind, span); @@ -170,9 +170,8 @@ impl<'s> Parser<'s> { } } - lines - .iter() - .for_each(|line| self.tree.elem(Atom::Inline, *line)); + lines.iter().for_each(|line| self.tree.atom(Inline, *line)); + self.tree.exit(); } Block::Container(c) => { let (skip_chars, skip_lines_suffix) = match &c { @@ -194,7 +193,7 @@ impl<'s> Parser<'s> { .take_while(|c| c.is_whitespace()) .count() + usize::from(skip_chars)) - .min(sp.len()); + .min(sp.len() - usize::from(sp.of(self.src).ends_with('\n'))); *sp = sp.skip(skip); }); @@ -203,10 +202,11 @@ impl<'s> Parser<'s> { while l < line_count_inner { l += self.parse_block(&mut lines[l..line_count_inner]); } + self.tree.exit(); } } - self.tree.exit(); - blanklines + line_count + + line_count }, ) } @@ -229,11 +229,16 @@ impl Block { /// Determine what type of block a line can start. fn start(line: &str) -> (Self, Span) { - let start = line.chars().take_while(|c| c.is_whitespace()).count(); + let start = line + .chars() + .take_while(|c| *c != '\n' && c.is_whitespace()) + .count(); let line_t = &line[start..]; let mut chars = line_t.chars(); match chars.next().unwrap_or(EOF) { + EOF => Some((Self::Atom(Blankline), Span::empty_at(start))), + '\n' => Some((Self::Atom(Blankline), Span::by_len(start, 1))), '#' => chars .find(|c| *c != '#') .map_or(true, char::is_whitespace) @@ -286,7 +291,7 @@ impl Block { ) }), '-' | '*' if Self::is_thematic_break(chars.clone()) => Some(( - Self::Leaf(ThematicBreak), + Self::Atom(ThematicBreak), Span::from_slice(line, line_t.trim()), )), '-' => chars.next().map_or(true, char::is_whitespace).then(|| { @@ -350,9 +355,9 @@ impl Block { fn continues(self, line: &str) -> bool { //let start = Self::start(line); // TODO allow starting new block without blank line match self { + Self::Atom(..) => false, Self::Leaf(Paragraph | Heading { .. } | Table) => !line.trim().is_empty(), Self::Leaf(LinkDefinition) => line.starts_with(' ') && !line.trim().is_empty(), - Self::Leaf(ThematicBreak) => false, Self::Container(Blockquote) => line.trim().starts_with('>'), Self::Container(Footnote { indent } | ListItem { indent }) => { let spaces = line.chars().take_while(|c| c.is_whitespace()).count(); @@ -362,7 +367,7 @@ impl Block { let fence = match self { Self::Container(..) => ':', Self::Leaf(CodeBlock { c, .. }) => c as char, - Self::Leaf(..) => unreachable!(), + Self::Leaf(..) | Self::Atom(..) => unreachable!(), }; let mut c = line.chars(); !((&mut c).take((fence_length).into()).all(|c| c == fence) @@ -375,6 +380,7 @@ impl Block { impl std::fmt::Display for Block { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { + Block::Atom(a) => std::fmt::Debug::fmt(a, f), Block::Leaf(e) => std::fmt::Debug::fmt(e, f), Block::Container(c) => std::fmt::Debug::fmt(c, f), } @@ -408,6 +414,7 @@ fn lines(src: &str) -> impl Iterator + '_ { #[cfg(test)] mod test { + use crate::tree::EventKind; use crate::tree::EventKind::*; use super::Atom::*; @@ -430,7 +437,7 @@ mod test { test_parse!( "para\n", (Enter(Leaf(Paragraph)), ""), - (Element(Inline), "para"), + (EventKind::Atom(Inline), "para"), (Exit(Leaf(Paragraph)), ""), ); } @@ -440,8 +447,8 @@ mod test { test_parse!( "para0\npara1\n", (Enter(Leaf(Paragraph)), ""), - (Element(Inline), "para0\n"), - (Element(Inline), "para1"), + (EventKind::Atom(Inline), "para0\n"), + (EventKind::Atom(Inline), "para1"), (Exit(Leaf(Paragraph)), ""), ); } @@ -457,39 +464,41 @@ mod test { "15\n", // ), (Enter(Leaf(Heading { level: 1 })), "#"), - (Element(Inline), "2"), + (EventKind::Atom(Inline), "2"), (Exit(Leaf(Heading { level: 1 })), "#"), - (Element(Blankline), "\n"), + (EventKind::Atom(Blankline), "\n"), (Enter(Leaf(Heading { level: 1 })), "#"), - (Element(Inline), "8\n"), - (Element(Inline), " 12\n"), - (Element(Inline), "15"), + (EventKind::Atom(Inline), "8\n"), + (EventKind::Atom(Inline), " 12\n"), + (EventKind::Atom(Inline), "15"), (Exit(Leaf(Heading { level: 1 })), "#"), ); } #[test] fn parse_blockquote() { + /* test_parse!( "> a\n", - (Enter(Container(Blockquote)), ">"), - (Enter(Leaf(Paragraph)), ""), - (Element(Inline), "a"), - (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Blockquote)), ">"), + (Enter, Container(Blockquote), ">"), + (Enter, Leaf(Paragraph), ""), + (Element, Atom(Inline), "a"), + (Exit, Leaf(Paragraph), ""), + (Exit, Container(Blockquote), ">"), ); test_parse!( "> \n", - (Enter(Container(Blockquote)), ">"), - (Element(Blankline), " \n"), - (Exit(Container(Blockquote)), ">"), + (Enter, Container(Blockquote), ">"), + (Element, Atom(Blankline), "\n"), + (Exit, Container(Blockquote), ">"), ); test_parse!( ">", - (Enter(Container(Blockquote)), ">"), - (Element(Blankline), ""), - (Exit(Container(Blockquote)), ">"), + (Enter, Container(Blockquote), ">"), + (Element, Atom(Blankline), ""), + (Exit, Container(Blockquote), ">"), ); + */ test_parse!( concat!( "> a\n", @@ -500,15 +509,15 @@ mod test { ), (Enter(Container(Blockquote)), ">"), (Enter(Leaf(Paragraph)), ""), - (Element(Inline), "a"), + (EventKind::Atom(Inline), "a"), (Exit(Leaf(Paragraph)), ""), - (Element(Blankline), ""), + (EventKind::Atom(Blankline), "\n"), (Enter(Leaf(Heading { level: 2 })), "##"), - (Element(Inline), "hl"), + (EventKind::Atom(Inline), "hl"), (Exit(Leaf(Heading { level: 2 })), "##"), - (Element(Blankline), ""), + (EventKind::Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), - (Element(Inline), "para"), + (EventKind::Atom(Inline), "para"), (Exit(Leaf(Paragraph)), ""), (Exit(Container(Blockquote)), ">"), ); @@ -519,13 +528,13 @@ mod test { test_parse!( "> \n", (Enter(Container(Blockquote)), ">"), - (Element(Blankline), "\n"), + (EventKind::Atom(Blankline), "\n"), (Exit(Container(Blockquote)), ">"), ); test_parse!( ">", (Enter(Container(Blockquote)), ">"), - (Element(Blankline), ""), + (EventKind::Atom(Blankline), ""), (Exit(Container(Blockquote)), ">"), ); } @@ -541,7 +550,7 @@ mod test { })), "", ), - (Element(Inline), "l0\n"), + (EventKind::Atom(Inline), "l0\n"), ( Exit(Leaf(CodeBlock { fence_length: 3, @@ -565,7 +574,7 @@ mod test { })), "" ), - (Element(Inline), "l0\n"), + (EventKind::Atom(Inline), "l0\n"), ( Exit(Leaf(CodeBlock { fence_length: 3, @@ -573,9 +582,9 @@ mod test { })), "" ), - (Element(Blankline), "\n"), + (EventKind::Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), - (Element(Inline), "para"), + (EventKind::Atom(Inline), "para"), (Exit(Leaf(Paragraph)), ""), ); test_parse!( @@ -593,9 +602,9 @@ mod test { })), "lang" ), - (Element(Inline), "l0\n"), - (Element(Inline), "```\n"), - (Element(Inline), " l1\n"), + (EventKind::Atom(Inline), "l0\n"), + (EventKind::Atom(Inline), "```\n"), + (EventKind::Atom(Inline), " l1\n"), ( Exit(Leaf(CodeBlock { fence_length: 4, @@ -620,7 +629,7 @@ mod test { })), "" ), - (Element(Inline), "a\n"), + (EventKind::Atom(Inline), "a\n"), ( Exit(Leaf(CodeBlock { fence_length: 3, @@ -635,7 +644,7 @@ mod test { })), "" ), - (Element(Inline), "bbb\n"), + (EventKind::Atom(Inline), "bbb\n"), ( Exit(Leaf(CodeBlock { fence_length: 3, @@ -658,8 +667,8 @@ mod test { })), "", ), - (Element(Inline), "code\n"), - (Element(Inline), " block\n"), + (EventKind::Atom(Inline), "code\n"), + (EventKind::Atom(Inline), " block\n"), ( Exit(Leaf(CodeBlock { fence_length: 3, @@ -675,7 +684,7 @@ mod test { test_parse!( "[tag]: url\n", (Enter(Leaf(LinkDefinition)), "tag"), - (Element(Inline), "url"), + (EventKind::Atom(Inline), "url"), (Exit(Leaf(LinkDefinition)), "tag"), ); } @@ -686,7 +695,7 @@ mod test { "[^tag]: description\n", (Enter(Container(Footnote { indent: 0 })), "tag"), (Enter(Leaf(Paragraph)), ""), - (Element(Inline), "description"), + (EventKind::Atom(Inline), "description"), (Exit(Leaf(Paragraph)), ""), (Exit(Container(Footnote { indent: 0 })), "tag"), ); @@ -705,6 +714,12 @@ mod test { }; } + #[test] + fn block_blankline() { + test_block!("\n", Block::Atom(Blankline), "\n", 1); + test_block!(" \n", Block::Atom(Blankline), "\n", 1); + } + #[test] fn block_multiline() { test_block!( @@ -733,14 +748,14 @@ mod test { #[test] fn block_thematic_break() { - test_block!("---\n", Block::Leaf(ThematicBreak), "---", 1); + test_block!("---\n", Block::Atom(ThematicBreak), "---", 1); test_block!( concat!( " -*- -*-\n", "\n", // "para", // ), - Block::Leaf(ThematicBreak), + Block::Atom(ThematicBreak), "-*- -*-", 1 ); diff --git a/src/lib.rs b/src/lib.rs index aa58888..45525e2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -269,6 +269,7 @@ impl<'s> Event<'s> { impl<'s> Container<'s> { fn from_block(src: &'s str, block: block::Block) -> Self { match block { + block::Block::Atom(a) => todo!(), block::Block::Leaf(l) => match l { block::Leaf::Paragraph => Self::Paragraph, block::Leaf::Heading { level } => Self::Heading { level }, @@ -342,14 +343,14 @@ impl<'s> Iterator for Parser<'s> { return Some(Event::from_inline(self.src, inline)); } else if let Some(ev) = self.tree.next() { match ev.kind { - tree::EventKind::Element(atom) => { - assert_eq!(atom, block::Atom::Inline); - let last_inline = self.tree.neighbors().next().is_none(); + tree::EventKind::Atom(a) => { + assert_eq!(a, block::Atom::Inline); + let last_inline = self.tree.atoms().next().is_none(); parser.parse(ev.span.of(self.src), last_inline); } - tree::EventKind::Exit(block) => { + tree::EventKind::Exit(c) => { self.parser = None; - return Some(Event::End(Container::from_block(self.src, block))); + return Some(Event::End(Container::from_block(self.src, c))); } tree::EventKind::Enter(..) => unreachable!(), } @@ -359,20 +360,21 @@ impl<'s> Iterator for Parser<'s> { for ev in &mut self.tree { let content = ev.span.of(self.src); let event = match ev.kind { - tree::EventKind::Element(atom) => match atom { + tree::EventKind::Atom(a) => match a { block::Atom::Inline => panic!("inline outside leaf block"), block::Atom::Blankline => Event::Atom(Atom::Blankline), + block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak), block::Atom::Attributes => { self.block_attributes.parse(content); continue; } }, - tree::EventKind::Enter(block) => { - if matches!(block, block::Block::Leaf(_)) { + tree::EventKind::Enter(c) => { + if matches!(c, block::Block::Leaf(_)) { self.parser = Some(inline::Parser::new()); self.inline_start = ev.span.end(); } - let container = match block { + let container = match c { block::Block::Leaf(block::Leaf::CodeBlock { .. }) => { self.inline_start += 1; // skip newline Container::CodeBlock { @@ -386,7 +388,7 @@ impl<'s> Iterator for Parser<'s> { }; Event::Start(container, self.block_attributes.take()) } - tree::EventKind::Exit(block) => Event::End(Container::from_block(self.src, block)), + tree::EventKind::Exit(c) => Event::End(Container::from_block(self.src, c)), }; return Some(event); } @@ -465,6 +467,7 @@ mod test { Start(Paragraph, Attributes::none()), Str("para0"), End(Paragraph), + Atom(Blankline), Start(Paragraph, Attributes::none()), Str("para1"), End(Paragraph), diff --git a/src/tree.rs b/src/tree.rs index c5be38e..cd5b74b 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,10 +1,10 @@ use crate::Span; #[derive(Debug, Clone, PartialEq, Eq)] -pub enum EventKind { +pub enum EventKind { Enter(C), - Element(E), Exit(C), + Atom(A), } #[derive(Debug, Clone, PartialEq, Eq)] @@ -13,25 +13,15 @@ pub struct Event { pub span: Span, } -pub struct Object { - kind: ObjectKind, - span: Span, -} - -pub enum ObjectKind { - Container(C), - Element(E), -} - -#[derive(Debug, Clone)] -pub struct Tree { - nodes: Vec>, +#[derive(Clone)] +pub struct Tree { + nodes: Vec>, branch: Vec, head: Option, } -impl Tree { - fn new(nodes: Vec>) -> Self { +impl Tree { + fn new(nodes: Vec>) -> Self { let head = nodes[NodeIndex::root().index()].next; Self { nodes, @@ -40,26 +30,25 @@ impl Tree { } } - pub fn neighbors(&self) -> impl Iterator> + '_ { + pub fn atoms(&self) -> impl Iterator + '_ { let mut head = self.head; std::iter::from_fn(move || { head.take().map(|h| { let n = &self.nodes[h.index()]; let kind = match &n.kind { NodeKind::Root => unreachable!(), - NodeKind::Container(c, _) => ObjectKind::Container(*c), - NodeKind::Element(e) => ObjectKind::Element(*e), + NodeKind::Container(..) => panic!(), + NodeKind::Atom(a) => *a, }; - let span = n.span; head = n.next; - Object { kind, span } + (kind, n.span) }) }) } } -impl Iterator for Tree { - type Item = Event; +impl Iterator for Tree { + type Item = Event; fn next(&mut self) -> Option { if let Some(head) = self.head { @@ -71,9 +60,9 @@ impl Iterator for Tree { self.head = *child; EventKind::Enter(*c) } - NodeKind::Element(e) => { + NodeKind::Atom(e) => { self.head = n.next; - EventKind::Element(*e) + EventKind::Atom(*e) } }; Some(Event { kind, span: n.span }) @@ -114,27 +103,27 @@ impl NodeIndex { } #[derive(Debug, Clone)] -enum NodeKind { +enum NodeKind { Root, Container(C, Option), - Element(E), + Atom(A), } #[derive(Debug, Clone)] -struct Node { +struct Node { span: Span, - kind: NodeKind, + kind: NodeKind, next: Option, } -#[derive(Debug, Clone)] -pub struct Builder { - nodes: Vec>, +#[derive(Clone)] +pub struct Builder { + nodes: Vec>, branch: Vec, head: Option, } -impl Builder { +impl Builder { pub(super) fn new() -> Self { Builder { nodes: vec![Node { @@ -147,10 +136,10 @@ impl Builder { } } - pub(super) fn elem(&mut self, e: E, span: Span) { + pub(super) fn atom(&mut self, a: A, span: Span) { self.add_node(Node { span, - kind: NodeKind::Element(e), + kind: NodeKind::Atom(a), next: None, }); } @@ -172,17 +161,17 @@ impl Builder { } } - pub(super) fn finish(self) -> Tree { + pub(super) fn finish(self) -> Tree { Tree::new(self.nodes) } - fn add_node(&mut self, node: Node) { + fn add_node(&mut self, node: Node) { let ni = NodeIndex::new(self.nodes.len()); self.nodes.push(node); if let Some(head_ni) = &mut self.head { let mut head = &mut self.nodes[head_ni.index()]; match &mut head.kind { - NodeKind::Root | NodeKind::Element(_) => { + NodeKind::Root | NodeKind::Atom(_) => { // update next pointer of previous node assert_eq!(head.next, None); head.next = Some(ni); @@ -205,30 +194,28 @@ impl Builder { } } -impl std::fmt::Display for Builder { +impl std::fmt::Debug for Builder { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.clone().finish().fmt(f) } } -impl std::fmt::Display for Tree { +impl std::fmt::Debug for Tree { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { const INDENT: &str = " "; let mut level = 0; for e in self.clone() { let indent = INDENT.repeat(level); match e.kind { - EventKind::Enter(container) => { - write!(f, "{}{}", indent, container)?; + EventKind::Enter(c) => { + write!(f, "{}{:?}", indent, c)?; level += 1; } - EventKind::Exit(_) => { + EventKind::Exit(..) => { level -= 1; continue; } - EventKind::Element(element) => { - write!(f, "{}{}", indent, element)?; - } + EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?, } writeln!(f, " ({}:{})", e.span.start(), e.span.end())?; } @@ -243,11 +230,11 @@ mod test { #[test] fn fmt_linear() { let mut tree: super::Builder = super::Builder::new(); - tree.elem(1, Span::new(0, 1)); - tree.elem(2, Span::new(1, 2)); - tree.elem(3, Span::new(3, 4)); + tree.atom(1, Span::new(0, 1)); + tree.atom(2, Span::new(1, 2)); + tree.atom(3, Span::new(3, 4)); assert_eq!( - tree.to_string(), + format!("{:?}", tree), concat!( "1 (0:1)\n", "2 (1:2)\n", @@ -260,24 +247,24 @@ mod test { fn fmt_container() { let mut tree: super::Builder = super::Builder::new(); tree.enter(1, Span::new(0, 1)); - tree.elem(11, Span::new(0, 1)); - tree.elem(12, Span::new(0, 1)); + tree.atom(11, Span::new(0, 1)); + tree.atom(12, Span::new(0, 1)); tree.exit(); tree.enter(2, Span::new(1, 5)); tree.enter(21, Span::new(2, 5)); tree.enter(211, Span::new(3, 4)); - tree.elem(2111, Span::new(3, 4)); + tree.atom(2111, Span::new(3, 4)); tree.exit(); tree.exit(); tree.enter(22, Span::new(4, 5)); - tree.elem(221, Span::new(4, 5)); + tree.atom(221, Span::new(4, 5)); tree.exit(); tree.exit(); tree.enter(3, Span::new(5, 6)); - tree.elem(31, Span::new(5, 6)); + tree.atom(31, Span::new(5, 6)); tree.exit(); assert_eq!( - tree.to_string(), + format!("{:?}", tree), concat!( "1 (0:1)\n", " 11 (0:1)\n",