From 660e8041b0fe560cc7bc744d59d8a28f0426d0ff Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Mon, 28 Nov 2022 20:12:49 +0100 Subject: [PATCH] wip --- src/block.rs | 109 +++++++++++++++++++++++++------------------------- src/inline.rs | 65 ++++++++++++++++-------------- src/lex.rs | 1 - src/lib.rs | 40 +++++++++++------- src/tree.rs | 79 ++++++++++++++++++------------------ 5 files changed, 155 insertions(+), 139 deletions(-) diff --git a/src/block.rs b/src/block.rs index 2f52fb2..f457cd2 100644 --- a/src/block.rs +++ b/src/block.rs @@ -13,13 +13,13 @@ pub fn parse(src: &str) -> Tree { Parser::new(src).parse() } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Block { Leaf(Leaf), Container(Container), } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Leaf { Paragraph, Heading { level: u8 }, @@ -27,9 +27,10 @@ pub enum Leaf { Table, LinkDefinition, CodeBlock { fence_length: u8 }, + ThematicBreak, } -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Container { Blockquote, Div { fence_length: u8 }, @@ -37,14 +38,14 @@ pub enum Container { Footnote { indent: u8 }, } -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Atom { /// Inline content with unparsed inline elements. Inline, /// A line with no non-whitespace characters. Blankline, - /// Thematic break. - ThematicBreak, + ///// Thematic break. + //ThematicBreak, } struct Parser<'s> { @@ -65,7 +66,7 @@ impl<'s> Parser<'s> { pub fn parse(mut self) -> Tree { let mut lines = lines(self.src).collect::>(); let mut line_pos = 0; - loop { + while line_pos < lines.len() { let line_count = self.parse_block(&mut lines[line_pos..]); if line_count == 0 { break; @@ -202,7 +203,6 @@ impl Block { .flatten() } _ => { - /* let thematic_break = || { let mut without_whitespace = line.chars().filter(|c| !c.is_whitespace()); let length = without_whitespace.clone().count(); @@ -211,9 +211,8 @@ impl Block { || without_whitespace.all(|c| c == '*'))) .then(|| (Self::Leaf(ThematicBreak), Span::by_len(start, line.len()))) }; - */ - //thematic_break() - None + + thematic_break() } } .unwrap_or((Self::Leaf(Paragraph), Span::new(0, 0))) @@ -225,7 +224,7 @@ impl Block { Self::Leaf(Paragraph | Heading { .. } | Table | LinkDefinition) => { !line.trim().is_empty() } - Self::Leaf(Attributes) => false, + Self::Leaf(Attributes | ThematicBreak) => false, Self::Container(Blockquote) => line.trim().starts_with('>'), Self::Container(Footnote { indent } | ListItem { indent }) => { let spaces = line.chars().take_while(|c| c.is_whitespace()).count(); @@ -276,7 +275,7 @@ fn lines(src: &str) -> impl Iterator + '_ { #[cfg(test)] mod test { - use crate::tree::Event; + use crate::tree::EventKind::*; use crate::Span; use super::Atom::*; @@ -288,81 +287,82 @@ mod test { macro_rules! test_parse { ($src:expr $(,$($event:expr),* $(,)?)?) => { let t = super::Parser::new($src).parse(); - let actual = t.iter().collect::>(); + let actual = t.iter().map(|ev| (ev.kind, ev.span.of($src))).collect::>(); let expected = &[$($($event),*,)?]; assert_eq!(actual, expected, "\n\n{}\n\n", $src); }; } #[test] - fn parse_elem_oneline() { + fn parse_para_oneline() { test_parse!( "para\n", - Event::Enter(&Leaf(Paragraph), Span::new(0, 0)), - Event::Element(&Inline, Span::new(0, 5)), - Event::Exit, + (Enter(Leaf(Paragraph)), ""), + (Element(Inline), "para\n"), + (Exit, ""), ); } #[test] - fn parse_elem_multiline() { + fn parse_para_multiline() { test_parse!( - "para\npara\n", - Event::Enter(&Leaf(Paragraph), Span::new(0, 0)), - Event::Element(&Inline, Span::new(0, 5)), - Event::Element(&Inline, Span::new(5, 10)), - Event::Exit, + "para0\npara1\n", + (Enter(Leaf(Paragraph)), ""), + (Element(Inline), "para0\n"), + (Element(Inline), "para1\n"), + (Exit, ""), ); } #[test] - fn parse_elem_multi() { + fn parse_heading_multi() { test_parse!( concat!( "# 2\n", "\n", - " # 8\n", + " # 8\n", " 12\n", "15\n", // ), - Event::Enter(&Leaf(Heading { level: 1 }), Span::new(0, 1)), - Event::Element(&Inline, Span::new(1, 4)), - Event::Exit, - Event::Element(&Blankline, Span::new(4, 5)), - Event::Enter(&Leaf(Heading { level: 1 }), Span::new(6, 7)), - Event::Element(&Inline, Span::new(7, 10)), - Event::Element(&Inline, Span::new(10, 15)), - Event::Element(&Inline, Span::new(15, 18)), - Event::Exit, + (Enter(Leaf(Heading { level: 1 })), "#"), + (Element(Inline), " 2\n"), + (Exit, "#"), + (Element(Blankline), "\n"), + (Enter(Leaf(Heading { level: 1 })), "#"), + (Element(Inline), " 8\n"), + (Element(Inline), " 12\n"), + (Element(Inline), "15\n"), + (Exit, "#"), ); } #[test] - fn parse_container() { + fn parse_blockquote() { test_parse!( concat!( "> a\n", ">\n", "> ## hl\n", ">\n", - "> para\n", // + "> para\n", // ), - Event::Enter(&Container(Blockquote), Span::new(0, 1)), - Event::Enter(&Leaf(Paragraph), Span::new(1, 1)), - Event::Element(&Inline, Span::new(1, 4)), - Event::Exit, - Event::Element(&Blankline, Span::new(5, 6)), - Event::Enter(&Leaf(Heading { level: 2 }), Span::new(8, 10)), - Event::Element(&Inline, Span::new(10, 14)), - Event::Exit, - Event::Element(&Blankline, Span::new(15, 16)), - Event::Enter(&Leaf(Paragraph), Span::new(17, 17)), - Event::Element(&Inline, Span::new(17, 23)), - Event::Exit, - Event::Exit, + (Enter(Container(Blockquote)), ">"), + (Enter(Leaf(Paragraph)), ""), + (Element(Inline), " a\n"), + (Exit, ""), + (Element(Blankline), "\n"), + (Enter(Leaf(Heading { level: 2 })), "##"), + (Element(Inline), " hl\n"), + (Exit, "##"), + (Element(Blankline), "\n"), + (Enter(Leaf(Paragraph)), ""), + (Element(Inline), " para\n"), + (Exit, ""), + (Exit, ">"), ); } + /* #[test] fn parse_code_block() { test_parse!( @@ -372,12 +372,13 @@ mod test { "l1\n", "```", // ), - Event::Enter(&Leaf(CodeBlock { fence_length: 3 }), Span::new(0, 8)), - Event::Element(&Inline, Span::new(8, 11)), - Event::Element(&Inline, Span::new(11, 14)), - Event::Exit + (Event::Enter(Leaf(CodeBlock { fence_length: 3 })), "```lang\n"), + (Event::Element(Inline), "l0\n"), + (Event::Element(Inline), "l1\n"), + (Event::Exit, "```lang\n"), ); } + */ macro_rules! test_block { ($src:expr, $kind:expr, $str:expr, $len:expr $(,)?) => { diff --git a/src/inline.rs b/src/inline.rs index ffd3472..db38540 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -303,7 +303,7 @@ mod test { #[allow(unused)] let mut p = super::Parser::new(); p.parse($src); - let actual = p.collect::>(); + let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::>(); let expected = &[$($($token),*,)?]; assert_eq!(actual, expected, "\n\n{}\n\n", $src); }; @@ -320,37 +320,40 @@ mod test { #[test] fn str() { - test_parse!("abc", Node(Str).span(0, 3)); - test_parse!("abc def", Node(Str).span(0, 7)); + test_parse!("abc", (Node(Str), "abc")); + test_parse!("abc def", (Node(Str), "abc def")); } #[test] fn verbatim() { - test_parse!("`abc`", Node(Verbatim).span(1, 4)); - test_parse!("`abc", Node(Verbatim).span(1, 4)); - test_parse!("``abc``", Node(Verbatim).span(2, 5)); - test_parse!("abc `def`", Node(Str).span(0, 4), Node(Verbatim).span(5, 8)); + test_parse!("`abc`", (Node(Verbatim), "abc")); + test_parse!("`abc", (Node(Verbatim), "abc")); + test_parse!("``abc``", (Node(Verbatim), "abc")); + test_parse!("abc `def`", (Node(Str), "abc "), (Node(Verbatim), "def")); } #[test] fn math() { - test_parse!("$`abc`", Node(InlineMath).span(2, 5)); - test_parse!("$$```abc", Node(DisplayMath).span(5, 8)); + test_parse!("$`abc`", (Node(InlineMath), "abc")); + test_parse!("$`abc` str", (Node(InlineMath), "abc"), (Node(Str), " str")); + test_parse!("$$`abc`", (Node(DisplayMath), "abc")); + test_parse!("$`abc", (Node(InlineMath), "abc")); + test_parse!("$```abc```", (Node(InlineMath), "abc"),); } #[test] fn container_basic() { test_parse!( "_abc_", - Enter(Emphasis).span(0, 1), - Node(Str).span(1, 4), - Exit(Emphasis).span(4, 5), + (Enter(Emphasis), "_"), + (Node(Str), "abc"), + (Exit(Emphasis), "_"), ); test_parse!( "{_abc_}", - Enter(Emphasis).span(0, 2), - Node(Str).span(2, 5), - Exit(Emphasis).span(5, 7), + (Enter(Emphasis), "{_"), + (Node(Str), "abc"), + (Exit(Emphasis), "_}"), ); } @@ -358,40 +361,40 @@ mod test { fn container_nest() { test_parse!( "{_{_abc_}_}", - Enter(Emphasis).span(0, 2), - Enter(Emphasis).span(2, 4), - Node(Str).span(4, 7), - Exit(Emphasis).span(7, 9), - Exit(Emphasis).span(9, 11), + (Enter(Emphasis), "{_"), + (Enter(Emphasis), "{_"), + (Node(Str), "abc"), + (Exit(Emphasis), "_}"), + (Exit(Emphasis), "_}"), ); test_parse!( "*_abc_*", - Enter(Strong).span(0, 1), - Enter(Emphasis).span(1, 2), - Node(Str).span(2, 5), - Exit(Emphasis).span(5, 6), - Exit(Strong).span(6, 7), + (Enter(Strong), "*"), + (Enter(Emphasis), "_"), + (Node(Str), "abc"), + (Exit(Emphasis), "_"), + (Exit(Strong), "*"), ); } #[test] fn container_unopened() { - test_parse!("*}abc", Node(Str).span(0, 5)); + test_parse!("*}abc", (Node(Str), "*}abc")); } #[test] fn container_close_parent() { test_parse!( "{*{_abc*}", - Enter(Strong).span(0, 2), - Node(Str).span(2, 7), - Exit(Strong).span(7, 9), + (Enter(Strong), "{*"), + (Node(Str), "{_abc"), + (Exit(Strong), "*}"), ); } #[test] fn container_close_block() { - test_parse!("{_abc", Node(Str).span(0, 5),); - test_parse!("{_{*{_abc", Node(Str).span(0, 9),); + test_parse!("{_abc", (Node(Str), "{_abc")); + test_parse!("{_{*{_abc", (Node(Str), "{_{*{_abc")); } } diff --git a/src/lex.rs b/src/lex.rs index d6fcb17..fa7b9d7 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -22,7 +22,6 @@ pub enum Kind { Close(Delimiter), Sym(Symbol), Seq(Sequence), - Eof, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/src/lib.rs b/src/lib.rs index d6f18d7..0532573 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,15 +1,16 @@ mod block; +mod html; mod inline; mod lex; mod span; mod tree; +use span::Span; + pub struct Block; const EOF: char = '\0'; -use span::Span; - pub struct Parser<'s> { src: &'s str, tree: block::Tree, @@ -35,11 +36,13 @@ impl<'s> Parser<'s> { } } +#[derive(Debug, PartialEq, Eq)] pub enum ListType { Unordered, Ordered, } +#[derive(Debug, PartialEq, Eq)] pub enum TagKind<'s> { Paragraph, Heading { level: u8 }, @@ -58,6 +61,13 @@ pub enum TagKind<'s> { Footnote { tag: &'s str }, } +#[derive(Debug, PartialEq, Eq)] +pub enum Event2<'s> { + Start(TagKind<'s>), + End(TagKind<'s>), + Blankline, +} + #[derive(Debug, PartialEq, Eq)] pub enum Event { Start(block::Block), @@ -83,33 +93,33 @@ impl<'s> Iterator for Iter<'s> { inline.span = inline.span.translate(self.inline_start); return Some(Event::Inline(inline)); } else if let Some(ev) = self.tree.next() { - match ev { - tree::Event::Element(atom, sp) => { - assert_eq!(*atom, block::Atom::Inline); - parser.parse(sp.of(self.src)); - self.inline_start = sp.start(); + match ev.kind { + tree::EventKind::Element(atom) => { + assert_eq!(atom, block::Atom::Inline); + parser.parse(ev.span.of(self.src)); + self.inline_start = ev.span.start(); } - tree::Event::Exit => { + tree::EventKind::Exit => { self.parser = None; return Some(Event::End); } - tree::Event::Enter(..) => unreachable!(), + tree::EventKind::Enter(..) => unreachable!(), } } } - self.tree.next().map(|ev| match ev { - tree::Event::Element(atom, _sp) => { - assert_eq!(*atom, block::Atom::Blankline); + self.tree.next().map(|ev| match ev.kind { + tree::EventKind::Element(atom) => { + assert_eq!(atom, block::Atom::Blankline); Event::Blankline } - tree::Event::Enter(block, ..) => { + tree::EventKind::Enter(block) => { if matches!(block, block::Block::Leaf(..)) { self.parser = Some(inline::Parser::new()); } - Event::Start(block.clone()) + Event::Start(block) } - tree::Event::Exit => Event::End, + tree::EventKind::Exit => Event::End, }) } } diff --git a/src/tree.rs b/src/tree.rs index 088fb5c..714f228 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -1,11 +1,24 @@ use crate::Span; -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum EventKind { + Enter(C), + Element(E), + Exit, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Event { + pub kind: EventKind, + pub span: Span, +} + +#[derive(Debug, Clone)] pub struct Tree { nodes: Vec>, } -impl Tree { +impl Tree { fn new(nodes: Vec>) -> Self { Self { nodes } } @@ -15,53 +28,41 @@ impl Tree { } } -#[derive(Debug, PartialEq, Eq)] -pub enum Event<'a, C, E> { - Enter(&'a C, Span), - Element(&'a E, Span), - Exit, -} - -impl<'a, C, E> Event<'a, C, E> { - pub fn span(&self) -> Span { - match self { - Self::Enter(_, sp) | Self::Element(_, sp) => *sp, - Self::Exit => panic!(), - } - } -} - pub struct Iter<'a, C, E> { nodes: &'a [Node], branch: Vec, head: Option, } -impl<'a, C, E> Iterator for Iter<'a, C, E> { - type Item = Event<'a, C, E>; +impl<'a, C: Copy, E: Copy> Iterator for Iter<'a, C, E> { + type Item = Event; fn next(&mut self) -> Option { if let Some(head) = self.head { let n = &self.nodes[head.index()]; - match &n.kind { + let kind = match &n.kind { NodeKind::Root => { self.head = n.next; - self.next() + return self.next(); } NodeKind::Container(c, child) => { self.branch.push(head); self.head = *child; - Some(Event::Enter(c, n.span)) + EventKind::Enter(*c) } NodeKind::Element(e) => { self.head = n.next; - Some(Event::Element(e, n.span)) + EventKind::Element(*e) } - } + }; + Some(Event { kind, span: n.span }) } else if let Some(block_ni) = self.branch.pop() { - let Node { next, .. } = &self.nodes[block_ni.index()]; + let Node { next, span, .. } = &self.nodes[block_ni.index()]; self.head = *next; - Some(Event::Exit) + Some(Event { + kind: EventKind::Exit, + span: *span, + }) } else { None } @@ -117,7 +118,7 @@ pub struct Builder { head: Option, } -impl Builder { +impl Builder { pub(super) fn new() -> Self { Builder { nodes: vec![Node { @@ -188,30 +189,32 @@ impl Builder { } } -impl std::fmt::Display - for Builder -{ +impl std::fmt::Display for Builder { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.clone().finish().fmt(f) } } -impl std::fmt::Display for Tree { +impl std::fmt::Display for Tree { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { const INDENT: &str = " "; let mut level = 0; for e in self.iter() { let indent = INDENT.repeat(level); - match e { - Event::Enter(container, sp) => { - writeln!(f, "{}{} ({}:{})", indent, container, sp.start(), sp.end())?; + match e.kind { + EventKind::Enter(container) => { + write!(f, "{}{}", indent, container)?; level += 1; } - Event::Exit => level -= 1, - Event::Element(element, sp) => { - writeln!(f, "{}{} ({}:{})", indent, element, sp.start(), sp.end())?; + EventKind::Exit => { + level -= 1; + continue; + } + EventKind::Element(element) => { + write!(f, "{}{}", indent, element)?; } } + writeln!(f, " ({}:{})", e.span.start(), e.span.end())?; } Ok(()) }