From ec69d98c756f0abc73d23da87927c587d3803262 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sun, 22 Jan 2023 21:55:14 +0100 Subject: [PATCH] amend! parser: determine tight vs loose lists block: determine tight vs loose lists --- src/block.rs | 371 +++++++++++++++++++++++++++++++++++++++++++-------- src/html.rs | 44 +++--- src/lib.rs | 34 +---- src/tree.rs | 77 +++-------- 4 files changed, 360 insertions(+), 166 deletions(-) diff --git a/src/block.rs b/src/block.rs index 14eca69..9e841e1 100644 --- a/src/block.rs +++ b/src/block.rs @@ -13,7 +13,6 @@ use ListType::*; pub type Tree = tree::Tree; pub type TreeBuilder = tree::Builder; -pub type Element = tree::Element; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Node { @@ -81,8 +80,11 @@ pub enum Container { /// Span is class specifier, possibly empty. Div, + /// Span is `:`. + DescriptionList, + /// Span is the list marker of the first list item in the list. - List(ListType), + List { ty: ListType, tight: bool }, /// Span is the list marker. ListItem(ListType), @@ -96,7 +98,6 @@ pub enum ListType { Unordered(u8), Ordered(crate::OrderedListNumbering, crate::OrderedListStyle), Task, - Description, } #[derive(Debug)] @@ -107,6 +108,8 @@ struct OpenList { /// Depth in the tree where the direct list items of the list are. Needed to determine when to /// close the list. depth: u16, + /// Index to node in tree, required to update tightness. + node: tree::NodeIndex, } /// Parser for block-level tree structure of entire document. @@ -114,7 +117,10 @@ struct TreeParser<'s> { src: &'s str, tree: TreeBuilder, - lists_open: Vec, + /// The previous block element was a blank line. + prev_blankline: bool, + /// Stack of currently open lists. + open_lists: Vec, } impl<'s> TreeParser<'s> { @@ -123,7 +129,8 @@ impl<'s> TreeParser<'s> { Self { src, tree: TreeBuilder::new(), - lists_open: Vec::new(), + prev_blankline: false, + open_lists: Vec::new(), } } @@ -138,7 +145,7 @@ impl<'s> TreeParser<'s> { } line_pos += line_count; } - for _ in self.lists_open.drain(..) { + for _ in self.open_lists.drain(..) { self.tree.exit(); // list } self.tree.finish() @@ -176,6 +183,45 @@ impl<'s> TreeParser<'s> { lines }; + // close list if a non list item or a list item of new type appeared + if let Some(OpenList { ty, depth, .. }) = self.open_lists.last() { + assert!(usize::from(*depth) <= self.tree.depth()); + if self.tree.depth() == (*depth).into() + && !matches!( + kind, + Block::Container(Container::ListItem(ty_new)) if *ty == ty_new, + ) + { + self.tree.exit(); // list + self.open_lists.pop(); + } + } + + // set list to loose if blankline discovered + if matches!(kind, Block::Atom(Atom::Blankline)) { + self.prev_blankline = true; + } else { + if self.prev_blankline { + for OpenList { node, depth, .. } in &self.open_lists { + if usize::from(*depth) < self.tree.depth() + && matches!(kind, Block::Container(Container::ListItem { .. })) + { + continue; + } + if let tree::Element::Container(Node::Container(Container::List { + tight, + .. + })) = self.tree.elem_mut(*node) + { + *tight = false; + } else { + panic!(); + } + } + } + self.prev_blankline = false; + } + match kind { Block::Atom(a) => self.tree.atom(a, span), Block::Leaf(l) => { @@ -210,7 +256,7 @@ impl<'s> TreeParser<'s> { Block::Container(c) => { let (skip_chars, skip_lines_suffix) = match c { Blockquote => (2, 0), - List(..) => panic!(), + List{..} | DescriptionList => panic!(), ListItem(..) | Footnote => (indent, 0), Div => (0, 1), }; @@ -234,16 +280,20 @@ impl<'s> TreeParser<'s> { if let Container::ListItem(ty) = c { if self - .lists_open + .open_lists .last() .map_or(true, |OpenList { depth, .. }| { usize::from(*depth) < self.tree.depth() }) { - self.tree.enter(Node::Container(Container::List(ty)), span); - self.lists_open.push(OpenList { + let tight = true; + let node = self + .tree + .enter(Node::Container(Container::List { ty, tight }), span); + self.open_lists.push(OpenList { ty, depth: self.tree.depth().try_into().unwrap(), + node, }); } } @@ -254,11 +304,11 @@ impl<'s> TreeParser<'s> { l += self.parse_block(&mut lines[l..line_count_inner]); } - if let Some(OpenList { depth, .. }) = self.lists_open.last() { + if let Some(OpenList { depth, .. }) = self.open_lists.last() { assert!(usize::from(*depth) <= self.tree.depth()); if self.tree.depth() == (*depth).into() { self.tree.exit(); // list - self.lists_open.pop(); + self.open_lists.pop(); } } @@ -368,10 +418,9 @@ impl BlockParser { ) } }), - ':' if chars.clone().next().map_or(true, char::is_whitespace) => Some(( - Block::Container(ListItem(Description)), - Span::by_len(start, 1), - )), + ':' if chars.clone().next().map_or(true, char::is_whitespace) => { + Some((Block::Container(DescriptionList), Span::by_len(start, 1))) + } f @ ('`' | ':' | '~') => { let fence_length = (&mut chars).take_while(|c| *c == f).count() + 1; fence = Some((f, fence_length)); @@ -445,7 +494,7 @@ impl BlockParser { !((&mut c).take(fence_length).all(|c| c == fence) && c.next().map_or(true, char::is_whitespace)) } - Block::Container(List(..)) => panic!(), + Block::Container(List { .. } | DescriptionList) => panic!(), } } @@ -818,42 +867,153 @@ mod test { #[test] fn parse_list_single_item() { test_parse!( - concat!( - "- abc\n", - "\n", - "\n", // + "- abc", + ( + Enter(Container(List { + ty: Unordered(b'-'), + tight: true + })), + "-" ), - (Enter(Container(List(Unordered(b'-')))), "-"), (Enter(Container(ListItem(Unordered(b'-')))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "abc"), (Exit(Leaf(Paragraph)), ""), - (Atom(Blankline), "\n"), - (Atom(Blankline), "\n"), (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Exit(Container(List(Unordered(b'-')))), "-"), + ( + Exit(Container(List { + ty: Unordered(b'-'), + tight: true + })), + "-" + ), ); } #[test] - fn parse_list_multi_item() { + fn parse_list_tight() { test_parse!( - "- abc\n\n\n- def\n\n", - (Enter(Container(List(Unordered(b'-')))), "-"), + concat!( + "- a\n", // + "- b\n", // + ), + ( + Enter(Container(List { + ty: Unordered(b'-'), + tight: true, + })), + "-" + ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), (Enter(Leaf(Paragraph)), ""), - (Inline, "abc"), + (Inline, "a"), + (Exit(Leaf(Paragraph)), ""), + (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "b"), + (Exit(Leaf(Paragraph)), ""), + (Exit(Container(ListItem(Unordered(b'-')))), "-"), + ( + Exit(Container(List { + ty: Unordered(b'-'), + tight: true, + })), + "-" + ), + ); + } + + #[test] + fn parse_list_loose() { + test_parse!( + concat!( + "- a\n", // + "- b\n", // + "\n", // + "- c\n", // + ), + ( + Enter(Container(List { + ty: Unordered(b'-'), + tight: false, + })), + "-" + ), + (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "a"), + (Exit(Leaf(Paragraph)), ""), + (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Atom(Blankline), "\n"), (Atom(Blankline), "\n"), (Exit(Container(ListItem(Unordered(b'-')))), "-"), (Enter(Container(ListItem(Unordered(b'-')))), "-"), (Enter(Leaf(Paragraph)), ""), - (Inline, "def"), + (Inline, "c"), + (Exit(Leaf(Paragraph)), ""), + (Exit(Container(ListItem(Unordered(b'-')))), "-"), + ( + Exit(Container(List { + ty: Unordered(b'-'), + tight: false, + })), + "-" + ), + ); + } + + #[test] + fn parse_list_tight_nest() { + test_parse!( + concat!( + "- a\n", // + "\n", // + " + aa\n", // + " + ab\n", // + "\n", // + "- b\n", // + ), + ( + Enter(Container(List { + ty: Unordered(b'-'), + tight: true, + })), + "-" + ), + (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "a"), + (Exit(Leaf(Paragraph)), ""), + (Atom(Blankline), "\n"), + ( + Enter(Container(List { + ty: Unordered(b'+'), + tight: true, + })), + "+", + ), + (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "aa"), + (Exit(Leaf(Paragraph)), ""), + (Exit(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "ab"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Exit(Container(List(Unordered(b'-')))), "-"), + ( + Exit(Container(List { + ty: Unordered(b'-'), + tight: true, + })), + "-" + ), ); } @@ -862,34 +1022,135 @@ mod test { test_parse!( concat!( "- a\n", // - "\n", // - " - aa\n", // - "\n", // - "\n", // - "- b\n", // + " \n", // + " + b\n", // + " \n", // + " * c\n", // + ), + ( + Enter(Container(List { + ty: Unordered(b'-'), + tight: true, + })), + "-" ), - (Enter(Container(List(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Leaf(Paragraph)), ""), - (Inline, "a"), - (Exit(Leaf(Paragraph)), ""), - (Atom(Blankline), "\n"), - (Enter(Container(List(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Leaf(Paragraph)), ""), - (Inline, "aa"), - (Exit(Leaf(Paragraph)), ""), - (Atom(Blankline), "\n"), - (Atom(Blankline), "\n"), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Exit(Container(List(Unordered(b'-')))), "-"), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), (Enter(Container(ListItem(Unordered(b'-')))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Exit(Container(List(Unordered(b'-')))), "-"), + ( + Exit(Container(List { + ty: Unordered(b'-'), + tight: true, + })), + "-" + ), + ); + } + + #[test] + fn parse_list_post() { + test_parse!( + concat!( + "- a\n", // + "\n", // + " * b\n", // + "cd\n", // + ), + ( + Enter(Container(List { + ty: Unordered(45), + tight: true + })), + "-" + ), + (Enter(Container(ListItem(Unordered(45)))), "-"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "a"), + (Exit(Leaf(Paragraph)), ""), + (Atom(Blankline), "\n"), + ( + Enter(Container(List { + ty: Unordered(42), + tight: true + })), + "*" + ), + (Enter(Container(ListItem(Unordered(42)))), "*"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "b\n"), + (Inline, "cd"), + (Exit(Leaf(Paragraph)), ""), + (Exit(Container(ListItem(Unordered(42)))), "*"), + ( + Exit(Container(List { + ty: Unordered(42), + tight: true + })), + "*" + ), + (Exit(Container(ListItem(Unordered(45)))), "-"), + ( + Exit(Container(List { + ty: Unordered(45), + tight: true + })), + "-" + ), + ); + } + #[test] + fn parse_list_mixed() { + test_parse!( + concat!( + "- a\n", // + "+ b\n", // + "+ c\n", // + ), + ( + Enter(Container(List { + ty: Unordered(b'-'), + tight: true + })), + "-" + ), + (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "a"), + (Exit(Leaf(Paragraph)), ""), + (Exit(Container(ListItem(Unordered(b'-')))), "-"), + ( + Exit(Container(List { + ty: Unordered(b'-'), + tight: true + })), + "-" + ), + ( + Enter(Container(List { + ty: Unordered(b'+'), + tight: true + })), + "+" + ), + (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "b"), + (Exit(Leaf(Paragraph)), ""), + (Exit(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "c"), + (Exit(Leaf(Paragraph)), ""), + (Exit(Container(ListItem(Unordered(b'+')))), "+"), + ( + Exit(Container(List { + ty: Unordered(b'+'), + tight: true + })), + "+" + ), ); } @@ -1081,7 +1342,7 @@ mod test { #[test] fn block_list_description() { - test_block!(": abc\n", Block::Container(ListItem(Description)), ":", 1); + test_block!(": abc\n", Block::Container(DescriptionList), ":", 1); } #[test] diff --git a/src/html.rs b/src/html.rs index f9b772b..552be8d 100644 --- a/src/html.rs +++ b/src/html.rs @@ -103,31 +103,29 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { } match &c { Container::Blockquote => self.out.write_str(" { - self.out.write_str(" { + self.list_tightness.push(*tight); + match kind { + ListKind::Unordered | ListKind::Task => { + self.out.write_str(" { - self.out.write_str(" 1 { - write!(self.out, r#" start="{}""#, start)?; - } - if let Some(ty) = match numbering { - Decimal => None, - AlphaLower => Some('a'), - AlphaUpper => Some('A'), - RomanLower => Some('i'), - RomanUpper => Some('I'), - } { - write!(self.out, r#" type="{}""#, ty)?; + } => { + self.out.write_str(" 1 { + write!(self.out, r#" start="{}""#, start)?; + } + if let Some(ty) = match numbering { + Decimal => None, + AlphaLower => Some('a'), + AlphaUpper => Some('A'), + RomanLower => Some('i'), + RomanUpper => Some('I'), + } { + write!(self.out, r#" type="{}""#, ty)?; + } + } } } Container::ListItem | Container::TaskListItem { .. } => { diff --git a/src/lib.rs b/src/lib.rs index 5c8ffa6..ff30b27 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -293,9 +293,6 @@ pub struct Parser<'s> { /// Inline parser, recreated for each new inline. inline_parser: Option>>, - /// Stack of tightnesses for current open lists. - list_tightness: Vec, - /// Footnote references in the order they were encountered, without duplicates. footnote_references: Vec<&'s str>, /// Cache of footnotes to emit at the end. @@ -336,7 +333,6 @@ impl<'s> Parser<'s> { src, link_definitions, tree: branch, - list_tightness: Vec::new(), footnote_references: Vec::new(), footnotes: std::collections::HashMap::new(), footnote_index: 0, @@ -506,10 +502,8 @@ impl<'s> Parser<'s> { self.footnotes.insert(content, self.tree.take_branch()); continue; } - block::Container::List(block::ListType::Description) => { - Container::DescriptionList - } - block::Container::List(ty) => { + block::Container::DescriptionList => Container::DescriptionList, + block::Container::List { ty, tight } => { let kind = match ty { block::ListType::Unordered(..) => ListKind::Unordered, block::ListType::Ordered(numbering, style) => { @@ -523,30 +517,6 @@ impl<'s> Parser<'s> { } } block::ListType::Task => ListKind::Task, - block::ListType::Description => unreachable!(), - }; - let tight = if enter { - let tight = !self.tree.linear().any(|elem| { - matches!(elem, block::Element::Atom(block::Atom::Blankline)) - }) && !self.tree.linear_containers().any( - |(c, tree)| { - matches!( - c, - block::Node::Container(block::Container::ListItem( - .. - )) - ) && tree.linear().any(|elem| { - matches!( - elem, - block::Element::Atom(block::Atom::Blankline) - ) - }) - }, - ); - self.list_tightness.push(tight); - tight - } else { - self.list_tightness.pop().unwrap() }; Container::List { kind, tight } } diff --git a/src/tree.rs b/src/tree.rs index cadba55..ddd00a5 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -8,10 +8,9 @@ pub enum EventKind { Atom(A), } -#[derive(Debug, Clone)] -pub enum Element { - Container(C), - Atom(A), +pub enum Element<'a, C, A> { + Container(&'a mut C), + Atom(&'a mut A), Inline, } @@ -29,14 +28,6 @@ pub struct Tree { } impl Tree { - fn with_head(&self, head: Option) -> Self { - Self { - nodes: self.nodes.clone(), - branch: Vec::new(), - head, - } - } - pub fn empty() -> Self { Self { nodes: vec![].into_boxed_slice().into(), @@ -57,42 +48,6 @@ impl Tree { count } - /// Retrieve upcoming direct events without entering branches. - pub fn linear(&self) -> impl Iterator> + '_ { - let mut head = self.head; - std::iter::from_fn(move || { - head.take().map(|h| { - let n = &self.nodes[h.index()]; - head = n.next; - match &n.kind { - NodeKind::Root => unreachable!(), - NodeKind::Container(c, ..) => Element::Container(c.clone()), - NodeKind::Atom(a) => Element::Atom(a.clone()), - NodeKind::Inline => Element::Inline, - } - }) - }) - } - - /// Retrieve the upcoming branches. - pub fn linear_containers(&self) -> impl Iterator + '_ { - let mut head = self.head; - std::iter::from_fn(move || { - while let Some(h) = head.take() { - let n = &self.nodes[h.index()]; - head = n.next; - match &n.kind { - NodeKind::Root => unreachable!(), - NodeKind::Container(c, child) => { - return Some((c.clone(), self.with_head(*child))); - } - NodeKind::Atom(_) | NodeKind::Inline => continue, - } - } - None - }) - } - /// Split off the remaining part of the current branch. The returned [`Tree`] will continue on /// the branch, this [`Tree`] will skip over the current branch. pub fn take_branch(&mut self) -> Self { @@ -162,7 +117,7 @@ impl Iterator for Tree { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -struct NodeIndex(std::num::NonZeroUsize); +pub struct NodeIndex(std::num::NonZeroUsize); impl NodeIndex { fn new(i: usize) -> Self { @@ -232,13 +187,13 @@ impl Builder { }); } - pub(super) fn enter(&mut self, c: C, span: Span) { + pub(super) fn enter(&mut self, c: C, span: Span) -> NodeIndex { self.depth += 1; self.add_node(Node { span, kind: NodeKind::Container(c, None), next: None, - }); + }) } pub(super) fn exit(&mut self) { @@ -251,6 +206,19 @@ impl Builder { } } + pub(super) fn depth(&self) -> usize { + self.depth + } + + pub(super) fn elem_mut(&mut self, ni: NodeIndex) -> Element { + match &mut self.nodes[ni.index()].kind { + NodeKind::Root => unreachable!(), + NodeKind::Container(c, ..) => Element::Container(c), + NodeKind::Atom(a) => Element::Atom(a), + NodeKind::Inline => Element::Inline, + } + } + pub(super) fn finish(self) -> Tree { assert_eq!(self.depth, 0); let head = self.nodes[NodeIndex::root().index()].next; @@ -261,11 +229,7 @@ impl Builder { } } - pub(super) fn depth(&self) -> usize { - self.depth - } - - fn add_node(&mut self, node: Node) { + fn add_node(&mut self, node: Node) -> NodeIndex { let ni = NodeIndex::new(self.nodes.len()); self.nodes.push(node); if let Some(head_ni) = &mut self.head { @@ -291,6 +255,7 @@ impl Builder { panic!() } self.head = Some(ni); + ni } }