From 70d29c65e4721371ba1d96c6af10d80f89c29392 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sun, 22 Jan 2023 12:39:04 +0100 Subject: [PATCH] parser: determine tight vs loose lists --- src/block.rs | 1 + src/html.rs | 37 +++++++++++++---- src/lib.rs | 114 +++++++++++++++++++++++++++++++++++---------------- src/tree.rs | 53 ++++++++++++++++++++++++ 4 files changed, 161 insertions(+), 44 deletions(-) diff --git a/src/block.rs b/src/block.rs index 3974b79..bb76300 100644 --- a/src/block.rs +++ b/src/block.rs @@ -13,6 +13,7 @@ use ListType::*; pub type Tree = tree::Tree; pub type TreeBuilder = tree::Builder; +pub type Element = tree::Element; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Node { diff --git a/src/html.rs b/src/html.rs index 88c043d..9956392 100644 --- a/src/html.rs +++ b/src/html.rs @@ -1,7 +1,7 @@ use crate::Atom; use crate::Container; use crate::Event; -use crate::List; +use crate::ListKind; use crate::OrderedListNumbering::*; /// Generate HTML from parsed events and push it to a unicode-accepting buffer or stream. @@ -101,12 +101,19 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { } match &c { Container::Blockquote => self.out.write_str(" { + Container::List { + kind: ListKind::Unordered | ListKind::Task, + .. + } => { self.out.write_str(" { + Container::List { + kind: + ListKind::Ordered { + numbering, start, .. + }, + .. + } => { self.out.write_str(" 1 { write!(self.out, r#" start="{}""#, start)?; @@ -191,14 +198,20 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { c, Container::Div { class: Some(_) } | Container::Math { .. } - | Container::List(List::Task) + | Container::List { + kind: ListKind::Task, + .. + } | Container::TaskListItem { .. } ) { self.out.write_str(r#" class=""#)?; let mut first_written = false; if let Some(cls) = match c { - Container::List(List::Task) => Some("task-list"), + Container::List { + kind: ListKind::Task, + .. + } => Some("task-list"), Container::TaskListItem { checked: false } => Some("unchecked"), Container::TaskListItem { checked: true } => Some("checked"), Container::Math { display: false } => Some("math inline"), @@ -256,10 +269,16 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { } match c { Container::Blockquote => self.out.write_str("")?, - Container::List(List::Unordered | List::Task) => { + Container::List { + kind: ListKind::Unordered | ListKind::Task, + .. + } => { self.out.write_str("")?; } - Container::List(List::Ordered { .. }) => self.out.write_str("")?, + Container::List { + kind: ListKind::Ordered { .. }, + .. + } => self.out.write_str("")?, Container::ListItem | Container::TaskListItem { .. } => { self.out.write_str("")?; } diff --git a/src/lib.rs b/src/lib.rs index b2730b5..480fbcf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -33,7 +33,7 @@ pub enum Container<'s> { /// A blockquote element. Blockquote, /// A list. - List(List), + List { kind: ListKind, tight: bool }, /// An item of a list ListItem, /// An item of a task list, either checked or unchecked. @@ -99,7 +99,7 @@ impl<'s> Container<'s> { fn is_block(&self) -> bool { match self { Self::Blockquote - | Self::List(..) + | Self::List { .. } | Self::ListItem | Self::TaskListItem { .. } | Self::DescriptionList @@ -136,7 +136,7 @@ impl<'s> Container<'s> { fn is_block_container(&self) -> bool { match self { Self::Blockquote - | Self::List(..) + | Self::List { .. } | Self::ListItem | Self::TaskListItem { .. } | Self::DescriptionList @@ -184,7 +184,7 @@ pub enum LinkType { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum List { +pub enum ListKind { Unordered, Ordered { numbering: OrderedListNumbering, @@ -502,20 +502,41 @@ impl<'s> Parser<'s> { self.footnotes.insert(content, self.tree.take_branch()); continue; } - block::Container::List(ty) => match ty { - block::ListType::Unordered(..) => Container::List(List::Unordered), - block::ListType::Task => Container::List(List::Task), - block::ListType::Ordered(numbering, style) => { - let marker = ev.span.of(self.src); - let start = numbering.parse_number(style.number(marker)).max(1); - Container::List(List::Ordered { - numbering, - style, - start, - }) - } - block::ListType::Description => panic!(), - }, + block::Container::List(block::ListType::Description) => { + Container::DescriptionList + } + block::Container::List(ty) => { + let kind = match ty { + block::ListType::Unordered(..) => ListKind::Unordered, + block::ListType::Ordered(numbering, style) => { + let marker = ev.span.of(self.src); + let start = + numbering.parse_number(style.number(marker)).max(1); + ListKind::Ordered { + numbering, + style, + start, + } + } + block::ListType::Task => ListKind::Task, + block::ListType::Description => unreachable!(), + }; + let tight = + !self.tree.linear().any(|elem| { + matches!(elem, block::Element::Atom(block::Atom::Blankline)) + }) && !self.tree.linear_containers().any(|(c, tree)| { + matches!( + c, + block::Node::Container(block::Container::ListItem(..)) + ) && tree.linear().any(|elem| { + matches!( + elem, + block::Element::Atom(block::Atom::Blankline) + ) + }) + }); + Container::List { kind, tight } + } block::Container::ListItem(ty) => { if matches!(ty, block::ListType::Task) { let marker = ev.span.of(self.src); @@ -587,8 +608,7 @@ mod test { use super::Container::*; use super::Event::*; use super::LinkType; - use super::List; - use super::List::*; + use super::ListKind; use super::OrderedListNumbering::*; use super::OrderedListStyle::*; use super::SpanLinkType; @@ -1013,13 +1033,22 @@ mod test { fn list_item_unordered() { test_parse!( "- abc", - Start(List(List::Unordered), Attributes::new()), + Start( + List { + kind: ListKind::Unordered, + tight: true, + }, + Attributes::new(), + ), Start(ListItem, Attributes::new()), Start(Paragraph, Attributes::new()), Str("abc".into()), End(Paragraph), End(ListItem), - End(List(List::Unordered)), + End(List { + kind: ListKind::Unordered, + tight: true, + }), ); } @@ -1028,23 +1057,29 @@ mod test { test_parse!( "123. abc", Start( - List(List::Ordered { - numbering: Decimal, - style: Period, - start: 123 - }), - Attributes::new() + List { + kind: ListKind::Ordered { + numbering: Decimal, + style: Period, + start: 123 + }, + tight: true, + }, + Attributes::new(), ), Start(ListItem, Attributes::new()), Start(Paragraph, Attributes::new()), Str("abc".into()), End(Paragraph), End(ListItem), - End(List(List::Ordered { - numbering: Decimal, - style: Period, - start: 123 - })), + End(List { + kind: ListKind::Ordered { + numbering: Decimal, + style: Period, + start: 123 + }, + tight: true, + }), ); } @@ -1056,7 +1091,13 @@ mod test { "- [x] b\n", // "- [X] c\n", // ), - Start(List(List::Task), Attributes::new()), + Start( + List { + kind: ListKind::Task, + tight: true, + }, + Attributes::new(), + ), Start(TaskListItem { checked: false }, Attributes::new()), Start(Paragraph, Attributes::new()), Str("a".into()), @@ -1072,7 +1113,10 @@ mod test { Str("c".into()), End(Paragraph), End(TaskListItem { checked: true }), - End(List(List::Task)), + End(List { + kind: ListKind::Task, + tight: true, + }), ); } } diff --git a/src/tree.rs b/src/tree.rs index 5ebdb8d..cadba55 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -8,6 +8,13 @@ pub enum EventKind { Atom(A), } +#[derive(Debug, Clone)] +pub enum Element { + Container(C), + Atom(A), + Inline, +} + #[derive(Debug, Clone, PartialEq, Eq)] pub struct Event { pub kind: EventKind, @@ -22,6 +29,14 @@ pub struct Tree { } impl Tree { + fn with_head(&self, head: Option) -> Self { + Self { + nodes: self.nodes.clone(), + branch: Vec::new(), + head, + } + } + pub fn empty() -> Self { Self { nodes: vec![].into_boxed_slice().into(), @@ -42,6 +57,44 @@ impl Tree { count } + /// Retrieve upcoming direct events without entering branches. + pub fn linear(&self) -> impl Iterator> + '_ { + let mut head = self.head; + std::iter::from_fn(move || { + head.take().map(|h| { + let n = &self.nodes[h.index()]; + head = n.next; + match &n.kind { + NodeKind::Root => unreachable!(), + NodeKind::Container(c, ..) => Element::Container(c.clone()), + NodeKind::Atom(a) => Element::Atom(a.clone()), + NodeKind::Inline => Element::Inline, + } + }) + }) + } + + /// Retrieve the upcoming branches. + pub fn linear_containers(&self) -> impl Iterator + '_ { + let mut head = self.head; + std::iter::from_fn(move || { + while let Some(h) = head.take() { + let n = &self.nodes[h.index()]; + head = n.next; + match &n.kind { + NodeKind::Root => unreachable!(), + NodeKind::Container(c, child) => { + return Some((c.clone(), self.with_head(*child))); + } + NodeKind::Atom(_) | NodeKind::Inline => continue, + } + } + None + }) + } + + /// Split off the remaining part of the current branch. The returned [`Tree`] will continue on + /// the branch, this [`Tree`] will skip over the current branch. pub fn take_branch(&mut self) -> Self { let head = self.head.take(); self.head = self.branch.pop();