parser: determine tight vs loose lists

This commit is contained in:
Noah Hellman 2023-01-22 12:39:04 +01:00
parent 36470af114
commit 70d29c65e4
4 changed files with 161 additions and 44 deletions

View file

@ -13,6 +13,7 @@ use ListType::*;
pub type Tree = tree::Tree<Node, Atom>; pub type Tree = tree::Tree<Node, Atom>;
pub type TreeBuilder = tree::Builder<Node, Atom>; pub type TreeBuilder = tree::Builder<Node, Atom>;
pub type Element = tree::Element<Node, Atom>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Node { pub enum Node {

View file

@ -1,7 +1,7 @@
use crate::Atom; use crate::Atom;
use crate::Container; use crate::Container;
use crate::Event; use crate::Event;
use crate::List; use crate::ListKind;
use crate::OrderedListNumbering::*; use crate::OrderedListNumbering::*;
/// Generate HTML from parsed events and push it to a unicode-accepting buffer or stream. /// Generate HTML from parsed events and push it to a unicode-accepting buffer or stream.
@ -101,12 +101,19 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
} }
match &c { match &c {
Container::Blockquote => self.out.write_str("<blockquote")?, Container::Blockquote => self.out.write_str("<blockquote")?,
Container::List(List::Unordered | List::Task) => { Container::List {
kind: ListKind::Unordered | ListKind::Task,
..
} => {
self.out.write_str("<ul")?; self.out.write_str("<ul")?;
} }
Container::List(List::Ordered { Container::List {
kind:
ListKind::Ordered {
numbering, start, .. numbering, start, ..
}) => { },
..
} => {
self.out.write_str("<ol")?; self.out.write_str("<ol")?;
if *start > 1 { if *start > 1 {
write!(self.out, r#" start="{}""#, start)?; write!(self.out, r#" start="{}""#, start)?;
@ -191,14 +198,20 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
c, c,
Container::Div { class: Some(_) } Container::Div { class: Some(_) }
| Container::Math { .. } | Container::Math { .. }
| Container::List(List::Task) | Container::List {
kind: ListKind::Task,
..
}
| Container::TaskListItem { .. } | Container::TaskListItem { .. }
) )
{ {
self.out.write_str(r#" class=""#)?; self.out.write_str(r#" class=""#)?;
let mut first_written = false; let mut first_written = false;
if let Some(cls) = match c { if let Some(cls) = match c {
Container::List(List::Task) => Some("task-list"), Container::List {
kind: ListKind::Task,
..
} => Some("task-list"),
Container::TaskListItem { checked: false } => Some("unchecked"), Container::TaskListItem { checked: false } => Some("unchecked"),
Container::TaskListItem { checked: true } => Some("checked"), Container::TaskListItem { checked: true } => Some("checked"),
Container::Math { display: false } => Some("math inline"), Container::Math { display: false } => Some("math inline"),
@ -256,10 +269,16 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
} }
match c { match c {
Container::Blockquote => self.out.write_str("</blockquote>")?, Container::Blockquote => self.out.write_str("</blockquote>")?,
Container::List(List::Unordered | List::Task) => { Container::List {
kind: ListKind::Unordered | ListKind::Task,
..
} => {
self.out.write_str("</ul>")?; self.out.write_str("</ul>")?;
} }
Container::List(List::Ordered { .. }) => self.out.write_str("</ol>")?, Container::List {
kind: ListKind::Ordered { .. },
..
} => self.out.write_str("</ol>")?,
Container::ListItem | Container::TaskListItem { .. } => { Container::ListItem | Container::TaskListItem { .. } => {
self.out.write_str("</li>")?; self.out.write_str("</li>")?;
} }

View file

@ -33,7 +33,7 @@ pub enum Container<'s> {
/// A blockquote element. /// A blockquote element.
Blockquote, Blockquote,
/// A list. /// A list.
List(List), List { kind: ListKind, tight: bool },
/// An item of a list /// An item of a list
ListItem, ListItem,
/// An item of a task list, either checked or unchecked. /// An item of a task list, either checked or unchecked.
@ -99,7 +99,7 @@ impl<'s> Container<'s> {
fn is_block(&self) -> bool { fn is_block(&self) -> bool {
match self { match self {
Self::Blockquote Self::Blockquote
| Self::List(..) | Self::List { .. }
| Self::ListItem | Self::ListItem
| Self::TaskListItem { .. } | Self::TaskListItem { .. }
| Self::DescriptionList | Self::DescriptionList
@ -136,7 +136,7 @@ impl<'s> Container<'s> {
fn is_block_container(&self) -> bool { fn is_block_container(&self) -> bool {
match self { match self {
Self::Blockquote Self::Blockquote
| Self::List(..) | Self::List { .. }
| Self::ListItem | Self::ListItem
| Self::TaskListItem { .. } | Self::TaskListItem { .. }
| Self::DescriptionList | Self::DescriptionList
@ -184,7 +184,7 @@ pub enum LinkType {
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum List { pub enum ListKind {
Unordered, Unordered,
Ordered { Ordered {
numbering: OrderedListNumbering, numbering: OrderedListNumbering,
@ -502,20 +502,41 @@ impl<'s> Parser<'s> {
self.footnotes.insert(content, self.tree.take_branch()); self.footnotes.insert(content, self.tree.take_branch());
continue; continue;
} }
block::Container::List(ty) => match ty { block::Container::List(block::ListType::Description) => {
block::ListType::Unordered(..) => Container::List(List::Unordered), Container::DescriptionList
block::ListType::Task => Container::List(List::Task), }
block::Container::List(ty) => {
let kind = match ty {
block::ListType::Unordered(..) => ListKind::Unordered,
block::ListType::Ordered(numbering, style) => { block::ListType::Ordered(numbering, style) => {
let marker = ev.span.of(self.src); let marker = ev.span.of(self.src);
let start = numbering.parse_number(style.number(marker)).max(1); let start =
Container::List(List::Ordered { numbering.parse_number(style.number(marker)).max(1);
ListKind::Ordered {
numbering, numbering,
style, style,
start, start,
})
} }
block::ListType::Description => panic!(), }
}, block::ListType::Task => ListKind::Task,
block::ListType::Description => unreachable!(),
};
let tight =
!self.tree.linear().any(|elem| {
matches!(elem, block::Element::Atom(block::Atom::Blankline))
}) && !self.tree.linear_containers().any(|(c, tree)| {
matches!(
c,
block::Node::Container(block::Container::ListItem(..))
) && tree.linear().any(|elem| {
matches!(
elem,
block::Element::Atom(block::Atom::Blankline)
)
})
});
Container::List { kind, tight }
}
block::Container::ListItem(ty) => { block::Container::ListItem(ty) => {
if matches!(ty, block::ListType::Task) { if matches!(ty, block::ListType::Task) {
let marker = ev.span.of(self.src); let marker = ev.span.of(self.src);
@ -587,8 +608,7 @@ mod test {
use super::Container::*; use super::Container::*;
use super::Event::*; use super::Event::*;
use super::LinkType; use super::LinkType;
use super::List; use super::ListKind;
use super::List::*;
use super::OrderedListNumbering::*; use super::OrderedListNumbering::*;
use super::OrderedListStyle::*; use super::OrderedListStyle::*;
use super::SpanLinkType; use super::SpanLinkType;
@ -1013,13 +1033,22 @@ mod test {
fn list_item_unordered() { fn list_item_unordered() {
test_parse!( test_parse!(
"- abc", "- abc",
Start(List(List::Unordered), Attributes::new()), Start(
List {
kind: ListKind::Unordered,
tight: true,
},
Attributes::new(),
),
Start(ListItem, Attributes::new()), Start(ListItem, Attributes::new()),
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
Str("abc".into()), Str("abc".into()),
End(Paragraph), End(Paragraph),
End(ListItem), End(ListItem),
End(List(List::Unordered)), End(List {
kind: ListKind::Unordered,
tight: true,
}),
); );
} }
@ -1028,23 +1057,29 @@ mod test {
test_parse!( test_parse!(
"123. abc", "123. abc",
Start( Start(
List(List::Ordered { List {
kind: ListKind::Ordered {
numbering: Decimal, numbering: Decimal,
style: Period, style: Period,
start: 123 start: 123
}), },
Attributes::new() tight: true,
},
Attributes::new(),
), ),
Start(ListItem, Attributes::new()), Start(ListItem, Attributes::new()),
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
Str("abc".into()), Str("abc".into()),
End(Paragraph), End(Paragraph),
End(ListItem), End(ListItem),
End(List(List::Ordered { End(List {
kind: ListKind::Ordered {
numbering: Decimal, numbering: Decimal,
style: Period, style: Period,
start: 123 start: 123
})), },
tight: true,
}),
); );
} }
@ -1056,7 +1091,13 @@ mod test {
"- [x] b\n", // "- [x] b\n", //
"- [X] c\n", // "- [X] c\n", //
), ),
Start(List(List::Task), Attributes::new()), Start(
List {
kind: ListKind::Task,
tight: true,
},
Attributes::new(),
),
Start(TaskListItem { checked: false }, Attributes::new()), Start(TaskListItem { checked: false }, Attributes::new()),
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
Str("a".into()), Str("a".into()),
@ -1072,7 +1113,10 @@ mod test {
Str("c".into()), Str("c".into()),
End(Paragraph), End(Paragraph),
End(TaskListItem { checked: true }), End(TaskListItem { checked: true }),
End(List(List::Task)), End(List {
kind: ListKind::Task,
tight: true,
}),
); );
} }
} }

View file

@ -8,6 +8,13 @@ pub enum EventKind<C, A> {
Atom(A), Atom(A),
} }
#[derive(Debug, Clone)]
pub enum Element<C, A> {
Container(C),
Atom(A),
Inline,
}
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct Event<C, A> { pub struct Event<C, A> {
pub kind: EventKind<C, A>, pub kind: EventKind<C, A>,
@ -22,6 +29,14 @@ pub struct Tree<C: 'static, A: 'static> {
} }
impl<C: Clone, A: Clone> Tree<C, A> { impl<C: Clone, A: Clone> Tree<C, A> {
fn with_head(&self, head: Option<NodeIndex>) -> Self {
Self {
nodes: self.nodes.clone(),
branch: Vec::new(),
head,
}
}
pub fn empty() -> Self { pub fn empty() -> Self {
Self { Self {
nodes: vec![].into_boxed_slice().into(), nodes: vec![].into_boxed_slice().into(),
@ -42,6 +57,44 @@ impl<C: Clone, A: Clone> Tree<C, A> {
count count
} }
/// Retrieve upcoming direct events without entering branches.
pub fn linear(&self) -> impl Iterator<Item = Element<C, A>> + '_ {
let mut head = self.head;
std::iter::from_fn(move || {
head.take().map(|h| {
let n = &self.nodes[h.index()];
head = n.next;
match &n.kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, ..) => Element::Container(c.clone()),
NodeKind::Atom(a) => Element::Atom(a.clone()),
NodeKind::Inline => Element::Inline,
}
})
})
}
/// Retrieve the upcoming branches.
pub fn linear_containers(&self) -> impl Iterator<Item = (C, Self)> + '_ {
let mut head = self.head;
std::iter::from_fn(move || {
while let Some(h) = head.take() {
let n = &self.nodes[h.index()];
head = n.next;
match &n.kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, child) => {
return Some((c.clone(), self.with_head(*child)));
}
NodeKind::Atom(_) | NodeKind::Inline => continue,
}
}
None
})
}
/// Split off the remaining part of the current branch. The returned [`Tree`] will continue on
/// the branch, this [`Tree`] will skip over the current branch.
pub fn take_branch(&mut self) -> Self { pub fn take_branch(&mut self) -> Self {
let head = self.head.take(); let head = self.head.take();
self.head = self.branch.pop(); self.head = self.branch.pop();