amend! parser: determine tight vs loose lists

block: determine tight vs loose lists
This commit is contained in:
Noah Hellman 2023-01-22 21:55:14 +01:00
parent c4a9a3e0b0
commit ec69d98c75
4 changed files with 360 additions and 166 deletions

View file

@ -13,7 +13,6 @@ use ListType::*;
pub type Tree = tree::Tree<Node, Atom>;
pub type TreeBuilder = tree::Builder<Node, Atom>;
pub type Element = tree::Element<Node, Atom>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Node {
@ -81,8 +80,11 @@ pub enum Container {
/// Span is class specifier, possibly empty.
Div,
/// Span is `:`.
DescriptionList,
/// Span is the list marker of the first list item in the list.
List(ListType),
List { ty: ListType, tight: bool },
/// Span is the list marker.
ListItem(ListType),
@ -96,7 +98,6 @@ pub enum ListType {
Unordered(u8),
Ordered(crate::OrderedListNumbering, crate::OrderedListStyle),
Task,
Description,
}
#[derive(Debug)]
@ -107,6 +108,8 @@ struct OpenList {
/// Depth in the tree where the direct list items of the list are. Needed to determine when to
/// close the list.
depth: u16,
/// Index to node in tree, required to update tightness.
node: tree::NodeIndex,
}
/// Parser for block-level tree structure of entire document.
@ -114,7 +117,10 @@ struct TreeParser<'s> {
src: &'s str,
tree: TreeBuilder,
lists_open: Vec<OpenList>,
/// The previous block element was a blank line.
prev_blankline: bool,
/// Stack of currently open lists.
open_lists: Vec<OpenList>,
}
impl<'s> TreeParser<'s> {
@ -123,7 +129,8 @@ impl<'s> TreeParser<'s> {
Self {
src,
tree: TreeBuilder::new(),
lists_open: Vec::new(),
prev_blankline: false,
open_lists: Vec::new(),
}
}
@ -138,7 +145,7 @@ impl<'s> TreeParser<'s> {
}
line_pos += line_count;
}
for _ in self.lists_open.drain(..) {
for _ in self.open_lists.drain(..) {
self.tree.exit(); // list
}
self.tree.finish()
@ -176,6 +183,45 @@ impl<'s> TreeParser<'s> {
lines
};
// close list if a non list item or a list item of new type appeared
if let Some(OpenList { ty, depth, .. }) = self.open_lists.last() {
assert!(usize::from(*depth) <= self.tree.depth());
if self.tree.depth() == (*depth).into()
&& !matches!(
kind,
Block::Container(Container::ListItem(ty_new)) if *ty == ty_new,
)
{
self.tree.exit(); // list
self.open_lists.pop();
}
}
// set list to loose if blankline discovered
if matches!(kind, Block::Atom(Atom::Blankline)) {
self.prev_blankline = true;
} else {
if self.prev_blankline {
for OpenList { node, depth, .. } in &self.open_lists {
if usize::from(*depth) < self.tree.depth()
&& matches!(kind, Block::Container(Container::ListItem { .. }))
{
continue;
}
if let tree::Element::Container(Node::Container(Container::List {
tight,
..
})) = self.tree.elem_mut(*node)
{
*tight = false;
} else {
panic!();
}
}
}
self.prev_blankline = false;
}
match kind {
Block::Atom(a) => self.tree.atom(a, span),
Block::Leaf(l) => {
@ -210,7 +256,7 @@ impl<'s> TreeParser<'s> {
Block::Container(c) => {
let (skip_chars, skip_lines_suffix) = match c {
Blockquote => (2, 0),
List(..) => panic!(),
List{..} | DescriptionList => panic!(),
ListItem(..) | Footnote => (indent, 0),
Div => (0, 1),
};
@ -234,16 +280,20 @@ impl<'s> TreeParser<'s> {
if let Container::ListItem(ty) = c {
if self
.lists_open
.open_lists
.last()
.map_or(true, |OpenList { depth, .. }| {
usize::from(*depth) < self.tree.depth()
})
{
self.tree.enter(Node::Container(Container::List(ty)), span);
self.lists_open.push(OpenList {
let tight = true;
let node = self
.tree
.enter(Node::Container(Container::List { ty, tight }), span);
self.open_lists.push(OpenList {
ty,
depth: self.tree.depth().try_into().unwrap(),
node,
});
}
}
@ -254,11 +304,11 @@ impl<'s> TreeParser<'s> {
l += self.parse_block(&mut lines[l..line_count_inner]);
}
if let Some(OpenList { depth, .. }) = self.lists_open.last() {
if let Some(OpenList { depth, .. }) = self.open_lists.last() {
assert!(usize::from(*depth) <= self.tree.depth());
if self.tree.depth() == (*depth).into() {
self.tree.exit(); // list
self.lists_open.pop();
self.open_lists.pop();
}
}
@ -368,10 +418,9 @@ impl BlockParser {
)
}
}),
':' if chars.clone().next().map_or(true, char::is_whitespace) => Some((
Block::Container(ListItem(Description)),
Span::by_len(start, 1),
)),
':' if chars.clone().next().map_or(true, char::is_whitespace) => {
Some((Block::Container(DescriptionList), Span::by_len(start, 1)))
}
f @ ('`' | ':' | '~') => {
let fence_length = (&mut chars).take_while(|c| *c == f).count() + 1;
fence = Some((f, fence_length));
@ -445,7 +494,7 @@ impl BlockParser {
!((&mut c).take(fence_length).all(|c| c == fence)
&& c.next().map_or(true, char::is_whitespace))
}
Block::Container(List(..)) => panic!(),
Block::Container(List { .. } | DescriptionList) => panic!(),
}
}
@ -818,42 +867,153 @@ mod test {
#[test]
fn parse_list_single_item() {
test_parse!(
concat!(
"- abc\n",
"\n",
"\n", //
"- abc",
(
Enter(Container(List {
ty: Unordered(b'-'),
tight: true
})),
"-"
),
(Enter(Container(List(Unordered(b'-')))), "-"),
(Enter(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "abc"),
(Exit(Leaf(Paragraph)), ""),
(Atom(Blankline), "\n"),
(Atom(Blankline), "\n"),
(Exit(Container(ListItem(Unordered(b'-')))), "-"),
(Exit(Container(List(Unordered(b'-')))), "-"),
(
Exit(Container(List {
ty: Unordered(b'-'),
tight: true
})),
"-"
),
);
}
#[test]
fn parse_list_multi_item() {
fn parse_list_tight() {
test_parse!(
"- abc\n\n\n- def\n\n",
(Enter(Container(List(Unordered(b'-')))), "-"),
concat!(
"- a\n", //
"- b\n", //
),
(
Enter(Container(List {
ty: Unordered(b'-'),
tight: true,
})),
"-"
),
(Enter(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "abc"),
(Inline, "a"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "b"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(ListItem(Unordered(b'-')))), "-"),
(
Exit(Container(List {
ty: Unordered(b'-'),
tight: true,
})),
"-"
),
);
}
#[test]
fn parse_list_loose() {
test_parse!(
concat!(
"- a\n", //
"- b\n", //
"\n", //
"- c\n", //
),
(
Enter(Container(List {
ty: Unordered(b'-'),
tight: false,
})),
"-"
),
(Enter(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "a"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "b"),
(Exit(Leaf(Paragraph)), ""),
(Atom(Blankline), "\n"),
(Atom(Blankline), "\n"),
(Exit(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "def"),
(Inline, "c"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(ListItem(Unordered(b'-')))), "-"),
(
Exit(Container(List {
ty: Unordered(b'-'),
tight: false,
})),
"-"
),
);
}
#[test]
fn parse_list_tight_nest() {
test_parse!(
concat!(
"- a\n", //
"\n", //
" + aa\n", //
" + ab\n", //
"\n", //
"- b\n", //
),
(
Enter(Container(List {
ty: Unordered(b'-'),
tight: true,
})),
"-"
),
(Enter(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "a"),
(Exit(Leaf(Paragraph)), ""),
(Atom(Blankline), "\n"),
(
Enter(Container(List {
ty: Unordered(b'+'),
tight: true,
})),
"+",
),
(Enter(Container(ListItem(Unordered(b'+')))), "+"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "aa"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(ListItem(Unordered(b'+')))), "+"),
(Enter(Container(ListItem(Unordered(b'+')))), "+"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "ab"),
(Exit(Leaf(Paragraph)), ""),
(Atom(Blankline), "\n"),
(Exit(Container(ListItem(Unordered(b'-')))), "-"),
(Exit(Container(List(Unordered(b'-')))), "-"),
(
Exit(Container(List {
ty: Unordered(b'-'),
tight: true,
})),
"-"
),
);
}
@ -862,34 +1022,135 @@ mod test {
test_parse!(
concat!(
"- a\n", //
"\n", //
" - aa\n", //
"\n", //
"\n", //
"- b\n", //
" \n", //
" + b\n", //
" \n", //
" * c\n", //
),
(
Enter(Container(List {
ty: Unordered(b'-'),
tight: true,
})),
"-"
),
(Enter(Container(List(Unordered(b'-')))), "-"),
(Enter(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "a"),
(Exit(Leaf(Paragraph)), ""),
(Atom(Blankline), "\n"),
(Enter(Container(List(Unordered(b'-')))), "-"),
(Enter(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "aa"),
(Exit(Leaf(Paragraph)), ""),
(Atom(Blankline), "\n"),
(Atom(Blankline), "\n"),
(Exit(Container(ListItem(Unordered(b'-')))), "-"),
(Exit(Container(List(Unordered(b'-')))), "-"),
(Exit(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "b"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(ListItem(Unordered(b'-')))), "-"),
(Exit(Container(List(Unordered(b'-')))), "-"),
(
Exit(Container(List {
ty: Unordered(b'-'),
tight: true,
})),
"-"
),
);
}
#[test]
fn parse_list_post() {
test_parse!(
concat!(
"- a\n", //
"\n", //
" * b\n", //
"cd\n", //
),
(
Enter(Container(List {
ty: Unordered(45),
tight: true
})),
"-"
),
(Enter(Container(ListItem(Unordered(45)))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "a"),
(Exit(Leaf(Paragraph)), ""),
(Atom(Blankline), "\n"),
(
Enter(Container(List {
ty: Unordered(42),
tight: true
})),
"*"
),
(Enter(Container(ListItem(Unordered(42)))), "*"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "b\n"),
(Inline, "cd"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(ListItem(Unordered(42)))), "*"),
(
Exit(Container(List {
ty: Unordered(42),
tight: true
})),
"*"
),
(Exit(Container(ListItem(Unordered(45)))), "-"),
(
Exit(Container(List {
ty: Unordered(45),
tight: true
})),
"-"
),
);
}
#[test]
fn parse_list_mixed() {
test_parse!(
concat!(
"- a\n", //
"+ b\n", //
"+ c\n", //
),
(
Enter(Container(List {
ty: Unordered(b'-'),
tight: true
})),
"-"
),
(Enter(Container(ListItem(Unordered(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "a"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(ListItem(Unordered(b'-')))), "-"),
(
Exit(Container(List {
ty: Unordered(b'-'),
tight: true
})),
"-"
),
(
Enter(Container(List {
ty: Unordered(b'+'),
tight: true
})),
"+"
),
(Enter(Container(ListItem(Unordered(b'+')))), "+"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "b"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(ListItem(Unordered(b'+')))), "+"),
(Enter(Container(ListItem(Unordered(b'+')))), "+"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "c"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(ListItem(Unordered(b'+')))), "+"),
(
Exit(Container(List {
ty: Unordered(b'+'),
tight: true
})),
"+"
),
);
}
@ -1081,7 +1342,7 @@ mod test {
#[test]
fn block_list_description() {
test_block!(": abc\n", Block::Container(ListItem(Description)), ":", 1);
test_block!(": abc\n", Block::Container(DescriptionList), ":", 1);
}
#[test]

View file

@ -103,18 +103,14 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
}
match &c {
Container::Blockquote => self.out.write_str("<blockquote")?,
Container::List {
kind: ListKind::Unordered | ListKind::Task,
..
} => {
self.out.write_str("<ul")?;
Container::List { kind, tight } => {
self.list_tightness.push(*tight);
match kind {
ListKind::Unordered | ListKind::Task => {
self.out.write_str("<ul")?
}
Container::List {
kind:
ListKind::Ordered {
numbering, start, ..
},
..
} => {
self.out.write_str("<ol")?;
if *start > 1 {
@ -130,6 +126,8 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
write!(self.out, r#" type="{}""#, ty)?;
}
}
}
}
Container::ListItem | Container::TaskListItem { .. } => {
self.out.write_str("<li")?;
}

View file

@ -293,9 +293,6 @@ pub struct Parser<'s> {
/// Inline parser, recreated for each new inline.
inline_parser: Option<inline::Parser<span::InlineCharsIter<'s>>>,
/// Stack of tightnesses for current open lists.
list_tightness: Vec<bool>,
/// Footnote references in the order they were encountered, without duplicates.
footnote_references: Vec<&'s str>,
/// Cache of footnotes to emit at the end.
@ -336,7 +333,6 @@ impl<'s> Parser<'s> {
src,
link_definitions,
tree: branch,
list_tightness: Vec::new(),
footnote_references: Vec::new(),
footnotes: std::collections::HashMap::new(),
footnote_index: 0,
@ -506,10 +502,8 @@ impl<'s> Parser<'s> {
self.footnotes.insert(content, self.tree.take_branch());
continue;
}
block::Container::List(block::ListType::Description) => {
Container::DescriptionList
}
block::Container::List(ty) => {
block::Container::DescriptionList => Container::DescriptionList,
block::Container::List { ty, tight } => {
let kind = match ty {
block::ListType::Unordered(..) => ListKind::Unordered,
block::ListType::Ordered(numbering, style) => {
@ -523,30 +517,6 @@ impl<'s> Parser<'s> {
}
}
block::ListType::Task => ListKind::Task,
block::ListType::Description => unreachable!(),
};
let tight = if enter {
let tight = !self.tree.linear().any(|elem| {
matches!(elem, block::Element::Atom(block::Atom::Blankline))
}) && !self.tree.linear_containers().any(
|(c, tree)| {
matches!(
c,
block::Node::Container(block::Container::ListItem(
..
))
) && tree.linear().any(|elem| {
matches!(
elem,
block::Element::Atom(block::Atom::Blankline)
)
})
},
);
self.list_tightness.push(tight);
tight
} else {
self.list_tightness.pop().unwrap()
};
Container::List { kind, tight }
}

View file

@ -8,10 +8,9 @@ pub enum EventKind<C, A> {
Atom(A),
}
#[derive(Debug, Clone)]
pub enum Element<C, A> {
Container(C),
Atom(A),
pub enum Element<'a, C, A> {
Container(&'a mut C),
Atom(&'a mut A),
Inline,
}
@ -29,14 +28,6 @@ pub struct Tree<C: 'static, A: 'static> {
}
impl<C: Clone, A: Clone> Tree<C, A> {
fn with_head(&self, head: Option<NodeIndex>) -> Self {
Self {
nodes: self.nodes.clone(),
branch: Vec::new(),
head,
}
}
pub fn empty() -> Self {
Self {
nodes: vec![].into_boxed_slice().into(),
@ -57,42 +48,6 @@ impl<C: Clone, A: Clone> Tree<C, A> {
count
}
/// Retrieve upcoming direct events without entering branches.
pub fn linear(&self) -> impl Iterator<Item = Element<C, A>> + '_ {
let mut head = self.head;
std::iter::from_fn(move || {
head.take().map(|h| {
let n = &self.nodes[h.index()];
head = n.next;
match &n.kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, ..) => Element::Container(c.clone()),
NodeKind::Atom(a) => Element::Atom(a.clone()),
NodeKind::Inline => Element::Inline,
}
})
})
}
/// Retrieve the upcoming branches.
pub fn linear_containers(&self) -> impl Iterator<Item = (C, Self)> + '_ {
let mut head = self.head;
std::iter::from_fn(move || {
while let Some(h) = head.take() {
let n = &self.nodes[h.index()];
head = n.next;
match &n.kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, child) => {
return Some((c.clone(), self.with_head(*child)));
}
NodeKind::Atom(_) | NodeKind::Inline => continue,
}
}
None
})
}
/// Split off the remaining part of the current branch. The returned [`Tree`] will continue on
/// the branch, this [`Tree`] will skip over the current branch.
pub fn take_branch(&mut self) -> Self {
@ -162,7 +117,7 @@ impl<C: Clone, A: Clone> Iterator for Tree<C, A> {
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct NodeIndex(std::num::NonZeroUsize);
pub struct NodeIndex(std::num::NonZeroUsize);
impl NodeIndex {
fn new(i: usize) -> Self {
@ -232,13 +187,13 @@ impl<C: Clone, A: Clone> Builder<C, A> {
});
}
pub(super) fn enter(&mut self, c: C, span: Span) {
pub(super) fn enter(&mut self, c: C, span: Span) -> NodeIndex {
self.depth += 1;
self.add_node(Node {
span,
kind: NodeKind::Container(c, None),
next: None,
});
})
}
pub(super) fn exit(&mut self) {
@ -251,6 +206,19 @@ impl<C: Clone, A: Clone> Builder<C, A> {
}
}
pub(super) fn depth(&self) -> usize {
self.depth
}
pub(super) fn elem_mut(&mut self, ni: NodeIndex) -> Element<C, A> {
match &mut self.nodes[ni.index()].kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, ..) => Element::Container(c),
NodeKind::Atom(a) => Element::Atom(a),
NodeKind::Inline => Element::Inline,
}
}
pub(super) fn finish(self) -> Tree<C, A> {
assert_eq!(self.depth, 0);
let head = self.nodes[NodeIndex::root().index()].next;
@ -261,11 +229,7 @@ impl<C: Clone, A: Clone> Builder<C, A> {
}
}
pub(super) fn depth(&self) -> usize {
self.depth
}
fn add_node(&mut self, node: Node<C, A>) {
fn add_node(&mut self, node: Node<C, A>) -> NodeIndex {
let ni = NodeIndex::new(self.nodes.len());
self.nodes.push(node);
if let Some(head_ni) = &mut self.head {
@ -291,6 +255,7 @@ impl<C: Clone, A: Clone> Builder<C, A> {
panic!()
}
self.head = Some(ni);
ni
}
}