block: parse list item

This commit is contained in:
Noah Hellman 2023-01-21 11:13:24 +01:00
parent 7ce78f95fa
commit 44a6961beb
2 changed files with 189 additions and 18 deletions

View file

@ -1,3 +1,5 @@
use crate::OrderedListNumbering::*;
use crate::OrderedListStyle::*;
use crate::Span; use crate::Span;
use crate::EOF; use crate::EOF;
@ -7,6 +9,7 @@ use crate::tree;
use Atom::*; use Atom::*;
use Container::*; use Container::*;
use Leaf::*; use Leaf::*;
use ListType::*;
pub type Tree = tree::Tree<Node, Atom>; pub type Tree = tree::Tree<Node, Atom>;
pub type Branch = tree::Branch<Node, Atom>; pub type Branch = tree::Branch<Node, Atom>;
@ -79,12 +82,20 @@ pub enum Container {
Div, Div,
/// Span is the list marker. /// Span is the list marker.
ListItem, ListItem(ListType),
/// Span is footnote tag. /// Span is footnote tag.
Footnote, Footnote,
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ListType {
Bullet(u8),
Task,
Ordered(crate::OrderedListNumbering, crate::OrderedListStyle),
Description,
}
/// Parser for block-level tree structure of entire document. /// Parser for block-level tree structure of entire document.
struct TreeParser<'s> { struct TreeParser<'s> {
src: &'s str, src: &'s str,
@ -182,7 +193,7 @@ impl<'s> TreeParser<'s> {
Block::Container(c) => { Block::Container(c) => {
let (skip_chars, skip_lines_suffix) = match c { let (skip_chars, skip_lines_suffix) = match c {
Blockquote => (2, 0), Blockquote => (2, 0),
ListItem | Footnote => (indent, 0), ListItem(..) | Footnote => (indent, 0),
Div => (0, 1), Div => (0, 1),
}; };
let line_count_inner = lines.len() - skip_lines_suffix; let line_count_inner = lines.len() - skip_lines_suffix;
@ -300,19 +311,24 @@ impl BlockParser {
Block::Atom(ThematicBreak), Block::Atom(ThematicBreak),
Span::from_slice(line, line_t.trim()), Span::from_slice(line, line_t.trim()),
)), )),
'-' => chars.next().map_or(true, char::is_whitespace).then(|| { b @ ('-' | '*' | '+') => chars.next().map_or(true, char::is_whitespace).then(|| {
let task_list = chars.next() == Some('[') let task_list = chars.next() == Some('[')
&& matches!(chars.next(), Some('X' | ' ')) && matches!(chars.next(), Some('x' | 'X' | ' '))
&& chars.next() == Some(']') && chars.next() == Some(']')
&& chars.next().map_or(true, char::is_whitespace); && chars.next().map_or(true, char::is_whitespace);
if task_list {
(Block::Container(ListItem(Task)), Span::by_len(start, 5))
} else {
( (
Block::Container(ListItem), Block::Container(ListItem(Bullet(b as u8))),
Span::by_len(start, if task_list { 3 } else { 1 }), Span::by_len(start, 1),
) )
}),
'+' | '*' | ':' if chars.next().map_or(true, char::is_whitespace) => {
Some((Block::Container(ListItem), Span::by_len(start, 1)))
} }
}),
':' if chars.clone().next().map_or(true, char::is_whitespace) => Some((
Block::Container(ListItem(Description)),
Span::by_len(start, 1),
)),
f @ ('`' | ':' | '~') => { f @ ('`' | ':' | '~') => {
let fence_length = (&mut chars).take_while(|c| *c == f).count() + 1; let fence_length = (&mut chars).take_while(|c| *c == f).count() + 1;
fence = Some((f, fence_length)); fence = Some((f, fence_length));
@ -329,7 +345,12 @@ impl BlockParser {
) )
}) })
} }
_ => None, c => maybe_ordered_list_item(c, &mut chars).map(|(num, fmt, len)| {
(
Block::Container(ListItem(Ordered(num, fmt))),
Span::by_len(start, len),
)
}),
} }
.unwrap_or((Block::Leaf(Paragraph), Span::new(0, 0))); .unwrap_or((Block::Leaf(Paragraph), Span::new(0, 0)));
@ -360,7 +381,7 @@ impl BlockParser {
Block::Leaf(Paragraph | Heading | Table) => !line.trim().is_empty(), Block::Leaf(Paragraph | Heading | Table) => !line.trim().is_empty(),
Block::Leaf(LinkDefinition) => line.starts_with(' ') && !line.trim().is_empty(), Block::Leaf(LinkDefinition) => line.starts_with(' ') && !line.trim().is_empty(),
Block::Container(Blockquote) => line.trim().starts_with('>'), Block::Container(Blockquote) => line.trim().starts_with('>'),
Block::Container(Footnote | ListItem) => { Block::Container(Footnote | ListItem(..)) => {
let spaces = line.chars().take_while(|c| c.is_whitespace()).count(); let spaces = line.chars().take_while(|c| c.is_whitespace()).count();
line.trim().is_empty() || spaces > self.indent line.trim().is_empty() || spaces > self.indent
} }
@ -374,6 +395,82 @@ impl BlockParser {
} }
} }
fn maybe_ordered_list_item(
mut first: char,
chars: &mut std::str::Chars,
) -> Option<(crate::OrderedListNumbering, crate::OrderedListStyle, usize)> {
let start_paren = first == '(';
if start_paren {
first = chars.next().unwrap_or(EOF);
}
let numbering = if first.is_ascii_digit() {
Decimal
} else if first.is_ascii_lowercase() {
AlphaLower
} else if first.is_ascii_uppercase() {
AlphaUpper
} else if is_roman_lower_digit(first) {
RomanLower
} else if is_roman_upper_digit(first) {
RomanUpper
} else {
return None;
};
let chars_num = chars.clone();
let len_num = 1 + chars_num
.clone()
.take_while(|c| match numbering {
Decimal => c.is_ascii_digit(),
AlphaLower => c.is_ascii_lowercase(),
AlphaUpper => c.is_ascii_uppercase(),
RomanLower => is_roman_lower_digit(*c),
RomanUpper => is_roman_upper_digit(*c),
})
.count();
let post_num = chars.nth(len_num - 1)?;
let style = if start_paren {
if post_num == ')' {
ParenParen
} else {
return None;
}
} else if post_num == ')' {
Paren
} else if post_num == '.' {
Period
} else {
return None;
};
let len_style = usize::from(start_paren) + 1;
let chars_num = std::iter::once(first).chain(chars_num.take(len_num - 1));
let numbering =
if matches!(numbering, AlphaLower) && chars_num.clone().all(is_roman_lower_digit) {
RomanLower
} else if matches!(numbering, AlphaUpper) && chars_num.clone().all(is_roman_upper_digit) {
RomanUpper
} else {
numbering
};
if chars.next().map_or(true, char::is_whitespace) {
Some((numbering, style, len_num + len_style))
} else {
None
}
}
fn is_roman_lower_digit(c: char) -> bool {
matches!(c, 'i' | 'v' | 'x' | 'l' | 'c' | 'd' | 'm')
}
fn is_roman_upper_digit(c: char) -> bool {
matches!(c, 'I' | 'V' | 'X' | 'L' | 'C' | 'D' | 'M')
}
impl std::fmt::Display for Block { impl std::fmt::Display for Block {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self { match self {
@ -411,13 +508,16 @@ fn lines(src: &str) -> impl Iterator<Item = Span> + '_ {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::tree::EventKind::*;
use crate::tree::EventKind; use crate::tree::EventKind;
use crate::tree::EventKind::*;
use crate::OrderedListNumbering::*;
use crate::OrderedListStyle::*;
use super::Atom::*; use super::Atom::*;
use super::Block; use super::Block;
use super::Container::*; use super::Container::*;
use super::Leaf::*; use super::Leaf::*;
use super::ListType::*;
use super::Node::*; use super::Node::*;
macro_rules! test_parse { macro_rules! test_parse {
@ -659,6 +759,18 @@ mod test {
); );
} }
#[test]
fn parse_list() {
test_parse!(
"- abc\n",
(Enter(Container(ListItem(Bullet(b'-')))), "-"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "abc"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(ListItem(Bullet(b'-')))), "-"),
);
}
macro_rules! test_block { macro_rules! test_block {
($src:expr, $kind:expr, $str:expr, $len:expr $(,)?) => { ($src:expr, $kind:expr, $str:expr, $len:expr $(,)?) => {
let lines = super::lines($src).map(|sp| sp.of($src)); let lines = super::lines($src).map(|sp| sp.of($src));
@ -822,4 +934,63 @@ mod test {
3, 3,
); );
} }
#[test]
fn block_list_bullet() {
test_block!("- abc\n", Block::Container(ListItem(Bullet(b'-'))), "-", 1);
test_block!("+ abc\n", Block::Container(ListItem(Bullet(b'+'))), "+", 1);
test_block!("* abc\n", Block::Container(ListItem(Bullet(b'*'))), "*", 1);
}
#[test]
fn block_list_description() {
test_block!(": abc\n", Block::Container(ListItem(Description)), ":", 1);
}
#[test]
fn block_list_task() {
test_block!("- [ ] abc\n", Block::Container(ListItem(Task)), "- [ ]", 1);
test_block!("+ [x] abc\n", Block::Container(ListItem(Task)), "+ [x]", 1);
test_block!("* [X] abc\n", Block::Container(ListItem(Task)), "* [X]", 1);
}
#[test]
fn block_list_ordered() {
test_block!(
"123. abc\n",
Block::Container(ListItem(Ordered(Decimal, Period))),
"123.",
1
);
test_block!(
"i. abc\n",
Block::Container(ListItem(Ordered(RomanLower, Period))),
"i.",
1
);
test_block!(
"I. abc\n",
Block::Container(ListItem(Ordered(RomanUpper, Period))),
"I.",
1
);
test_block!(
"IJ. abc\n",
Block::Container(ListItem(Ordered(AlphaUpper, Period))),
"IJ.",
1
);
test_block!(
"(a) abc\n",
Block::Container(ListItem(Ordered(AlphaLower, ParenParen))),
"(a)",
1
);
test_block!(
"a) abc\n",
Block::Container(ListItem(Ordered(AlphaLower, Paren))),
"a)",
1
);
}
} }

View file

@ -182,13 +182,13 @@ pub enum LinkType {
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum List { pub enum List {
Unordered, Unordered,
Ordered { kind: OrderedListKind, start: u32 }, Ordered { kind: OrderedListNumbering, start: u32 },
Description, Description,
Task, Task,
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OrderedListKind { pub enum OrderedListNumbering {
/// Decimal numbering, e.g. `1)`. /// Decimal numbering, e.g. `1)`.
Decimal, Decimal,
/// Lowercase alphabetic numbering, e.g. `a)`. /// Lowercase alphabetic numbering, e.g. `a)`.
@ -473,7 +473,7 @@ impl<'s> Parser<'s> {
self.footnotes.insert(content, self.tree.take_branch()); self.footnotes.insert(content, self.tree.take_branch());
continue; continue;
} }
block::Container::ListItem => panic!(), block::Container::ListItem(..) => panic!(),
}; };
Event::Start(container, attributes) Event::Start(container, attributes)
} }
@ -487,7 +487,7 @@ impl<'s> Parser<'s> {
class: (!ev.span.is_empty()).then(|| content), class: (!ev.span.is_empty()).then(|| content),
}, },
block::Container::Footnote => panic!(), block::Container::Footnote => panic!(),
block::Container::ListItem => panic!(), block::Container::ListItem(..) => panic!(),
}; };
Event::End(container) Event::End(container)
} }