This commit is contained in:
Noah Hellman 2022-12-10 10:26:06 +01:00
parent 5afc6a41a8
commit 3a70cd8255
3 changed files with 148 additions and 143 deletions

View file

@ -3,6 +3,7 @@ use crate::EOF;
use crate::tree;
use Atom::*;
use Container::*;
use Leaf::*;
@ -15,6 +16,9 @@ pub fn parse(src: &str) -> Tree {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Block {
/// An atomic block, containing no children elements.
Atom(Atom),
/// A leaf block, containing only inline elements.
Leaf(Leaf),
@ -22,6 +26,21 @@ pub enum Block {
Container(Container),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Atom {
/// Inline content with unparsed inline elements.
Inline,
/// A line with no non-whitespace characters.
Blankline,
/// A list of attributes.
Attributes,
/// A thematic break.
ThematicBreak,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Leaf {
/// Span is empty, before first character of paragraph.
@ -43,10 +62,6 @@ pub enum Leaf {
/// Span is language specifier.
/// Each inline is a line.
CodeBlock { fence_length: u8, c: u8 },
/// Span is from first to last character.
/// No inlines.
ThematicBreak,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@ -64,18 +79,6 @@ pub enum Container {
Footnote { indent: u8 },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Atom {
/// Inline content with unparsed inline elements.
Inline,
/// A line with no non-whitespace characters.
Blankline,
/// A list of attributes.
Attributes,
}
struct Parser<'s> {
src: &'s str,
tree: tree::Builder<Block, Atom>,
@ -106,15 +109,8 @@ impl<'s> Parser<'s> {
/// Recursively parse a block and all of its children. Return number of lines the block uses.
fn parse_block(&mut self, lines: &mut [Span]) -> usize {
let blanklines = lines
.iter()
.take_while(|sp| sp.of(self.src).trim().is_empty())
.map(|sp| self.tree.elem(Atom::Blankline, *sp))
.count();
let lines = &mut lines[blanklines..];
Block::parse(lines.iter().map(|sp| sp.of(self.src))).map_or(
blanklines,
0,
|(kind, span, line_count)| {
let lines = {
let l = lines.len().min(line_count);
@ -147,7 +143,11 @@ impl<'s> Parser<'s> {
lines
};
match &kind {
match kind {
Block::Atom(a) => {
assert_ne!(a, Inline);
self.tree.atom(a, span);
}
Block::Leaf(l) => {
self.tree.enter(kind, span);
@ -170,9 +170,8 @@ impl<'s> Parser<'s> {
}
}
lines
.iter()
.for_each(|line| self.tree.elem(Atom::Inline, *line));
lines.iter().for_each(|line| self.tree.atom(Inline, *line));
self.tree.exit();
}
Block::Container(c) => {
let (skip_chars, skip_lines_suffix) = match &c {
@ -194,7 +193,7 @@ impl<'s> Parser<'s> {
.take_while(|c| c.is_whitespace())
.count()
+ usize::from(skip_chars))
.min(sp.len());
.min(sp.len() - usize::from(sp.of(self.src).ends_with('\n')));
*sp = sp.skip(skip);
});
@ -203,10 +202,11 @@ impl<'s> Parser<'s> {
while l < line_count_inner {
l += self.parse_block(&mut lines[l..line_count_inner]);
}
}
}
self.tree.exit();
blanklines + line_count
}
}
line_count
},
)
}
@ -229,11 +229,16 @@ impl Block {
/// Determine what type of block a line can start.
fn start(line: &str) -> (Self, Span) {
let start = line.chars().take_while(|c| c.is_whitespace()).count();
let start = line
.chars()
.take_while(|c| *c != '\n' && c.is_whitespace())
.count();
let line_t = &line[start..];
let mut chars = line_t.chars();
match chars.next().unwrap_or(EOF) {
EOF => Some((Self::Atom(Blankline), Span::empty_at(start))),
'\n' => Some((Self::Atom(Blankline), Span::by_len(start, 1))),
'#' => chars
.find(|c| *c != '#')
.map_or(true, char::is_whitespace)
@ -286,7 +291,7 @@ impl Block {
)
}),
'-' | '*' if Self::is_thematic_break(chars.clone()) => Some((
Self::Leaf(ThematicBreak),
Self::Atom(ThematicBreak),
Span::from_slice(line, line_t.trim()),
)),
'-' => chars.next().map_or(true, char::is_whitespace).then(|| {
@ -350,9 +355,9 @@ impl Block {
fn continues(self, line: &str) -> bool {
//let start = Self::start(line); // TODO allow starting new block without blank line
match self {
Self::Atom(..) => false,
Self::Leaf(Paragraph | Heading { .. } | Table) => !line.trim().is_empty(),
Self::Leaf(LinkDefinition) => line.starts_with(' ') && !line.trim().is_empty(),
Self::Leaf(ThematicBreak) => false,
Self::Container(Blockquote) => line.trim().starts_with('>'),
Self::Container(Footnote { indent } | ListItem { indent }) => {
let spaces = line.chars().take_while(|c| c.is_whitespace()).count();
@ -362,7 +367,7 @@ impl Block {
let fence = match self {
Self::Container(..) => ':',
Self::Leaf(CodeBlock { c, .. }) => c as char,
Self::Leaf(..) => unreachable!(),
Self::Leaf(..) | Self::Atom(..) => unreachable!(),
};
let mut c = line.chars();
!((&mut c).take((fence_length).into()).all(|c| c == fence)
@ -375,6 +380,7 @@ impl Block {
impl std::fmt::Display for Block {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Block::Atom(a) => std::fmt::Debug::fmt(a, f),
Block::Leaf(e) => std::fmt::Debug::fmt(e, f),
Block::Container(c) => std::fmt::Debug::fmt(c, f),
}
@ -408,6 +414,7 @@ fn lines(src: &str) -> impl Iterator<Item = Span> + '_ {
#[cfg(test)]
mod test {
use crate::tree::EventKind;
use crate::tree::EventKind::*;
use super::Atom::*;
@ -430,7 +437,7 @@ mod test {
test_parse!(
"para\n",
(Enter(Leaf(Paragraph)), ""),
(Element(Inline), "para"),
(EventKind::Atom(Inline), "para"),
(Exit(Leaf(Paragraph)), ""),
);
}
@ -440,8 +447,8 @@ mod test {
test_parse!(
"para0\npara1\n",
(Enter(Leaf(Paragraph)), ""),
(Element(Inline), "para0\n"),
(Element(Inline), "para1"),
(EventKind::Atom(Inline), "para0\n"),
(EventKind::Atom(Inline), "para1"),
(Exit(Leaf(Paragraph)), ""),
);
}
@ -457,39 +464,41 @@ mod test {
"15\n", //
),
(Enter(Leaf(Heading { level: 1 })), "#"),
(Element(Inline), "2"),
(EventKind::Atom(Inline), "2"),
(Exit(Leaf(Heading { level: 1 })), "#"),
(Element(Blankline), "\n"),
(EventKind::Atom(Blankline), "\n"),
(Enter(Leaf(Heading { level: 1 })), "#"),
(Element(Inline), "8\n"),
(Element(Inline), " 12\n"),
(Element(Inline), "15"),
(EventKind::Atom(Inline), "8\n"),
(EventKind::Atom(Inline), " 12\n"),
(EventKind::Atom(Inline), "15"),
(Exit(Leaf(Heading { level: 1 })), "#"),
);
}
#[test]
fn parse_blockquote() {
/*
test_parse!(
"> a\n",
(Enter(Container(Blockquote)), ">"),
(Enter(Leaf(Paragraph)), ""),
(Element(Inline), "a"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(Blockquote)), ">"),
(Enter, Container(Blockquote), ">"),
(Enter, Leaf(Paragraph), ""),
(Element, Atom(Inline), "a"),
(Exit, Leaf(Paragraph), ""),
(Exit, Container(Blockquote), ">"),
);
test_parse!(
"> \n",
(Enter(Container(Blockquote)), ">"),
(Element(Blankline), " \n"),
(Exit(Container(Blockquote)), ">"),
(Enter, Container(Blockquote), ">"),
(Element, Atom(Blankline), "\n"),
(Exit, Container(Blockquote), ">"),
);
test_parse!(
">",
(Enter(Container(Blockquote)), ">"),
(Element(Blankline), ""),
(Exit(Container(Blockquote)), ">"),
(Enter, Container(Blockquote), ">"),
(Element, Atom(Blankline), ""),
(Exit, Container(Blockquote), ">"),
);
*/
test_parse!(
concat!(
"> a\n",
@ -500,15 +509,15 @@ mod test {
),
(Enter(Container(Blockquote)), ">"),
(Enter(Leaf(Paragraph)), ""),
(Element(Inline), "a"),
(EventKind::Atom(Inline), "a"),
(Exit(Leaf(Paragraph)), ""),
(Element(Blankline), ""),
(EventKind::Atom(Blankline), "\n"),
(Enter(Leaf(Heading { level: 2 })), "##"),
(Element(Inline), "hl"),
(EventKind::Atom(Inline), "hl"),
(Exit(Leaf(Heading { level: 2 })), "##"),
(Element(Blankline), ""),
(EventKind::Atom(Blankline), "\n"),
(Enter(Leaf(Paragraph)), ""),
(Element(Inline), "para"),
(EventKind::Atom(Inline), "para"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(Blockquote)), ">"),
);
@ -519,13 +528,13 @@ mod test {
test_parse!(
"> \n",
(Enter(Container(Blockquote)), ">"),
(Element(Blankline), "\n"),
(EventKind::Atom(Blankline), "\n"),
(Exit(Container(Blockquote)), ">"),
);
test_parse!(
">",
(Enter(Container(Blockquote)), ">"),
(Element(Blankline), ""),
(EventKind::Atom(Blankline), ""),
(Exit(Container(Blockquote)), ">"),
);
}
@ -541,7 +550,7 @@ mod test {
})),
"",
),
(Element(Inline), "l0\n"),
(EventKind::Atom(Inline), "l0\n"),
(
Exit(Leaf(CodeBlock {
fence_length: 3,
@ -565,7 +574,7 @@ mod test {
})),
""
),
(Element(Inline), "l0\n"),
(EventKind::Atom(Inline), "l0\n"),
(
Exit(Leaf(CodeBlock {
fence_length: 3,
@ -573,9 +582,9 @@ mod test {
})),
""
),
(Element(Blankline), "\n"),
(EventKind::Atom(Blankline), "\n"),
(Enter(Leaf(Paragraph)), ""),
(Element(Inline), "para"),
(EventKind::Atom(Inline), "para"),
(Exit(Leaf(Paragraph)), ""),
);
test_parse!(
@ -593,9 +602,9 @@ mod test {
})),
"lang"
),
(Element(Inline), "l0\n"),
(Element(Inline), "```\n"),
(Element(Inline), " l1\n"),
(EventKind::Atom(Inline), "l0\n"),
(EventKind::Atom(Inline), "```\n"),
(EventKind::Atom(Inline), " l1\n"),
(
Exit(Leaf(CodeBlock {
fence_length: 4,
@ -620,7 +629,7 @@ mod test {
})),
""
),
(Element(Inline), "a\n"),
(EventKind::Atom(Inline), "a\n"),
(
Exit(Leaf(CodeBlock {
fence_length: 3,
@ -635,7 +644,7 @@ mod test {
})),
""
),
(Element(Inline), "bbb\n"),
(EventKind::Atom(Inline), "bbb\n"),
(
Exit(Leaf(CodeBlock {
fence_length: 3,
@ -658,8 +667,8 @@ mod test {
})),
"",
),
(Element(Inline), "code\n"),
(Element(Inline), " block\n"),
(EventKind::Atom(Inline), "code\n"),
(EventKind::Atom(Inline), " block\n"),
(
Exit(Leaf(CodeBlock {
fence_length: 3,
@ -675,7 +684,7 @@ mod test {
test_parse!(
"[tag]: url\n",
(Enter(Leaf(LinkDefinition)), "tag"),
(Element(Inline), "url"),
(EventKind::Atom(Inline), "url"),
(Exit(Leaf(LinkDefinition)), "tag"),
);
}
@ -686,7 +695,7 @@ mod test {
"[^tag]: description\n",
(Enter(Container(Footnote { indent: 0 })), "tag"),
(Enter(Leaf(Paragraph)), ""),
(Element(Inline), "description"),
(EventKind::Atom(Inline), "description"),
(Exit(Leaf(Paragraph)), ""),
(Exit(Container(Footnote { indent: 0 })), "tag"),
);
@ -705,6 +714,12 @@ mod test {
};
}
#[test]
fn block_blankline() {
test_block!("\n", Block::Atom(Blankline), "\n", 1);
test_block!(" \n", Block::Atom(Blankline), "\n", 1);
}
#[test]
fn block_multiline() {
test_block!(
@ -733,14 +748,14 @@ mod test {
#[test]
fn block_thematic_break() {
test_block!("---\n", Block::Leaf(ThematicBreak), "---", 1);
test_block!("---\n", Block::Atom(ThematicBreak), "---", 1);
test_block!(
concat!(
" -*- -*-\n",
"\n", //
"para", //
),
Block::Leaf(ThematicBreak),
Block::Atom(ThematicBreak),
"-*- -*-",
1
);

View file

@ -269,6 +269,7 @@ impl<'s> Event<'s> {
impl<'s> Container<'s> {
fn from_block(src: &'s str, block: block::Block) -> Self {
match block {
block::Block::Atom(a) => todo!(),
block::Block::Leaf(l) => match l {
block::Leaf::Paragraph => Self::Paragraph,
block::Leaf::Heading { level } => Self::Heading { level },
@ -342,14 +343,14 @@ impl<'s> Iterator for Parser<'s> {
return Some(Event::from_inline(self.src, inline));
} else if let Some(ev) = self.tree.next() {
match ev.kind {
tree::EventKind::Element(atom) => {
assert_eq!(atom, block::Atom::Inline);
let last_inline = self.tree.neighbors().next().is_none();
tree::EventKind::Atom(a) => {
assert_eq!(a, block::Atom::Inline);
let last_inline = self.tree.atoms().next().is_none();
parser.parse(ev.span.of(self.src), last_inline);
}
tree::EventKind::Exit(block) => {
tree::EventKind::Exit(c) => {
self.parser = None;
return Some(Event::End(Container::from_block(self.src, block)));
return Some(Event::End(Container::from_block(self.src, c)));
}
tree::EventKind::Enter(..) => unreachable!(),
}
@ -359,20 +360,21 @@ impl<'s> Iterator for Parser<'s> {
for ev in &mut self.tree {
let content = ev.span.of(self.src);
let event = match ev.kind {
tree::EventKind::Element(atom) => match atom {
tree::EventKind::Atom(a) => match a {
block::Atom::Inline => panic!("inline outside leaf block"),
block::Atom::Blankline => Event::Atom(Atom::Blankline),
block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak),
block::Atom::Attributes => {
self.block_attributes.parse(content);
continue;
}
},
tree::EventKind::Enter(block) => {
if matches!(block, block::Block::Leaf(_)) {
tree::EventKind::Enter(c) => {
if matches!(c, block::Block::Leaf(_)) {
self.parser = Some(inline::Parser::new());
self.inline_start = ev.span.end();
}
let container = match block {
let container = match c {
block::Block::Leaf(block::Leaf::CodeBlock { .. }) => {
self.inline_start += 1; // skip newline
Container::CodeBlock {
@ -386,7 +388,7 @@ impl<'s> Iterator for Parser<'s> {
};
Event::Start(container, self.block_attributes.take())
}
tree::EventKind::Exit(block) => Event::End(Container::from_block(self.src, block)),
tree::EventKind::Exit(c) => Event::End(Container::from_block(self.src, c)),
};
return Some(event);
}
@ -465,6 +467,7 @@ mod test {
Start(Paragraph, Attributes::none()),
Str("para0"),
End(Paragraph),
Atom(Blankline),
Start(Paragraph, Attributes::none()),
Str("para1"),
End(Paragraph),

View file

@ -1,10 +1,10 @@
use crate::Span;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EventKind<C, E> {
pub enum EventKind<C, A> {
Enter(C),
Element(E),
Exit(C),
Atom(A),
}
#[derive(Debug, Clone, PartialEq, Eq)]
@ -13,25 +13,15 @@ pub struct Event<C, A> {
pub span: Span,
}
pub struct Object<C, E> {
kind: ObjectKind<C, E>,
span: Span,
}
pub enum ObjectKind<C, E> {
Container(C),
Element(E),
}
#[derive(Debug, Clone)]
pub struct Tree<C, E> {
nodes: Vec<Node<C, E>>,
#[derive(Clone)]
pub struct Tree<C, A> {
nodes: Vec<Node<C, A>>,
branch: Vec<NodeIndex>,
head: Option<NodeIndex>,
}
impl<C: Copy, E: Copy> Tree<C, E> {
fn new(nodes: Vec<Node<C, E>>) -> Self {
impl<C: Copy, A: Copy> Tree<C, A> {
fn new(nodes: Vec<Node<C, A>>) -> Self {
let head = nodes[NodeIndex::root().index()].next;
Self {
nodes,
@ -40,26 +30,25 @@ impl<C: Copy, E: Copy> Tree<C, E> {
}
}
pub fn neighbors(&self) -> impl Iterator<Item = Object<C, E>> + '_ {
pub fn atoms(&self) -> impl Iterator<Item = (A, Span)> + '_ {
let mut head = self.head;
std::iter::from_fn(move || {
head.take().map(|h| {
let n = &self.nodes[h.index()];
let kind = match &n.kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, _) => ObjectKind::Container(*c),
NodeKind::Element(e) => ObjectKind::Element(*e),
NodeKind::Container(..) => panic!(),
NodeKind::Atom(a) => *a,
};
let span = n.span;
head = n.next;
Object { kind, span }
(kind, n.span)
})
})
}
}
impl<C: Copy, E: Copy> Iterator for Tree<C, E> {
type Item = Event<C, E>;
impl<C: Copy, A: Copy> Iterator for Tree<C, A> {
type Item = Event<C, A>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(head) = self.head {
@ -71,9 +60,9 @@ impl<C: Copy, E: Copy> Iterator for Tree<C, E> {
self.head = *child;
EventKind::Enter(*c)
}
NodeKind::Element(e) => {
NodeKind::Atom(e) => {
self.head = n.next;
EventKind::Element(*e)
EventKind::Atom(*e)
}
};
Some(Event { kind, span: n.span })
@ -114,27 +103,27 @@ impl NodeIndex {
}
#[derive(Debug, Clone)]
enum NodeKind<C, E> {
enum NodeKind<C, A> {
Root,
Container(C, Option<NodeIndex>),
Element(E),
Atom(A),
}
#[derive(Debug, Clone)]
struct Node<C, E> {
struct Node<C, A> {
span: Span,
kind: NodeKind<C, E>,
kind: NodeKind<C, A>,
next: Option<NodeIndex>,
}
#[derive(Debug, Clone)]
pub struct Builder<C, E> {
nodes: Vec<Node<C, E>>,
#[derive(Clone)]
pub struct Builder<C, A> {
nodes: Vec<Node<C, A>>,
branch: Vec<NodeIndex>,
head: Option<NodeIndex>,
}
impl<C: Copy, E: Copy> Builder<C, E> {
impl<C: Copy, A: Copy> Builder<C, A> {
pub(super) fn new() -> Self {
Builder {
nodes: vec![Node {
@ -147,10 +136,10 @@ impl<C: Copy, E: Copy> Builder<C, E> {
}
}
pub(super) fn elem(&mut self, e: E, span: Span) {
pub(super) fn atom(&mut self, a: A, span: Span) {
self.add_node(Node {
span,
kind: NodeKind::Element(e),
kind: NodeKind::Atom(a),
next: None,
});
}
@ -172,17 +161,17 @@ impl<C: Copy, E: Copy> Builder<C, E> {
}
}
pub(super) fn finish(self) -> Tree<C, E> {
pub(super) fn finish(self) -> Tree<C, A> {
Tree::new(self.nodes)
}
fn add_node(&mut self, node: Node<C, E>) {
fn add_node(&mut self, node: Node<C, A>) {
let ni = NodeIndex::new(self.nodes.len());
self.nodes.push(node);
if let Some(head_ni) = &mut self.head {
let mut head = &mut self.nodes[head_ni.index()];
match &mut head.kind {
NodeKind::Root | NodeKind::Element(_) => {
NodeKind::Root | NodeKind::Atom(_) => {
// update next pointer of previous node
assert_eq!(head.next, None);
head.next = Some(ni);
@ -205,30 +194,28 @@ impl<C: Copy, E: Copy> Builder<C, E> {
}
}
impl<C: Copy + std::fmt::Display, E: Copy + std::fmt::Display> std::fmt::Display for Builder<C, E> {
impl<C: Copy + std::fmt::Debug, A: Copy + std::fmt::Debug> std::fmt::Debug for Builder<C, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.clone().finish().fmt(f)
}
}
impl<C: Copy + std::fmt::Display, E: Copy + std::fmt::Display> std::fmt::Display for Tree<C, E> {
impl<C: Copy + std::fmt::Debug, A: Copy + std::fmt::Debug> std::fmt::Debug for Tree<C, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
const INDENT: &str = " ";
let mut level = 0;
for e in self.clone() {
let indent = INDENT.repeat(level);
match e.kind {
EventKind::Enter(container) => {
write!(f, "{}{}", indent, container)?;
EventKind::Enter(c) => {
write!(f, "{}{:?}", indent, c)?;
level += 1;
}
EventKind::Exit(_) => {
EventKind::Exit(..) => {
level -= 1;
continue;
}
EventKind::Element(element) => {
write!(f, "{}{}", indent, element)?;
}
EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?,
}
writeln!(f, " ({}:{})", e.span.start(), e.span.end())?;
}
@ -243,11 +230,11 @@ mod test {
#[test]
fn fmt_linear() {
let mut tree: super::Builder<u8, u8> = super::Builder::new();
tree.elem(1, Span::new(0, 1));
tree.elem(2, Span::new(1, 2));
tree.elem(3, Span::new(3, 4));
tree.atom(1, Span::new(0, 1));
tree.atom(2, Span::new(1, 2));
tree.atom(3, Span::new(3, 4));
assert_eq!(
tree.to_string(),
format!("{:?}", tree),
concat!(
"1 (0:1)\n",
"2 (1:2)\n",
@ -260,24 +247,24 @@ mod test {
fn fmt_container() {
let mut tree: super::Builder<u8, u16> = super::Builder::new();
tree.enter(1, Span::new(0, 1));
tree.elem(11, Span::new(0, 1));
tree.elem(12, Span::new(0, 1));
tree.atom(11, Span::new(0, 1));
tree.atom(12, Span::new(0, 1));
tree.exit();
tree.enter(2, Span::new(1, 5));
tree.enter(21, Span::new(2, 5));
tree.enter(211, Span::new(3, 4));
tree.elem(2111, Span::new(3, 4));
tree.atom(2111, Span::new(3, 4));
tree.exit();
tree.exit();
tree.enter(22, Span::new(4, 5));
tree.elem(221, Span::new(4, 5));
tree.atom(221, Span::new(4, 5));
tree.exit();
tree.exit();
tree.enter(3, Span::new(5, 6));
tree.elem(31, Span::new(5, 6));
tree.atom(31, Span::new(5, 6));
tree.exit();
assert_eq!(
tree.to_string(),
format!("{:?}", tree),
concat!(
"1 (0:1)\n",
" 11 (0:1)\n",