diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 55571f3..b332999 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -72,7 +72,6 @@ jobs: matrix: target: - parse - - parse_balance - html runs-on: ubuntu-latest steps: diff --git a/examples/jotdown_wasm/demo.html b/examples/jotdown_wasm/demo.html index ba7679b..2a028d2 100644 --- a/examples/jotdown_wasm/demo.html +++ b/examples/jotdown_wasm/demo.html @@ -21,7 +21,10 @@ output.innerText = jotdown_render(input.innerText); } else if (fmt.value == "events") { output.classList.add("verbatim") - output.innerText = jotdown_parse(input.innerText); + output.innerText = jotdown_parse(input.innerText, false); + } else if (fmt.value == "events_spans") { + output.classList.add("verbatim") + output.innerText = jotdown_parse(input.innerText, true); } else if (fmt.value == "events_indent") { output.classList.add("verbatim") output.innerText = jotdown_parse_indent(input.innerText); @@ -50,6 +53,7 @@ + diff --git a/examples/jotdown_wasm/src/lib.rs b/examples/jotdown_wasm/src/lib.rs index 9aeb866..4d6d4e8 100644 --- a/examples/jotdown_wasm/src/lib.rs +++ b/examples/jotdown_wasm/src/lib.rs @@ -22,10 +22,16 @@ pub fn jotdown_render(djot: &str) -> String { #[must_use] #[wasm_bindgen] -pub fn jotdown_parse(djot: &str) -> String { - jotdown::Parser::new(djot) - .map(|e| format!("{:?}\n", e)) - .collect() +pub fn jotdown_parse(djot: &str, spans: bool) -> String { + let mut out = String::new(); + for (e, sp) in jotdown::Parser::new(djot).into_offset_iter() { + write!(out, "{:?}", e).unwrap(); + if spans { + write!(out, " {:?} {:?}", &djot[sp.clone()], sp).unwrap(); + } + writeln!(out).unwrap(); + } + out } #[must_use] diff --git a/src/block.rs b/src/block.rs index ba4b602..2ce775d 100644 --- a/src/block.rs +++ b/src/block.rs @@ -5,105 +5,87 @@ use crate::Span; use crate::attr; use crate::lex; -use crate::tree; use Atom::*; use Container::*; use Leaf::*; use ListType::*; -pub type Tree = tree::Tree; -pub type TreeBuilder = tree::Builder; +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct Event<'s> { + pub kind: EventKind<'s>, + pub span: Span, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum EventKind<'s> { + Enter(Node<'s>), + Inline, + Exit(Node<'s>), + Atom(Atom), + Stale, +} #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Node { - Container(Container), - Leaf(Leaf), +pub enum Node<'s> { + Container(Container<'s>), + Leaf(Leaf<'s>), } #[must_use] -pub fn parse(src: &str) -> Tree { +pub fn parse(src: &str) -> Vec { TreeParser::new(src).parse() } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum Block { +enum Block<'s> { /// An atomic block, containing no children elements. Atom(Atom), - /// A leaf block, containing only inline elements. - Leaf(Leaf), - + Leaf(Leaf<'s>), /// A container block, containing children blocks. - Container(Container), + Container(Container<'s>), } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Atom { /// A line with no non-whitespace characters. Blankline, - /// A list of attributes. Attributes, - /// A thematic break. ThematicBreak, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Leaf { - /// Span is empty, before first character of paragraph. - /// Each inline is a line. +pub enum Leaf<'s> { Paragraph, - - /// Span is `#` characters. - /// Each inline is a line. - Heading { has_section: bool }, - - /// Span is empty. + Heading { + level: u16, + has_section: bool, + pos: u32, + }, DescriptionTerm, - - /// Span is '|'. - /// Has zero or one inline for the cell contents. TableCell(Alignment), - - /// Span is '^' character. Caption, - - /// Span is the link tag. - /// Inlines are lines of the URL. - LinkDefinition, - - /// Span is language specifier. - /// Each inline is a line. - CodeBlock, + LinkDefinition { + label: &'s str, + }, + CodeBlock { + language: &'s str, + }, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Container { - /// Span is `>`. +pub enum Container<'s> { Blockquote, - - /// Span is class specifier, possibly empty. - Div, - - /// Span is the list marker of the first list item in the list. - List(ListKind), - - /// Span is the list marker. - ListItem(ListType), - - /// Span is footnote tag. - Footnote, - - /// Span is empty, before first '|' character. + Div { class: &'s str }, + List { kind: ListKind, marker: &'s str }, + ListItem(ListItemKind), + Footnote { label: &'s str }, Table, - - /// Span is first '|' character. TableRow { head: bool }, - - /// Span is '#' characters of heading. - Section, + Section { pos: u32 }, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -112,6 +94,13 @@ pub struct ListKind { pub tight: bool, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ListItemKind { + Task { checked: bool }, + Description, + List, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ListType { Unordered(u8), @@ -128,42 +117,48 @@ struct OpenList { /// Depth in the tree where the direct list items of the list are. Needed to determine when to /// close the list. depth: u16, - /// Index to node in tree, required to update tightness. - node: tree::NodeIndex, + /// Index to event in tree, required to update tightness. + event: usize, } /// Parser for block-level tree structure of entire document. struct TreeParser<'s> { src: &'s str, - tree: TreeBuilder, - /// The previous block element was a blank line. prev_blankline: bool, prev_loose: bool, + attr_start: Option, /// Stack of currently open lists. open_lists: Vec, /// Stack of currently open sections. open_sections: Vec, /// Alignments for each column in for the current table. alignments: Vec, + /// Current container depth. + open: Vec, + /// Buffer queue for next events. Events are buffered until no modifications due to future + /// characters are needed. + events: Vec>, } impl<'s> TreeParser<'s> { #[must_use] - pub fn new(src: &'s str) -> Self { + fn new(src: &'s str) -> Self { Self { src, - tree: TreeBuilder::new(), prev_blankline: false, prev_loose: false, + attr_start: None, open_lists: Vec::new(), alignments: Vec::new(), open_sections: Vec::new(), + open: Vec::new(), + events: Vec::new(), } } #[must_use] - pub fn parse(mut self) -> Tree { + fn parse(mut self) -> Vec> { let mut lines = lines(self.src).collect::>(); let mut line_pos = 0; while line_pos < lines.len() { @@ -174,30 +169,68 @@ impl<'s> TreeParser<'s> { line_pos += line_count; } while let Some(l) = self.open_lists.pop() { - self.close_list(l); + self.close_list(l, self.src.len()); } - for _ in self.open_sections.drain(..) { - self.tree.exit(); // section + + for _ in std::mem::take(&mut self.open_sections).drain(..) { + self.exit(Span::empty_at(self.src.len())); } - self.tree.finish() + debug_assert_eq!(self.open, &[]); + self.events + } + + fn inline(&mut self, span: Span) { + self.events.push(Event { + kind: EventKind::Inline, + span, + }); + } + + fn enter(&mut self, node: Node<'s>, span: Span) -> usize { + let i = self.events.len(); + self.open.push(i); + self.events.push(Event { + kind: EventKind::Enter(node), + span, + }); + i + } + + fn exit(&mut self, span: Span) -> usize { + let i = self.events.len(); + let node = if let EventKind::Enter(node) = self.events[self.open.pop().unwrap()].kind { + node + } else { + panic!(); + }; + self.events.push(Event { + kind: EventKind::Exit(node), + span, + }); + i } /// Recursively parse a block and all of its children. Return number of lines the block uses. fn parse_block(&mut self, lines: &mut [Span], top_level: bool) -> usize { if let Some(MeteredBlock { kind, - span, + span: span_start, line_count, }) = MeteredBlock::new(lines.iter().map(|sp| sp.of(self.src))) { let lines = &mut lines[..line_count]; - let span = span.translate(lines[0].start()); + let span_start = span_start.translate(lines[0].start()); + let end_line = lines[lines.len() - 1]; + let span_end = match kind { + Kind::Fenced { + has_closing_fence: true, + .. + } => end_line, + _ => end_line.empty_after(), + }; // part of first inline that is from the outer block - let outer = Span::new( - lines[0].start(), - span.end() + "]:".len() * usize::from(matches!(kind, Kind::Definition { .. })), - ); + let outer = Span::new(lines[0].start(), span_start.end()); // skip outer block part for inner content lines[0] = lines[0].skip(outer.len()); @@ -215,12 +248,12 @@ impl<'s> TreeParser<'s> { // close list if a non list item or a list item of new type appeared if let Some(OpenList { ty, depth, .. }) = self.open_lists.last() { - debug_assert!(usize::from(*depth) <= self.tree.depth()); - if self.tree.depth() == (*depth).into() + debug_assert!(usize::from(*depth) <= self.open.len()); + if self.open.len() == (*depth).into() && !matches!(kind, Kind::ListItem { ty: ty_new, .. } if *ty == ty_new) { let l = self.open_lists.pop().unwrap(); - self.close_list(l); + self.close_list(l, span_start.start()); } } @@ -230,15 +263,17 @@ impl<'s> TreeParser<'s> { } else { self.prev_loose = false; if self.prev_blankline { - if let Some(OpenList { node, depth, .. }) = self.open_lists.last() { - if usize::from(*depth) >= self.tree.depth() + if let Some(OpenList { event, depth, .. }) = self.open_lists.last() { + if usize::from(*depth) >= self.open.len() || !matches!(kind, Kind::ListItem { .. }) { - let mut elem = self.tree.elem(*node); - let ListKind { tight, .. } = elem.list_mut().unwrap(); - if *tight { - self.prev_loose = true; - *tight = false; + if let EventKind::Enter(Node::Container(List { kind, .. })) = + &mut self.events[*event].kind + { + if kind.tight { + self.prev_loose = true; + kind.tight = false; + } } } } @@ -246,11 +281,61 @@ impl<'s> TreeParser<'s> { self.prev_blankline = false; } - match kind.block(top_level) { - Block::Atom(a) => self.tree.atom(a, span), - Block::Leaf(l) => self.parse_leaf(l, &kind, span, lines), - Block::Container(Table) => self.parse_table(lines, span), - Block::Container(c) => self.parse_container(c, &kind, span, outer, lines), + let block = match kind { + Kind::Atom(a) => Block::Atom(a), + Kind::Paragraph => Block::Leaf(Paragraph), + Kind::Heading { level } => Block::Leaf(Heading { + level: level.try_into().unwrap(), + has_section: top_level, + pos: span_start.start() as u32, + }), + Kind::Fenced { + kind: FenceKind::CodeBlock(..), + spec, + .. + } => Block::Leaf(CodeBlock { language: spec }), + Kind::Fenced { + kind: FenceKind::Div, + spec, + .. + } => Block::Container(Div { class: spec }), + Kind::Definition { + footnote: false, + label, + .. + } => Block::Leaf(LinkDefinition { label }), + Kind::Definition { + footnote: true, + label, + .. + } => Block::Container(Footnote { label }), + Kind::Blockquote => Block::Container(Blockquote), + Kind::ListItem { ty, .. } => Block::Container(ListItem(match ty { + ListType::Task => ListItemKind::Task { + checked: span_start.of(self.src).as_bytes()[3] != b' ', + }, + ListType::Description => ListItemKind::Description, + _ => ListItemKind::List, + })), + Kind::Table { .. } => Block::Container(Table), + }; + + match block { + Block::Atom(a) => self.events.push(Event { + kind: EventKind::Atom(a), + span: span_start, + }), + Block::Leaf(l) => self.parse_leaf(l, &kind, span_start, span_end, lines), + Block::Container(Table) => self.parse_table(lines, span_start, span_end), + Block::Container(c) => { + self.parse_container(c, &kind, span_start, span_end, outer, lines); + } + } + + if matches!(kind, Kind::Atom(Attributes)) { + self.attr_start = self.attr_start.or_else(|| Some(self.events.len() - 1)); + } else if !matches!(kind, Kind::Atom(Blankline)) { + self.attr_start = None; } line_count @@ -259,7 +344,14 @@ impl<'s> TreeParser<'s> { } } - fn parse_leaf(&mut self, leaf: Leaf, k: &Kind, span: Span, lines: &mut [Span]) { + fn parse_leaf( + &mut self, + leaf: Leaf<'s>, + k: &Kind, + span_start: Span, + span_end: Span, + mut lines: &mut [Span], + ) { if let Kind::Fenced { indent, .. } = k { for line in lines.iter_mut() { let indent_line = line @@ -275,6 +367,18 @@ impl<'s> TreeParser<'s> { *line = line.trim_start(self.src); } + // skip first inline if empty + if lines.get(0).map_or(false, |l| l.is_empty()) { + lines = &mut lines[1..]; + }; + + if matches!(leaf, LinkDefinition { .. }) { + // trim ending whitespace of each inline + for line in lines.iter_mut() { + *line = line.trim_end(self.src); + } + } + // trim ending whitespace of block let l = lines.len(); if l > 0 { @@ -294,32 +398,51 @@ impl<'s> TreeParser<'s> { .iter() .rposition(|l| l < level) .map_or(0, |i| i + 1); - self.open_sections.drain(first_close..).for_each(|_| { - self.tree.exit(); // section - }); + let pos = span_start.start() as u32; + for i in 0..(self.open_sections.len() - first_close) { + let node = if let EventKind::Enter(node) = + self.events[self.open.pop().unwrap()].kind + { + node + } else { + panic!(); + }; + let end = self + .attr_start + .map_or(span_start.start(), |a| self.events[a].span.start()); + self.events.insert( + self.attr_start.map_or(self.events.len(), |a| a + i), + Event { + kind: EventKind::Exit(node), + span: Span::new(end, end), + }, + ); + } + self.open_sections.drain(first_close..); self.open_sections.push(*level); - self.tree.enter(Node::Container(Section), span); + self.enter(Node::Container(Section { pos }), span_start.empty_before()); } // trim '#' characters - for line in lines[1..].iter_mut() { + for line in lines.iter_mut().skip(1) { *line = line.trim_start_matches(self.src, |c| c == '#' || c.is_whitespace()); } } - self.tree.enter(Node::Leaf(leaf), span); + self.enter(Node::Leaf(leaf), span_start); lines .iter() .filter(|l| !matches!(k, Kind::Heading { .. }) || !l.is_empty()) - .for_each(|line| self.tree.inline(*line)); - self.tree.exit(); + .for_each(|line| self.inline(*line)); + self.exit(span_end); } fn parse_container( &mut self, - c: Container, + c: Container<'s>, k: &Kind, - span: Span, + mut span_start: Span, + span_end: Span, outer: Span, lines: &mut [Span], ) { @@ -350,102 +473,133 @@ impl<'s> TreeParser<'s> { *sp = sp.skip_chars(skip.min(count), self.src); }); - if let ListItem(ty) = c { + if let Kind::ListItem { ty, .. } = k { let same_depth = self .open_lists .last() .map_or(true, |OpenList { depth, .. }| { - usize::from(*depth) < self.tree.depth() + usize::from(*depth) < self.open.len() }); if same_depth { let tight = true; - let node = self.tree.enter( - Node::Container(Container::List(ListKind { ty, tight })), - span, + let event = self.enter( + Node::Container(Container::List { + kind: ListKind { ty: *ty, tight }, + marker: span_start.of(self.src), + }), + span_start.empty_before(), ); self.open_lists.push(OpenList { - ty, - depth: self.tree.depth().try_into().unwrap(), - node, + ty: *ty, + depth: self.open.len().try_into().unwrap(), + event, }); } } - let dt = if let ListItem(Description) = c { - let dt = self - .tree - .enter(Node::Leaf(DescriptionTerm), span.empty_after()); - self.tree.exit(); - Some(dt) + let dt = if let ListItem(ListItemKind::Description) = c { + let dt = self.enter(Node::Leaf(DescriptionTerm), span_start); + self.exit(span_start.trim_end(self.src).empty_after()); + span_start = lines[0].empty_before(); + Some((dt, self.events.len(), self.open.len())) } else { None }; - let node = self.tree.enter(Node::Container(c), span); + self.enter(Node::Container(c), span_start); let mut l = 0; while l < lines.len() { l += self.parse_block(&mut lines[l..], false); } - if let Some(node_dt) = dt { - let node_child = if let Some(node_child) = self.tree.children(node).next() { - if let tree::Element::Container(Node::Leaf(l @ Paragraph)) = node_child.elem { + if let Some((empty_term, enter_detail, open_detail)) = dt { + let enter_term = enter_detail + 1; + if let Some(first_child) = self.events.get_mut(enter_term) { + if let EventKind::Enter(Node::Leaf(l @ Paragraph)) = &mut first_child.kind { + // convert paragraph into description term *l = DescriptionTerm; - Some(node_child.index) - } else { - None + let exit_term = if let Some(i) = self.events[enter_term + 1..] + .iter_mut() + .position(|e| matches!(e.kind, EventKind::Exit(Node::Leaf(Paragraph)))) + { + enter_term + 1 + i + } else { + panic!() + }; + if let EventKind::Exit(Node::Leaf(l)) = &mut self.events[exit_term].kind { + *l = DescriptionTerm; + } else { + panic!() + } + + // remove empty description term + self.events[empty_term].kind = EventKind::Stale; + self.events[empty_term + 1].kind = EventKind::Stale; + + // move out term before detail + self.events[enter_term].span = self.events[empty_term].span; + let first_detail = self.events[exit_term + 1..] + .iter() + .position(|e| !matches!(e.kind, EventKind::Atom(Blankline))) + .map(|i| exit_term + 1 + i) + .unwrap_or(self.events.len()); + let detail_pos = self + .events + .get(first_detail) + .map(|e| e.span.start()) + .unwrap_or_else(|| self.events.last().unwrap().span.end()); + self.events + .copy_within(enter_term..first_detail, enter_detail); + self.events[first_detail - 1] = Event { + kind: EventKind::Enter(Node::Container(c)), + span: Span::empty_at(detail_pos), + }; + self.open[open_detail] = first_detail - 1; } - } else { - None - }; - if let Some(node_child) = node_child { - self.tree.swap_prev(node_child); - self.tree.remove(node_dt); } } if let Some(OpenList { depth, .. }) = self.open_lists.last() { - debug_assert!(usize::from(*depth) <= self.tree.depth()); - if self.tree.depth() == (*depth).into() { + debug_assert!(usize::from(*depth) <= self.open.len()); + if self.open.len() == (*depth).into() { self.prev_blankline = false; self.prev_loose = false; let l = self.open_lists.pop().unwrap(); - self.close_list(l); + self.close_list(l, span_end.start()); } } - self.tree.exit(); + self.exit(span_end); } - fn parse_table(&mut self, lines: &mut [Span], span: Span) { + fn parse_table(&mut self, lines: &mut [Span], span_start: Span, span_end: Span) { self.alignments.clear(); - self.tree.enter(Node::Container(Table), span); + self.enter(Node::Container(Table), span_start); let caption_line = lines .iter() .position(|sp| sp.of(self.src).trim_start().starts_with('^')) .map_or(lines.len(), |caption_line| { - self.tree.enter(Node::Leaf(Caption), span); + self.enter(Node::Leaf(Caption), span_start); lines[caption_line] = lines[caption_line] .trim_start(self.src) .skip_chars(2, self.src); lines[lines.len() - 1] = lines[lines.len() - 1].trim_end(self.src); for line in &lines[caption_line..] { - self.tree.inline(*line); + self.inline(*line); } - self.tree.exit(); + self.exit(span_end); caption_line }); - let mut last_row_node = None; + let mut last_row_event = None; for row in &lines[..caption_line] { let row = row.trim(self.src); if row.is_empty() { break; } - let row_node = self - .tree - .enter(Node::Container(TableRow { head: false }), row.with_len(1)); + let row_event_enter = + self.enter(Node::Container(TableRow { head: false }), row.with_len(1)); let rem = row.skip(1); // | let lex = lex::Lexer::new(rem.of(self.src)); let mut pos = rem.start(); @@ -474,17 +628,17 @@ impl<'s> TreeParser<'s> { } }; separator_row &= separator_cell; - self.tree.enter( + self.enter( Node::Leaf(TableCell( self.alignments .get(column_index) .copied() .unwrap_or(Alignment::Unspecified), )), - Span::by_len(cell_start - 1, 1), + Span::empty_at(cell_start), ); - self.tree.inline(span); - self.tree.exit(); // cell + self.inline(span); + self.exit(Span::new(pos, pos + 1)); cell_start = pos + len; column_index += 1; } @@ -500,11 +654,11 @@ impl<'s> TreeParser<'s> { if separator_row && verbatim.is_none() { self.alignments.clear(); self.alignments.extend( - self.tree - .children(row_node) - .filter(|n| matches!(n.elem, tree::Element::Inline)) - .map(|n| { - let cell = n.span.of(self.src); + self.events[row_event_enter + 1..] + .iter() + .filter(|e| matches!(e.kind, EventKind::Inline)) + .map(|e| { + let cell = e.span.of(self.src); let l = cell.as_bytes()[0] == b':'; let r = cell.as_bytes()[cell.len() - 1] == b':'; match (l, r) { @@ -515,75 +669,80 @@ impl<'s> TreeParser<'s> { } }), ); - self.tree.exit_discard(); // table row - if let Some(head_row) = last_row_node { - self.tree - .children(head_row) - .filter(|n| { - matches!(n.elem, tree::Element::Container(Node::Leaf(TableCell(..)))) + self.open.pop(); + self.events.drain(row_event_enter..); // remove table row + if let Some((head_row_enter, head_row_exit)) = last_row_event { + self.events[head_row_enter + 1..] + .iter_mut() + .filter(|e| { + matches!( + e.kind, + EventKind::Enter(Node::Leaf(TableCell(..))) + | EventKind::Exit(Node::Leaf(TableCell(..))) + ) }) .zip( self.alignments .iter() .copied() - .chain(std::iter::repeat(Alignment::Unspecified)), + .chain(std::iter::repeat(Alignment::Unspecified)) + .flat_map(|a| [a, a].into_iter()), ) - .for_each(|(n, new_align)| { - if let tree::Element::Container(Node::Leaf(TableCell(alignment))) = - n.elem - { + .for_each(|(e, new_align)| match &mut e.kind { + EventKind::Enter(Node::Leaf(TableCell(alignment))) + | EventKind::Exit(Node::Leaf(TableCell(alignment))) => { *alignment = new_align; } + _ => panic!(), }); - if let tree::Element::Container(Node::Container(TableRow { head })) = - self.tree.elem(head_row) - { + let event: &mut Event = &mut self.events[head_row_enter]; + if let EventKind::Enter(Node::Container(TableRow { head })) = &mut event.kind { + *head = true; + } else { + panic!() + } + let event: &mut Event = &mut self.events[head_row_exit]; + if let EventKind::Exit(Node::Container(TableRow { head })) = &mut event.kind { *head = true; } else { panic!() } } } else { - self.tree.exit(); // table row - last_row_node = Some(row_node); + let row_event_exit = self.exit(Span::empty_at(pos)); // table row + last_row_event = Some((row_event_enter, row_event_exit)); } } - self.tree.exit(); // table + self.exit(span_end); } - fn close_list(&mut self, list: OpenList) { + fn close_list(&mut self, list: OpenList, pos: usize) { if self.prev_loose { - let mut elem = self.tree.elem(list.node); - let ListKind { tight, .. } = elem.list_mut().unwrap(); - // ignore blankline at end - *tight = true; + if let EventKind::Enter(Node::Container(List { kind, .. })) = + &mut self.events[list.event].kind + { + // ignore blankline at end + kind.tight = true; + } else { + panic!() + } } - self.tree.exit(); // list - } -} - -impl<'t> tree::Element<'t, Node, Atom> { - fn list_mut(&mut self) -> Option<&mut ListKind> { - if let tree::Element::Container(Node::Container(Container::List(l))) = self { - Some(l) - } else { - None - } + self.exit(Span::empty_at(pos)); // list } } /// Parser for a single block. -struct MeteredBlock { - kind: Kind, +struct MeteredBlock<'s> { + kind: Kind<'s>, span: Span, line_count: usize, } -impl MeteredBlock { +impl<'s> MeteredBlock<'s> { /// Identify and measure the line length of a single block. - fn new<'s, I: Iterator>(mut lines: I) -> Option { + fn new>(mut lines: I) -> Option { lines.next().map(|l| { let IdentifiedBlock { mut kind, span } = IdentifiedBlock::new(l); let line_count = 1 + lines.take_while(|l| kind.continues(l)).count(); @@ -604,7 +763,7 @@ enum FenceKind { #[cfg_attr(test, derive(PartialEq, Eq))] #[derive(Debug)] -enum Kind { +enum Kind<'s> { Atom(Atom), Paragraph, Heading { @@ -614,12 +773,13 @@ enum Kind { indent: usize, fence_length: usize, kind: FenceKind, - has_spec: bool, + spec: &'s str, has_closing_fence: bool, }, Definition { indent: usize, footnote: bool, + label: &'s str, }, Blockquote, ListItem { @@ -632,13 +792,13 @@ enum Kind { }, } -struct IdentifiedBlock { - kind: Kind, +struct IdentifiedBlock<'s> { + kind: Kind<'s>, span: Span, } -impl IdentifiedBlock { - fn new(line: &str) -> Self { +impl<'s> IdentifiedBlock<'s> { + fn new(line: &'s str) -> Self { let mut chars = line.chars(); let indent = chars .clone() @@ -686,11 +846,15 @@ impl IdentifiedBlock { } } '[' => chars.as_str().find("]:").map(|l| { - let tag = &chars.as_str()[0..l]; - let footnote = tag.starts_with('^'); + let label = &chars.as_str()[0..l]; + let footnote = label.starts_with('^'); ( - Kind::Definition { indent, footnote }, - Span::by_len(indent_bytes + 1, l).skip(usize::from(footnote)), + Kind::Definition { + indent, + footnote, + label: &label[usize::from(footnote)..], + }, + Span::by_len(0, indent_bytes + 3 + l), ) }), '-' | '*' if Self::is_thematic_break(chars.clone()) => { @@ -737,7 +901,6 @@ impl IdentifiedBlock { } else { !spec.chars().any(char::is_whitespace) && !spec.chars().any(|c| c == '`') }; - let skip = line_t.len() - spec.len(); (valid_spec && fence_length >= 3).then(|| { ( Kind::Fenced { @@ -747,10 +910,10 @@ impl IdentifiedBlock { ':' => FenceKind::Div, _ => FenceKind::CodeBlock(f as u8), }, - has_spec: !spec.is_empty(), + spec, has_closing_fence: false, }, - Span::by_len(indent_bytes + skip, spec.len()), + Span::by_len(indent_bytes, line.len()), ) }) } @@ -868,9 +1031,9 @@ impl IdentifiedBlock { } } -impl Kind { +impl<'s> Kind<'s> { /// Determine if a line continues the block. - fn continues(&mut self, line: &str) -> bool { + fn continues(&mut self, line: &'s str) -> bool { let IdentifiedBlock { kind: next, .. } = IdentifiedBlock::new(line); match self { Self::Atom(..) @@ -897,7 +1060,9 @@ impl Kind { *last_blankline = blankline; blankline || spaces > *indent || para } - Self::Definition { indent, footnote } => { + Self::Definition { + indent, footnote, .. + } => { if *footnote { let spaces = line.chars().take_while(|c| c.is_whitespace()).count(); matches!(next, Self::Atom(Blankline)) || spaces > *indent @@ -914,13 +1079,15 @@ impl Kind { if let Kind::Fenced { kind: k, fence_length: l, - has_spec: false, + spec, .. } = next { - *has_closing_fence = k == *kind - && (l == *fence_length - || (matches!(k, FenceKind::Div) && l > *fence_length)); + if spec.is_empty() { + *has_closing_fence = k == *kind + && (l == *fence_length + || (matches!(k, FenceKind::Div) && l > *fence_length)); + } } true } @@ -936,34 +1103,9 @@ impl Kind { } } } - - fn block(&self, top_level: bool) -> Block { - match self { - Self::Atom(a) => Block::Atom(*a), - Self::Paragraph => Block::Leaf(Paragraph), - Self::Heading { .. } => Block::Leaf(Heading { - has_section: top_level, - }), - Self::Fenced { - kind: FenceKind::CodeBlock(..), - .. - } => Block::Leaf(CodeBlock), - Self::Fenced { - kind: FenceKind::Div, - .. - } => Block::Container(Div), - Self::Definition { - footnote: false, .. - } => Block::Leaf(LinkDefinition), - Self::Definition { footnote: true, .. } => Block::Container(Footnote), - Self::Blockquote => Block::Container(Blockquote), - Self::ListItem { ty, .. } => Block::Container(ListItem(*ty)), - Self::Table { .. } => Block::Container(Table), - } - } } -impl std::fmt::Display for Block { +impl<'s> std::fmt::Display for Block<'s> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Block::Atom(a) => std::fmt::Debug::fmt(a, f), @@ -1000,16 +1142,17 @@ fn lines(src: &str) -> impl Iterator + '_ { #[cfg(test)] mod test { - use crate::tree::EventKind::*; use crate::Alignment; use crate::OrderedListNumbering::*; use crate::OrderedListStyle::*; use super::Atom::*; use super::Container::*; + use super::EventKind::*; use super::FenceKind; use super::Kind; use super::Leaf::*; + use super::ListItemKind; use super::ListKind; use super::ListType::*; use super::Node::*; @@ -1017,7 +1160,7 @@ mod test { macro_rules! test_parse { ($src:expr $(,$($event:expr),* $(,)?)?) => { let t = super::TreeParser::new($src).parse(); - let actual = t.map(|ev| (ev.kind, ev.span.of($src))).collect::>(); + let actual = t.into_iter().map(|ev| (ev.kind, ev.span.of($src))).collect::>(); let expected = &[$($($event),*,)?]; assert_eq!( actual, @@ -1086,16 +1229,44 @@ mod test { "# a\n", "## b\n", // ), - (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + (Enter(Container(Section { pos: 0 })), ""), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true, + pos: 0 + })), + "#" + ), (Inline, "a"), - (Exit(Leaf(Heading { has_section: true })), "#"), - (Enter(Container(Section)), "##"), - (Enter(Leaf(Heading { has_section: true })), "##"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true, + pos: 0 + })), + "" + ), + (Enter(Container(Section { pos: 4 })), ""), + ( + Enter(Leaf(Heading { + level: 2, + has_section: true, + pos: 4 + })), + "##" + ), (Inline, "b"), - (Exit(Leaf(Heading { has_section: true })), "##"), - (Exit(Container(Section)), "##"), - (Exit(Container(Section)), "#"), + ( + Exit(Leaf(Heading { + level: 2, + has_section: true, + pos: 4 + })), + "" + ), + (Exit(Container(Section { pos: 4 })), ""), + (Exit(Container(Section { pos: 0 })), ""), ); } @@ -1106,11 +1277,25 @@ mod test { "#\n", "heading\n", // ), - (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + (Enter(Container(Section { pos: 0 })), ""), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true, + pos: 0 + })), + "#" + ), (Inline, "heading"), - (Exit(Leaf(Heading { has_section: true })), "#"), - (Exit(Container(Section)), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true, + pos: 0 + })), + "" + ), + (Exit(Container(Section { pos: 0 })), ""), ); } @@ -1124,19 +1309,47 @@ mod test { " 12\n", "15\n", // ), - (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + (Enter(Container(Section { pos: 0 })), ""), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true, + pos: 0, + })), + "#" + ), (Inline, "2"), - (Exit(Leaf(Heading { has_section: true })), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true, + pos: 0, + })), + "" + ), (Atom(Blankline), "\n"), - (Exit(Container(Section)), "#"), - (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + (Exit(Container(Section { pos: 0 })), ""), + (Enter(Container(Section { pos: 6 })), ""), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true, + pos: 6, + })), + "#" + ), (Inline, "8\n"), (Inline, "12\n"), (Inline, "15"), - (Exit(Leaf(Heading { has_section: true })), "#"), - (Exit(Container(Section)), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true, + pos: 6, + })), + "" + ), + (Exit(Container(Section { pos: 6 })), ""), ); } @@ -1148,13 +1361,27 @@ mod test { "# b\n", "c\n", // ), - (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + (Enter(Container(Section { pos: 0 })), ""), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true, + pos: 0 + })), + "#" + ), (Inline, "a\n"), (Inline, "b\n"), (Inline, "c"), - (Exit(Leaf(Heading { has_section: true })), "#"), - (Exit(Container(Section)), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true, + pos: 0 + })), + "", + ), + (Exit(Container(Section { pos: 0 })), ""), ); } @@ -1174,41 +1401,125 @@ mod test { "\n", "# b\n", ), - (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + (Enter(Container(Section { pos: 0 })), ""), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true, + pos: 0, + })), + "#" + ), (Inline, "a"), - (Exit(Leaf(Heading { has_section: true })), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true, + pos: 0, + })), + "", + ), (Atom(Blankline), "\n"), - (Enter(Container(Section)), "##"), - (Enter(Leaf(Heading { has_section: true })), "##"), + (Enter(Container(Section { pos: 5 })), ""), + ( + Enter(Leaf(Heading { + level: 2, + has_section: true, + pos: 5, + })), + "##" + ), (Inline, "aa"), - (Exit(Leaf(Heading { has_section: true })), "##"), + ( + Exit(Leaf(Heading { + level: 2, + has_section: true, + pos: 5, + })), + "", + ), (Atom(Blankline), "\n"), - (Enter(Container(Section)), "####"), - (Enter(Leaf(Heading { has_section: true })), "####"), + (Enter(Container(Section { pos: 12 })), ""), + ( + Enter(Leaf(Heading { + level: 4, + has_section: true, + pos: 12, + })), + "####" + ), (Inline, "aaaa"), - (Exit(Leaf(Heading { has_section: true })), "####"), + ( + Exit(Leaf(Heading { + level: 4, + has_section: true, + pos: 12, + })), + "", + ), (Atom(Blankline), "\n"), - (Exit(Container(Section)), "####"), - (Exit(Container(Section)), "##"), - (Enter(Container(Section)), "##"), - (Enter(Leaf(Heading { has_section: true })), "##"), + (Exit(Container(Section { pos: 12 })), ""), + (Exit(Container(Section { pos: 5 })), ""), + (Enter(Container(Section { pos: 23 })), ""), + ( + Enter(Leaf(Heading { + level: 2, + has_section: true, + pos: 23, + })), + "##" + ), (Inline, "ab"), - (Exit(Leaf(Heading { has_section: true })), "##"), + ( + Exit(Leaf(Heading { + level: 2, + has_section: true, + pos: 23, + })), + "", + ), (Atom(Blankline), "\n"), - (Enter(Container(Section)), "###"), - (Enter(Leaf(Heading { has_section: true })), "###"), + (Enter(Container(Section { pos: 30 })), ""), + ( + Enter(Leaf(Heading { + level: 3, + has_section: true, + pos: 30, + })), + "###" + ), (Inline, "aba"), - (Exit(Leaf(Heading { has_section: true })), "###"), + ( + Exit(Leaf(Heading { + level: 3, + has_section: true, + pos: 30, + })), + "", + ), (Atom(Blankline), "\n"), - (Exit(Container(Section)), "###"), - (Exit(Container(Section)), "##"), - (Exit(Container(Section)), "#"), - (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + (Exit(Container(Section { pos: 30 })), ""), + (Exit(Container(Section { pos: 23 })), ""), + (Exit(Container(Section { pos: 0 })), ""), + (Enter(Container(Section { pos: 39 })), ""), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true, + pos: 39, + })), + "#" + ), (Inline, "b"), - (Exit(Leaf(Heading { has_section: true })), "#"), - (Exit(Container(Section)), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true, + pos: 39, + })), + "", + ), + (Exit(Container(Section { pos: 39 })), ""), ); } @@ -1220,7 +1531,7 @@ mod test { (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Blockquote)), ">"), + (Exit(Container(Blockquote)), ""), ); test_parse!( "> a\nb\nc\n", @@ -1230,7 +1541,7 @@ mod test { (Inline, "b\n"), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Blockquote)), ">"), + (Exit(Container(Blockquote)), ""), ); test_parse!( concat!( @@ -1245,14 +1556,28 @@ mod test { (Inline, "a"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Enter(Leaf(Heading { has_section: false })), "##"), + ( + Enter(Leaf(Heading { + level: 2, + has_section: false, + pos: 8, + })), + "##" + ), (Inline, "hl"), - (Exit(Leaf(Heading { has_section: false })), "##"), + ( + Exit(Leaf(Heading { + level: 2, + has_section: false, + pos: 8, + })), + "" + ), (Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "para"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Blockquote)), ">"), + (Exit(Container(Blockquote)), ""), ); } @@ -1262,23 +1587,26 @@ mod test { "> \n", (Enter(Container(Blockquote)), ">"), (Atom(Blankline), "\n"), - (Exit(Container(Blockquote)), ">"), + (Exit(Container(Blockquote)), ""), ); test_parse!( ">", (Enter(Container(Blockquote)), ">"), (Atom(Blankline), ""), - (Exit(Container(Blockquote)), ">"), + (Exit(Container(Blockquote)), ""), ); } #[test] fn parse_code_block() { test_parse!( - concat!("```\n", "l0\n"), - (Enter(Leaf(CodeBlock)), "",), + concat!( + "```\n", // + "l0\n" // + ), + (Enter(Leaf(CodeBlock { language: "" })), "```\n",), (Inline, "l0\n"), - (Exit(Leaf(CodeBlock)), "",), + (Exit(Leaf(CodeBlock { language: "" })), "",), ); test_parse!( concat!( @@ -1288,9 +1616,9 @@ mod test { "\n", "para\n", // ), - (Enter(Leaf(CodeBlock)), ""), + (Enter(Leaf(CodeBlock { language: "" })), "```\n"), (Inline, "l0\n"), - (Exit(Leaf(CodeBlock)), ""), + (Exit(Leaf(CodeBlock { language: "" })), "```\n"), (Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "para"), @@ -1304,11 +1632,11 @@ mod test { " l1\n", "````", // ), - (Enter(Leaf(CodeBlock)), "lang"), + (Enter(Leaf(CodeBlock { language: "lang" })), "```` lang\n",), (Inline, "l0\n"), (Inline, "```\n"), (Inline, " l1\n"), - (Exit(Leaf(CodeBlock)), "lang"), + (Exit(Leaf(CodeBlock { language: "lang" })), "````"), ); test_parse!( concat!( @@ -1319,12 +1647,12 @@ mod test { "bbb\n", // "```\n", // ), - (Enter(Leaf(CodeBlock)), ""), + (Enter(Leaf(CodeBlock { language: "" })), "```\n"), (Inline, "a\n"), - (Exit(Leaf(CodeBlock)), ""), - (Enter(Leaf(CodeBlock)), ""), + (Exit(Leaf(CodeBlock { language: "" })), "```\n"), + (Enter(Leaf(CodeBlock { language: "" })), "```\n"), (Inline, "bbb\n"), - (Exit(Leaf(CodeBlock)), ""), + (Exit(Leaf(CodeBlock { language: "" })), "```\n"), ); test_parse!( concat!( @@ -1333,10 +1661,15 @@ mod test { " block\n", "~~~\n", // ), - (Enter(Leaf(CodeBlock)), ""), + (Enter(Leaf(CodeBlock { language: "" })), "~~~\n"), (Inline, "code\n"), (Inline, " block\n"), - (Exit(Leaf(CodeBlock)), ""), + (Exit(Leaf(CodeBlock { language: "" })), "~~~\n"), + ); + test_parse!( + " ```abc\n", + (Enter(Leaf(CodeBlock { language: "abc" })), "```abc\n"), + (Exit(Leaf(CodeBlock { language: "abc" })), ""), ); } @@ -1344,9 +1677,9 @@ mod test { fn parse_link_definition() { test_parse!( "[tag]: url\n", - (Enter(Leaf(LinkDefinition)), "tag"), + (Enter(Leaf(LinkDefinition { label: "tag" })), "[tag]:"), (Inline, "url"), - (Exit(Leaf(LinkDefinition)), "tag"), + (Exit(Leaf(LinkDefinition { label: "tag" })), ""), ); } @@ -1354,11 +1687,11 @@ mod test { fn parse_footnote() { test_parse!( "[^tag]: description\n", - (Enter(Container(Footnote)), "tag"), + (Enter(Container(Footnote { label: "tag" })), "[^tag]:"), (Enter(Leaf(Paragraph)), ""), (Inline, "description"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Footnote)), "tag"), + (Exit(Container(Footnote { label: "tag" })), ""), ); } @@ -1376,12 +1709,12 @@ mod test { (Inline, "[^a]"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Enter(Container(Footnote)), "a"), + (Enter(Container(Footnote { label: "a" })), "[^a]:"), (Enter(Leaf(Paragraph)), ""), (Inline, "note"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(Footnote)), "a"), + (Exit(Container(Footnote { label: "a" })), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "para"), (Exit(Leaf(Paragraph)), ""), @@ -1397,6 +1730,22 @@ mod test { (Inline, "para"), (Exit(Leaf(Paragraph)), ""), ); + test_parse!( + concat!( + "{.a}\n", // + "\n", // + "{.b}\n", // + "\n", // + "para\n", // + ), + (Atom(Attributes), "{.a}\n"), + (Atom(Blankline), "\n"), + (Atom(Attributes), "{.b}\n"), + (Atom(Blankline), "\n"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "para"), + (Exit(Leaf(Paragraph)), ""), + ); } #[test] @@ -1404,23 +1753,29 @@ mod test { test_parse!( "- abc", ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), - "-" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "abc"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), - "-" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), + "" ), ); } @@ -1433,28 +1788,34 @@ mod test { "- b\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), ); } @@ -1469,34 +1830,40 @@ mod test { "- c\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: false, - }))), - "-" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: false, + }, + marker: "-", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: false, - }))), - "-" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: false, + }, + marker: "-", + })), + "" ), ); } @@ -1513,30 +1880,36 @@ mod test { " d\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: false, - }))), - "-" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: false, + }, + marker: "-", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), @@ -1544,21 +1917,27 @@ mod test { (Enter(Leaf(Paragraph)), ""), (Inline, "d"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: false, - }))), - "-" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: false, + }, + marker: "-", + })), + "" ), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), ); } @@ -1575,54 +1954,66 @@ mod test { "- b\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true, - }))), - "+", + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true, + }, + marker: "+", + })), + "", ), - (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "aa"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'+')))), "+"), - (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), ""), + (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "ab"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(Unordered(b'+')))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true, - }))), - "+", + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true, + }, + marker: "+", + })), + "", ), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), ); test_parse!( @@ -1634,47 +2025,59 @@ mod test { " c\n", // ), ( - Enter(Container(List(ListKind { - ty: Ordered(Decimal, Period), - tight: true, - }))), - "1.", + Enter(Container(List { + kind: ListKind { + ty: Ordered(Decimal, Period), + tight: true, + }, + marker: "1.", + })), + "", ), - (Enter(Container(ListItem(Ordered(Decimal, Period)))), "1."), + (Enter(Container(ListItem(ListItemKind::List))), "1."), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-", + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "", ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-", + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "", ), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Ordered(Decimal, Period)))), "1."), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Ordered(Decimal, Period), - tight: true, - }))), - "1.", + Exit(Container(List { + kind: ListKind { + ty: Ordered(Decimal, Period), + tight: true, + }, + marker: "1.", + })), + "", ), ); } @@ -1690,63 +2093,81 @@ mod test { " * c\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true, - }))), - "+", + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true, + }, + marker: "+", + })), + "", ), - (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'*'), - tight: true, - }))), - "*", + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'*'), + tight: true, + }, + marker: "*", + })), + "", ), - (Enter(Container(ListItem(Unordered(b'*')))), "*"), + (Enter(Container(ListItem(ListItemKind::List))), "*"), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'*')))), "*"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'*'), - tight: true, - }))), - "*", + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'*'), + tight: true, + }, + marker: "*", + })), + "", ), - (Exit(Container(ListItem(Unordered(b'+')))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true, - }))), - "+", + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true, + }, + marker: "+", + })), + "", ), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), ); } @@ -1762,44 +2183,56 @@ mod test { "cd\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), - "-" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'*'), - tight: true - }))), - "*" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'*'), + tight: true + }, + marker: "*", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'*')))), "*"), + (Enter(Container(ListItem(ListItemKind::List))), "*"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(Unordered(b'*')))), "*"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'*'), - tight: true - }))), - "*" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'*'), + tight: true + }, + marker: "*", + })), + "" ), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), - "-" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), + "" ), (Enter(Leaf(Paragraph)), ""), (Inline, "cd"), @@ -1816,47 +2249,59 @@ mod test { "+ c\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), - "-" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), - "-" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), + "" ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true - }))), - "+" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true + }, + marker: "+", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'+')))), "+"), - (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), ""), + (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'+')))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true - }))), - "+" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true + }, + marker: "+", + })), + "" ), ); } @@ -1870,27 +2315,157 @@ mod test { " description\n", // ), ( - Enter(Container(List(ListKind { - ty: Description, - tight: true, - }))), - ":" + Enter(Container(List { + kind: ListKind { + ty: Description, + tight: true, + }, + marker: ":", + })), + "" ), - (Enter(Leaf(DescriptionTerm)), ""), + (Stale, ":"), + (Stale, ""), + (Enter(Leaf(DescriptionTerm)), ":"), (Inline, "term"), (Exit(Leaf(DescriptionTerm)), ""), - (Enter(Container(ListItem(Description))), ":"), (Atom(Blankline), "\n"), + (Enter(Container(ListItem(ListItemKind::Description))), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "description"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Description))), ":"), + (Exit(Container(ListItem(ListItemKind::Description))), ""), ( - Exit(Container(List(ListKind { - ty: Description, - tight: true, - }))), - ":" + Exit(Container(List { + kind: ListKind { + ty: Description, + tight: true, + }, + marker: ":", + })), + "" + ), + ); + test_parse!( + concat!( + ": apple\n", + " fruit\n", + "\n", + " Paragraph one\n", + "\n", + " Paragraph two\n", + "\n", + " - sub\n", + " - list\n", + "\n", + ": orange\n", + ), + ( + Enter(Container(List { + kind: ListKind { + ty: Description, + tight: false + }, + marker: ":", + })), + "", + ), + (Stale, ":"), + (Stale, ""), + (Enter(Leaf(DescriptionTerm)), ":"), + (Inline, "apple\n"), + (Inline, "fruit"), + (Exit(Leaf(DescriptionTerm)), ""), + (Atom(Blankline), "\n"), + (Enter(Container(ListItem(ListItemKind::Description))), ""), + (Enter(Leaf(Paragraph)), ""), + (Inline, "Paragraph one"), + (Exit(Leaf(Paragraph)), ""), + (Atom(Blankline), "\n"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "Paragraph two"), + (Exit(Leaf(Paragraph)), ""), + (Atom(Blankline), "\n"), + ( + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), + "", + ), + (Enter(Container(ListItem(ListItemKind::List))), "-"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "sub"), + (Exit(Leaf(Paragraph)), ""), + (Exit(Container(ListItem(ListItemKind::List))), ""), + (Enter(Container(ListItem(ListItemKind::List))), "-"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "list"), + (Exit(Leaf(Paragraph)), ""), + (Atom(Blankline), "\n"), + (Exit(Container(ListItem(ListItemKind::List))), ""), + ( + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), + "", + ), + (Exit(Container(ListItem(ListItemKind::Description))), ""), + (Stale, ":"), + (Stale, ""), + (Enter(Leaf(DescriptionTerm)), ":"), + (Inline, "orange"), + (Exit(Leaf(DescriptionTerm)), ""), + (Enter(Container(ListItem(ListItemKind::Description))), ""), + (Exit(Container(ListItem(ListItemKind::Description))), ""), + ( + Exit(Container(List { + kind: ListKind { + ty: Description, + tight: false + }, + marker: ":", + })), + "", + ), + ); + } + + #[test] + fn parse_description_list_empty() { + test_parse!( + ":\n", + ( + Enter(Container(List { + kind: ListKind { + ty: Description, + tight: true, + }, + marker: ":", + })), + "" + ), + (Enter(Leaf(DescriptionTerm)), ":"), + (Exit(Leaf(DescriptionTerm)), ""), + (Enter(Container(ListItem(ListItemKind::Description))), ""), + (Atom(Blankline), "\n"), + (Exit(Container(ListItem(ListItemKind::Description))), ""), + ( + Exit(Container(List { + kind: ListKind { + ty: Description, + tight: true, + }, + marker: ":", + })), + "" ), ); } @@ -1905,31 +2480,45 @@ mod test { ), (Enter(Container(Table)), ""), (Enter(Container(TableRow { head: true })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "a"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "b"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "c"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: true })), "|"), + (Exit(Container(TableRow { head: true })), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "1"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "2"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "3"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), "") ); } + #[test] + fn parse_table_empty() { + test_parse!( + "||", + (Enter(Container(Table)), ""), + (Enter(Container(TableRow { head: false })), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), + (Inline, ""), + (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Exit(Container(TableRow { head: false })), ""), + (Exit(Container(Table)), ""), + ); + } + #[test] fn parse_table_escaped() { test_parse!( @@ -1946,10 +2535,10 @@ mod test { "|a|\npara", (Enter(Container(Table)), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "a"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "para"), @@ -1966,18 +2555,31 @@ mod test { ), (Enter(Container(Table)), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Left))), "|"), + (Enter(Leaf(TableCell(Alignment::Left))), ""), (Inline, "left"), (Exit(Leaf(TableCell(Alignment::Left))), "|"), - (Enter(Leaf(TableCell(Alignment::Center))), "|"), + (Enter(Leaf(TableCell(Alignment::Center))), ""), (Inline, "center"), (Exit(Leaf(TableCell(Alignment::Center))), "|"), - (Enter(Leaf(TableCell(Alignment::Right))), "|"), + (Enter(Leaf(TableCell(Alignment::Right))), ""), (Inline, "right"), (Exit(Leaf(TableCell(Alignment::Right))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), "") ); + test_parse!( + concat!( + "||\n", // + "|-:|\n", // + ), + (Enter(Container(Table)), ""), + (Enter(Container(TableRow { head: true })), "|"), + (Enter(Leaf(TableCell(Alignment::Right))), ""), + (Inline, ""), + (Exit(Leaf(TableCell(Alignment::Right))), "|"), + (Exit(Container(TableRow { head: true })), ""), + (Exit(Container(Table)), ""), + ); } #[test] @@ -1989,10 +2591,10 @@ mod test { (Inline, "caption"), (Exit(Leaf(Caption)), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "a"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), ""), ); } @@ -2014,10 +2616,10 @@ mod test { (Inline, "continued"), (Exit(Leaf(Caption)), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "a"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), ""), (Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), @@ -2032,10 +2634,10 @@ mod test { "|a|\n^ ", (Enter(Container(Table)), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "a"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "^"), @@ -2055,24 +2657,32 @@ mod test { #[test] fn parse_div() { test_parse!( - concat!("::: cls\n", "abc\n", ":::\n",), - (Enter(Container(Div)), "cls"), + concat!( + "::: cls\n", // + "abc\n", // + ":::\n", // + ), + (Enter(Container(Div { class: "cls" })), "::: cls\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "abc"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Div)), "cls"), + (Exit(Container(Div { class: "cls" })), ":::\n"), ); } #[test] fn parse_div_no_class() { test_parse!( - concat!(":::\n", "abc\n", ":::\n",), - (Enter(Container(Div)), ""), + concat!( + ":::\n", // + "abc\n", // + ":::\n", // + ), + (Enter(Container(Div { class: "" })), ":::\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "abc"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Div)), ""), + (Exit(Container(Div { class: "" })), ":::\n"), ); } @@ -2080,48 +2690,60 @@ mod test { fn parse_inner_indent() { test_parse!( concat!( - "- - a\n", + "- - a\n", // " - b\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), - "-" + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), + "" ), ); } @@ -2200,10 +2822,10 @@ mod test { indent: 0, kind: FenceKind::CodeBlock(b'`'), fence_length: 4, - has_spec: true, + spec: "lang", has_closing_fence: true, }, - "lang", + "```` lang\n", 5, ); test_block!( @@ -2219,10 +2841,10 @@ mod test { indent: 0, kind: FenceKind::CodeBlock(b'`'), fence_length: 3, - has_spec: false, + spec: "", has_closing_fence: true, }, - "", + "```\n", 3, ); test_block!( @@ -2243,9 +2865,10 @@ mod test { "[tag]: url\n", Kind::Definition { indent: 0, - footnote: false + footnote: false, + label: "tag", }, - "tag", + "[tag]:", 1 ); } @@ -2259,9 +2882,10 @@ mod test { ), Kind::Definition { indent: 0, - footnote: false + footnote: false, + label: "tag", }, - "tag", + "[tag]:", 2, ); test_block!( @@ -2271,9 +2895,10 @@ mod test { ), Kind::Definition { indent: 0, - footnote: false + footnote: false, + label: "tag", }, - "tag", + "[tag]:", 1, ); } @@ -2284,9 +2909,10 @@ mod test { "[^tag]:\n", Kind::Definition { indent: 0, - footnote: true + footnote: true, + label: "tag", }, - "tag", + "[^tag]:", 1 ); } @@ -2297,9 +2923,10 @@ mod test { "[^tag]: a\n", Kind::Definition { indent: 0, - footnote: true + footnote: true, + label: "tag", }, - "tag", + "[^tag]:", 1 ); } @@ -2313,9 +2940,10 @@ mod test { ), Kind::Definition { indent: 0, - footnote: true + footnote: true, + label: "tag", }, - "tag", + "[^tag]:", 2, ); } @@ -2331,9 +2959,10 @@ mod test { ), Kind::Definition { indent: 0, - footnote: true + footnote: true, + label: "tag", }, - "tag", + "[^tag]:", 3, ); } diff --git a/src/inline.rs b/src/inline.rs index 74994d4..d471ee1 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -12,9 +12,9 @@ use Container::*; use ControlFlow::*; #[derive(Debug, Clone, PartialEq, Eq)] -pub enum Atom { - FootnoteReference, - Symbol, +pub enum Atom<'s> { + FootnoteReference { label: &'s str }, + Symbol(&'s str), Softbreak, Hardbreak, Escape, @@ -26,7 +26,7 @@ pub enum Atom { } #[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum Container { +pub enum Container<'s> { Span, Subscript, Superscript, @@ -36,16 +36,14 @@ pub enum Container { Strong, Mark, Verbatim, - /// Span is the format. - RawFormat, + RawFormat { format: &'s str }, InlineMath, DisplayMath, ReferenceLink(CowStrIndex), ReferenceImage(CowStrIndex), InlineLink(CowStrIndex), InlineImage(CowStrIndex), - /// Open delimiter span is URL, closing is '>'. - Autolink, + Autolink(&'s str), } type CowStrIndex = u32; @@ -57,10 +55,10 @@ pub enum QuoteType { } #[derive(Clone, Debug, PartialEq, Eq)] -pub enum EventKind { - Enter(Container), - Exit(Container), - Atom(Atom), +pub enum EventKind<'s> { + Enter(Container<'s>), + Exit(Container<'s>), + Atom(Atom<'s>), Str, Attributes { container: bool, @@ -72,8 +70,8 @@ pub enum EventKind { type AttributesIndex = u32; #[derive(Clone, Debug, PartialEq, Eq)] -pub struct Event { - pub kind: EventKind, +pub struct Event<'s> { + pub kind: EventKind<'s>, pub span: Span, } @@ -218,7 +216,7 @@ pub struct Parser<'s> { openers: Vec<(Opener, usize)>, /// Buffer queue for next events. Events are buffered until no modifications due to future /// characters are needed. - events: std::collections::VecDeque, + events: std::collections::VecDeque>, /// State if inside a verbatim container. verbatim: Option, /// State if currently parsing potential attributes. @@ -268,12 +266,12 @@ impl<'s> Parser<'s> { self.store_attributes.clear(); } - fn push_sp(&mut self, kind: EventKind, span: Span) -> Option { + fn push_sp(&mut self, kind: EventKind<'s>, span: Span) -> Option { self.events.push_back(Event { kind, span }); Some(Continue) } - fn push(&mut self, kind: EventKind) -> Option { + fn push(&mut self, kind: EventKind<'s>) -> Option { self.push_sp(kind, self.input.span) } @@ -310,17 +308,16 @@ impl<'s> Parser<'s> { && matches!(first.kind, lex::Kind::Seq(Sequence::Backtick)) { let raw_format = self.input.ahead_raw_format(); - let mut span_closer = self.input.span; if let Some(span_format) = raw_format { - self.events[event_opener].kind = EventKind::Enter(RawFormat); - self.events[event_opener].span = span_format; - self.input.span = span_format.translate(1); - span_closer = span_format; + self.events[event_opener].kind = EventKind::Enter(RawFormat { + format: span_format.of(self.input.src), + }); + self.input.span = Span::new(self.input.span.start(), span_format.end() + 1); }; let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind { debug_assert!(matches!( ty, - Verbatim | RawFormat | InlineMath | DisplayMath + Verbatim | RawFormat { .. } | InlineMath | DisplayMath )); ty } else { @@ -330,7 +327,7 @@ impl<'s> Parser<'s> { { self.events.drain(*event_skip..); } - self.push_sp(EventKind::Exit(ty_opener), span_closer); + self.push(EventKind::Exit(ty_opener)); self.verbatim = None; if raw_format.is_none() && self.input.peek().map_or(false, |t| { @@ -527,7 +524,13 @@ impl<'s> Parser<'s> { self.input.span = Span::new(start_attr, state.end_attr); self.input.lexer = lex::Lexer::new(&self.input.src[state.end_attr..line_end]); - if !attrs.is_empty() { + if attrs.is_empty() { + if matches!(state.elem_ty, AttributesElementType::Container { .. }) { + let last = self.events.len() - 1; + self.events[last].span = + Span::new(self.events[last].span.start(), self.input.span.end()); + } + } else { let attr_index = self.store_attributes.len() as AttributesIndex; self.store_attributes.push(attrs); let attr_event = Event { @@ -540,11 +543,13 @@ impl<'s> Parser<'s> { match state.elem_ty { AttributesElementType::Container { e_placeholder } => { self.events[e_placeholder] = attr_event; + let last = self.events.len() - 1; if matches!(self.events[e_placeholder + 1].kind, EventKind::Str) { self.events[e_placeholder + 1].kind = EventKind::Enter(Span); - let last = self.events.len() - 1; self.events[last].kind = EventKind::Exit(Span); } + self.events[last].span = + Span::new(self.events[last].span.start(), self.input.span.end()); } AttributesElementType::Word => { self.events.push_back(attr_event); @@ -577,12 +582,13 @@ impl<'s> Parser<'s> { .sum(); if end && is_url { self.input.lexer = lex::Lexer::new(ahead.as_str()); - self.input.span = self.input.span.after(len); - self.push(EventKind::Enter(Autolink)); + let span_url = self.input.span.after(len); + let url = span_url.of(self.input.src); + self.push(EventKind::Enter(Autolink(url))); + self.input.span = span_url; self.push(EventKind::Str); - self.push(EventKind::Exit(Autolink)); self.input.span = self.input.span.after(1); - return Some(Continue); + return self.push(EventKind::Exit(Autolink(url))); } } None @@ -606,10 +612,11 @@ impl<'s> Parser<'s> { .sum(); if end && valid { self.input.lexer = lex::Lexer::new(ahead.as_str()); - self.input.span = self.input.span.after(len); - self.push(EventKind::Atom(Symbol)); - self.input.span = self.input.span.after(1); - return Some(Continue); + let span_symbol = self.input.span.after(len); + self.input.span = Span::new(self.input.span.start(), span_symbol.end() + 1); + return self.push(EventKind::Atom(Atom::Symbol( + span_symbol.of(self.input.src), + ))); } } None @@ -649,10 +656,10 @@ impl<'s> Parser<'s> { .sum(); if end { self.input.lexer = lex::Lexer::new(ahead.as_str()); - self.input.span = self.input.span.after(len); - self.push(EventKind::Atom(FootnoteReference)); - self.input.span = self.input.span.after(1); - return Some(Continue); + let span_label = self.input.span.after(len); + let label = span_label.of(self.input.src); + self.input.span = Span::new(self.input.span.start(), span_label.end() + 1); + return self.push(EventKind::Atom(FootnoteReference { label })); } } None @@ -925,7 +932,7 @@ impl<'s> Parser<'s> { self.push(EventKind::Atom(atom)) } - fn merge_str_events(&mut self, span_str: Span) -> Event { + fn merge_str_events(&mut self, span_str: Span) -> Event<'s> { let mut span = span_str; let should_merge = |e: &Event, span: Span| { matches!(e.kind, EventKind::Str | EventKind::Placeholder) @@ -952,7 +959,7 @@ impl<'s> Parser<'s> { } } - fn apply_word_attributes(&mut self, span_str: Span) -> Event { + fn apply_word_attributes(&mut self, span_str: Span) -> Event<'s> { if let Some(i) = span_str .of(self.input.src) .bytes() @@ -972,7 +979,7 @@ impl<'s> Parser<'s> { let attr = self.events.pop_front().unwrap(); self.events.push_front(Event { kind: EventKind::Exit(Span), - span: span_str.empty_after(), + span: attr.span, }); self.events.push_front(Event { kind: EventKind::Str, @@ -1089,8 +1096,8 @@ impl Opener { } } -enum DelimEventKind { - Container(Container), +enum DelimEventKind<'s> { + Container(Container<'s>), Span(SpanType), Quote(QuoteType), Link { @@ -1100,7 +1107,7 @@ enum DelimEventKind { }, } -impl From for DelimEventKind { +impl<'s> From for DelimEventKind<'s> { fn from(d: Opener) -> Self { match d { Opener::Span(ty) => Self::Span(ty), @@ -1127,7 +1134,7 @@ impl From for DelimEventKind { } impl<'s> Iterator for Parser<'s> { - type Item = Event; + type Item = Event<'s>; fn next(&mut self) -> Option { while self.events.is_empty() @@ -1158,7 +1165,7 @@ impl<'s> Iterator for Parser<'s> { let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind { debug_assert!(matches!( ty, - Verbatim | RawFormat | InlineMath | DisplayMath + Verbatim | RawFormat { .. } | InlineMath | DisplayMath )); ty } else { @@ -1266,7 +1273,7 @@ mod test { ), (Enter(Verbatim), "`"), (Str, "raw"), - (Exit(Verbatim), "`"), + (Exit(Verbatim), "`{#id}"), (Str, " post"), ); } @@ -1336,16 +1343,16 @@ mod test { fn raw_format() { test_parse!( "`raw`{=format}", - (Enter(RawFormat), "format"), + (Enter(RawFormat { format: "format" }), "`"), (Str, "raw"), - (Exit(RawFormat), "format"), + (Exit(RawFormat { format: "format" }), "`{=format}"), ); test_parse!( "before `raw`{=format} after", (Str, "before "), - (Enter(RawFormat), "format"), + (Enter(RawFormat { format: "format" }), "`"), (Str, "raw"), - (Exit(RawFormat), "format"), + (Exit(RawFormat { format: "format" }), "`{=format}"), (Str, " after"), ); } @@ -1456,7 +1463,7 @@ mod test { ), (Enter(Span), ""), (Str, "[text]("), - (Exit(Span), ""), + (Exit(Span), "{.cls}"), ); } @@ -1520,7 +1527,7 @@ mod test { "{.cls}", ), (Enter(Span), "["), - (Exit(Span), "]") + (Exit(Span), "]{.cls}") ); } @@ -1537,7 +1544,7 @@ mod test { ), (Enter(Span), "["), (Str, "abc"), - (Exit(Span), "]"), + (Exit(Span), "]{.def}"), ); test_parse!("not a [span] {#id}.", (Str, "not a [span] "), (Str, ".")); } @@ -1555,7 +1562,7 @@ mod test { ), (Enter(Span), "["), (Str, "x_y"), - (Exit(Span), "]"), + (Exit(Span), "]{.bar_}"), ); } @@ -1563,24 +1570,24 @@ mod test { fn autolink() { test_parse!( "", - (Enter(Autolink), "https://example.com"), + (Enter(Autolink("https://example.com",)), "<"), (Str, "https://example.com"), - (Exit(Autolink), "https://example.com") + (Exit(Autolink("https://example.com",)), ">") ); test_parse!( "", - (Enter(Autolink), "a@b.c"), + (Enter(Autolink("a@b.c")), "<"), (Str, "a@b.c"), - (Exit(Autolink), "a@b.c"), + (Exit(Autolink("a@b.c")), ">"), ); test_parse!( "", - (Enter(Autolink), "http://a.b"), + (Enter(Autolink("http://a.b")), "<"), (Str, "http://a.b"), - (Exit(Autolink), "http://a.b"), - (Enter(Autolink), "http://c.d"), + (Exit(Autolink("http://a.b")), ">"), + (Enter(Autolink("http://c.d")), "<"), (Str, "http://c.d"), - (Exit(Autolink), "http://c.d"), + (Exit(Autolink("http://c.d")), ">"), ); test_parse!("", (Str, "")); } @@ -1590,7 +1597,7 @@ mod test { test_parse!( "text[^footnote]. more text", (Str, "text"), - (Atom(FootnoteReference), "footnote"), + (Atom(FootnoteReference { label: "footnote" }), "[^footnote]"), (Str, ". more text"), ); } @@ -1687,7 +1694,7 @@ mod test { ), (Enter(Emphasis), "_"), (Str, "abc def"), - (Exit(Emphasis), "_"), + (Exit(Emphasis), "_{.attr}"), ); } @@ -1697,13 +1704,13 @@ mod test { "_abc def_{}", (Enter(Emphasis), "_"), (Str, "abc def"), - (Exit(Emphasis), "_"), + (Exit(Emphasis), "_{}"), ); test_parse!( "_abc def_{ % comment % } ghi", (Enter(Emphasis), "_"), (Str, "abc def"), - (Exit(Emphasis), "_"), + (Exit(Emphasis), "_{ % comment % }"), (Str, " ghi"), ); } @@ -1721,7 +1728,7 @@ mod test { ), (Enter(Emphasis), "_"), (Str, "abc def"), - (Exit(Emphasis), "_"), + (Exit(Emphasis), "_{.a}{.b}{.c}"), (Str, " "), ); } @@ -1739,7 +1746,7 @@ mod test { ), (Enter(Span), ""), (Str, "word"), - (Exit(Span), ""), + (Exit(Span), "{a=b}"), ); test_parse!( "some word{.a}{.b} with attrs", @@ -1753,7 +1760,7 @@ mod test { ), (Enter(Span), ""), (Str, "word"), - (Exit(Span), ""), + (Exit(Span), "{.a}{.b}"), (Str, " with attrs"), ); } diff --git a/src/lib.rs b/src/lib.rs index 81142c3..1c2620e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,6 +51,7 @@ use std::fmt; use std::fmt::Write as FmtWrite; use std::io; +use std::ops::Range; #[cfg(feature = "html")] pub mod html; @@ -60,7 +61,6 @@ mod block; mod inline; mod lex; mod span; -mod tree; use span::Span; @@ -555,13 +555,14 @@ pub struct Parser<'s> { src: &'s str, /// Block tree parsed at first. - tree: block::Tree, + blocks: std::iter::Peekable>>, /// Contents obtained by the prepass. pre_pass: PrePass<'s>, - /// Last parsed block attributes + /// Last parsed block attributes, and its starting offset. block_attributes: Attributes<'s>, + block_attributes_pos: Option, /// Current table row is a head row. table_head_row: bool, @@ -576,7 +577,7 @@ pub struct Parser<'s> { #[derive(Clone)] struct Heading { /// Location of heading in src. - location: usize, + location: u32, /// Automatically generated id from heading text. id_auto: String, /// Text of heading, formatting stripped. @@ -598,28 +599,50 @@ struct PrePass<'s> { impl<'s> PrePass<'s> { #[must_use] - fn new(src: &'s str, mut tree: block::Tree, inline_parser: &mut inline::Parser<'s>) -> Self { + fn new( + src: &'s str, + blocks: std::slice::Iter>, + inline_parser: &mut inline::Parser<'s>, + ) -> Self { let mut link_definitions = Map::new(); let mut headings: Vec = Vec::new(); let mut used_ids: Set<&str> = Set::new(); + let mut blocks = blocks.peekable(); + let mut attr_prev: Option = None; - while let Some(e) = tree.next() { + while let Some(e) = blocks.next() { match e.kind { - tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition)) => { + block::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition { + label, + })) => { + fn next_is_inline( + bs: &mut std::iter::Peekable>, + ) -> bool { + matches!(bs.peek().map(|e| &e.kind), Some(block::EventKind::Inline)) + } + // All link definition tags have to be obtained initially, as references can // appear before the definition. - let tag = e.span.of(src); let attrs = attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src))); - let url = match tree.count_children() { - 0 => "".into(), - 1 => tree.take_inlines().next().unwrap().of(src).trim().into(), - _ => tree.take_inlines().map(|sp| sp.of(src).trim()).collect(), + let url = if !next_is_inline(&mut blocks) { + "".into() + } else { + let start = blocks.next().unwrap().span.of(src).trim(); + if !next_is_inline(&mut blocks) { + start.into() + } else { + let mut url = start.to_string(); + while next_is_inline(&mut blocks) { + url.push_str(blocks.next().unwrap().span.of(src).trim()); + } + url.into() + } }; - link_definitions.insert(tag, (url, attrs)); + link_definitions.insert(label, (url, attrs)); } - tree::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => { + block::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => { // All headings ids have to be obtained initially, as references can appear // before the heading. Additionally, determining the id requires inline parsing // as formatting must be removed. @@ -634,10 +657,21 @@ impl<'s> PrePass<'s> { let mut id_auto = String::new(); let mut text = String::new(); let mut last_whitespace = true; - let inlines = tree.take_inlines().collect::>(); inline_parser.reset(); - inlines.iter().enumerate().for_each(|(i, sp)| { - inline_parser.feed_line(*sp, i == inlines.len() - 1); + let mut last_end = 0; + loop { + let span_inline = blocks.next().and_then(|e| { + if matches!(e.kind, block::EventKind::Inline) { + last_end = e.span.end(); + Some(e.span) + } else { + None + } + }); + inline_parser.feed_line( + span_inline.unwrap_or_else(|| Span::empty_at(last_end)), + span_inline.is_none(), + ); inline_parser.for_each(|ev| match ev.kind { inline::EventKind::Str => { text.push_str(ev.span.of(src)); @@ -662,8 +696,11 @@ impl<'s> PrePass<'s> { id_auto.push('-'); } _ => {} - }) - }); + }); + if span_inline.is_none() { + break; + } + } id_auto.drain(id_auto.trim_end_matches('-').len()..); // ensure id unique @@ -689,17 +726,17 @@ impl<'s> PrePass<'s> { std::mem::transmute::<&str, &'static str>(id_auto.as_ref()) }); headings.push(Heading { - location: e.span.start(), + location: e.span.start() as u32, id_auto, text, id_override, }); } - tree::EventKind::Atom(block::Atom::Attributes) => { + block::EventKind::Atom(block::Atom::Attributes) => { attr_prev = Some(e.span); } - tree::EventKind::Enter(..) - | tree::EventKind::Exit(block::Node::Container(block::Container::Section { + block::EventKind::Enter(..) + | block::EventKind::Exit(block::Node::Container(block::Container::Section { .. })) => {} _ => { @@ -723,7 +760,7 @@ impl<'s> PrePass<'s> { h.id_override.as_ref().unwrap_or(&h.id_auto) } - fn heading_id_by_location(&self, location: usize) -> Option<&str> { + fn heading_id_by_location(&self, location: u32) -> Option<&str> { self.headings .binary_search_by_key(&location, |h| h.location) .ok() @@ -741,22 +778,133 @@ impl<'s> PrePass<'s> { impl<'s> Parser<'s> { #[must_use] pub fn new(src: &'s str) -> Self { - let tree = block::parse(src); + let blocks = block::parse(src); let mut inline_parser = inline::Parser::new(src); - let pre_pass = PrePass::new(src, tree.clone(), &mut inline_parser); + let pre_pass = PrePass::new(src, blocks.iter(), &mut inline_parser); Self { src, - tree, + blocks: blocks.into_iter().peekable(), pre_pass, block_attributes: Attributes::new(), + block_attributes_pos: None, table_head_row: false, verbatim: false, inline_parser, } } - fn inline(&mut self) -> Option> { + /// Turn the [`Parser`] into an iterator of tuples, each with an [`Event`] and a start/end byte + /// offset for its corresponding input (as a [`Range`]). + /// + /// Generally, the range of each event does not overlap with any other event and the ranges are + /// in same order as the events are emitted, i.e. the start offset of an event must be greater + /// or equal to the (exclusive) end offset of all events that were emitted before that event. + /// However, there are some exceptions to this rule: + /// + /// - Blank lines inbetween block attributes and the block causes the blankline events to + /// overlap with the block start event. + /// - Caption events are emitted before the table rows while the input for the caption content + /// is located after the table rows, causing the ranges to be out of order. + /// + /// Characters between events, that are not part of any event range, are typically whitespace + /// but may also consist of unattached attributes or `>` characters from blockquotes. + /// + /// # Examples + /// + /// Start and end events of containers correspond only to the start and end markers for that + /// container, not its inner content: + /// + /// ``` + /// # use jotdown::*; + /// # use jotdown::Event::*; + /// # use jotdown::Container::*; + /// let input = "> _hello_ [text](url)\n"; + /// assert!(matches!( + /// Parser::new(input) + /// .into_offset_iter() + /// .map(|(e, r)| (&input[r], e)) + /// .collect::>() + /// .as_slice(), + /// &[ + /// (">", Start(Blockquote, ..)), + /// ("", Start(Paragraph, ..)), + /// ("_", Start(Emphasis, ..)), + /// ("hello", Str(..)), + /// ("_", End(Emphasis)), + /// (" ", Str(..)), + /// ("[", Start(Link { .. }, ..)), + /// ("text", Str(..)), + /// ("](url)", End(Link { .. })), + /// ("", End(Paragraph)), + /// ("", End(Blockquote)), + /// ], + /// )); + /// ``` + /// + /// _Block_ attributes that belong to a container are included in the _start_ event. _Inline_ + /// attributes that belong to a container are included in the _end_ event: + /// + /// ``` + /// # use jotdown::*; + /// # use jotdown::Event::*; + /// # use jotdown::Container::*; + /// let input = " + /// {.quote} + /// > [Hello]{lang=en} world!"; + /// assert!(matches!( + /// Parser::new(input) + /// .into_offset_iter() + /// .map(|(e, r)| (&input[r], e)) + /// .collect::>() + /// .as_slice(), + /// &[ + /// ("\n", Blankline), + /// ("{.quote}\n>", Start(Blockquote, ..)), + /// ("", Start(Paragraph, ..)), + /// ("[", Start(Span, ..)), + /// ("Hello", Str(..)), + /// ("]{lang=en}", End(Span)), + /// (" world!", Str(..)), + /// ("", End(Paragraph)), + /// ("", End(Blockquote)), + /// ], + /// )); + /// ``` + /// + /// Inline events that span multiple lines may contain characters from outer block containers + /// (e.g. `>` characters from blockquotes or whitespace from list items): + /// + /// ``` + /// # use jotdown::*; + /// # use jotdown::Event::*; + /// # use jotdown::Container::*; + /// let input = " + /// > [txt](multi + /// > line)"; + /// assert!(matches!( + /// Parser::new(input) + /// .into_offset_iter() + /// .map(|(e, r)| (&input[r], e)) + /// .collect::>() + /// .as_slice(), + /// &[ + /// ("\n", Blankline), + /// (">", Start(Blockquote, ..)), + /// ("", Start(Paragraph, ..)), + /// ("[", Start(Link { .. }, ..)), + /// ("txt", Str(..)), + /// ("](multi\n> line)", End(Link { .. })), + /// ("", End(Paragraph)), + /// ("", End(Blockquote)), + /// ], + /// )); + /// ``` + pub fn into_offset_iter(self) -> OffsetIter<'s> { + OffsetIter { parser: self } + } + + fn inline(&mut self) -> Option<(Event<'s>, Range)> { let next = self.inline_parser.next()?; let (inline, mut attributes) = match next { @@ -772,16 +920,14 @@ impl<'s> Parser<'s> { inline.map(|inline| { let enter = matches!(inline.kind, inline::EventKind::Enter(_)); - match inline.kind { + let event = match inline.kind { inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => { let t = match c { inline::Container::Span => Container::Span, inline::Container::Verbatim => Container::Verbatim, inline::Container::InlineMath => Container::Math { display: false }, inline::Container::DisplayMath => Container::Math { display: true }, - inline::Container::RawFormat => Container::RawInline { - format: inline.span.of(self.src), - }, + inline::Container::RawFormat { format } => Container::RawInline { format }, inline::Container::Subscript => Container::Subscript, inline::Container::Superscript => Container::Superscript, inline::Container::Insert => Container::Insert, @@ -822,14 +968,13 @@ impl<'s> Parser<'s> { Container::Image(url_or_tag, ty) } } - inline::Container::Autolink => { - let url: CowStr = inline.span.of(self.src).into(); + inline::Container::Autolink(url) => { let ty = if url.contains('@') { LinkType::Email } else { LinkType::AutoLink }; - Container::Link(url, ty) + Container::Link(url.into(), ty) } }; if enter { @@ -839,10 +984,8 @@ impl<'s> Parser<'s> { } } inline::EventKind::Atom(a) => match a { - inline::Atom::FootnoteReference => { - Event::FootnoteReference(inline.span.of(self.src)) - } - inline::Atom::Symbol => Event::Symbol(inline.span.of(self.src).into()), + inline::Atom::FootnoteReference { label } => Event::FootnoteReference(label), + inline::Atom::Symbol(sym) => Event::Symbol(sym.into()), inline::Atom::Quote { ty, left } => match (ty, left) { (inline::QuoteType::Single, true) => Event::LeftSingleQuote, (inline::QuoteType::Single, false) => Event::RightSingleQuote, @@ -861,48 +1004,58 @@ impl<'s> Parser<'s> { inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => { panic!("{:?}", inline) } - } + }; + (event, inline.span.into()) }) } - fn block(&mut self) -> Option> { - while let Some(ev) = &mut self.tree.next() { - let content = ev.span.of(self.src); + fn block(&mut self) -> Option<(Event<'s>, Range)> { + while let Some(mut ev) = &mut self.blocks.next() { let event = match ev.kind { - tree::EventKind::Atom(a) => match a { + block::EventKind::Atom(a) => match a { block::Atom::Blankline => Event::Blankline, block::Atom::ThematicBreak => { + if let Some(pos) = self.block_attributes_pos.take() { + ev.span = Span::new(pos, ev.span.end()); + } Event::ThematicBreak(self.block_attributes.take()) } block::Atom::Attributes => { - self.block_attributes.parse(content); + if self.block_attributes_pos.is_none() { + self.block_attributes_pos = Some(ev.span.start()); + } + self.block_attributes.parse(ev.span.of(self.src)); continue; } }, - tree::EventKind::Enter(c) | tree::EventKind::Exit(c) => { - let enter = matches!(ev.kind, tree::EventKind::Enter(..)); + block::EventKind::Enter(c) | block::EventKind::Exit(c) => { + let enter = matches!(ev.kind, block::EventKind::Enter(..)); let cont = match c { block::Node::Leaf(l) => { self.inline_parser.reset(); match l { block::Leaf::Paragraph => Container::Paragraph, - block::Leaf::Heading { has_section } => Container::Heading { - level: content.len().try_into().unwrap(), + block::Leaf::Heading { + level, + has_section, + pos, + } => Container::Heading { + level, has_section, id: self .pre_pass - .heading_id_by_location(ev.span.start()) + .heading_id_by_location(pos) .unwrap_or_default() .to_string() .into(), }, block::Leaf::DescriptionTerm => Container::DescriptionTerm, - block::Leaf::CodeBlock => { + block::Leaf::CodeBlock { language } => { self.verbatim = enter; - if let Some(format) = content.strip_prefix('=') { + if let Some(format) = language.strip_prefix('=') { Container::RawBlock { format } } else { - Container::CodeBlock { language: content } + Container::CodeBlock { language } } } block::Leaf::TableCell(alignment) => Container::TableCell { @@ -910,16 +1063,20 @@ impl<'s> Parser<'s> { head: self.table_head_row, }, block::Leaf::Caption => Container::Caption, - block::Leaf::LinkDefinition => { - Container::LinkDefinition { label: content } + block::Leaf::LinkDefinition { label } => { + self.verbatim = enter; + Container::LinkDefinition { label } } } } block::Node::Container(c) => match c { block::Container::Blockquote => Container::Blockquote, - block::Container::Div => Container::Div { class: content }, - block::Container::Footnote => Container::Footnote { label: content }, - block::Container::List(block::ListKind { ty, tight }) => { + block::Container::Div { class } => Container::Div { class }, + block::Container::Footnote { label } => Container::Footnote { label }, + block::Container::List { + kind: block::ListKind { ty, tight }, + marker, + } => { if matches!(ty, block::ListType::Description) { Container::DescriptionList } else { @@ -927,9 +1084,8 @@ impl<'s> Parser<'s> { block::ListType::Unordered(..) => ListKind::Unordered, block::ListType::Task => ListKind::Task, block::ListType::Ordered(numbering, style) => { - let start = numbering - .parse_number(style.number(content)) - .max(1); + let start = + numbering.parse_number(style.number(marker)).max(1); ListKind::Ordered { numbering, style, @@ -941,12 +1097,12 @@ impl<'s> Parser<'s> { Container::List { kind, tight } } } - block::Container::ListItem(ty) => match ty { - block::ListType::Task => Container::TaskListItem { - checked: content.as_bytes()[3] != b' ', - }, - block::ListType::Description => Container::DescriptionDetails, - _ => Container::ListItem, + block::Container::ListItem(kind) => match kind { + block::ListItemKind::Task { checked } => { + Container::TaskListItem { checked } + } + block::ListItemKind::Description => Container::DescriptionDetails, + block::ListItemKind::List => Container::ListItem, }, block::Container::Table => Container::Table, block::Container::TableRow { head } => { @@ -955,10 +1111,10 @@ impl<'s> Parser<'s> { } Container::TableRow { head } } - block::Container::Section => Container::Section { + block::Container::Section { pos } => Container::Section { id: self .pre_pass - .heading_id_by_location(ev.span.start()) + .heading_id_by_location(pos) .unwrap_or_default() .to_string() .into(), @@ -966,32 +1122,63 @@ impl<'s> Parser<'s> { }, }; if enter { + if let Some(pos) = self.block_attributes_pos.take() { + ev.span = Span::new(pos, ev.span.end()); + } Event::Start(cont, self.block_attributes.take()) } else { + self.block_attributes = Attributes::new(); + self.block_attributes_pos = None; Event::End(cont) } } - tree::EventKind::Inline => { + block::EventKind::Inline => { if self.verbatim { - Event::Str(content.into()) + Event::Str(ev.span.of(self.src).into()) } else { - self.inline_parser - .feed_line(ev.span, self.tree.branch_is_empty()); - return self.next(); + self.inline_parser.feed_line( + ev.span, + !matches!( + self.blocks.peek().map(|e| &e.kind), + Some(block::EventKind::Inline), + ), + ); + return self.next_span(); } } + block::EventKind::Stale => continue, }; - return Some(event); + return Some((event, ev.span.into())); } None } + + fn next_span(&mut self) -> Option<(Event<'s>, Range)> { + self.inline().or_else(|| self.block()) + } } impl<'s> Iterator for Parser<'s> { type Item = Event<'s>; fn next(&mut self) -> Option { - self.inline().or_else(|| self.block()) + self.next_span().map(|(e, _)| e) + } +} + +/// An iterator that is identical to a [`Parser`], except that it also emits the location of each +/// event within the input. +/// +/// See the documentation of [`Parser::into_offset_iter`] for more information. +pub struct OffsetIter<'s> { + parser: Parser<'s>, +} + +impl<'s> Iterator for OffsetIter<'s> { + type Item = (Event<'s>, Range); + + fn next(&mut self) -> Option { + self.parser.next_span() } } @@ -1523,7 +1710,6 @@ mod test { Blankline, Start(LinkDefinition { label: "tag" }, Attributes::new()), Str("u".into()), - Softbreak, Str("rl".into()), End(LinkDefinition { label: "tag" }), ); @@ -1532,19 +1718,24 @@ mod test { "[text][tag]\n", "\n", "[tag]:\n", - " url\n", // + " url\n", // + " cont\n", // ), Start(Paragraph, Attributes::new()), Start( - Link("url".into(), LinkType::Span(SpanLinkType::Reference)), + Link("urlcont".into(), LinkType::Span(SpanLinkType::Reference)), Attributes::new() ), Str("text".into()), - End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))), + End(Link( + "urlcont".into(), + LinkType::Span(SpanLinkType::Reference) + )), End(Paragraph), Blankline, Start(LinkDefinition { label: "tag" }, Attributes::new()), Str("url".into()), + Str("cont".into()), End(LinkDefinition { label: "tag" }), ); } diff --git a/src/span.rs b/src/span.rs index 6c595a7..722281e 100644 --- a/src/span.rs +++ b/src/span.rs @@ -4,6 +4,12 @@ pub struct Span { end: u32, } +impl From for std::ops::Range { + fn from(span: Span) -> Self { + span.start()..span.end() + } +} + impl Span { pub fn new(start: usize, end: usize) -> Self { Self::by_len(start, end.checked_sub(start).unwrap()) diff --git a/src/tree.rs b/src/tree.rs deleted file mode 100644 index 0101370..0000000 --- a/src/tree.rs +++ /dev/null @@ -1,427 +0,0 @@ -use crate::Span; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum EventKind { - Enter(C), - Inline, - Exit(C), - Atom(A), -} - -#[derive(Debug)] -pub struct Node<'a, C, A> { - pub index: NodeIndex, - pub elem: Element<'a, C, A>, - pub span: Span, -} - -#[derive(Debug)] -pub enum Element<'a, C, A> { - Container(&'a mut C), - Atom(&'a mut A), - Inline, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Event { - pub kind: EventKind, - pub span: Span, -} - -#[derive(Clone)] -pub struct Tree { - nodes: std::rc::Rc<[InternalNode]>, - branch: Vec, - head: Option, -} - -impl Tree { - /// Count number of direct children nodes. - pub fn count_children(&self) -> usize { - let mut head = self.head; - let mut count = 0; - while let Some(h) = head { - let n = &self.nodes[h.index()]; - head = n.next; - count += 1; - } - count - } - - /// Retrieve all inlines until the end of the current container. Panics if any upcoming node is - /// not an inline node. - pub fn take_inlines(&mut self) -> impl Iterator + '_ { - let mut head = self.head.take(); - std::iter::from_fn(move || { - head.take().map(|h| { - let n = &self.nodes[h.index()]; - debug_assert!(matches!(n.kind, NodeKind::Inline)); - head = n.next; - n.span - }) - }) - } - - pub fn branch_is_empty(&self) -> bool { - matches!(self.head, None) - } -} - -impl Iterator for Tree { - type Item = Event; - - fn next(&mut self) -> Option { - if let Some(head) = self.head { - let n = &self.nodes[head.index()]; - let kind = match &n.kind { - NodeKind::Root => unreachable!(), - NodeKind::Container(c, child) => { - self.branch.push(head); - self.head = *child; - EventKind::Enter(c.clone()) - } - NodeKind::Atom(a) => { - self.head = n.next; - EventKind::Atom(a.clone()) - } - NodeKind::Inline => { - self.head = n.next; - EventKind::Inline - } - }; - Some(Event { kind, span: n.span }) - } else if let Some(block_ni) = self.branch.pop() { - let InternalNode { next, kind, span } = &self.nodes[block_ni.index()]; - let kind = EventKind::Exit(kind.container().unwrap().clone()); - self.head = *next; - Some(Event { kind, span: *span }) - } else { - None - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct NodeIndex(std::num::NonZeroUsize); - -impl NodeIndex { - fn new(i: usize) -> Self { - debug_assert_ne!(i, usize::MAX); - Self((i + 1).try_into().unwrap()) - } - - fn root() -> Self { - Self::new(0) - } - - fn index(self) -> usize { - usize::from(self.0) - 1 - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -enum NodeKind { - Root, - Container(C, Option), - Atom(A), - Inline, -} - -#[derive(Debug, Clone)] -struct InternalNode { - span: Span, - kind: NodeKind, - next: Option, -} - -#[derive(Clone)] -pub struct Builder { - nodes: Vec>, - branch: Vec, - head: Option, - depth: usize, -} - -impl NodeKind { - fn child(&self) -> Option { - if let NodeKind::Container(_, child) = self { - *child - } else { - None - } - } - - fn child_mut(&mut self) -> &mut Option { - if let NodeKind::Container(_, child) = self { - child - } else { - panic!() - } - } - - fn container(&self) -> Option<&C> { - if let NodeKind::Container(c, _) = self { - Some(c) - } else { - None - } - } -} - -impl<'a, C, A> From<&'a mut NodeKind> for Element<'a, C, A> { - fn from(kind: &'a mut NodeKind) -> Self { - match kind { - NodeKind::Root => unreachable!(), - NodeKind::Container(c, ..) => Element::Container(c), - NodeKind::Atom(a) => Element::Atom(a), - NodeKind::Inline => Element::Inline, - } - } -} - -impl Builder { - pub(super) fn new() -> Self { - Builder { - nodes: vec![InternalNode { - span: Span::default(), - kind: NodeKind::Root, - next: None, - }], - branch: vec![], - head: Some(NodeIndex::root()), - depth: 0, - } - } - - pub(super) fn atom(&mut self, a: A, span: Span) { - self.add_node(InternalNode { - span, - kind: NodeKind::Atom(a), - next: None, - }); - } - - pub(super) fn inline(&mut self, span: Span) { - self.add_node(InternalNode { - span, - kind: NodeKind::Inline, - next: None, - }); - } - - pub(super) fn enter(&mut self, c: C, span: Span) -> NodeIndex { - self.depth += 1; - self.add_node(InternalNode { - span, - kind: NodeKind::Container(c, None), - next: None, - }) - } - - pub(super) fn exit(&mut self) { - self.depth -= 1; - if let Some(head) = self.head.take() { - if matches!(self.nodes[head.index()].kind, NodeKind::Container(..)) { - self.branch.push(head); - } - } else { - let last = self.branch.pop(); - debug_assert_ne!(last, None); - } - } - - /// Exit and discard all the contents of the current container. - pub(super) fn exit_discard(&mut self) { - self.exit(); - let exited = self.branch.pop().unwrap(); - self.nodes.drain(exited.index()..); - let (prev, has_parent) = self.replace(exited, None); - if has_parent { - self.head = Some(prev); - } else { - self.branch.push(prev); - } - } - - /// Swap the node and its children with either its parent or the node before. - pub fn swap_prev(&mut self, node: NodeIndex) { - let next = self.nodes[node.index()].next; - if let Some(n) = next { - self.replace(n, None); - } - let (prev, _) = self.replace(node, next); - self.replace(prev, Some(node)); - self.nodes[node.index()].next = Some(prev); - } - - /// Remove the specified node and its children. - pub fn remove(&mut self, node: NodeIndex) { - let next = self.nodes[node.index()].next; - self.replace(node, next); - } - - pub(super) fn depth(&self) -> usize { - self.depth - } - - pub(super) fn elem(&mut self, ni: NodeIndex) -> Element { - match &mut self.nodes[ni.index()].kind { - NodeKind::Root => unreachable!(), - NodeKind::Container(c, ..) => Element::Container(c), - NodeKind::Atom(a) => Element::Atom(a), - NodeKind::Inline => Element::Inline, - } - } - - /// Retrieve all children nodes for the specified node, in the order that they were added. - pub(super) fn children(&mut self, node: NodeIndex) -> impl Iterator> { - // XXX assumes no modifications - let n = &self.nodes[node.index()]; - let range = if let Some(start) = n.kind.child() { - start.index()..n.next.map_or(self.nodes.len(), NodeIndex::index) - } else { - 0..0 - }; - range - .clone() - .map(NodeIndex::new) - .zip(self.nodes[range].iter_mut()) - .map(|(index, n)| Node { - index, - elem: Element::from(&mut n.kind), - span: n.span, - }) - } - - pub(super) fn finish(self) -> Tree { - debug_assert_eq!(self.depth, 0); - let head = self.nodes[NodeIndex::root().index()].next; - Tree { - nodes: self.nodes.into_boxed_slice().into(), - branch: Vec::new(), - head, - } - } - - fn add_node(&mut self, node: InternalNode) -> NodeIndex { - let ni = NodeIndex::new(self.nodes.len()); - self.nodes.push(node); - if let Some(head_ni) = &mut self.head { - let mut head = &mut self.nodes[head_ni.index()]; - match &mut head.kind { - NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => { - // set next pointer of previous node - debug_assert_eq!(head.next, None); - head.next = Some(ni); - } - NodeKind::Container(_, child) => { - self.branch.push(*head_ni); - // set child pointer of current container - debug_assert_eq!(*child, None); - *child = Some(ni); - } - } - } else if let Some(block) = self.branch.pop() { - let mut block = &mut self.nodes[block.index()]; - debug_assert!(matches!(block.kind, NodeKind::Container(..))); - block.next = Some(ni); - } else { - panic!() - } - self.head = Some(ni); - ni - } - - /// Remove the link from the node that points to the specified node. Optionally replace the - /// node with another node. Return the pointer node and whether it is a container or not. - fn replace(&mut self, node: NodeIndex, next: Option) -> (NodeIndex, bool) { - for (i, n) in self.nodes.iter_mut().enumerate().rev() { - let ni = NodeIndex::new(i); - if n.next == Some(node) { - n.next = next; - return (ni, false); - } else if n.kind.child() == Some(node) { - *n.kind.child_mut() = next; - return (ni, true); - } - } - panic!("node is never linked to") - } -} - -impl std::fmt::Debug - for Builder -{ - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.clone().finish().fmt(f) - } -} - -impl std::fmt::Debug for Tree { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - const INDENT: &str = " "; - let mut level = 0; - writeln!(f)?; - for e in self.clone() { - let indent = INDENT.repeat(level); - match e.kind { - EventKind::Enter(c) => { - write!(f, "{}{:?}", indent, c)?; - level += 1; - } - EventKind::Inline => write!(f, "{}Inline", indent)?, - EventKind::Exit(..) => { - level -= 1; - continue; - } - EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?, - } - writeln!(f, " ({}:{})", e.span.start(), e.span.end())?; - } - Ok(()) - } -} - -#[cfg(test)] -mod test { - use crate::Span; - - #[test] - fn fmt() { - let mut tree = super::Builder::new(); - tree.enter(1, Span::new(0, 1)); - tree.atom(11, Span::new(0, 1)); - tree.atom(12, Span::new(0, 1)); - tree.exit(); - tree.enter(2, Span::new(1, 5)); - tree.enter(21, Span::new(2, 5)); - tree.enter(211, Span::new(3, 4)); - tree.atom(2111, Span::new(3, 4)); - tree.exit(); - tree.exit(); - tree.enter(22, Span::new(4, 5)); - tree.atom(221, Span::new(4, 5)); - tree.exit(); - tree.exit(); - tree.enter(3, Span::new(5, 6)); - tree.atom(31, Span::new(5, 6)); - tree.exit(); - assert_eq!( - format!("{:?}", tree.finish()), - concat!( - "\n", - "1 (0:1)\n", - " 11 (0:1)\n", - " 12 (0:1)\n", - "2 (1:5)\n", - " 21 (2:5)\n", - " 211 (3:4)\n", - " 2111 (3:4)\n", - " 22 (4:5)\n", - " 221 (4:5)\n", - "3 (5:6)\n", - " 31 (5:6)\n", - ) - ); - } -} diff --git a/tests/afl/Cargo.toml b/tests/afl/Cargo.toml index 66b25e1..70e156b 100644 --- a/tests/afl/Cargo.toml +++ b/tests/afl/Cargo.toml @@ -17,10 +17,6 @@ path = "src/main.rs" name = "parse" path = "src/parse.rs" -[[bin]] -name = "parse_balance" -path = "src/parse_balance.rs" - [[bin]] name = "html" path = "src/html.rs" diff --git a/tests/afl/src/lib.rs b/tests/afl/src/lib.rs index 0591238..697ba74 100644 --- a/tests/afl/src/lib.rs +++ b/tests/afl/src/lib.rs @@ -5,27 +5,66 @@ use html5ever::tendril::TendrilSink; use html5ever::tokenizer; use html5ever::tree_builder; +/// Perform sanity checks on events. pub fn parse(data: &[u8]) { if let Ok(s) = std::str::from_utf8(data) { - jotdown::Parser::new(s).last(); - } -} - -/// Ensure containers are always balanced, i.e. opened and closed in correct order. -pub fn parse_balance(data: &[u8]) { - if let Ok(s) = std::str::from_utf8(data) { + let whitelist_whitespace = s.contains('{') && s.contains('}'); // attributes are outside events let mut open = Vec::new(); - for event in jotdown::Parser::new(s) { + let mut last = (jotdown::Event::Str("".into()), 0..0); + for (event, range) in jotdown::Parser::new(s).into_offset_iter() { + // no overlap, out of order + assert!( + last.1.end <= range.start + // block attributes may overlap with start event + || ( + matches!(last.0, jotdown::Event::Blankline) + && ( + matches!( + event, + jotdown::Event::Start(ref cont, ..) if cont.is_block() + ) + || matches!(event, jotdown::Event::ThematicBreak(..)) + ) + ) + // caption event is before table rows but src is after + || ( + matches!( + last.0, + jotdown::Event::Start(jotdown::Container::Caption, ..) + | jotdown::Event::End(jotdown::Container::Caption) + ) + && range.end <= last.1.start + ), + "{} > {} {:?} {:?}", + last.1.end, + range.start, + last.0, + event + ); + last = (event.clone(), range.clone()); + // range is valid unicode, does not cross char boundary + let _ = &s[range]; match event { jotdown::Event::Start(c, ..) => open.push(c.clone()), - jotdown::Event::End(c) => assert_eq!(open.pop().unwrap(), c), + jotdown::Event::End(c) => { + // closes correct event + assert_eq!(open.pop().unwrap(), c); + } _ => {} } } + // no missing close assert_eq!(open, &[]); + // only whitespace after last event + assert!( + whitelist_whitespace || s[last.1.end..].chars().all(char::is_whitespace), + "non whitespace {:?}", + &s[last.1.end..], + ); } } +/// Validate rendered html output. pub fn html(data: &[u8]) { if data.iter().any(|i| *i == 0) { return; @@ -132,9 +171,6 @@ impl<'a> tree_builder::TreeSink for Dom<'a> { "Found special tag while closing generic tag", "Formatting element not current node", "Formatting element not open", - // FIXME bug caused by empty table at end of list - "No matching tag to close", - "Unexpected open element while closing", ]; if !whitelist.iter().any(|e| msg.starts_with(e)) { #[cfg(feature = "debug")] diff --git a/tests/afl/src/main.rs b/tests/afl/src/main.rs index ad09a2d..b0a66c8 100644 --- a/tests/afl/src/main.rs +++ b/tests/afl/src/main.rs @@ -8,7 +8,6 @@ fn main() { let f = match target.as_str() { "parse" => jotdown_afl::parse, - "parse_balance" => jotdown_afl::parse_balance, "html" => jotdown_afl::html, _ => panic!("unknown target '{}'", target), }; diff --git a/tests/afl/src/parse_balance.rs b/tests/afl/src/parse_balance.rs deleted file mode 100644 index 9118fb2..0000000 --- a/tests/afl/src/parse_balance.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - afl::fuzz!(|data: &[u8]| { jotdown_afl::parse_balance(data) }); -}