From 657b47df12d951dbf77897b283c9e914106aa9e5 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Mon, 24 Apr 2023 19:35:33 +0200 Subject: [PATCH 01/31] inline: mv raw format spec from span to event field --- src/inline.rs | 58 +++++++++++++++++++++++++-------------------------- src/lib.rs | 4 +--- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/src/inline.rs b/src/inline.rs index 74994d4..51145ef 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -26,7 +26,7 @@ pub enum Atom { } #[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum Container { +pub enum Container<'s> { Span, Subscript, Superscript, @@ -36,8 +36,9 @@ pub enum Container { Strong, Mark, Verbatim, - /// Span is the format. - RawFormat, + RawFormat { + format: &'s str, + }, InlineMath, DisplayMath, ReferenceLink(CowStrIndex), @@ -57,9 +58,9 @@ pub enum QuoteType { } #[derive(Clone, Debug, PartialEq, Eq)] -pub enum EventKind { - Enter(Container), - Exit(Container), +pub enum EventKind<'s> { + Enter(Container<'s>), + Exit(Container<'s>), Atom(Atom), Str, Attributes { @@ -72,8 +73,8 @@ pub enum EventKind { type AttributesIndex = u32; #[derive(Clone, Debug, PartialEq, Eq)] -pub struct Event { - pub kind: EventKind, +pub struct Event<'s> { + pub kind: EventKind<'s>, pub span: Span, } @@ -218,7 +219,7 @@ pub struct Parser<'s> { openers: Vec<(Opener, usize)>, /// Buffer queue for next events. Events are buffered until no modifications due to future /// characters are needed. - events: std::collections::VecDeque, + events: std::collections::VecDeque>, /// State if inside a verbatim container. verbatim: Option, /// State if currently parsing potential attributes. @@ -268,12 +269,12 @@ impl<'s> Parser<'s> { self.store_attributes.clear(); } - fn push_sp(&mut self, kind: EventKind, span: Span) -> Option { + fn push_sp(&mut self, kind: EventKind<'s>, span: Span) -> Option { self.events.push_back(Event { kind, span }); Some(Continue) } - fn push(&mut self, kind: EventKind) -> Option { + fn push(&mut self, kind: EventKind<'s>) -> Option { self.push_sp(kind, self.input.span) } @@ -310,17 +311,16 @@ impl<'s> Parser<'s> { && matches!(first.kind, lex::Kind::Seq(Sequence::Backtick)) { let raw_format = self.input.ahead_raw_format(); - let mut span_closer = self.input.span; if let Some(span_format) = raw_format { - self.events[event_opener].kind = EventKind::Enter(RawFormat); - self.events[event_opener].span = span_format; - self.input.span = span_format.translate(1); - span_closer = span_format; + self.events[event_opener].kind = EventKind::Enter(RawFormat { + format: span_format.of(self.input.src), + }); + self.input.span = Span::new(self.input.span.start(), span_format.end() + 1); }; let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind { debug_assert!(matches!( ty, - Verbatim | RawFormat | InlineMath | DisplayMath + Verbatim | RawFormat { .. } | InlineMath | DisplayMath )); ty } else { @@ -330,7 +330,7 @@ impl<'s> Parser<'s> { { self.events.drain(*event_skip..); } - self.push_sp(EventKind::Exit(ty_opener), span_closer); + self.push(EventKind::Exit(ty_opener)); self.verbatim = None; if raw_format.is_none() && self.input.peek().map_or(false, |t| { @@ -925,7 +925,7 @@ impl<'s> Parser<'s> { self.push(EventKind::Atom(atom)) } - fn merge_str_events(&mut self, span_str: Span) -> Event { + fn merge_str_events(&mut self, span_str: Span) -> Event<'s> { let mut span = span_str; let should_merge = |e: &Event, span: Span| { matches!(e.kind, EventKind::Str | EventKind::Placeholder) @@ -952,7 +952,7 @@ impl<'s> Parser<'s> { } } - fn apply_word_attributes(&mut self, span_str: Span) -> Event { + fn apply_word_attributes(&mut self, span_str: Span) -> Event<'s> { if let Some(i) = span_str .of(self.input.src) .bytes() @@ -1089,8 +1089,8 @@ impl Opener { } } -enum DelimEventKind { - Container(Container), +enum DelimEventKind<'s> { + Container(Container<'s>), Span(SpanType), Quote(QuoteType), Link { @@ -1100,7 +1100,7 @@ enum DelimEventKind { }, } -impl From for DelimEventKind { +impl<'s> From for DelimEventKind<'s> { fn from(d: Opener) -> Self { match d { Opener::Span(ty) => Self::Span(ty), @@ -1127,7 +1127,7 @@ impl From for DelimEventKind { } impl<'s> Iterator for Parser<'s> { - type Item = Event; + type Item = Event<'s>; fn next(&mut self) -> Option { while self.events.is_empty() @@ -1158,7 +1158,7 @@ impl<'s> Iterator for Parser<'s> { let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind { debug_assert!(matches!( ty, - Verbatim | RawFormat | InlineMath | DisplayMath + Verbatim | RawFormat { .. } | InlineMath | DisplayMath )); ty } else { @@ -1336,16 +1336,16 @@ mod test { fn raw_format() { test_parse!( "`raw`{=format}", - (Enter(RawFormat), "format"), + (Enter(RawFormat { format: "format" }), "`"), (Str, "raw"), - (Exit(RawFormat), "format"), + (Exit(RawFormat { format: "format" }), "`{=format}"), ); test_parse!( "before `raw`{=format} after", (Str, "before "), - (Enter(RawFormat), "format"), + (Enter(RawFormat { format: "format" }), "`"), (Str, "raw"), - (Exit(RawFormat), "format"), + (Exit(RawFormat { format: "format" }), "`{=format}"), (Str, " after"), ); } diff --git a/src/lib.rs b/src/lib.rs index 81142c3..3c9325d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -779,9 +779,7 @@ impl<'s> Parser<'s> { inline::Container::Verbatim => Container::Verbatim, inline::Container::InlineMath => Container::Math { display: false }, inline::Container::DisplayMath => Container::Math { display: true }, - inline::Container::RawFormat => Container::RawInline { - format: inline.span.of(self.src), - }, + inline::Container::RawFormat { format } => Container::RawInline { format }, inline::Container::Subscript => Container::Subscript, inline::Container::Superscript => Container::Superscript, inline::Container::Insert => Container::Insert, From 0a144574f407c05874abc5a368cecdfea915bb58 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Mon, 24 Apr 2023 19:45:30 +0200 Subject: [PATCH 02/31] inline: mv autolink url from span to event field --- src/inline.rs | 32 +++++++++++++++----------------- src/lib.rs | 5 ++--- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/inline.rs b/src/inline.rs index 51145ef..8bb216c 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -36,17 +36,14 @@ pub enum Container<'s> { Strong, Mark, Verbatim, - RawFormat { - format: &'s str, - }, + RawFormat { format: &'s str }, InlineMath, DisplayMath, ReferenceLink(CowStrIndex), ReferenceImage(CowStrIndex), InlineLink(CowStrIndex), InlineImage(CowStrIndex), - /// Open delimiter span is URL, closing is '>'. - Autolink, + Autolink(&'s str), } type CowStrIndex = u32; @@ -577,12 +574,13 @@ impl<'s> Parser<'s> { .sum(); if end && is_url { self.input.lexer = lex::Lexer::new(ahead.as_str()); - self.input.span = self.input.span.after(len); - self.push(EventKind::Enter(Autolink)); + let span_url = self.input.span.after(len); + let url = span_url.of(self.input.src); + self.push(EventKind::Enter(Autolink(url))); + self.input.span = span_url; self.push(EventKind::Str); - self.push(EventKind::Exit(Autolink)); self.input.span = self.input.span.after(1); - return Some(Continue); + return self.push(EventKind::Exit(Autolink(url))); } } None @@ -1563,24 +1561,24 @@ mod test { fn autolink() { test_parse!( "", - (Enter(Autolink), "https://example.com"), + (Enter(Autolink("https://example.com",)), "<"), (Str, "https://example.com"), - (Exit(Autolink), "https://example.com") + (Exit(Autolink("https://example.com",)), ">") ); test_parse!( "", - (Enter(Autolink), "a@b.c"), + (Enter(Autolink("a@b.c")), "<"), (Str, "a@b.c"), - (Exit(Autolink), "a@b.c"), + (Exit(Autolink("a@b.c")), ">"), ); test_parse!( "", - (Enter(Autolink), "http://a.b"), + (Enter(Autolink("http://a.b")), "<"), (Str, "http://a.b"), - (Exit(Autolink), "http://a.b"), - (Enter(Autolink), "http://c.d"), + (Exit(Autolink("http://a.b")), ">"), + (Enter(Autolink("http://c.d")), "<"), (Str, "http://c.d"), - (Exit(Autolink), "http://c.d"), + (Exit(Autolink("http://c.d")), ">"), ); test_parse!("", (Str, "")); } diff --git a/src/lib.rs b/src/lib.rs index 3c9325d..67f100c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -820,14 +820,13 @@ impl<'s> Parser<'s> { Container::Image(url_or_tag, ty) } } - inline::Container::Autolink => { - let url: CowStr = inline.span.of(self.src).into(); + inline::Container::Autolink(url) => { let ty = if url.contains('@') { LinkType::Email } else { LinkType::AutoLink }; - Container::Link(url, ty) + Container::Link(url.into(), ty) } }; if enter { From 9676d9e5d61a169d1c22f68040995e898c748c63 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Mon, 24 Apr 2023 19:50:52 +0200 Subject: [PATCH 03/31] inline: mv footnote label from span to event field --- src/inline.rs | 16 ++++++++-------- src/lib.rs | 4 +--- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/src/inline.rs b/src/inline.rs index 8bb216c..9e614b2 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -12,8 +12,8 @@ use Container::*; use ControlFlow::*; #[derive(Debug, Clone, PartialEq, Eq)] -pub enum Atom { - FootnoteReference, +pub enum Atom<'s> { + FootnoteReference { label: &'s str }, Symbol, Softbreak, Hardbreak, @@ -58,7 +58,7 @@ pub enum QuoteType { pub enum EventKind<'s> { Enter(Container<'s>), Exit(Container<'s>), - Atom(Atom), + Atom(Atom<'s>), Str, Attributes { container: bool, @@ -647,10 +647,10 @@ impl<'s> Parser<'s> { .sum(); if end { self.input.lexer = lex::Lexer::new(ahead.as_str()); - self.input.span = self.input.span.after(len); - self.push(EventKind::Atom(FootnoteReference)); - self.input.span = self.input.span.after(1); - return Some(Continue); + let span_label = self.input.span.after(len); + let label = span_label.of(self.input.src); + self.input.span = Span::new(self.input.span.start(), span_label.end() + 1); + return self.push(EventKind::Atom(FootnoteReference { label })); } } None @@ -1588,7 +1588,7 @@ mod test { test_parse!( "text[^footnote]. more text", (Str, "text"), - (Atom(FootnoteReference), "footnote"), + (Atom(FootnoteReference { label: "footnote" }), "[^footnote]"), (Str, ". more text"), ); } diff --git a/src/lib.rs b/src/lib.rs index 67f100c..a6411a4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -836,9 +836,7 @@ impl<'s> Parser<'s> { } } inline::EventKind::Atom(a) => match a { - inline::Atom::FootnoteReference => { - Event::FootnoteReference(inline.span.of(self.src)) - } + inline::Atom::FootnoteReference { label } => Event::FootnoteReference(label), inline::Atom::Symbol => Event::Symbol(inline.span.of(self.src).into()), inline::Atom::Quote { ty, left } => match (ty, left) { (inline::QuoteType::Single, true) => Event::LeftSingleQuote, From 6abe9e44c726936b70e54ff75e579ef681117a48 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Mon, 24 Apr 2023 19:55:49 +0200 Subject: [PATCH 04/31] inline: mv symbol from span to event field --- src/inline.rs | 11 ++++++----- src/lib.rs | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/inline.rs b/src/inline.rs index 9e614b2..76be8d0 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -14,7 +14,7 @@ use ControlFlow::*; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Atom<'s> { FootnoteReference { label: &'s str }, - Symbol, + Symbol(&'s str), Softbreak, Hardbreak, Escape, @@ -604,10 +604,11 @@ impl<'s> Parser<'s> { .sum(); if end && valid { self.input.lexer = lex::Lexer::new(ahead.as_str()); - self.input.span = self.input.span.after(len); - self.push(EventKind::Atom(Symbol)); - self.input.span = self.input.span.after(1); - return Some(Continue); + let span_symbol = self.input.span.after(len); + self.input.span = Span::new(self.input.span.start(), span_symbol.end() + 1); + return self.push(EventKind::Atom(Atom::Symbol( + span_symbol.of(self.input.src), + ))); } } None diff --git a/src/lib.rs b/src/lib.rs index a6411a4..26a0e6c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -837,7 +837,7 @@ impl<'s> Parser<'s> { } inline::EventKind::Atom(a) => match a { inline::Atom::FootnoteReference { label } => Event::FootnoteReference(label), - inline::Atom::Symbol => Event::Symbol(inline.span.of(self.src).into()), + inline::Atom::Symbol(sym) => Event::Symbol(sym.into()), inline::Atom::Quote { ty, left } => match (ty, left) { (inline::QuoteType::Single, true) => Event::LeftSingleQuote, (inline::QuoteType::Single, false) => Event::RightSingleQuote, From bc4dd794bcfb8fe6879acece5af284bdea91452c Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 29 Apr 2023 18:34:23 +0200 Subject: [PATCH 05/31] block: store level in event do not use span length as level --- src/block.rs | 213 ++++++++++++++++++++++++++++++++++++++++++++------- src/lib.rs | 4 +- 2 files changed, 187 insertions(+), 30 deletions(-) diff --git a/src/block.rs b/src/block.rs index ba4b602..4e96591 100644 --- a/src/block.rs +++ b/src/block.rs @@ -58,7 +58,7 @@ pub enum Leaf { /// Span is `#` characters. /// Each inline is a line. - Heading { has_section: bool }, + Heading { level: u16, has_section: bool }, /// Span is empty. DescriptionTerm, @@ -941,7 +941,8 @@ impl Kind { match self { Self::Atom(a) => Block::Atom(*a), Self::Paragraph => Block::Leaf(Paragraph), - Self::Heading { .. } => Block::Leaf(Heading { + Self::Heading { level } => Block::Leaf(Heading { + level: (*level).try_into().unwrap(), has_section: top_level, }), Self::Fenced { @@ -1087,13 +1088,37 @@ mod test { "## b\n", // ), (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Inline, "a"), - (Exit(Leaf(Heading { has_section: true })), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Enter(Container(Section)), "##"), - (Enter(Leaf(Heading { has_section: true })), "##"), + ( + Enter(Leaf(Heading { + level: 2, + has_section: true + })), + "##" + ), (Inline, "b"), - (Exit(Leaf(Heading { has_section: true })), "##"), + ( + Exit(Leaf(Heading { + level: 2, + has_section: true + })), + "##" + ), (Exit(Container(Section)), "##"), (Exit(Container(Section)), "#"), ); @@ -1107,9 +1132,21 @@ mod test { "heading\n", // ), (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Inline, "heading"), - (Exit(Leaf(Heading { has_section: true })), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Exit(Container(Section)), "#"), ); } @@ -1125,17 +1162,41 @@ mod test { "15\n", // ), (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Inline, "2"), - (Exit(Leaf(Heading { has_section: true })), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Atom(Blankline), "\n"), (Exit(Container(Section)), "#"), (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Inline, "8\n"), (Inline, "12\n"), (Inline, "15"), - (Exit(Leaf(Heading { has_section: true })), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Exit(Container(Section)), "#"), ); } @@ -1149,11 +1210,23 @@ mod test { "c\n", // ), (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Inline, "a\n"), (Inline, "b\n"), (Inline, "c"), - (Exit(Leaf(Heading { has_section: true })), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Exit(Container(Section)), "#"), ); } @@ -1175,39 +1248,111 @@ mod test { "# b\n", ), (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Inline, "a"), - (Exit(Leaf(Heading { has_section: true })), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Atom(Blankline), "\n"), (Enter(Container(Section)), "##"), - (Enter(Leaf(Heading { has_section: true })), "##"), + ( + Enter(Leaf(Heading { + level: 2, + has_section: true + })), + "##" + ), (Inline, "aa"), - (Exit(Leaf(Heading { has_section: true })), "##"), + ( + Exit(Leaf(Heading { + level: 2, + has_section: true + })), + "##" + ), (Atom(Blankline), "\n"), (Enter(Container(Section)), "####"), - (Enter(Leaf(Heading { has_section: true })), "####"), + ( + Enter(Leaf(Heading { + level: 4, + has_section: true + })), + "####" + ), (Inline, "aaaa"), - (Exit(Leaf(Heading { has_section: true })), "####"), + ( + Exit(Leaf(Heading { + level: 4, + has_section: true + })), + "####" + ), (Atom(Blankline), "\n"), (Exit(Container(Section)), "####"), (Exit(Container(Section)), "##"), (Enter(Container(Section)), "##"), - (Enter(Leaf(Heading { has_section: true })), "##"), + ( + Enter(Leaf(Heading { + level: 2, + has_section: true + })), + "##" + ), (Inline, "ab"), - (Exit(Leaf(Heading { has_section: true })), "##"), + ( + Exit(Leaf(Heading { + level: 2, + has_section: true + })), + "##" + ), (Atom(Blankline), "\n"), (Enter(Container(Section)), "###"), - (Enter(Leaf(Heading { has_section: true })), "###"), + ( + Enter(Leaf(Heading { + level: 3, + has_section: true + })), + "###" + ), (Inline, "aba"), - (Exit(Leaf(Heading { has_section: true })), "###"), + ( + Exit(Leaf(Heading { + level: 3, + has_section: true + })), + "###" + ), (Atom(Blankline), "\n"), (Exit(Container(Section)), "###"), (Exit(Container(Section)), "##"), (Exit(Container(Section)), "#"), (Enter(Container(Section)), "#"), - (Enter(Leaf(Heading { has_section: true })), "#"), + ( + Enter(Leaf(Heading { + level: 1, + has_section: true + })), + "#" + ), (Inline, "b"), - (Exit(Leaf(Heading { has_section: true })), "#"), + ( + Exit(Leaf(Heading { + level: 1, + has_section: true, + })), + "#" + ), (Exit(Container(Section)), "#"), ); } @@ -1245,9 +1390,21 @@ mod test { (Inline, "a"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Enter(Leaf(Heading { has_section: false })), "##"), + ( + Enter(Leaf(Heading { + level: 2, + has_section: false, + })), + "##" + ), (Inline, "hl"), - (Exit(Leaf(Heading { has_section: false })), "##"), + ( + Exit(Leaf(Heading { + level: 2, + has_section: false, + })), + "##" + ), (Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "para"), diff --git a/src/lib.rs b/src/lib.rs index 26a0e6c..d96db8e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -881,8 +881,8 @@ impl<'s> Parser<'s> { self.inline_parser.reset(); match l { block::Leaf::Paragraph => Container::Paragraph, - block::Leaf::Heading { has_section } => Container::Heading { - level: content.len().try_into().unwrap(), + block::Leaf::Heading { level, has_section } => Container::Heading { + level, has_section, id: self .pre_pass From e90594f2b77695ab6f519861a4168df623cca58e Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 29 Apr 2023 18:42:31 +0200 Subject: [PATCH 06/31] block: specify link def label in event instead of using span --- src/block.rs | 84 ++++++++++++++++++++++++++-------------------------- src/lib.rs | 19 +++++++----- src/tree.rs | 6 ++-- 3 files changed, 56 insertions(+), 53 deletions(-) diff --git a/src/block.rs b/src/block.rs index 4e96591..897c28d 100644 --- a/src/block.rs +++ b/src/block.rs @@ -12,13 +12,13 @@ use Container::*; use Leaf::*; use ListType::*; -pub type Tree = tree::Tree; -pub type TreeBuilder = tree::Builder; +pub type Tree<'s> = tree::Tree, Atom>; +pub type TreeBuilder<'s> = tree::Builder, Atom>; #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Node { +pub enum Node<'s> { Container(Container), - Leaf(Leaf), + Leaf(Leaf<'s>), } #[must_use] @@ -27,12 +27,12 @@ pub fn parse(src: &str) -> Tree { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum Block { +enum Block<'s> { /// An atomic block, containing no children elements. Atom(Atom), /// A leaf block, containing only inline elements. - Leaf(Leaf), + Leaf(Leaf<'s>), /// A container block, containing children blocks. Container(Container), @@ -51,7 +51,7 @@ pub enum Atom { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Leaf { +pub enum Leaf<'s> { /// Span is empty, before first character of paragraph. /// Each inline is a line. Paragraph, @@ -72,7 +72,7 @@ pub enum Leaf { /// Span is the link tag. /// Inlines are lines of the URL. - LinkDefinition, + LinkDefinition { label: &'s str }, /// Span is language specifier. /// Each inline is a line. @@ -135,7 +135,7 @@ struct OpenList { /// Parser for block-level tree structure of entire document. struct TreeParser<'s> { src: &'s str, - tree: TreeBuilder, + tree: TreeBuilder<'s>, /// The previous block element was a blank line. prev_blankline: bool, @@ -163,7 +163,7 @@ impl<'s> TreeParser<'s> { } #[must_use] - pub fn parse(mut self) -> Tree { + pub fn parse(mut self) -> Tree<'s> { let mut lines = lines(self.src).collect::>(); let mut line_pos = 0; while line_pos < lines.len() { @@ -246,7 +246,33 @@ impl<'s> TreeParser<'s> { self.prev_blankline = false; } - match kind.block(top_level) { + let block = match kind { + Kind::Atom(a) => Block::Atom(a), + Kind::Paragraph => Block::Leaf(Paragraph), + Kind::Heading { level } => Block::Leaf(Heading { + level: level.try_into().unwrap(), + has_section: top_level, + }), + Kind::Fenced { + kind: FenceKind::CodeBlock(..), + .. + } => Block::Leaf(CodeBlock), + Kind::Fenced { + kind: FenceKind::Div, + .. + } => Block::Container(Div), + Kind::Definition { + footnote: false, .. + } => Block::Leaf(LinkDefinition { + label: span.of(self.src), + }), + Kind::Definition { footnote: true, .. } => Block::Container(Footnote), + Kind::Blockquote => Block::Container(Blockquote), + Kind::ListItem { ty, .. } => Block::Container(ListItem(ty)), + Kind::Table { .. } => Block::Container(Table), + }; + + match block { Block::Atom(a) => self.tree.atom(a, span), Block::Leaf(l) => self.parse_leaf(l, &kind, span, lines), Block::Container(Table) => self.parse_table(lines, span), @@ -259,7 +285,7 @@ impl<'s> TreeParser<'s> { } } - fn parse_leaf(&mut self, leaf: Leaf, k: &Kind, span: Span, lines: &mut [Span]) { + fn parse_leaf(&mut self, leaf: Leaf<'s>, k: &Kind, span: Span, lines: &mut [Span]) { if let Kind::Fenced { indent, .. } = k { for line in lines.iter_mut() { let indent_line = line @@ -564,7 +590,7 @@ impl<'s> TreeParser<'s> { } } -impl<'t> tree::Element<'t, Node, Atom> { +impl<'t, 's> tree::Element<'t, Node<'s>, Atom> { fn list_mut(&mut self) -> Option<&mut ListKind> { if let tree::Element::Container(Node::Container(Container::List(l))) = self { Some(l) @@ -936,35 +962,9 @@ impl Kind { } } } - - fn block(&self, top_level: bool) -> Block { - match self { - Self::Atom(a) => Block::Atom(*a), - Self::Paragraph => Block::Leaf(Paragraph), - Self::Heading { level } => Block::Leaf(Heading { - level: (*level).try_into().unwrap(), - has_section: top_level, - }), - Self::Fenced { - kind: FenceKind::CodeBlock(..), - .. - } => Block::Leaf(CodeBlock), - Self::Fenced { - kind: FenceKind::Div, - .. - } => Block::Container(Div), - Self::Definition { - footnote: false, .. - } => Block::Leaf(LinkDefinition), - Self::Definition { footnote: true, .. } => Block::Container(Footnote), - Self::Blockquote => Block::Container(Blockquote), - Self::ListItem { ty, .. } => Block::Container(ListItem(*ty)), - Self::Table { .. } => Block::Container(Table), - } - } } -impl std::fmt::Display for Block { +impl<'s> std::fmt::Display for Block<'s> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Block::Atom(a) => std::fmt::Debug::fmt(a, f), @@ -1501,9 +1501,9 @@ mod test { fn parse_link_definition() { test_parse!( "[tag]: url\n", - (Enter(Leaf(LinkDefinition)), "tag"), + (Enter(Leaf(LinkDefinition { label: "tag" })), "tag"), (Inline, "url"), - (Exit(Leaf(LinkDefinition)), "tag"), + (Exit(Leaf(LinkDefinition { label: "tag" })), "tag"), ); } diff --git a/src/lib.rs b/src/lib.rs index d96db8e..d0e3460 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -555,7 +555,7 @@ pub struct Parser<'s> { src: &'s str, /// Block tree parsed at first. - tree: block::Tree, + tree: block::Tree<'s>, /// Contents obtained by the prepass. pre_pass: PrePass<'s>, @@ -598,7 +598,11 @@ struct PrePass<'s> { impl<'s> PrePass<'s> { #[must_use] - fn new(src: &'s str, mut tree: block::Tree, inline_parser: &mut inline::Parser<'s>) -> Self { + fn new( + src: &'s str, + mut tree: block::Tree<'s>, + inline_parser: &mut inline::Parser<'s>, + ) -> Self { let mut link_definitions = Map::new(); let mut headings: Vec = Vec::new(); let mut used_ids: Set<&str> = Set::new(); @@ -606,10 +610,11 @@ impl<'s> PrePass<'s> { let mut attr_prev: Option = None; while let Some(e) = tree.next() { match e.kind { - tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition)) => { + tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition { + label, + })) => { // All link definition tags have to be obtained initially, as references can // appear before the definition. - let tag = e.span.of(src); let attrs = attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src))); let url = match tree.count_children() { @@ -617,7 +622,7 @@ impl<'s> PrePass<'s> { 1 => tree.take_inlines().next().unwrap().of(src).trim().into(), _ => tree.take_inlines().map(|sp| sp.of(src).trim()).collect(), }; - link_definitions.insert(tag, (url, attrs)); + link_definitions.insert(label, (url, attrs)); } tree::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => { // All headings ids have to be obtained initially, as references can appear @@ -905,8 +910,8 @@ impl<'s> Parser<'s> { head: self.table_head_row, }, block::Leaf::Caption => Container::Caption, - block::Leaf::LinkDefinition => { - Container::LinkDefinition { label: content } + block::Leaf::LinkDefinition { label } => { + Container::LinkDefinition { label } } } } diff --git a/src/tree.rs b/src/tree.rs index 0101370..1e32f9f 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -29,7 +29,7 @@ pub struct Event { } #[derive(Clone)] -pub struct Tree { +pub struct Tree { nodes: std::rc::Rc<[InternalNode]>, branch: Vec, head: Option, @@ -349,9 +349,7 @@ impl Builder { } } -impl std::fmt::Debug - for Builder -{ +impl std::fmt::Debug for Builder { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.clone().finish().fmt(f) } From dbedeeb5eec40ca2f6d45dcce8f7e668499a96fa Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 29 Apr 2023 18:51:47 +0200 Subject: [PATCH 07/31] block: specify lang in code block event instead of using span --- src/block.rs | 30 ++++++++++++++++-------------- src/lib.rs | 6 +++--- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/block.rs b/src/block.rs index 897c28d..7a8b993 100644 --- a/src/block.rs +++ b/src/block.rs @@ -76,7 +76,7 @@ pub enum Leaf<'s> { /// Span is language specifier. /// Each inline is a line. - CodeBlock, + CodeBlock { language: &'s str }, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -256,7 +256,9 @@ impl<'s> TreeParser<'s> { Kind::Fenced { kind: FenceKind::CodeBlock(..), .. - } => Block::Leaf(CodeBlock), + } => Block::Leaf(CodeBlock { + language: span.of(self.src), + }), Kind::Fenced { kind: FenceKind::Div, .. @@ -1433,9 +1435,9 @@ mod test { fn parse_code_block() { test_parse!( concat!("```\n", "l0\n"), - (Enter(Leaf(CodeBlock)), "",), + (Enter(Leaf(CodeBlock { language: "" })), "",), (Inline, "l0\n"), - (Exit(Leaf(CodeBlock)), "",), + (Exit(Leaf(CodeBlock { language: "" })), "",), ); test_parse!( concat!( @@ -1445,9 +1447,9 @@ mod test { "\n", "para\n", // ), - (Enter(Leaf(CodeBlock)), ""), + (Enter(Leaf(CodeBlock { language: "" })), ""), (Inline, "l0\n"), - (Exit(Leaf(CodeBlock)), ""), + (Exit(Leaf(CodeBlock { language: "" })), ""), (Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "para"), @@ -1461,11 +1463,11 @@ mod test { " l1\n", "````", // ), - (Enter(Leaf(CodeBlock)), "lang"), + (Enter(Leaf(CodeBlock { language: "lang" })), "lang"), (Inline, "l0\n"), (Inline, "```\n"), (Inline, " l1\n"), - (Exit(Leaf(CodeBlock)), "lang"), + (Exit(Leaf(CodeBlock { language: "lang" })), "lang"), ); test_parse!( concat!( @@ -1476,12 +1478,12 @@ mod test { "bbb\n", // "```\n", // ), - (Enter(Leaf(CodeBlock)), ""), + (Enter(Leaf(CodeBlock { language: "" })), ""), (Inline, "a\n"), - (Exit(Leaf(CodeBlock)), ""), - (Enter(Leaf(CodeBlock)), ""), + (Exit(Leaf(CodeBlock { language: "" })), ""), + (Enter(Leaf(CodeBlock { language: "" })), ""), (Inline, "bbb\n"), - (Exit(Leaf(CodeBlock)), ""), + (Exit(Leaf(CodeBlock { language: "" })), ""), ); test_parse!( concat!( @@ -1490,10 +1492,10 @@ mod test { " block\n", "~~~\n", // ), - (Enter(Leaf(CodeBlock)), ""), + (Enter(Leaf(CodeBlock { language: "" })), ""), (Inline, "code\n"), (Inline, " block\n"), - (Exit(Leaf(CodeBlock)), ""), + (Exit(Leaf(CodeBlock { language: "" })), ""), ); } diff --git a/src/lib.rs b/src/lib.rs index d0e3460..14a34cc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -897,12 +897,12 @@ impl<'s> Parser<'s> { .into(), }, block::Leaf::DescriptionTerm => Container::DescriptionTerm, - block::Leaf::CodeBlock => { + block::Leaf::CodeBlock { language } => { self.verbatim = enter; - if let Some(format) = content.strip_prefix('=') { + if let Some(format) = language.strip_prefix('=') { Container::RawBlock { format } } else { - Container::CodeBlock { language: content } + Container::CodeBlock { language } } } block::Leaf::TableCell(alignment) => Container::TableCell { From 6200b07287434c2258617139e7bc62fecb84697d Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 29 Apr 2023 19:14:44 +0200 Subject: [PATCH 08/31] block: specify list marker in event instead of using span --- src/block.rs | 437 ++++++++++++++++++++++++++++++++------------------- src/lib.rs | 10 +- 2 files changed, 283 insertions(+), 164 deletions(-) diff --git a/src/block.rs b/src/block.rs index 7a8b993..295a423 100644 --- a/src/block.rs +++ b/src/block.rs @@ -17,7 +17,7 @@ pub type TreeBuilder<'s> = tree::Builder, Atom>; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Node<'s> { - Container(Container), + Container(Container<'s>), Leaf(Leaf<'s>), } @@ -35,7 +35,7 @@ enum Block<'s> { Leaf(Leaf<'s>), /// A container block, containing children blocks. - Container(Container), + Container(Container<'s>), } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -80,7 +80,7 @@ pub enum Leaf<'s> { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Container { +pub enum Container<'s> { /// Span is `>`. Blockquote, @@ -88,7 +88,7 @@ pub enum Container { Div, /// Span is the list marker of the first list item in the list. - List(ListKind), + List { kind: ListKind, marker: &'s str }, /// Span is the list marker. ListItem(ListType), @@ -345,7 +345,7 @@ impl<'s> TreeParser<'s> { fn parse_container( &mut self, - c: Container, + c: Container<'s>, k: &Kind, span: Span, outer: Span, @@ -388,7 +388,10 @@ impl<'s> TreeParser<'s> { if same_depth { let tight = true; let node = self.tree.enter( - Node::Container(Container::List(ListKind { ty, tight })), + Node::Container(Container::List { + kind: ListKind { ty, tight }, + marker: span.of(self.src), + }), span, ); self.open_lists.push(OpenList { @@ -594,8 +597,8 @@ impl<'s> TreeParser<'s> { impl<'t, 's> tree::Element<'t, Node<'s>, Atom> { fn list_mut(&mut self) -> Option<&mut ListKind> { - if let tree::Element::Container(Node::Container(Container::List(l))) = self { - Some(l) + if let tree::Element::Container(Node::Container(Container::List { kind, .. })) = self { + Some(kind) } else { None } @@ -1563,10 +1566,13 @@ mod test { test_parse!( "- abc", ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), "-" ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), @@ -1575,10 +1581,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), "-" ), ); @@ -1592,10 +1601,13 @@ mod test { "- b\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), @@ -1609,10 +1621,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), ); @@ -1628,10 +1643,13 @@ mod test { "- c\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: false, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: false, + }, + marker: "-", + })), "-" ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), @@ -1651,10 +1669,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: false, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: false, + }, + marker: "-", + })), "-" ), ); @@ -1672,10 +1693,13 @@ mod test { " d\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), @@ -1689,10 +1713,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: false, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: false, + }, + marker: "-", + })), "-" ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), @@ -1705,18 +1732,24 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: false, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: false, + }, + marker: "-", + })), "-" ), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), ); @@ -1734,10 +1767,13 @@ mod test { "- b\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), @@ -1746,10 +1782,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true, + }, + marker: "+", + })), "+", ), (Enter(Container(ListItem(Unordered(b'+')))), "+"), @@ -1764,10 +1803,13 @@ mod test { (Atom(Blankline), "\n"), (Exit(Container(ListItem(Unordered(b'+')))), "+"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true, + }, + marker: "+", + })), "+", ), (Exit(Container(ListItem(Unordered(b'-')))), "-"), @@ -1777,10 +1819,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), ); @@ -1793,10 +1838,13 @@ mod test { " c\n", // ), ( - Enter(Container(List(ListKind { - ty: Ordered(Decimal, Period), - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Ordered(Decimal, Period), + tight: true, + }, + marker: "1.", + })), "1.", ), (Enter(Container(ListItem(Ordered(Decimal, Period)))), "1."), @@ -1805,10 +1853,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-", ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), @@ -1818,10 +1869,13 @@ mod test { (Atom(Blankline), "\n"), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-", ), (Enter(Leaf(Paragraph)), ""), @@ -1829,10 +1883,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Ordered(Decimal, Period)))), "1."), ( - Exit(Container(List(ListKind { - ty: Ordered(Decimal, Period), - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Ordered(Decimal, Period), + tight: true, + }, + marker: "1.", + })), "1.", ), ); @@ -1849,10 +1906,13 @@ mod test { " * c\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), @@ -1861,10 +1921,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true, + }, + marker: "+", + })), "+", ), (Enter(Container(ListItem(Unordered(b'+')))), "+"), @@ -1873,10 +1936,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'*'), - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'*'), + tight: true, + }, + marker: "*", + })), "*", ), (Enter(Container(ListItem(Unordered(b'*')))), "*"), @@ -1885,26 +1951,35 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Unordered(b'*')))), "*"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'*'), - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'*'), + tight: true, + }, + marker: "*", + })), "*", ), (Exit(Container(ListItem(Unordered(b'+')))), "+"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true, + }, + marker: "+", + })), "+", ), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), ); @@ -1921,10 +1996,13 @@ mod test { "cd\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), "-" ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), @@ -1933,10 +2011,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'*'), - tight: true - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'*'), + tight: true + }, + marker: "*", + })), "*" ), (Enter(Container(ListItem(Unordered(b'*')))), "*"), @@ -1946,18 +2027,24 @@ mod test { (Atom(Blankline), "\n"), (Exit(Container(ListItem(Unordered(b'*')))), "*"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'*'), - tight: true - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'*'), + tight: true + }, + marker: "*", + })), "*" ), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), "-" ), (Enter(Leaf(Paragraph)), ""), @@ -1975,10 +2062,13 @@ mod test { "+ c\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), "-" ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), @@ -1987,17 +2077,23 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), "-" ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true + }, + marker: "+", + })), "+" ), (Enter(Container(ListItem(Unordered(b'+')))), "+"), @@ -2011,10 +2107,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Unordered(b'+')))), "+"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'+'), - tight: true - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'+'), + tight: true + }, + marker: "+", + })), "+" ), ); @@ -2029,10 +2128,13 @@ mod test { " description\n", // ), ( - Enter(Container(List(ListKind { - ty: Description, - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Description, + tight: true, + }, + marker: ":", + })), ":" ), (Enter(Leaf(DescriptionTerm)), ""), @@ -2045,10 +2147,13 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Description))), ":"), ( - Exit(Container(List(ListKind { - ty: Description, - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Description, + tight: true, + }, + marker: ":", + })), ":" ), ); @@ -2243,18 +2348,24 @@ mod test { " - b\n", // ), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), ( - Enter(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), (Enter(Container(ListItem(Unordered(b'-')))), "-"), @@ -2268,18 +2379,24 @@ mod test { (Exit(Leaf(Paragraph)), ""), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), (Exit(Container(ListItem(Unordered(b'-')))), "-"), ( - Exit(Container(List(ListKind { - ty: Unordered(b'-'), - tight: true, - }))), + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true, + }, + marker: "-", + })), "-" ), ); diff --git a/src/lib.rs b/src/lib.rs index 14a34cc..6b4f002 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -919,7 +919,10 @@ impl<'s> Parser<'s> { block::Container::Blockquote => Container::Blockquote, block::Container::Div => Container::Div { class: content }, block::Container::Footnote => Container::Footnote { label: content }, - block::Container::List(block::ListKind { ty, tight }) => { + block::Container::List { + kind: block::ListKind { ty, tight }, + marker, + } => { if matches!(ty, block::ListType::Description) { Container::DescriptionList } else { @@ -927,9 +930,8 @@ impl<'s> Parser<'s> { block::ListType::Unordered(..) => ListKind::Unordered, block::ListType::Task => ListKind::Task, block::ListType::Ordered(numbering, style) => { - let start = numbering - .parse_number(style.number(content)) - .max(1); + let start = + numbering.parse_number(style.number(marker)).max(1); ListKind::Ordered { numbering, style, From 116245367ada3aa995e31c20c0fa4d24bf6d9947 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 29 Apr 2023 19:27:47 +0200 Subject: [PATCH 09/31] block: specify task check in event instead of using span --- src/block.rs | 134 ++++++++++++++++++++++++++++----------------------- src/lib.rs | 12 ++--- 2 files changed, 80 insertions(+), 66 deletions(-) diff --git a/src/block.rs b/src/block.rs index 295a423..b9d5f18 100644 --- a/src/block.rs +++ b/src/block.rs @@ -91,7 +91,7 @@ pub enum Container<'s> { List { kind: ListKind, marker: &'s str }, /// Span is the list marker. - ListItem(ListType), + ListItem(ListItemKind), /// Span is footnote tag. Footnote, @@ -112,6 +112,13 @@ pub struct ListKind { pub tight: bool, } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum ListItemKind { + Task { checked: bool }, + Description, + List, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ListType { Unordered(u8), @@ -270,7 +277,13 @@ impl<'s> TreeParser<'s> { }), Kind::Definition { footnote: true, .. } => Block::Container(Footnote), Kind::Blockquote => Block::Container(Blockquote), - Kind::ListItem { ty, .. } => Block::Container(ListItem(ty)), + Kind::ListItem { ty, .. } => Block::Container(ListItem(match ty { + ListType::Task => ListItemKind::Task { + checked: span.of(self.src).as_bytes()[3] != b' ', + }, + ListType::Description => ListItemKind::Description, + _ => ListItemKind::List, + })), Kind::Table { .. } => Block::Container(Table), }; @@ -378,7 +391,7 @@ impl<'s> TreeParser<'s> { *sp = sp.skip_chars(skip.min(count), self.src); }); - if let ListItem(ty) = c { + if let Kind::ListItem { ty, .. } = k { let same_depth = self .open_lists .last() @@ -389,20 +402,20 @@ impl<'s> TreeParser<'s> { let tight = true; let node = self.tree.enter( Node::Container(Container::List { - kind: ListKind { ty, tight }, + kind: ListKind { ty: *ty, tight }, marker: span.of(self.src), }), span, ); self.open_lists.push(OpenList { - ty, + ty: *ty, depth: self.tree.depth().try_into().unwrap(), node, }); } } - let dt = if let ListItem(Description) = c { + let dt = if let ListItem(ListItemKind::Description) = c { let dt = self .tree .enter(Node::Leaf(DescriptionTerm), span.empty_after()); @@ -1016,6 +1029,7 @@ mod test { use super::FenceKind; use super::Kind; use super::Leaf::*; + use super::ListItemKind; use super::ListKind; use super::ListType::*; use super::Node::*; @@ -1575,11 +1589,11 @@ mod test { })), "-" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "abc"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { @@ -1610,16 +1624,16 @@ mod test { })), "-" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { @@ -1652,22 +1666,22 @@ mod test { })), "-" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { @@ -1702,12 +1716,12 @@ mod test { })), "-" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), @@ -1722,7 +1736,7 @@ mod test { })), "-" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), @@ -1730,7 +1744,7 @@ mod test { (Enter(Leaf(Paragraph)), ""), (Inline, "d"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { @@ -1741,7 +1755,7 @@ mod test { })), "-" ), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { @@ -1776,7 +1790,7 @@ mod test { })), "-" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), @@ -1791,17 +1805,17 @@ mod test { })), "+", ), - (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "aa"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'+')))), "+"), - (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), "+"), + (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "ab"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(Unordered(b'+')))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), "+"), ( Exit(Container(List { kind: ListKind { @@ -1812,12 +1826,12 @@ mod test { })), "+", ), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { @@ -1847,7 +1861,7 @@ mod test { })), "1.", ), - (Enter(Container(ListItem(Ordered(Decimal, Period)))), "1."), + (Enter(Container(ListItem(ListItemKind::List))), "1."), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), @@ -1862,12 +1876,12 @@ mod test { })), "-", ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { @@ -1881,7 +1895,7 @@ mod test { (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Ordered(Decimal, Period)))), "1."), + (Exit(Container(ListItem(ListItemKind::List))), "1."), ( Exit(Container(List { kind: ListKind { @@ -1915,7 +1929,7 @@ mod test { })), "-" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), @@ -1930,7 +1944,7 @@ mod test { })), "+", ), - (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), @@ -1945,11 +1959,11 @@ mod test { })), "*", ), - (Enter(Container(ListItem(Unordered(b'*')))), "*"), + (Enter(Container(ListItem(ListItemKind::List))), "*"), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'*')))), "*"), + (Exit(Container(ListItem(ListItemKind::List))), "*"), ( Exit(Container(List { kind: ListKind { @@ -1960,7 +1974,7 @@ mod test { })), "*", ), - (Exit(Container(ListItem(Unordered(b'+')))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), "+"), ( Exit(Container(List { kind: ListKind { @@ -1971,7 +1985,7 @@ mod test { })), "+", ), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { @@ -2005,7 +2019,7 @@ mod test { })), "-" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), @@ -2020,12 +2034,12 @@ mod test { })), "*" ), - (Enter(Container(ListItem(Unordered(b'*')))), "*"), + (Enter(Container(ListItem(ListItemKind::List))), "*"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(Unordered(b'*')))), "*"), + (Exit(Container(ListItem(ListItemKind::List))), "*"), ( Exit(Container(List { kind: ListKind { @@ -2036,7 +2050,7 @@ mod test { })), "*" ), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { @@ -2071,11 +2085,11 @@ mod test { })), "-" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { @@ -2096,16 +2110,16 @@ mod test { })), "+" ), - (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'+')))), "+"), - (Enter(Container(ListItem(Unordered(b'+')))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), "+"), + (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'+')))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), "+"), ( Exit(Container(List { kind: ListKind { @@ -2140,12 +2154,12 @@ mod test { (Enter(Leaf(DescriptionTerm)), ""), (Inline, "term"), (Exit(Leaf(DescriptionTerm)), ""), - (Enter(Container(ListItem(Description))), ":"), + (Enter(Container(ListItem(ListItemKind::Description))), ":"), (Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "description"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Description))), ":"), + (Exit(Container(ListItem(ListItemKind::Description))), ":"), ( Exit(Container(List { kind: ListKind { @@ -2357,7 +2371,7 @@ mod test { })), "-" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), ( Enter(Container(List { kind: ListKind { @@ -2368,16 +2382,16 @@ mod test { })), "-" ), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), - (Enter(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { @@ -2388,7 +2402,7 @@ mod test { })), "-" ), - (Exit(Container(ListItem(Unordered(b'-')))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), ( Exit(Container(List { kind: ListKind { diff --git a/src/lib.rs b/src/lib.rs index 6b4f002..36b3998 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -943,12 +943,12 @@ impl<'s> Parser<'s> { Container::List { kind, tight } } } - block::Container::ListItem(ty) => match ty { - block::ListType::Task => Container::TaskListItem { - checked: content.as_bytes()[3] != b' ', - }, - block::ListType::Description => Container::DescriptionDetails, - _ => Container::ListItem, + block::Container::ListItem(kind) => match kind { + block::ListItemKind::Task { checked } => { + Container::TaskListItem { checked } + } + block::ListItemKind::Description => Container::DescriptionDetails, + block::ListItemKind::List => Container::ListItem, }, block::Container::Table => Container::Table, block::Container::TableRow { head } => { From 6cebdfcc0c3b5d9d8b3d92fb64c68290614abe47 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 29 Apr 2023 19:48:45 +0200 Subject: [PATCH 10/31] block: specify footnote label in event instead of using span --- src/block.rs | 14 ++++++++------ src/lib.rs | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/block.rs b/src/block.rs index b9d5f18..0884b0a 100644 --- a/src/block.rs +++ b/src/block.rs @@ -94,7 +94,7 @@ pub enum Container<'s> { ListItem(ListItemKind), /// Span is footnote tag. - Footnote, + Footnote { label: &'s str }, /// Span is empty, before first '|' character. Table, @@ -275,7 +275,9 @@ impl<'s> TreeParser<'s> { } => Block::Leaf(LinkDefinition { label: span.of(self.src), }), - Kind::Definition { footnote: true, .. } => Block::Container(Footnote), + Kind::Definition { footnote: true, .. } => Block::Container(Footnote { + label: span.of(self.src), + }), Kind::Blockquote => Block::Container(Blockquote), Kind::ListItem { ty, .. } => Block::Container(ListItem(match ty { ListType::Task => ListItemKind::Task { @@ -1530,11 +1532,11 @@ mod test { fn parse_footnote() { test_parse!( "[^tag]: description\n", - (Enter(Container(Footnote)), "tag"), + (Enter(Container(Footnote { label: "tag" })), "tag"), (Enter(Leaf(Paragraph)), ""), (Inline, "description"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Footnote)), "tag"), + (Exit(Container(Footnote { label: "tag" })), "tag"), ); } @@ -1552,12 +1554,12 @@ mod test { (Inline, "[^a]"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Enter(Container(Footnote)), "a"), + (Enter(Container(Footnote { label: "a" })), "a"), (Enter(Leaf(Paragraph)), ""), (Inline, "note"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(Footnote)), "a"), + (Exit(Container(Footnote { label: "a" })), "a"), (Enter(Leaf(Paragraph)), ""), (Inline, "para"), (Exit(Leaf(Paragraph)), ""), diff --git a/src/lib.rs b/src/lib.rs index 36b3998..65724c5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -918,7 +918,7 @@ impl<'s> Parser<'s> { block::Node::Container(c) => match c { block::Container::Blockquote => Container::Blockquote, block::Container::Div => Container::Div { class: content }, - block::Container::Footnote => Container::Footnote { label: content }, + block::Container::Footnote { label } => Container::Footnote { label }, block::Container::List { kind: block::ListKind { ty, tight }, marker, From 898ed90a2406894f95c89844eae276545fd64661 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 29 Apr 2023 19:53:04 +0200 Subject: [PATCH 11/31] block: specify div class in event instead of using span --- src/block.rs | 14 ++++++++------ src/lib.rs | 2 +- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/block.rs b/src/block.rs index 0884b0a..af1ca7f 100644 --- a/src/block.rs +++ b/src/block.rs @@ -85,7 +85,7 @@ pub enum Container<'s> { Blockquote, /// Span is class specifier, possibly empty. - Div, + Div { class: &'s str }, /// Span is the list marker of the first list item in the list. List { kind: ListKind, marker: &'s str }, @@ -269,7 +269,9 @@ impl<'s> TreeParser<'s> { Kind::Fenced { kind: FenceKind::Div, .. - } => Block::Container(Div), + } => Block::Container(Div { + class: span.of(self.src), + }), Kind::Definition { footnote: false, .. } => Block::Leaf(LinkDefinition { @@ -2336,11 +2338,11 @@ mod test { fn parse_div() { test_parse!( concat!("::: cls\n", "abc\n", ":::\n",), - (Enter(Container(Div)), "cls"), + (Enter(Container(Div { class: "cls" })), "cls"), (Enter(Leaf(Paragraph)), ""), (Inline, "abc"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Div)), "cls"), + (Exit(Container(Div { class: "cls" })), "cls"), ); } @@ -2348,11 +2350,11 @@ mod test { fn parse_div_no_class() { test_parse!( concat!(":::\n", "abc\n", ":::\n",), - (Enter(Container(Div)), ""), + (Enter(Container(Div { class: "" })), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "abc"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Div)), ""), + (Exit(Container(Div { class: "" })), ""), ); } diff --git a/src/lib.rs b/src/lib.rs index 65724c5..516ccf0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -917,7 +917,7 @@ impl<'s> Parser<'s> { } block::Node::Container(c) => match c { block::Container::Blockquote => Container::Blockquote, - block::Container::Div => Container::Div { class: content }, + block::Container::Div { class } => Container::Div { class }, block::Container::Footnote { label } => Container::Footnote { label }, block::Container::List { kind: block::ListKind { ty, tight }, From ee9ea2e023e209f2033deb1bbe40786f4727536c Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 29 Apr 2023 20:02:18 +0200 Subject: [PATCH 12/31] block: specify heading pos in event instead of using span --- src/block.rs | 136 ++++++++++++++++++++++++++++++++------------------- src/lib.rs | 18 ++++--- 2 files changed, 97 insertions(+), 57 deletions(-) diff --git a/src/block.rs b/src/block.rs index af1ca7f..d5b35f3 100644 --- a/src/block.rs +++ b/src/block.rs @@ -58,7 +58,11 @@ pub enum Leaf<'s> { /// Span is `#` characters. /// Each inline is a line. - Heading { level: u16, has_section: bool }, + Heading { + level: u16, + has_section: bool, + pos: u32, + }, /// Span is empty. DescriptionTerm, @@ -103,7 +107,7 @@ pub enum Container<'s> { TableRow { head: bool }, /// Span is '#' characters of heading. - Section, + Section { pos: u32 }, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -259,6 +263,7 @@ impl<'s> TreeParser<'s> { Kind::Heading { level } => Block::Leaf(Heading { level: level.try_into().unwrap(), has_section: top_level, + pos: span.start() as u32, }), Kind::Fenced { kind: FenceKind::CodeBlock(..), @@ -343,7 +348,12 @@ impl<'s> TreeParser<'s> { self.tree.exit(); // section }); self.open_sections.push(*level); - self.tree.enter(Node::Container(Section), span); + self.tree.enter( + Node::Container(Section { + pos: span.start() as u32, + }), + span, + ); } // trim '#' characters @@ -1110,11 +1120,12 @@ mod test { "# a\n", "## b\n", // ), - (Enter(Container(Section)), "#"), + (Enter(Container(Section { pos: 0 })), "#"), ( Enter(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 0 })), "#" ), @@ -1122,15 +1133,17 @@ mod test { ( Exit(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 0 })), "#" ), - (Enter(Container(Section)), "##"), + (Enter(Container(Section { pos: 4 })), "##"), ( Enter(Leaf(Heading { level: 2, - has_section: true + has_section: true, + pos: 4 })), "##" ), @@ -1138,12 +1151,13 @@ mod test { ( Exit(Leaf(Heading { level: 2, - has_section: true + has_section: true, + pos: 4 })), "##" ), - (Exit(Container(Section)), "##"), - (Exit(Container(Section)), "#"), + (Exit(Container(Section { pos: 4 })), "##"), + (Exit(Container(Section { pos: 0 })), "#"), ); } @@ -1154,11 +1168,12 @@ mod test { "#\n", "heading\n", // ), - (Enter(Container(Section)), "#"), + (Enter(Container(Section { pos: 0 })), "#"), ( Enter(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 0 })), "#" ), @@ -1166,11 +1181,12 @@ mod test { ( Exit(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 0 })), "#" ), - (Exit(Container(Section)), "#"), + (Exit(Container(Section { pos: 0 })), "#"), ); } @@ -1184,11 +1200,12 @@ mod test { " 12\n", "15\n", // ), - (Enter(Container(Section)), "#"), + (Enter(Container(Section { pos: 0 })), "#"), ( Enter(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 0, })), "#" ), @@ -1196,17 +1213,19 @@ mod test { ( Exit(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 0, })), "#" ), (Atom(Blankline), "\n"), - (Exit(Container(Section)), "#"), - (Enter(Container(Section)), "#"), + (Exit(Container(Section { pos: 0 })), "#"), + (Enter(Container(Section { pos: 6 })), "#"), ( Enter(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 6, })), "#" ), @@ -1216,11 +1235,12 @@ mod test { ( Exit(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 6, })), "#" ), - (Exit(Container(Section)), "#"), + (Exit(Container(Section { pos: 6 })), "#"), ); } @@ -1232,11 +1252,12 @@ mod test { "# b\n", "c\n", // ), - (Enter(Container(Section)), "#"), + (Enter(Container(Section { pos: 0 })), "#"), ( Enter(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 0 })), "#" ), @@ -1246,11 +1267,12 @@ mod test { ( Exit(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 0 })), "#" ), - (Exit(Container(Section)), "#"), + (Exit(Container(Section { pos: 0 })), "#"), ); } @@ -1270,11 +1292,12 @@ mod test { "\n", "# b\n", ), - (Enter(Container(Section)), "#"), + (Enter(Container(Section { pos: 0 })), "#"), ( Enter(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 0, })), "#" ), @@ -1282,16 +1305,18 @@ mod test { ( Exit(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 0, })), "#" ), (Atom(Blankline), "\n"), - (Enter(Container(Section)), "##"), + (Enter(Container(Section { pos: 5 })), "##"), ( Enter(Leaf(Heading { level: 2, - has_section: true + has_section: true, + pos: 5, })), "##" ), @@ -1299,16 +1324,18 @@ mod test { ( Exit(Leaf(Heading { level: 2, - has_section: true + has_section: true, + pos: 5, })), "##" ), (Atom(Blankline), "\n"), - (Enter(Container(Section)), "####"), + (Enter(Container(Section { pos: 12 })), "####"), ( Enter(Leaf(Heading { level: 4, - has_section: true + has_section: true, + pos: 12, })), "####" ), @@ -1316,18 +1343,20 @@ mod test { ( Exit(Leaf(Heading { level: 4, - has_section: true + has_section: true, + pos: 12, })), "####" ), (Atom(Blankline), "\n"), - (Exit(Container(Section)), "####"), - (Exit(Container(Section)), "##"), - (Enter(Container(Section)), "##"), + (Exit(Container(Section { pos: 12 })), "####"), + (Exit(Container(Section { pos: 5 })), "##"), + (Enter(Container(Section { pos: 23 })), "##"), ( Enter(Leaf(Heading { level: 2, - has_section: true + has_section: true, + pos: 23, })), "##" ), @@ -1335,16 +1364,18 @@ mod test { ( Exit(Leaf(Heading { level: 2, - has_section: true + has_section: true, + pos: 23, })), "##" ), (Atom(Blankline), "\n"), - (Enter(Container(Section)), "###"), + (Enter(Container(Section { pos: 30 })), "###"), ( Enter(Leaf(Heading { level: 3, - has_section: true + has_section: true, + pos: 30, })), "###" ), @@ -1352,19 +1383,21 @@ mod test { ( Exit(Leaf(Heading { level: 3, - has_section: true + has_section: true, + pos: 30, })), "###" ), (Atom(Blankline), "\n"), - (Exit(Container(Section)), "###"), - (Exit(Container(Section)), "##"), - (Exit(Container(Section)), "#"), - (Enter(Container(Section)), "#"), + (Exit(Container(Section { pos: 30 })), "###"), + (Exit(Container(Section { pos: 23 })), "##"), + (Exit(Container(Section { pos: 0 })), "#"), + (Enter(Container(Section { pos: 39 })), "#"), ( Enter(Leaf(Heading { level: 1, - has_section: true + has_section: true, + pos: 39, })), "#" ), @@ -1373,10 +1406,11 @@ mod test { Exit(Leaf(Heading { level: 1, has_section: true, + pos: 39, })), "#" ), - (Exit(Container(Section)), "#"), + (Exit(Container(Section { pos: 39 })), "#"), ); } @@ -1417,6 +1451,7 @@ mod test { Enter(Leaf(Heading { level: 2, has_section: false, + pos: 8, })), "##" ), @@ -1425,6 +1460,7 @@ mod test { Exit(Leaf(Heading { level: 2, has_section: false, + pos: 8, })), "##" ), diff --git a/src/lib.rs b/src/lib.rs index 516ccf0..0051486 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -576,7 +576,7 @@ pub struct Parser<'s> { #[derive(Clone)] struct Heading { /// Location of heading in src. - location: usize, + location: u32, /// Automatically generated id from heading text. id_auto: String, /// Text of heading, formatting stripped. @@ -694,7 +694,7 @@ impl<'s> PrePass<'s> { std::mem::transmute::<&str, &'static str>(id_auto.as_ref()) }); headings.push(Heading { - location: e.span.start(), + location: e.span.start() as u32, id_auto, text, id_override, @@ -728,7 +728,7 @@ impl<'s> PrePass<'s> { h.id_override.as_ref().unwrap_or(&h.id_auto) } - fn heading_id_by_location(&self, location: usize) -> Option<&str> { + fn heading_id_by_location(&self, location: u32) -> Option<&str> { self.headings .binary_search_by_key(&location, |h| h.location) .ok() @@ -886,12 +886,16 @@ impl<'s> Parser<'s> { self.inline_parser.reset(); match l { block::Leaf::Paragraph => Container::Paragraph, - block::Leaf::Heading { level, has_section } => Container::Heading { + block::Leaf::Heading { + level, + has_section, + pos, + } => Container::Heading { level, has_section, id: self .pre_pass - .heading_id_by_location(ev.span.start()) + .heading_id_by_location(pos) .unwrap_or_default() .to_string() .into(), @@ -957,10 +961,10 @@ impl<'s> Parser<'s> { } Container::TableRow { head } } - block::Container::Section => Container::Section { + block::Container::Section { pos } => Container::Section { id: self .pre_pass - .heading_id_by_location(ev.span.start()) + .heading_id_by_location(pos) .unwrap_or_default() .to_string() .into(), From d9804d963cdeea28292e561878326ef265858835 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Mon, 1 May 2023 20:34:27 +0200 Subject: [PATCH 13/31] block: add parse_table_empty --- src/block.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/block.rs b/src/block.rs index d5b35f3..cc4f6af 100644 --- a/src/block.rs +++ b/src/block.rs @@ -2248,6 +2248,20 @@ mod test { ); } + #[test] + fn parse_table_empty() { + test_parse!( + "||", + (Enter(Container(Table)), ""), + (Enter(Container(TableRow { head: false })), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Inline, ""), + (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(Table)), ""), + ); + } + #[test] fn parse_table_escaped() { test_parse!( From 4f863f91d50c12c376f31e540a83ca9bd47d49f3 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Tue, 2 May 2023 23:11:31 +0200 Subject: [PATCH 14/31] block: add test parse_description_list_empty --- src/block.rs | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/block.rs b/src/block.rs index cc4f6af..c4b921c 100644 --- a/src/block.rs +++ b/src/block.rs @@ -2213,6 +2213,38 @@ mod test { ); } + #[test] + fn parse_description_list_empty() { + test_parse!( + ":\n", + ( + Enter(Container(List { + kind: ListKind { + ty: Description, + tight: true, + }, + marker: ":", + })), + ":" + ), + (Enter(Leaf(DescriptionTerm)), ""), + (Exit(Leaf(DescriptionTerm)), ""), + (Enter(Container(ListItem(ListItemKind::Description))), ":"), + (Atom(Blankline), "\n"), + (Exit(Container(ListItem(ListItemKind::Description))), ":"), + ( + Exit(Container(List { + kind: ListKind { + ty: Description, + tight: true, + }, + marker: ":", + })), + ":" + ), + ); + } + #[test] fn parse_table() { test_parse!( From c21138d5b95e48944137a2618dadb4784926e7df Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Wed, 3 May 2023 23:44:48 +0200 Subject: [PATCH 15/31] block: extend parse_table_align --- src/block.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/block.rs b/src/block.rs index c4b921c..edca5f6 100644 --- a/src/block.rs +++ b/src/block.rs @@ -2342,6 +2342,19 @@ mod test { (Exit(Container(TableRow { head: false })), "|"), (Exit(Container(Table)), "") ); + test_parse!( + concat!( + "||\n", // + "|-:|\n", // + ), + (Enter(Container(Table)), ""), + (Enter(Container(TableRow { head: true })), "|"), + (Enter(Leaf(TableCell(Alignment::Right))), "|"), + (Inline, ""), + (Exit(Leaf(TableCell(Alignment::Right))), "|"), + (Exit(Container(TableRow { head: true })), "|"), + (Exit(Container(Table)), ""), + ); } #[test] From 8d1381144e8fe5dee9211c77e41937f13a8d1d68 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Thu, 4 May 2023 18:03:11 +0200 Subject: [PATCH 16/31] block: extend parse_code_block --- src/block.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/block.rs b/src/block.rs index edca5f6..4e29b94 100644 --- a/src/block.rs +++ b/src/block.rs @@ -1554,6 +1554,11 @@ mod test { (Inline, " block\n"), (Exit(Leaf(CodeBlock { language: "" })), ""), ); + test_parse!( + " ```abc\n", + (Enter(Leaf(CodeBlock { language: "abc" })), "abc"), + (Exit(Leaf(CodeBlock { language: "abc" })), "abc"), + ); } #[test] From 631c9eff42497a11e28d31d1cfd9e23497d44608 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Wed, 3 May 2023 22:59:32 +0200 Subject: [PATCH 17/31] block: extend parse_description_list --- src/block.rs | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/src/block.rs b/src/block.rs index 4e29b94..bc23437 100644 --- a/src/block.rs +++ b/src/block.rs @@ -2216,6 +2216,92 @@ mod test { ":" ), ); + test_parse!( + concat!( + ": apple\n", + " fruit\n", + "\n", + " Paragraph one\n", + "\n", + " Paragraph two\n", + "\n", + " - sub\n", + " - list\n", + "\n", + ": orange\n", + ), + ( + Enter(Container(List { + kind: ListKind { + ty: Description, + tight: false + }, + marker: ":", + })), + ":", + ), + (Enter(Leaf(DescriptionTerm)), ""), + (Inline, "apple\n"), + (Inline, "fruit"), + (Exit(Leaf(DescriptionTerm)), ""), + (Enter(Container(ListItem(ListItemKind::Description))), ":"), + (Atom(Blankline), "\n"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "Paragraph one"), + (Exit(Leaf(Paragraph)), ""), + (Atom(Blankline), "\n"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "Paragraph two"), + (Exit(Leaf(Paragraph)), ""), + (Atom(Blankline), "\n"), + ( + Enter(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), + "-", + ), + (Enter(Container(ListItem(ListItemKind::List))), "-"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "sub"), + (Exit(Leaf(Paragraph)), ""), + (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Enter(Container(ListItem(ListItemKind::List))), "-"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "list"), + (Exit(Leaf(Paragraph)), ""), + (Atom(Blankline), "\n"), + (Exit(Container(ListItem(ListItemKind::List))), "-"), + ( + Exit(Container(List { + kind: ListKind { + ty: Unordered(b'-'), + tight: true + }, + marker: "-", + })), + "-", + ), + (Exit(Container(ListItem(ListItemKind::Description))), ":"), + (Enter(Leaf(DescriptionTerm)), ""), + (Inline, "orange"), + (Exit(Leaf(DescriptionTerm)), ""), + (Enter(Container(ListItem(ListItemKind::Description))), ":"), + (Exit(Container(ListItem(ListItemKind::Description))), ":"), + ( + Exit(Container(List { + kind: ListKind { + ty: Description, + tight: false + }, + marker: ":", + })), + ":", + ), + ); } #[test] From 5e99d98f4fad2155332c660478ae7413d1ffca5c Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 29 Apr 2023 19:37:16 +0200 Subject: [PATCH 18/31] block: use separate spans for start/end events --- src/block.rs | 638 ++++++++++++++++++++++++++------------------------- src/tree.rs | 81 ++++--- 2 files changed, 373 insertions(+), 346 deletions(-) diff --git a/src/block.rs b/src/block.rs index bc23437..1693849 100644 --- a/src/block.rs +++ b/src/block.rs @@ -30,10 +30,8 @@ pub fn parse(src: &str) -> Tree { enum Block<'s> { /// An atomic block, containing no children elements. Atom(Atom), - /// A leaf block, containing only inline elements. Leaf(Leaf<'s>), - /// A container block, containing children blocks. Container(Container<'s>), } @@ -42,71 +40,40 @@ enum Block<'s> { pub enum Atom { /// A line with no non-whitespace characters. Blankline, - /// A list of attributes. Attributes, - /// A thematic break. ThematicBreak, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Leaf<'s> { - /// Span is empty, before first character of paragraph. - /// Each inline is a line. Paragraph, - - /// Span is `#` characters. - /// Each inline is a line. Heading { level: u16, has_section: bool, pos: u32, }, - - /// Span is empty. DescriptionTerm, - - /// Span is '|'. - /// Has zero or one inline for the cell contents. TableCell(Alignment), - - /// Span is '^' character. Caption, - - /// Span is the link tag. - /// Inlines are lines of the URL. - LinkDefinition { label: &'s str }, - - /// Span is language specifier. - /// Each inline is a line. - CodeBlock { language: &'s str }, + LinkDefinition { + label: &'s str, + }, + CodeBlock { + language: &'s str, + }, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Container<'s> { - /// Span is `>`. Blockquote, - - /// Span is class specifier, possibly empty. Div { class: &'s str }, - - /// Span is the list marker of the first list item in the list. List { kind: ListKind, marker: &'s str }, - - /// Span is the list marker. ListItem(ListItemKind), - - /// Span is footnote tag. Footnote { label: &'s str }, - - /// Span is empty, before first '|' character. Table, - - /// Span is first '|' character. TableRow { head: bool }, - - /// Span is '#' characters of heading. Section { pos: u32 }, } @@ -185,10 +152,10 @@ impl<'s> TreeParser<'s> { line_pos += line_count; } while let Some(l) = self.open_lists.pop() { - self.close_list(l); + self.close_list(l, self.src.len()); } for _ in self.open_sections.drain(..) { - self.tree.exit(); // section + self.tree.exit(Span::empty_at(self.src.len())); // section } self.tree.finish() } @@ -197,18 +164,23 @@ impl<'s> TreeParser<'s> { fn parse_block(&mut self, lines: &mut [Span], top_level: bool) -> usize { if let Some(MeteredBlock { kind, - span, + span: span_start, line_count, }) = MeteredBlock::new(lines.iter().map(|sp| sp.of(self.src))) { let lines = &mut lines[..line_count]; - let span = span.translate(lines[0].start()); + let span_start = span_start.translate(lines[0].start()); + let end_line = lines[lines.len() - 1]; + let span_end = match kind { + Kind::Fenced { + has_closing_fence: true, + .. + } => end_line, + _ => end_line.empty_after(), + }; // part of first inline that is from the outer block - let outer = Span::new( - lines[0].start(), - span.end() + "]:".len() * usize::from(matches!(kind, Kind::Definition { .. })), - ); + let outer = Span::new(lines[0].start(), span_start.end()); // skip outer block part for inner content lines[0] = lines[0].skip(outer.len()); @@ -231,7 +203,7 @@ impl<'s> TreeParser<'s> { && !matches!(kind, Kind::ListItem { ty: ty_new, .. } if *ty == ty_new) { let l = self.open_lists.pop().unwrap(); - self.close_list(l); + self.close_list(l, span_start.start()); } } @@ -263,32 +235,32 @@ impl<'s> TreeParser<'s> { Kind::Heading { level } => Block::Leaf(Heading { level: level.try_into().unwrap(), has_section: top_level, - pos: span.start() as u32, + pos: span_start.start() as u32, }), Kind::Fenced { kind: FenceKind::CodeBlock(..), + spec, .. - } => Block::Leaf(CodeBlock { - language: span.of(self.src), - }), + } => Block::Leaf(CodeBlock { language: spec }), Kind::Fenced { kind: FenceKind::Div, + spec, .. - } => Block::Container(Div { - class: span.of(self.src), - }), + } => Block::Container(Div { class: spec }), Kind::Definition { - footnote: false, .. - } => Block::Leaf(LinkDefinition { - label: span.of(self.src), - }), - Kind::Definition { footnote: true, .. } => Block::Container(Footnote { - label: span.of(self.src), - }), + footnote: false, + label, + .. + } => Block::Leaf(LinkDefinition { label }), + Kind::Definition { + footnote: true, + label, + .. + } => Block::Container(Footnote { label }), Kind::Blockquote => Block::Container(Blockquote), Kind::ListItem { ty, .. } => Block::Container(ListItem(match ty { ListType::Task => ListItemKind::Task { - checked: span.of(self.src).as_bytes()[3] != b' ', + checked: span_start.of(self.src).as_bytes()[3] != b' ', }, ListType::Description => ListItemKind::Description, _ => ListItemKind::List, @@ -297,10 +269,12 @@ impl<'s> TreeParser<'s> { }; match block { - Block::Atom(a) => self.tree.atom(a, span), - Block::Leaf(l) => self.parse_leaf(l, &kind, span, lines), - Block::Container(Table) => self.parse_table(lines, span), - Block::Container(c) => self.parse_container(c, &kind, span, outer, lines), + Block::Atom(a) => self.tree.atom(a, span_start), + Block::Leaf(l) => self.parse_leaf(l, &kind, span_start, span_end, lines), + Block::Container(Table) => self.parse_table(lines, span_start, span_end), + Block::Container(c) => { + self.parse_container(c, &kind, span_start, span_end, outer, lines); + } } line_count @@ -309,7 +283,14 @@ impl<'s> TreeParser<'s> { } } - fn parse_leaf(&mut self, leaf: Leaf<'s>, k: &Kind, span: Span, lines: &mut [Span]) { + fn parse_leaf( + &mut self, + leaf: Leaf<'s>, + k: &Kind, + span_start: Span, + span_end: Span, + lines: &mut [Span], + ) { if let Kind::Fenced { indent, .. } = k { for line in lines.iter_mut() { let indent_line = line @@ -345,14 +326,14 @@ impl<'s> TreeParser<'s> { .rposition(|l| l < level) .map_or(0, |i| i + 1); self.open_sections.drain(first_close..).for_each(|_| { - self.tree.exit(); // section + self.tree.exit(Span::empty_at(span_start.start())); // section }); self.open_sections.push(*level); self.tree.enter( Node::Container(Section { - pos: span.start() as u32, + pos: span_start.start() as u32, }), - span, + span_start.empty_before(), ); } @@ -362,19 +343,20 @@ impl<'s> TreeParser<'s> { } } - self.tree.enter(Node::Leaf(leaf), span); + self.tree.enter(Node::Leaf(leaf), span_start); lines .iter() .filter(|l| !matches!(k, Kind::Heading { .. }) || !l.is_empty()) .for_each(|line| self.tree.inline(*line)); - self.tree.exit(); + self.tree.exit(span_end); } fn parse_container( &mut self, c: Container<'s>, k: &Kind, - span: Span, + mut span_start: Span, + span_end: Span, outer: Span, lines: &mut [Span], ) { @@ -417,9 +399,9 @@ impl<'s> TreeParser<'s> { let node = self.tree.enter( Node::Container(Container::List { kind: ListKind { ty: *ty, tight }, - marker: span.of(self.src), + marker: span_start.of(self.src), }), - span, + span_start.empty_before(), ); self.open_lists.push(OpenList { ty: *ty, @@ -430,22 +412,22 @@ impl<'s> TreeParser<'s> { } let dt = if let ListItem(ListItemKind::Description) = c { - let dt = self - .tree - .enter(Node::Leaf(DescriptionTerm), span.empty_after()); - self.tree.exit(); - Some(dt) + let dt = self.tree.enter(Node::Leaf(DescriptionTerm), span_start); + self.tree.exit(span_start.trim_end(self.src).empty_after()); + let span_open = span_start; + span_start = lines[0].empty_before(); + Some((dt, span_open)) } else { None }; - let node = self.tree.enter(Node::Container(c), span); + let node = self.tree.enter(Node::Container(c), span_start); let mut l = 0; while l < lines.len() { l += self.parse_block(&mut lines[l..], false); } - if let Some(node_dt) = dt { + if let Some((node_dt, span_open)) = dt { let node_child = if let Some(node_child) = self.tree.children(node).next() { if let tree::Element::Container(Node::Leaf(l @ Paragraph)) = node_child.elem { *l = DescriptionTerm; @@ -457,7 +439,7 @@ impl<'s> TreeParser<'s> { None }; if let Some(node_child) = node_child { - self.tree.swap_prev(node_child); + self.tree.swap_prev(node_child, span_open); self.tree.remove(node_dt); } } @@ -468,22 +450,22 @@ impl<'s> TreeParser<'s> { self.prev_blankline = false; self.prev_loose = false; let l = self.open_lists.pop().unwrap(); - self.close_list(l); + self.close_list(l, span_end.start()); } } - self.tree.exit(); + self.tree.exit(span_end); } - fn parse_table(&mut self, lines: &mut [Span], span: Span) { + fn parse_table(&mut self, lines: &mut [Span], span_start: Span, span_end: Span) { self.alignments.clear(); - self.tree.enter(Node::Container(Table), span); + self.tree.enter(Node::Container(Table), span_start); let caption_line = lines .iter() .position(|sp| sp.of(self.src).trim_start().starts_with('^')) .map_or(lines.len(), |caption_line| { - self.tree.enter(Node::Leaf(Caption), span); + self.tree.enter(Node::Leaf(Caption), span_start); lines[caption_line] = lines[caption_line] .trim_start(self.src) .skip_chars(2, self.src); @@ -491,7 +473,7 @@ impl<'s> TreeParser<'s> { for line in &lines[caption_line..] { self.tree.inline(*line); } - self.tree.exit(); + self.tree.exit(span_end); caption_line }); @@ -539,10 +521,10 @@ impl<'s> TreeParser<'s> { .copied() .unwrap_or(Alignment::Unspecified), )), - Span::by_len(cell_start - 1, 1), + Span::empty_at(cell_start), ); self.tree.inline(span); - self.tree.exit(); // cell + self.tree.exit(Span::new(pos, pos + 1)); // cell cell_start = pos + len; column_index += 1; } @@ -602,15 +584,15 @@ impl<'s> TreeParser<'s> { } } } else { - self.tree.exit(); // table row + self.tree.exit(Span::empty_at(pos)); // table row last_row_node = Some(row_node); } } - self.tree.exit(); // table + self.tree.exit(span_end); // table } - fn close_list(&mut self, list: OpenList) { + fn close_list(&mut self, list: OpenList, pos: usize) { if self.prev_loose { let mut elem = self.tree.elem(list.node); let ListKind { tight, .. } = elem.list_mut().unwrap(); @@ -618,7 +600,7 @@ impl<'s> TreeParser<'s> { *tight = true; } - self.tree.exit(); // list + self.tree.exit(Span::empty_at(pos)); // list } } @@ -633,15 +615,15 @@ impl<'t, 's> tree::Element<'t, Node<'s>, Atom> { } /// Parser for a single block. -struct MeteredBlock { - kind: Kind, +struct MeteredBlock<'s> { + kind: Kind<'s>, span: Span, line_count: usize, } -impl MeteredBlock { +impl<'s> MeteredBlock<'s> { /// Identify and measure the line length of a single block. - fn new<'s, I: Iterator>(mut lines: I) -> Option { + fn new>(mut lines: I) -> Option { lines.next().map(|l| { let IdentifiedBlock { mut kind, span } = IdentifiedBlock::new(l); let line_count = 1 + lines.take_while(|l| kind.continues(l)).count(); @@ -662,7 +644,7 @@ enum FenceKind { #[cfg_attr(test, derive(PartialEq, Eq))] #[derive(Debug)] -enum Kind { +enum Kind<'s> { Atom(Atom), Paragraph, Heading { @@ -672,12 +654,13 @@ enum Kind { indent: usize, fence_length: usize, kind: FenceKind, - has_spec: bool, + spec: &'s str, has_closing_fence: bool, }, Definition { indent: usize, footnote: bool, + label: &'s str, }, Blockquote, ListItem { @@ -690,13 +673,13 @@ enum Kind { }, } -struct IdentifiedBlock { - kind: Kind, +struct IdentifiedBlock<'s> { + kind: Kind<'s>, span: Span, } -impl IdentifiedBlock { - fn new(line: &str) -> Self { +impl<'s> IdentifiedBlock<'s> { + fn new(line: &'s str) -> Self { let mut chars = line.chars(); let indent = chars .clone() @@ -744,11 +727,15 @@ impl IdentifiedBlock { } } '[' => chars.as_str().find("]:").map(|l| { - let tag = &chars.as_str()[0..l]; - let footnote = tag.starts_with('^'); + let label = &chars.as_str()[0..l]; + let footnote = label.starts_with('^'); ( - Kind::Definition { indent, footnote }, - Span::by_len(indent_bytes + 1, l).skip(usize::from(footnote)), + Kind::Definition { + indent, + footnote, + label: &label[usize::from(footnote)..], + }, + Span::by_len(0, indent_bytes + 3 + l), ) }), '-' | '*' if Self::is_thematic_break(chars.clone()) => { @@ -795,7 +782,6 @@ impl IdentifiedBlock { } else { !spec.chars().any(char::is_whitespace) && !spec.chars().any(|c| c == '`') }; - let skip = line_t.len() - spec.len(); (valid_spec && fence_length >= 3).then(|| { ( Kind::Fenced { @@ -805,10 +791,10 @@ impl IdentifiedBlock { ':' => FenceKind::Div, _ => FenceKind::CodeBlock(f as u8), }, - has_spec: !spec.is_empty(), + spec, has_closing_fence: false, }, - Span::by_len(indent_bytes + skip, spec.len()), + Span::by_len(indent_bytes, line.len()), ) }) } @@ -926,9 +912,9 @@ impl IdentifiedBlock { } } -impl Kind { +impl<'s> Kind<'s> { /// Determine if a line continues the block. - fn continues(&mut self, line: &str) -> bool { + fn continues(&mut self, line: &'s str) -> bool { let IdentifiedBlock { kind: next, .. } = IdentifiedBlock::new(line); match self { Self::Atom(..) @@ -955,7 +941,9 @@ impl Kind { *last_blankline = blankline; blankline || spaces > *indent || para } - Self::Definition { indent, footnote } => { + Self::Definition { + indent, footnote, .. + } => { if *footnote { let spaces = line.chars().take_while(|c| c.is_whitespace()).count(); matches!(next, Self::Atom(Blankline)) || spaces > *indent @@ -972,13 +960,15 @@ impl Kind { if let Kind::Fenced { kind: k, fence_length: l, - has_spec: false, + spec, .. } = next { - *has_closing_fence = k == *kind - && (l == *fence_length - || (matches!(k, FenceKind::Div) && l > *fence_length)); + if spec.is_empty() { + *has_closing_fence = k == *kind + && (l == *fence_length + || (matches!(k, FenceKind::Div) && l > *fence_length)); + } } true } @@ -1120,7 +1110,7 @@ mod test { "# a\n", "## b\n", // ), - (Enter(Container(Section { pos: 0 })), "#"), + (Enter(Container(Section { pos: 0 })), ""), ( Enter(Leaf(Heading { level: 1, @@ -1136,9 +1126,9 @@ mod test { has_section: true, pos: 0 })), - "#" + "" ), - (Enter(Container(Section { pos: 4 })), "##"), + (Enter(Container(Section { pos: 4 })), ""), ( Enter(Leaf(Heading { level: 2, @@ -1154,10 +1144,10 @@ mod test { has_section: true, pos: 4 })), - "##" + "" ), - (Exit(Container(Section { pos: 4 })), "##"), - (Exit(Container(Section { pos: 0 })), "#"), + (Exit(Container(Section { pos: 4 })), ""), + (Exit(Container(Section { pos: 0 })), ""), ); } @@ -1168,7 +1158,7 @@ mod test { "#\n", "heading\n", // ), - (Enter(Container(Section { pos: 0 })), "#"), + (Enter(Container(Section { pos: 0 })), ""), ( Enter(Leaf(Heading { level: 1, @@ -1184,9 +1174,9 @@ mod test { has_section: true, pos: 0 })), - "#" + "" ), - (Exit(Container(Section { pos: 0 })), "#"), + (Exit(Container(Section { pos: 0 })), ""), ); } @@ -1200,7 +1190,7 @@ mod test { " 12\n", "15\n", // ), - (Enter(Container(Section { pos: 0 })), "#"), + (Enter(Container(Section { pos: 0 })), ""), ( Enter(Leaf(Heading { level: 1, @@ -1216,11 +1206,11 @@ mod test { has_section: true, pos: 0, })), - "#" + "" ), (Atom(Blankline), "\n"), - (Exit(Container(Section { pos: 0 })), "#"), - (Enter(Container(Section { pos: 6 })), "#"), + (Exit(Container(Section { pos: 0 })), ""), + (Enter(Container(Section { pos: 6 })), ""), ( Enter(Leaf(Heading { level: 1, @@ -1238,9 +1228,9 @@ mod test { has_section: true, pos: 6, })), - "#" + "" ), - (Exit(Container(Section { pos: 6 })), "#"), + (Exit(Container(Section { pos: 6 })), ""), ); } @@ -1252,7 +1242,7 @@ mod test { "# b\n", "c\n", // ), - (Enter(Container(Section { pos: 0 })), "#"), + (Enter(Container(Section { pos: 0 })), ""), ( Enter(Leaf(Heading { level: 1, @@ -1270,9 +1260,9 @@ mod test { has_section: true, pos: 0 })), - "#" + "", ), - (Exit(Container(Section { pos: 0 })), "#"), + (Exit(Container(Section { pos: 0 })), ""), ); } @@ -1292,7 +1282,7 @@ mod test { "\n", "# b\n", ), - (Enter(Container(Section { pos: 0 })), "#"), + (Enter(Container(Section { pos: 0 })), ""), ( Enter(Leaf(Heading { level: 1, @@ -1308,10 +1298,10 @@ mod test { has_section: true, pos: 0, })), - "#" + "", ), (Atom(Blankline), "\n"), - (Enter(Container(Section { pos: 5 })), "##"), + (Enter(Container(Section { pos: 5 })), ""), ( Enter(Leaf(Heading { level: 2, @@ -1327,10 +1317,10 @@ mod test { has_section: true, pos: 5, })), - "##" + "", ), (Atom(Blankline), "\n"), - (Enter(Container(Section { pos: 12 })), "####"), + (Enter(Container(Section { pos: 12 })), ""), ( Enter(Leaf(Heading { level: 4, @@ -1346,12 +1336,12 @@ mod test { has_section: true, pos: 12, })), - "####" + "", ), (Atom(Blankline), "\n"), - (Exit(Container(Section { pos: 12 })), "####"), - (Exit(Container(Section { pos: 5 })), "##"), - (Enter(Container(Section { pos: 23 })), "##"), + (Exit(Container(Section { pos: 12 })), ""), + (Exit(Container(Section { pos: 5 })), ""), + (Enter(Container(Section { pos: 23 })), ""), ( Enter(Leaf(Heading { level: 2, @@ -1367,10 +1357,10 @@ mod test { has_section: true, pos: 23, })), - "##" + "", ), (Atom(Blankline), "\n"), - (Enter(Container(Section { pos: 30 })), "###"), + (Enter(Container(Section { pos: 30 })), ""), ( Enter(Leaf(Heading { level: 3, @@ -1386,13 +1376,13 @@ mod test { has_section: true, pos: 30, })), - "###" + "", ), (Atom(Blankline), "\n"), - (Exit(Container(Section { pos: 30 })), "###"), - (Exit(Container(Section { pos: 23 })), "##"), - (Exit(Container(Section { pos: 0 })), "#"), - (Enter(Container(Section { pos: 39 })), "#"), + (Exit(Container(Section { pos: 30 })), ""), + (Exit(Container(Section { pos: 23 })), ""), + (Exit(Container(Section { pos: 0 })), ""), + (Enter(Container(Section { pos: 39 })), ""), ( Enter(Leaf(Heading { level: 1, @@ -1408,9 +1398,9 @@ mod test { has_section: true, pos: 39, })), - "#" + "", ), - (Exit(Container(Section { pos: 39 })), "#"), + (Exit(Container(Section { pos: 39 })), ""), ); } @@ -1422,7 +1412,7 @@ mod test { (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Blockquote)), ">"), + (Exit(Container(Blockquote)), ""), ); test_parse!( "> a\nb\nc\n", @@ -1432,7 +1422,7 @@ mod test { (Inline, "b\n"), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Blockquote)), ">"), + (Exit(Container(Blockquote)), ""), ); test_parse!( concat!( @@ -1462,13 +1452,13 @@ mod test { has_section: false, pos: 8, })), - "##" + "" ), (Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "para"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Blockquote)), ">"), + (Exit(Container(Blockquote)), ""), ); } @@ -1478,21 +1468,24 @@ mod test { "> \n", (Enter(Container(Blockquote)), ">"), (Atom(Blankline), "\n"), - (Exit(Container(Blockquote)), ">"), + (Exit(Container(Blockquote)), ""), ); test_parse!( ">", (Enter(Container(Blockquote)), ">"), (Atom(Blankline), ""), - (Exit(Container(Blockquote)), ">"), + (Exit(Container(Blockquote)), ""), ); } #[test] fn parse_code_block() { test_parse!( - concat!("```\n", "l0\n"), - (Enter(Leaf(CodeBlock { language: "" })), "",), + concat!( + "```\n", // + "l0\n" // + ), + (Enter(Leaf(CodeBlock { language: "" })), "```\n",), (Inline, "l0\n"), (Exit(Leaf(CodeBlock { language: "" })), "",), ); @@ -1504,9 +1497,9 @@ mod test { "\n", "para\n", // ), - (Enter(Leaf(CodeBlock { language: "" })), ""), + (Enter(Leaf(CodeBlock { language: "" })), "```\n"), (Inline, "l0\n"), - (Exit(Leaf(CodeBlock { language: "" })), ""), + (Exit(Leaf(CodeBlock { language: "" })), "```\n"), (Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "para"), @@ -1520,11 +1513,11 @@ mod test { " l1\n", "````", // ), - (Enter(Leaf(CodeBlock { language: "lang" })), "lang"), + (Enter(Leaf(CodeBlock { language: "lang" })), "```` lang\n",), (Inline, "l0\n"), (Inline, "```\n"), (Inline, " l1\n"), - (Exit(Leaf(CodeBlock { language: "lang" })), "lang"), + (Exit(Leaf(CodeBlock { language: "lang" })), "````"), ); test_parse!( concat!( @@ -1535,12 +1528,12 @@ mod test { "bbb\n", // "```\n", // ), - (Enter(Leaf(CodeBlock { language: "" })), ""), + (Enter(Leaf(CodeBlock { language: "" })), "```\n"), (Inline, "a\n"), - (Exit(Leaf(CodeBlock { language: "" })), ""), - (Enter(Leaf(CodeBlock { language: "" })), ""), + (Exit(Leaf(CodeBlock { language: "" })), "```\n"), + (Enter(Leaf(CodeBlock { language: "" })), "```\n"), (Inline, "bbb\n"), - (Exit(Leaf(CodeBlock { language: "" })), ""), + (Exit(Leaf(CodeBlock { language: "" })), "```\n"), ); test_parse!( concat!( @@ -1549,15 +1542,15 @@ mod test { " block\n", "~~~\n", // ), - (Enter(Leaf(CodeBlock { language: "" })), ""), + (Enter(Leaf(CodeBlock { language: "" })), "~~~\n"), (Inline, "code\n"), (Inline, " block\n"), - (Exit(Leaf(CodeBlock { language: "" })), ""), + (Exit(Leaf(CodeBlock { language: "" })), "~~~\n"), ); test_parse!( " ```abc\n", - (Enter(Leaf(CodeBlock { language: "abc" })), "abc"), - (Exit(Leaf(CodeBlock { language: "abc" })), "abc"), + (Enter(Leaf(CodeBlock { language: "abc" })), "```abc\n"), + (Exit(Leaf(CodeBlock { language: "abc" })), ""), ); } @@ -1565,9 +1558,9 @@ mod test { fn parse_link_definition() { test_parse!( "[tag]: url\n", - (Enter(Leaf(LinkDefinition { label: "tag" })), "tag"), + (Enter(Leaf(LinkDefinition { label: "tag" })), "[tag]:"), (Inline, "url"), - (Exit(Leaf(LinkDefinition { label: "tag" })), "tag"), + (Exit(Leaf(LinkDefinition { label: "tag" })), ""), ); } @@ -1575,11 +1568,11 @@ mod test { fn parse_footnote() { test_parse!( "[^tag]: description\n", - (Enter(Container(Footnote { label: "tag" })), "tag"), + (Enter(Container(Footnote { label: "tag" })), "[^tag]:"), (Enter(Leaf(Paragraph)), ""), (Inline, "description"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Footnote { label: "tag" })), "tag"), + (Exit(Container(Footnote { label: "tag" })), ""), ); } @@ -1597,12 +1590,12 @@ mod test { (Inline, "[^a]"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Enter(Container(Footnote { label: "a" })), "a"), + (Enter(Container(Footnote { label: "a" })), "[^a]:"), (Enter(Leaf(Paragraph)), ""), (Inline, "note"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(Footnote { label: "a" })), "a"), + (Exit(Container(Footnote { label: "a" })), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "para"), (Exit(Leaf(Paragraph)), ""), @@ -1632,13 +1625,13 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "abc"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -1647,7 +1640,7 @@ mod test { }, marker: "-", })), - "-" + "" ), ); } @@ -1667,18 +1660,18 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -1687,7 +1680,7 @@ mod test { }, marker: "-", })), - "-" + "" ), ); } @@ -1709,24 +1702,24 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -1735,7 +1728,7 @@ mod test { }, marker: "-", })), - "-" + "" ), ); } @@ -1759,13 +1752,13 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), @@ -1779,7 +1772,7 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), @@ -1789,7 +1782,7 @@ mod test { (Enter(Leaf(Paragraph)), ""), (Inline, "d"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -1798,9 +1791,9 @@ mod test { }, marker: "-", })), - "-" + "" ), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -1809,7 +1802,7 @@ mod test { }, marker: "-", })), - "-" + "" ), ); } @@ -1833,7 +1826,7 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), @@ -1848,19 +1841,19 @@ mod test { }, marker: "+", })), - "+", + "", ), (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "aa"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), ""), (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "ab"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(ListItemKind::List))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -1869,14 +1862,14 @@ mod test { }, marker: "+", })), - "+", + "", ), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -1885,7 +1878,7 @@ mod test { }, marker: "-", })), - "-" + "" ), ); test_parse!( @@ -1904,7 +1897,7 @@ mod test { }, marker: "1.", })), - "1.", + "", ), (Enter(Container(ListItem(ListItemKind::List))), "1."), (Enter(Leaf(Paragraph)), ""), @@ -1919,14 +1912,14 @@ mod test { }, marker: "-", })), - "-", + "", ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -1935,12 +1928,12 @@ mod test { }, marker: "-", })), - "-", + "", ), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "1."), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -1949,7 +1942,7 @@ mod test { }, marker: "1.", })), - "1.", + "", ), ); } @@ -1972,7 +1965,7 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), @@ -1987,7 +1980,7 @@ mod test { }, marker: "+", })), - "+", + "", ), (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), @@ -2002,13 +1995,13 @@ mod test { }, marker: "*", })), - "*", + "", ), (Enter(Container(ListItem(ListItemKind::List))), "*"), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "*"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -2017,9 +2010,9 @@ mod test { }, marker: "*", })), - "*", + "", ), - (Exit(Container(ListItem(ListItemKind::List))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -2028,9 +2021,9 @@ mod test { }, marker: "+", })), - "+", + "", ), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -2039,7 +2032,7 @@ mod test { }, marker: "-", })), - "-" + "" ), ); } @@ -2062,7 +2055,7 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), @@ -2077,14 +2070,14 @@ mod test { }, marker: "*", })), - "*" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "*"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(ListItemKind::List))), "*"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -2093,9 +2086,9 @@ mod test { }, marker: "*", })), - "*" + "" ), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -2104,7 +2097,7 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Leaf(Paragraph)), ""), (Inline, "cd"), @@ -2128,13 +2121,13 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -2143,7 +2136,7 @@ mod test { }, marker: "-", })), - "-" + "" ), ( Enter(Container(List { @@ -2153,18 +2146,18 @@ mod test { }, marker: "+", })), - "+" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), ""), (Enter(Container(ListItem(ListItemKind::List))), "+"), (Enter(Leaf(Paragraph)), ""), (Inline, "c"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "+"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -2173,7 +2166,7 @@ mod test { }, marker: "+", })), - "+" + "" ), ); } @@ -2194,17 +2187,17 @@ mod test { }, marker: ":", })), - ":" + "" ), - (Enter(Leaf(DescriptionTerm)), ""), + (Enter(Leaf(DescriptionTerm)), ":"), (Inline, "term"), (Exit(Leaf(DescriptionTerm)), ""), - (Enter(Container(ListItem(ListItemKind::Description))), ":"), + (Enter(Container(ListItem(ListItemKind::Description))), ""), (Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "description"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::Description))), ":"), + (Exit(Container(ListItem(ListItemKind::Description))), ""), ( Exit(Container(List { kind: ListKind { @@ -2213,7 +2206,7 @@ mod test { }, marker: ":", })), - ":" + "" ), ); test_parse!( @@ -2238,13 +2231,13 @@ mod test { }, marker: ":", })), - ":", + "", ), - (Enter(Leaf(DescriptionTerm)), ""), + (Enter(Leaf(DescriptionTerm)), ":"), (Inline, "apple\n"), (Inline, "fruit"), (Exit(Leaf(DescriptionTerm)), ""), - (Enter(Container(ListItem(ListItemKind::Description))), ":"), + (Enter(Container(ListItem(ListItemKind::Description))), ""), (Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "Paragraph one"), @@ -2262,19 +2255,19 @@ mod test { }, marker: "-", })), - "-", + "", ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "sub"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "list"), (Exit(Leaf(Paragraph)), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -2283,14 +2276,14 @@ mod test { }, marker: "-", })), - "-", + "", ), - (Exit(Container(ListItem(ListItemKind::Description))), ":"), - (Enter(Leaf(DescriptionTerm)), ""), + (Exit(Container(ListItem(ListItemKind::Description))), ""), + (Enter(Leaf(DescriptionTerm)), ":"), (Inline, "orange"), (Exit(Leaf(DescriptionTerm)), ""), - (Enter(Container(ListItem(ListItemKind::Description))), ":"), - (Exit(Container(ListItem(ListItemKind::Description))), ":"), + (Enter(Container(ListItem(ListItemKind::Description))), ""), + (Exit(Container(ListItem(ListItemKind::Description))), ""), ( Exit(Container(List { kind: ListKind { @@ -2299,7 +2292,7 @@ mod test { }, marker: ":", })), - ":", + "", ), ); } @@ -2316,13 +2309,13 @@ mod test { }, marker: ":", })), - ":" + "" ), - (Enter(Leaf(DescriptionTerm)), ""), + (Enter(Leaf(DescriptionTerm)), ":"), (Exit(Leaf(DescriptionTerm)), ""), - (Enter(Container(ListItem(ListItemKind::Description))), ":"), + (Enter(Container(ListItem(ListItemKind::Description))), ""), (Atom(Blankline), "\n"), - (Exit(Container(ListItem(ListItemKind::Description))), ":"), + (Exit(Container(ListItem(ListItemKind::Description))), ""), ( Exit(Container(List { kind: ListKind { @@ -2331,7 +2324,7 @@ mod test { }, marker: ":", })), - ":" + "" ), ); } @@ -2346,27 +2339,27 @@ mod test { ), (Enter(Container(Table)), ""), (Enter(Container(TableRow { head: true })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "a"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "b"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "c"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: true })), "|"), + (Exit(Container(TableRow { head: true })), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "1"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "2"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "3"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), "") ); } @@ -2377,10 +2370,10 @@ mod test { "||", (Enter(Container(Table)), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, ""), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), ""), ); } @@ -2401,10 +2394,10 @@ mod test { "|a|\npara", (Enter(Container(Table)), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "a"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "para"), @@ -2421,16 +2414,16 @@ mod test { ), (Enter(Container(Table)), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Left))), "|"), + (Enter(Leaf(TableCell(Alignment::Left))), ""), (Inline, "left"), (Exit(Leaf(TableCell(Alignment::Left))), "|"), - (Enter(Leaf(TableCell(Alignment::Center))), "|"), + (Enter(Leaf(TableCell(Alignment::Center))), ""), (Inline, "center"), (Exit(Leaf(TableCell(Alignment::Center))), "|"), - (Enter(Leaf(TableCell(Alignment::Right))), "|"), + (Enter(Leaf(TableCell(Alignment::Right))), ""), (Inline, "right"), (Exit(Leaf(TableCell(Alignment::Right))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), "") ); test_parse!( @@ -2440,10 +2433,10 @@ mod test { ), (Enter(Container(Table)), ""), (Enter(Container(TableRow { head: true })), "|"), - (Enter(Leaf(TableCell(Alignment::Right))), "|"), + (Enter(Leaf(TableCell(Alignment::Right))), ""), (Inline, ""), (Exit(Leaf(TableCell(Alignment::Right))), "|"), - (Exit(Container(TableRow { head: true })), "|"), + (Exit(Container(TableRow { head: true })), ""), (Exit(Container(Table)), ""), ); } @@ -2457,10 +2450,10 @@ mod test { (Inline, "caption"), (Exit(Leaf(Caption)), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "a"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), ""), ); } @@ -2482,10 +2475,10 @@ mod test { (Inline, "continued"), (Exit(Leaf(Caption)), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "a"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), ""), (Atom(Blankline), "\n"), (Enter(Leaf(Paragraph)), ""), @@ -2500,10 +2493,10 @@ mod test { "|a|\n^ ", (Enter(Container(Table)), ""), (Enter(Container(TableRow { head: false })), "|"), - (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), ""), (Inline, "a"), (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), - (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(TableRow { head: false })), ""), (Exit(Container(Table)), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "^"), @@ -2523,24 +2516,32 @@ mod test { #[test] fn parse_div() { test_parse!( - concat!("::: cls\n", "abc\n", ":::\n",), - (Enter(Container(Div { class: "cls" })), "cls"), + concat!( + "::: cls\n", // + "abc\n", // + ":::\n", // + ), + (Enter(Container(Div { class: "cls" })), "::: cls\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "abc"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Div { class: "cls" })), "cls"), + (Exit(Container(Div { class: "cls" })), ":::\n"), ); } #[test] fn parse_div_no_class() { test_parse!( - concat!(":::\n", "abc\n", ":::\n",), - (Enter(Container(Div { class: "" })), ""), + concat!( + ":::\n", // + "abc\n", // + ":::\n", // + ), + (Enter(Container(Div { class: "" })), ":::\n"), (Enter(Leaf(Paragraph)), ""), (Inline, "abc"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(Div { class: "" })), ""), + (Exit(Container(Div { class: "" })), ":::\n"), ); } @@ -2548,7 +2549,7 @@ mod test { fn parse_inner_indent() { test_parse!( concat!( - "- - a\n", + "- - a\n", // " - b\n", // ), ( @@ -2559,7 +2560,7 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "-"), ( @@ -2570,18 +2571,18 @@ mod test { }, marker: "-", })), - "-" + "" ), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "a"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), (Enter(Container(ListItem(ListItemKind::List))), "-"), (Enter(Leaf(Paragraph)), ""), (Inline, "b"), (Exit(Leaf(Paragraph)), ""), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -2590,9 +2591,9 @@ mod test { }, marker: "-", })), - "-" + "" ), - (Exit(Container(ListItem(ListItemKind::List))), "-"), + (Exit(Container(ListItem(ListItemKind::List))), ""), ( Exit(Container(List { kind: ListKind { @@ -2601,7 +2602,7 @@ mod test { }, marker: "-", })), - "-" + "" ), ); } @@ -2680,10 +2681,10 @@ mod test { indent: 0, kind: FenceKind::CodeBlock(b'`'), fence_length: 4, - has_spec: true, + spec: "lang", has_closing_fence: true, }, - "lang", + "```` lang\n", 5, ); test_block!( @@ -2699,10 +2700,10 @@ mod test { indent: 0, kind: FenceKind::CodeBlock(b'`'), fence_length: 3, - has_spec: false, + spec: "", has_closing_fence: true, }, - "", + "```\n", 3, ); test_block!( @@ -2723,9 +2724,10 @@ mod test { "[tag]: url\n", Kind::Definition { indent: 0, - footnote: false + footnote: false, + label: "tag", }, - "tag", + "[tag]:", 1 ); } @@ -2739,9 +2741,10 @@ mod test { ), Kind::Definition { indent: 0, - footnote: false + footnote: false, + label: "tag", }, - "tag", + "[tag]:", 2, ); test_block!( @@ -2751,9 +2754,10 @@ mod test { ), Kind::Definition { indent: 0, - footnote: false + footnote: false, + label: "tag", }, - "tag", + "[tag]:", 1, ); } @@ -2764,9 +2768,10 @@ mod test { "[^tag]:\n", Kind::Definition { indent: 0, - footnote: true + footnote: true, + label: "tag", }, - "tag", + "[^tag]:", 1 ); } @@ -2777,9 +2782,10 @@ mod test { "[^tag]: a\n", Kind::Definition { indent: 0, - footnote: true + footnote: true, + label: "tag", }, - "tag", + "[^tag]:", 1 ); } @@ -2793,9 +2799,10 @@ mod test { ), Kind::Definition { indent: 0, - footnote: true + footnote: true, + label: "tag", }, - "tag", + "[^tag]:", 2, ); } @@ -2811,9 +2818,10 @@ mod test { ), Kind::Definition { indent: 0, - footnote: true + footnote: true, + label: "tag", }, - "tag", + "[^tag]:", 3, ); } diff --git a/src/tree.rs b/src/tree.rs index 1e32f9f..d441805 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -75,7 +75,7 @@ impl Iterator for Tree { let n = &self.nodes[head.index()]; let kind = match &n.kind { NodeKind::Root => unreachable!(), - NodeKind::Container(c, child) => { + NodeKind::Container(c, child, ..) => { self.branch.push(head); self.head = *child; EventKind::Enter(c.clone()) @@ -91,10 +91,16 @@ impl Iterator for Tree { }; Some(Event { kind, span: n.span }) } else if let Some(block_ni) = self.branch.pop() { - let InternalNode { next, kind, span } = &self.nodes[block_ni.index()]; - let kind = EventKind::Exit(kind.container().unwrap().clone()); - self.head = *next; - Some(Event { kind, span: *span }) + let InternalNode { next, kind, .. } = &self.nodes[block_ni.index()]; + if let NodeKind::Container(c, _, span) = kind { + self.head = *next; + Some(Event { + kind: EventKind::Exit(c.clone()), + span: *span, + }) + } else { + panic!() + } } else { None } @@ -122,7 +128,7 @@ impl NodeIndex { #[derive(Debug, Clone, PartialEq, Eq)] enum NodeKind { Root, - Container(C, Option), + Container(C, Option, Span), Atom(A), Inline, } @@ -144,7 +150,7 @@ pub struct Builder { impl NodeKind { fn child(&self) -> Option { - if let NodeKind::Container(_, child) = self { + if let NodeKind::Container(_, child, _) = self { *child } else { None @@ -152,20 +158,12 @@ impl NodeKind { } fn child_mut(&mut self) -> &mut Option { - if let NodeKind::Container(_, child) = self { + if let NodeKind::Container(_, child, _) = self { child } else { panic!() } } - - fn container(&self) -> Option<&C> { - if let NodeKind::Container(c, _) = self { - Some(c) - } else { - None - } - } } impl<'a, C, A> From<&'a mut NodeKind> for Element<'a, C, A> { @@ -213,26 +211,36 @@ impl Builder { self.depth += 1; self.add_node(InternalNode { span, - kind: NodeKind::Container(c, None), + kind: NodeKind::Container(c, None, Span::new(0, 0)), next: None, }) } - pub(super) fn exit(&mut self) { + pub(super) fn exit(&mut self, span: Span) { self.depth -= 1; if let Some(head) = self.head.take() { - if matches!(self.nodes[head.index()].kind, NodeKind::Container(..)) { + if let NodeKind::Container(_, _, sp) = &mut self.nodes[head.index()].kind { + *sp = span; self.branch.push(head); + return; } } else { let last = self.branch.pop(); debug_assert_ne!(last, None); } + + if let NodeKind::Container(_, _, sp) = + &mut self.nodes[self.branch.last().unwrap().index()].kind + { + *sp = span; + } else { + panic!(); + } } /// Exit and discard all the contents of the current container. pub(super) fn exit_discard(&mut self) { - self.exit(); + self.exit(Span::new(0, (1 << 31) - 1)); let exited = self.branch.pop().unwrap(); self.nodes.drain(exited.index()..); let (prev, has_parent) = self.replace(exited, None); @@ -244,14 +252,25 @@ impl Builder { } /// Swap the node and its children with either its parent or the node before. - pub fn swap_prev(&mut self, node: NodeIndex) { + pub fn swap_prev(&mut self, node: NodeIndex, span: Span) { let next = self.nodes[node.index()].next; - if let Some(n) = next { - self.replace(n, None); - } let (prev, _) = self.replace(node, next); + if let Some(n) = next { + self.nodes[prev.index()].span = self.nodes[n.index()].span.empty_before(); + self.replace(n, None); + } else { + self.nodes[prev.index()].span = self.nodes[self.nodes.len() - 1].span.empty_after(); + } self.replace(prev, Some(node)); self.nodes[node.index()].next = Some(prev); + self.nodes[node.index()].span = span; + + let span = self.nodes[prev.index()].span; + if let NodeKind::Container(_, _, sp) = &mut self.nodes[node.index()].kind { + *sp = span; + } else { + panic!() + } } /// Remove the specified node and its children. @@ -314,7 +333,7 @@ impl Builder { debug_assert_eq!(head.next, None); head.next = Some(ni); } - NodeKind::Container(_, child) => { + NodeKind::Container(_, child, _) => { self.branch.push(*head_ni); // set child pointer of current container debug_assert_eq!(*child, None); @@ -390,20 +409,20 @@ mod test { tree.enter(1, Span::new(0, 1)); tree.atom(11, Span::new(0, 1)); tree.atom(12, Span::new(0, 1)); - tree.exit(); + tree.exit(Span::new(0, 0)); tree.enter(2, Span::new(1, 5)); tree.enter(21, Span::new(2, 5)); tree.enter(211, Span::new(3, 4)); tree.atom(2111, Span::new(3, 4)); - tree.exit(); - tree.exit(); + tree.exit(Span::new(0, 0)); + tree.exit(Span::new(0, 0)); tree.enter(22, Span::new(4, 5)); tree.atom(221, Span::new(4, 5)); - tree.exit(); - tree.exit(); + tree.exit(Span::new(0, 0)); + tree.exit(Span::new(0, 0)); tree.enter(3, Span::new(5, 6)); tree.atom(31, Span::new(5, 6)); - tree.exit(); + tree.exit(Span::new(0, 0)); assert_eq!( format!("{:?}", tree.finish()), concat!( From e84385c2db7fb2ef1cc0eb5c1274d4a3fe4c514d Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Tue, 25 Apr 2023 20:18:31 +0200 Subject: [PATCH 19/31] block: replace tree with event vec a lot simpler to use and reason about should also make it easier to move to streaming --- src/block.rs | 313 +++++++++++++++++++++++------------- src/lib.rs | 92 +++++++---- src/tree.rs | 444 --------------------------------------------------- 3 files changed, 265 insertions(+), 584 deletions(-) delete mode 100644 src/tree.rs diff --git a/src/block.rs b/src/block.rs index 1693849..40485e0 100644 --- a/src/block.rs +++ b/src/block.rs @@ -5,15 +5,26 @@ use crate::Span; use crate::attr; use crate::lex; -use crate::tree; use Atom::*; use Container::*; use Leaf::*; use ListType::*; -pub type Tree<'s> = tree::Tree, Atom>; -pub type TreeBuilder<'s> = tree::Builder, Atom>; +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct Event<'s> { + pub kind: EventKind<'s>, + pub span: Span, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum EventKind<'s> { + Enter(Node<'s>), + Inline, + Exit(Node<'s>), + Atom(Atom), + Stale, +} #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Node<'s> { @@ -22,7 +33,7 @@ pub enum Node<'s> { } #[must_use] -pub fn parse(src: &str) -> Tree { +pub fn parse(src: &str) -> Vec { TreeParser::new(src).parse() } @@ -106,15 +117,13 @@ struct OpenList { /// Depth in the tree where the direct list items of the list are. Needed to determine when to /// close the list. depth: u16, - /// Index to node in tree, required to update tightness. - node: tree::NodeIndex, + /// Index to event in tree, required to update tightness. + event: usize, } /// Parser for block-level tree structure of entire document. struct TreeParser<'s> { src: &'s str, - tree: TreeBuilder<'s>, - /// The previous block element was a blank line. prev_blankline: bool, prev_loose: bool, @@ -124,24 +133,30 @@ struct TreeParser<'s> { open_sections: Vec, /// Alignments for each column in for the current table. alignments: Vec, + /// Current container depth. + open: Vec, + /// Buffer queue for next events. Events are buffered until no modifications due to future + /// characters are needed. + events: Vec>, } impl<'s> TreeParser<'s> { #[must_use] - pub fn new(src: &'s str) -> Self { + fn new(src: &'s str) -> Self { Self { src, - tree: TreeBuilder::new(), prev_blankline: false, prev_loose: false, open_lists: Vec::new(), alignments: Vec::new(), open_sections: Vec::new(), + open: Vec::new(), + events: Vec::new(), } } #[must_use] - pub fn parse(mut self) -> Tree<'s> { + fn parse(mut self) -> Vec> { let mut lines = lines(self.src).collect::>(); let mut line_pos = 0; while line_pos < lines.len() { @@ -154,10 +169,43 @@ impl<'s> TreeParser<'s> { while let Some(l) = self.open_lists.pop() { self.close_list(l, self.src.len()); } - for _ in self.open_sections.drain(..) { - self.tree.exit(Span::empty_at(self.src.len())); // section + + for _ in std::mem::take(&mut self.open_sections).drain(..) { + self.exit(Span::empty_at(self.src.len())); } - self.tree.finish() + debug_assert_eq!(self.open, &[]); + self.events + } + + fn inline(&mut self, span: Span) { + self.events.push(Event { + kind: EventKind::Inline, + span, + }); + } + + fn enter(&mut self, node: Node<'s>, span: Span) -> usize { + let i = self.events.len(); + self.open.push(i); + self.events.push(Event { + kind: EventKind::Enter(node), + span, + }); + i + } + + fn exit(&mut self, span: Span) -> usize { + let i = self.events.len(); + let node = if let EventKind::Enter(node) = self.events[self.open.pop().unwrap()].kind { + node + } else { + panic!(); + }; + self.events.push(Event { + kind: EventKind::Exit(node), + span, + }); + i } /// Recursively parse a block and all of its children. Return number of lines the block uses. @@ -198,8 +246,8 @@ impl<'s> TreeParser<'s> { // close list if a non list item or a list item of new type appeared if let Some(OpenList { ty, depth, .. }) = self.open_lists.last() { - debug_assert!(usize::from(*depth) <= self.tree.depth()); - if self.tree.depth() == (*depth).into() + debug_assert!(usize::from(*depth) <= self.open.len()); + if self.open.len() == (*depth).into() && !matches!(kind, Kind::ListItem { ty: ty_new, .. } if *ty == ty_new) { let l = self.open_lists.pop().unwrap(); @@ -213,15 +261,17 @@ impl<'s> TreeParser<'s> { } else { self.prev_loose = false; if self.prev_blankline { - if let Some(OpenList { node, depth, .. }) = self.open_lists.last() { - if usize::from(*depth) >= self.tree.depth() + if let Some(OpenList { event, depth, .. }) = self.open_lists.last() { + if usize::from(*depth) >= self.open.len() || !matches!(kind, Kind::ListItem { .. }) { - let mut elem = self.tree.elem(*node); - let ListKind { tight, .. } = elem.list_mut().unwrap(); - if *tight { - self.prev_loose = true; - *tight = false; + if let EventKind::Enter(Node::Container(List { kind, .. })) = + &mut self.events[*event].kind + { + if kind.tight { + self.prev_loose = true; + kind.tight = false; + } } } } @@ -269,7 +319,10 @@ impl<'s> TreeParser<'s> { }; match block { - Block::Atom(a) => self.tree.atom(a, span_start), + Block::Atom(a) => self.events.push(Event { + kind: EventKind::Atom(a), + span: span_start, + }), Block::Leaf(l) => self.parse_leaf(l, &kind, span_start, span_end, lines), Block::Container(Table) => self.parse_table(lines, span_start, span_end), Block::Container(c) => { @@ -325,16 +378,13 @@ impl<'s> TreeParser<'s> { .iter() .rposition(|l| l < level) .map_or(0, |i| i + 1); - self.open_sections.drain(first_close..).for_each(|_| { - self.tree.exit(Span::empty_at(span_start.start())); // section - }); + let pos = span_start.start() as u32; + for _ in 0..(self.open_sections.len() - first_close) { + self.exit(Span::empty_at(span_start.start())); // section + } + self.open_sections.drain(first_close..); self.open_sections.push(*level); - self.tree.enter( - Node::Container(Section { - pos: span_start.start() as u32, - }), - span_start.empty_before(), - ); + self.enter(Node::Container(Section { pos }), span_start.empty_before()); } // trim '#' characters @@ -343,12 +393,12 @@ impl<'s> TreeParser<'s> { } } - self.tree.enter(Node::Leaf(leaf), span_start); + self.enter(Node::Leaf(leaf), span_start); lines .iter() .filter(|l| !matches!(k, Kind::Heading { .. }) || !l.is_empty()) - .for_each(|line| self.tree.inline(*line)); - self.tree.exit(span_end); + .for_each(|line| self.inline(*line)); + self.exit(span_end); } fn parse_container( @@ -392,11 +442,11 @@ impl<'s> TreeParser<'s> { .open_lists .last() .map_or(true, |OpenList { depth, .. }| { - usize::from(*depth) < self.tree.depth() + usize::from(*depth) < self.open.len() }); if same_depth { let tight = true; - let node = self.tree.enter( + let event = self.enter( Node::Container(Container::List { kind: ListKind { ty: *ty, tight }, marker: span_start.of(self.src), @@ -405,48 +455,77 @@ impl<'s> TreeParser<'s> { ); self.open_lists.push(OpenList { ty: *ty, - depth: self.tree.depth().try_into().unwrap(), - node, + depth: self.open.len().try_into().unwrap(), + event, }); } } let dt = if let ListItem(ListItemKind::Description) = c { - let dt = self.tree.enter(Node::Leaf(DescriptionTerm), span_start); - self.tree.exit(span_start.trim_end(self.src).empty_after()); - let span_open = span_start; + let dt = self.enter(Node::Leaf(DescriptionTerm), span_start); + self.exit(span_start.trim_end(self.src).empty_after()); span_start = lines[0].empty_before(); - Some((dt, span_open)) + Some((dt, self.events.len(), self.open.len())) } else { None }; - let node = self.tree.enter(Node::Container(c), span_start); + self.enter(Node::Container(c), span_start); let mut l = 0; while l < lines.len() { l += self.parse_block(&mut lines[l..], false); } - if let Some((node_dt, span_open)) = dt { - let node_child = if let Some(node_child) = self.tree.children(node).next() { - if let tree::Element::Container(Node::Leaf(l @ Paragraph)) = node_child.elem { + if let Some((empty_term, enter_detail, open_detail)) = dt { + let enter_term = enter_detail + 1; + if let Some(first_child) = self.events.get_mut(enter_term) { + if let EventKind::Enter(Node::Leaf(l @ Paragraph)) = &mut first_child.kind { + // convert paragraph into description term *l = DescriptionTerm; - Some(node_child.index) - } else { - None + let exit_term = if let Some(i) = self.events[enter_term + 1..] + .iter_mut() + .position(|e| matches!(e.kind, EventKind::Exit(Node::Leaf(Paragraph)))) + { + enter_term + 1 + i + } else { + panic!() + }; + if let EventKind::Exit(Node::Leaf(l)) = &mut self.events[exit_term].kind { + *l = DescriptionTerm; + } else { + panic!() + } + + // remove empty description term + self.events[empty_term].kind = EventKind::Stale; + self.events[empty_term + 1].kind = EventKind::Stale; + + // move out term before detail + self.events[enter_term].span = self.events[empty_term].span; + let first_detail = self.events[exit_term + 1..] + .iter() + .position(|e| !matches!(e.kind, EventKind::Atom(Blankline))) + .map(|i| exit_term + 1 + i) + .unwrap_or(self.events.len()); + let detail_pos = self + .events + .get(first_detail) + .map(|e| e.span.start()) + .unwrap_or_else(|| self.events.last().unwrap().span.end()); + self.events + .copy_within(enter_term..first_detail, enter_detail); + self.events[first_detail - 1] = Event { + kind: EventKind::Enter(Node::Container(c)), + span: Span::empty_at(detail_pos), + }; + self.open[open_detail] = first_detail - 1; } - } else { - None - }; - if let Some(node_child) = node_child { - self.tree.swap_prev(node_child, span_open); - self.tree.remove(node_dt); } } if let Some(OpenList { depth, .. }) = self.open_lists.last() { - debug_assert!(usize::from(*depth) <= self.tree.depth()); - if self.tree.depth() == (*depth).into() { + debug_assert!(usize::from(*depth) <= self.open.len()); + if self.open.len() == (*depth).into() { self.prev_blankline = false; self.prev_loose = false; let l = self.open_lists.pop().unwrap(); @@ -454,38 +533,37 @@ impl<'s> TreeParser<'s> { } } - self.tree.exit(span_end); + self.exit(span_end); } fn parse_table(&mut self, lines: &mut [Span], span_start: Span, span_end: Span) { self.alignments.clear(); - self.tree.enter(Node::Container(Table), span_start); + self.enter(Node::Container(Table), span_start); let caption_line = lines .iter() .position(|sp| sp.of(self.src).trim_start().starts_with('^')) .map_or(lines.len(), |caption_line| { - self.tree.enter(Node::Leaf(Caption), span_start); + self.enter(Node::Leaf(Caption), span_start); lines[caption_line] = lines[caption_line] .trim_start(self.src) .skip_chars(2, self.src); lines[lines.len() - 1] = lines[lines.len() - 1].trim_end(self.src); for line in &lines[caption_line..] { - self.tree.inline(*line); + self.inline(*line); } - self.tree.exit(span_end); + self.exit(span_end); caption_line }); - let mut last_row_node = None; + let mut last_row_event = None; for row in &lines[..caption_line] { let row = row.trim(self.src); if row.is_empty() { break; } - let row_node = self - .tree - .enter(Node::Container(TableRow { head: false }), row.with_len(1)); + let row_event_enter = + self.enter(Node::Container(TableRow { head: false }), row.with_len(1)); let rem = row.skip(1); // | let lex = lex::Lexer::new(rem.of(self.src)); let mut pos = rem.start(); @@ -514,7 +592,7 @@ impl<'s> TreeParser<'s> { } }; separator_row &= separator_cell; - self.tree.enter( + self.enter( Node::Leaf(TableCell( self.alignments .get(column_index) @@ -523,8 +601,8 @@ impl<'s> TreeParser<'s> { )), Span::empty_at(cell_start), ); - self.tree.inline(span); - self.tree.exit(Span::new(pos, pos + 1)); // cell + self.inline(span); + self.exit(Span::new(pos, pos + 1)); cell_start = pos + len; column_index += 1; } @@ -540,11 +618,11 @@ impl<'s> TreeParser<'s> { if separator_row && verbatim.is_none() { self.alignments.clear(); self.alignments.extend( - self.tree - .children(row_node) - .filter(|n| matches!(n.elem, tree::Element::Inline)) - .map(|n| { - let cell = n.span.of(self.src); + self.events[row_event_enter + 1..] + .iter() + .filter(|e| matches!(e.kind, EventKind::Inline)) + .map(|e| { + let cell = e.span.of(self.src); let l = cell.as_bytes()[0] == b':'; let r = cell.as_bytes()[cell.len() - 1] == b':'; match (l, r) { @@ -555,62 +633,67 @@ impl<'s> TreeParser<'s> { } }), ); - self.tree.exit_discard(); // table row - if let Some(head_row) = last_row_node { - self.tree - .children(head_row) - .filter(|n| { - matches!(n.elem, tree::Element::Container(Node::Leaf(TableCell(..)))) + self.open.pop(); + self.events.drain(row_event_enter..); // remove table row + if let Some((head_row_enter, head_row_exit)) = last_row_event { + self.events[head_row_enter + 1..] + .iter_mut() + .filter(|e| { + matches!( + e.kind, + EventKind::Enter(Node::Leaf(TableCell(..))) + | EventKind::Exit(Node::Leaf(TableCell(..))) + ) }) .zip( self.alignments .iter() .copied() - .chain(std::iter::repeat(Alignment::Unspecified)), + .chain(std::iter::repeat(Alignment::Unspecified)) + .flat_map(|a| [a, a].into_iter()), ) - .for_each(|(n, new_align)| { - if let tree::Element::Container(Node::Leaf(TableCell(alignment))) = - n.elem - { + .for_each(|(e, new_align)| match &mut e.kind { + EventKind::Enter(Node::Leaf(TableCell(alignment))) + | EventKind::Exit(Node::Leaf(TableCell(alignment))) => { *alignment = new_align; } + _ => panic!(), }); - if let tree::Element::Container(Node::Container(TableRow { head })) = - self.tree.elem(head_row) - { + let event: &mut Event = &mut self.events[head_row_enter]; + if let EventKind::Enter(Node::Container(TableRow { head })) = &mut event.kind { + *head = true; + } else { + panic!() + } + let event: &mut Event = &mut self.events[head_row_exit]; + if let EventKind::Exit(Node::Container(TableRow { head })) = &mut event.kind { *head = true; } else { panic!() } } } else { - self.tree.exit(Span::empty_at(pos)); // table row - last_row_node = Some(row_node); + let row_event_exit = self.exit(Span::empty_at(pos)); // table row + last_row_event = Some((row_event_enter, row_event_exit)); } } - self.tree.exit(span_end); // table + self.exit(span_end); } fn close_list(&mut self, list: OpenList, pos: usize) { if self.prev_loose { - let mut elem = self.tree.elem(list.node); - let ListKind { tight, .. } = elem.list_mut().unwrap(); - // ignore blankline at end - *tight = true; + if let EventKind::Enter(Node::Container(List { kind, .. })) = + &mut self.events[list.event].kind + { + // ignore blankline at end + kind.tight = true; + } else { + panic!() + } } - self.tree.exit(Span::empty_at(pos)); // list - } -} - -impl<'t, 's> tree::Element<'t, Node<'s>, Atom> { - fn list_mut(&mut self) -> Option<&mut ListKind> { - if let tree::Element::Container(Node::Container(Container::List { kind, .. })) = self { - Some(kind) - } else { - None - } + self.exit(Span::empty_at(pos)); // list } } @@ -1023,13 +1106,13 @@ fn lines(src: &str) -> impl Iterator + '_ { #[cfg(test)] mod test { - use crate::tree::EventKind::*; use crate::Alignment; use crate::OrderedListNumbering::*; use crate::OrderedListStyle::*; use super::Atom::*; use super::Container::*; + use super::EventKind::*; use super::FenceKind; use super::Kind; use super::Leaf::*; @@ -1041,7 +1124,7 @@ mod test { macro_rules! test_parse { ($src:expr $(,$($event:expr),* $(,)?)?) => { let t = super::TreeParser::new($src).parse(); - let actual = t.map(|ev| (ev.kind, ev.span.of($src))).collect::>(); + let actual = t.into_iter().map(|ev| (ev.kind, ev.span.of($src))).collect::>(); let expected = &[$($($event),*,)?]; assert_eq!( actual, @@ -2189,11 +2272,13 @@ mod test { })), "" ), + (Stale, ":"), + (Stale, ""), (Enter(Leaf(DescriptionTerm)), ":"), (Inline, "term"), (Exit(Leaf(DescriptionTerm)), ""), - (Enter(Container(ListItem(ListItemKind::Description))), ""), (Atom(Blankline), "\n"), + (Enter(Container(ListItem(ListItemKind::Description))), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "description"), (Exit(Leaf(Paragraph)), ""), @@ -2233,12 +2318,14 @@ mod test { })), "", ), + (Stale, ":"), + (Stale, ""), (Enter(Leaf(DescriptionTerm)), ":"), (Inline, "apple\n"), (Inline, "fruit"), (Exit(Leaf(DescriptionTerm)), ""), - (Enter(Container(ListItem(ListItemKind::Description))), ""), (Atom(Blankline), "\n"), + (Enter(Container(ListItem(ListItemKind::Description))), ""), (Enter(Leaf(Paragraph)), ""), (Inline, "Paragraph one"), (Exit(Leaf(Paragraph)), ""), @@ -2279,6 +2366,8 @@ mod test { "", ), (Exit(Container(ListItem(ListItemKind::Description))), ""), + (Stale, ":"), + (Stale, ""), (Enter(Leaf(DescriptionTerm)), ":"), (Inline, "orange"), (Exit(Leaf(DescriptionTerm)), ""), diff --git a/src/lib.rs b/src/lib.rs index 0051486..889b6c3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -60,7 +60,6 @@ mod block; mod inline; mod lex; mod span; -mod tree; use span::Span; @@ -555,7 +554,7 @@ pub struct Parser<'s> { src: &'s str, /// Block tree parsed at first. - tree: block::Tree<'s>, + blocks: std::iter::Peekable>>, /// Contents obtained by the prepass. pre_pass: PrePass<'s>, @@ -600,31 +599,48 @@ impl<'s> PrePass<'s> { #[must_use] fn new( src: &'s str, - mut tree: block::Tree<'s>, + blocks: std::slice::Iter>, inline_parser: &mut inline::Parser<'s>, ) -> Self { let mut link_definitions = Map::new(); let mut headings: Vec = Vec::new(); let mut used_ids: Set<&str> = Set::new(); + let mut blocks = blocks.peekable(); + let mut attr_prev: Option = None; - while let Some(e) = tree.next() { + while let Some(e) = blocks.next() { match e.kind { - tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition { + block::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition { label, })) => { + fn next_is_inline( + bs: &mut std::iter::Peekable>, + ) -> bool { + matches!(bs.peek().map(|e| &e.kind), Some(block::EventKind::Inline)) + } + // All link definition tags have to be obtained initially, as references can // appear before the definition. let attrs = attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src))); - let url = match tree.count_children() { - 0 => "".into(), - 1 => tree.take_inlines().next().unwrap().of(src).trim().into(), - _ => tree.take_inlines().map(|sp| sp.of(src).trim()).collect(), + let url = if !next_is_inline(&mut blocks) { + "".into() + } else { + let start = blocks.next().unwrap().span.of(src).trim(); + if !next_is_inline(&mut blocks) { + start.into() + } else { + let mut url = start.to_string(); + while next_is_inline(&mut blocks) { + url.push_str(blocks.next().unwrap().span.of(src).trim()); + } + url.into() + } }; link_definitions.insert(label, (url, attrs)); } - tree::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => { + block::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => { // All headings ids have to be obtained initially, as references can appear // before the heading. Additionally, determining the id requires inline parsing // as formatting must be removed. @@ -639,10 +655,21 @@ impl<'s> PrePass<'s> { let mut id_auto = String::new(); let mut text = String::new(); let mut last_whitespace = true; - let inlines = tree.take_inlines().collect::>(); inline_parser.reset(); - inlines.iter().enumerate().for_each(|(i, sp)| { - inline_parser.feed_line(*sp, i == inlines.len() - 1); + let mut last_end = 0; + loop { + let span_inline = blocks.next().and_then(|e| { + if matches!(e.kind, block::EventKind::Inline) { + last_end = e.span.end(); + Some(e.span) + } else { + None + } + }); + inline_parser.feed_line( + span_inline.unwrap_or_else(|| Span::empty_at(last_end)), + span_inline.is_none(), + ); inline_parser.for_each(|ev| match ev.kind { inline::EventKind::Str => { text.push_str(ev.span.of(src)); @@ -667,8 +694,11 @@ impl<'s> PrePass<'s> { id_auto.push('-'); } _ => {} - }) - }); + }); + if span_inline.is_none() { + break; + } + } id_auto.drain(id_auto.trim_end_matches('-').len()..); // ensure id unique @@ -700,11 +730,11 @@ impl<'s> PrePass<'s> { id_override, }); } - tree::EventKind::Atom(block::Atom::Attributes) => { + block::EventKind::Atom(block::Atom::Attributes) => { attr_prev = Some(e.span); } - tree::EventKind::Enter(..) - | tree::EventKind::Exit(block::Node::Container(block::Container::Section { + block::EventKind::Enter(..) + | block::EventKind::Exit(block::Node::Container(block::Container::Section { .. })) => {} _ => { @@ -746,13 +776,13 @@ impl<'s> PrePass<'s> { impl<'s> Parser<'s> { #[must_use] pub fn new(src: &'s str) -> Self { - let tree = block::parse(src); + let blocks = block::parse(src); let mut inline_parser = inline::Parser::new(src); - let pre_pass = PrePass::new(src, tree.clone(), &mut inline_parser); + let pre_pass = PrePass::new(src, blocks.iter(), &mut inline_parser); Self { src, - tree, + blocks: blocks.into_iter().peekable(), pre_pass, block_attributes: Attributes::new(), table_head_row: false, @@ -866,10 +896,10 @@ impl<'s> Parser<'s> { } fn block(&mut self) -> Option> { - while let Some(ev) = &mut self.tree.next() { + while let Some(ev) = &mut self.blocks.next() { let content = ev.span.of(self.src); let event = match ev.kind { - tree::EventKind::Atom(a) => match a { + block::EventKind::Atom(a) => match a { block::Atom::Blankline => Event::Blankline, block::Atom::ThematicBreak => { Event::ThematicBreak(self.block_attributes.take()) @@ -879,8 +909,8 @@ impl<'s> Parser<'s> { continue; } }, - tree::EventKind::Enter(c) | tree::EventKind::Exit(c) => { - let enter = matches!(ev.kind, tree::EventKind::Enter(..)); + block::EventKind::Enter(c) | block::EventKind::Exit(c) => { + let enter = matches!(ev.kind, block::EventKind::Enter(..)); let cont = match c { block::Node::Leaf(l) => { self.inline_parser.reset(); @@ -977,15 +1007,21 @@ impl<'s> Parser<'s> { Event::End(cont) } } - tree::EventKind::Inline => { + block::EventKind::Inline => { if self.verbatim { Event::Str(content.into()) } else { - self.inline_parser - .feed_line(ev.span, self.tree.branch_is_empty()); + self.inline_parser.feed_line( + ev.span, + !matches!( + self.blocks.peek().map(|e| &e.kind), + Some(block::EventKind::Inline), + ), + ); return self.next(); } } + block::EventKind::Stale => continue, }; return Some(event); } diff --git a/src/tree.rs b/src/tree.rs deleted file mode 100644 index d441805..0000000 --- a/src/tree.rs +++ /dev/null @@ -1,444 +0,0 @@ -use crate::Span; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum EventKind { - Enter(C), - Inline, - Exit(C), - Atom(A), -} - -#[derive(Debug)] -pub struct Node<'a, C, A> { - pub index: NodeIndex, - pub elem: Element<'a, C, A>, - pub span: Span, -} - -#[derive(Debug)] -pub enum Element<'a, C, A> { - Container(&'a mut C), - Atom(&'a mut A), - Inline, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Event { - pub kind: EventKind, - pub span: Span, -} - -#[derive(Clone)] -pub struct Tree { - nodes: std::rc::Rc<[InternalNode]>, - branch: Vec, - head: Option, -} - -impl Tree { - /// Count number of direct children nodes. - pub fn count_children(&self) -> usize { - let mut head = self.head; - let mut count = 0; - while let Some(h) = head { - let n = &self.nodes[h.index()]; - head = n.next; - count += 1; - } - count - } - - /// Retrieve all inlines until the end of the current container. Panics if any upcoming node is - /// not an inline node. - pub fn take_inlines(&mut self) -> impl Iterator + '_ { - let mut head = self.head.take(); - std::iter::from_fn(move || { - head.take().map(|h| { - let n = &self.nodes[h.index()]; - debug_assert!(matches!(n.kind, NodeKind::Inline)); - head = n.next; - n.span - }) - }) - } - - pub fn branch_is_empty(&self) -> bool { - matches!(self.head, None) - } -} - -impl Iterator for Tree { - type Item = Event; - - fn next(&mut self) -> Option { - if let Some(head) = self.head { - let n = &self.nodes[head.index()]; - let kind = match &n.kind { - NodeKind::Root => unreachable!(), - NodeKind::Container(c, child, ..) => { - self.branch.push(head); - self.head = *child; - EventKind::Enter(c.clone()) - } - NodeKind::Atom(a) => { - self.head = n.next; - EventKind::Atom(a.clone()) - } - NodeKind::Inline => { - self.head = n.next; - EventKind::Inline - } - }; - Some(Event { kind, span: n.span }) - } else if let Some(block_ni) = self.branch.pop() { - let InternalNode { next, kind, .. } = &self.nodes[block_ni.index()]; - if let NodeKind::Container(c, _, span) = kind { - self.head = *next; - Some(Event { - kind: EventKind::Exit(c.clone()), - span: *span, - }) - } else { - panic!() - } - } else { - None - } - } -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct NodeIndex(std::num::NonZeroUsize); - -impl NodeIndex { - fn new(i: usize) -> Self { - debug_assert_ne!(i, usize::MAX); - Self((i + 1).try_into().unwrap()) - } - - fn root() -> Self { - Self::new(0) - } - - fn index(self) -> usize { - usize::from(self.0) - 1 - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -enum NodeKind { - Root, - Container(C, Option, Span), - Atom(A), - Inline, -} - -#[derive(Debug, Clone)] -struct InternalNode { - span: Span, - kind: NodeKind, - next: Option, -} - -#[derive(Clone)] -pub struct Builder { - nodes: Vec>, - branch: Vec, - head: Option, - depth: usize, -} - -impl NodeKind { - fn child(&self) -> Option { - if let NodeKind::Container(_, child, _) = self { - *child - } else { - None - } - } - - fn child_mut(&mut self) -> &mut Option { - if let NodeKind::Container(_, child, _) = self { - child - } else { - panic!() - } - } -} - -impl<'a, C, A> From<&'a mut NodeKind> for Element<'a, C, A> { - fn from(kind: &'a mut NodeKind) -> Self { - match kind { - NodeKind::Root => unreachable!(), - NodeKind::Container(c, ..) => Element::Container(c), - NodeKind::Atom(a) => Element::Atom(a), - NodeKind::Inline => Element::Inline, - } - } -} - -impl Builder { - pub(super) fn new() -> Self { - Builder { - nodes: vec![InternalNode { - span: Span::default(), - kind: NodeKind::Root, - next: None, - }], - branch: vec![], - head: Some(NodeIndex::root()), - depth: 0, - } - } - - pub(super) fn atom(&mut self, a: A, span: Span) { - self.add_node(InternalNode { - span, - kind: NodeKind::Atom(a), - next: None, - }); - } - - pub(super) fn inline(&mut self, span: Span) { - self.add_node(InternalNode { - span, - kind: NodeKind::Inline, - next: None, - }); - } - - pub(super) fn enter(&mut self, c: C, span: Span) -> NodeIndex { - self.depth += 1; - self.add_node(InternalNode { - span, - kind: NodeKind::Container(c, None, Span::new(0, 0)), - next: None, - }) - } - - pub(super) fn exit(&mut self, span: Span) { - self.depth -= 1; - if let Some(head) = self.head.take() { - if let NodeKind::Container(_, _, sp) = &mut self.nodes[head.index()].kind { - *sp = span; - self.branch.push(head); - return; - } - } else { - let last = self.branch.pop(); - debug_assert_ne!(last, None); - } - - if let NodeKind::Container(_, _, sp) = - &mut self.nodes[self.branch.last().unwrap().index()].kind - { - *sp = span; - } else { - panic!(); - } - } - - /// Exit and discard all the contents of the current container. - pub(super) fn exit_discard(&mut self) { - self.exit(Span::new(0, (1 << 31) - 1)); - let exited = self.branch.pop().unwrap(); - self.nodes.drain(exited.index()..); - let (prev, has_parent) = self.replace(exited, None); - if has_parent { - self.head = Some(prev); - } else { - self.branch.push(prev); - } - } - - /// Swap the node and its children with either its parent or the node before. - pub fn swap_prev(&mut self, node: NodeIndex, span: Span) { - let next = self.nodes[node.index()].next; - let (prev, _) = self.replace(node, next); - if let Some(n) = next { - self.nodes[prev.index()].span = self.nodes[n.index()].span.empty_before(); - self.replace(n, None); - } else { - self.nodes[prev.index()].span = self.nodes[self.nodes.len() - 1].span.empty_after(); - } - self.replace(prev, Some(node)); - self.nodes[node.index()].next = Some(prev); - self.nodes[node.index()].span = span; - - let span = self.nodes[prev.index()].span; - if let NodeKind::Container(_, _, sp) = &mut self.nodes[node.index()].kind { - *sp = span; - } else { - panic!() - } - } - - /// Remove the specified node and its children. - pub fn remove(&mut self, node: NodeIndex) { - let next = self.nodes[node.index()].next; - self.replace(node, next); - } - - pub(super) fn depth(&self) -> usize { - self.depth - } - - pub(super) fn elem(&mut self, ni: NodeIndex) -> Element { - match &mut self.nodes[ni.index()].kind { - NodeKind::Root => unreachable!(), - NodeKind::Container(c, ..) => Element::Container(c), - NodeKind::Atom(a) => Element::Atom(a), - NodeKind::Inline => Element::Inline, - } - } - - /// Retrieve all children nodes for the specified node, in the order that they were added. - pub(super) fn children(&mut self, node: NodeIndex) -> impl Iterator> { - // XXX assumes no modifications - let n = &self.nodes[node.index()]; - let range = if let Some(start) = n.kind.child() { - start.index()..n.next.map_or(self.nodes.len(), NodeIndex::index) - } else { - 0..0 - }; - range - .clone() - .map(NodeIndex::new) - .zip(self.nodes[range].iter_mut()) - .map(|(index, n)| Node { - index, - elem: Element::from(&mut n.kind), - span: n.span, - }) - } - - pub(super) fn finish(self) -> Tree { - debug_assert_eq!(self.depth, 0); - let head = self.nodes[NodeIndex::root().index()].next; - Tree { - nodes: self.nodes.into_boxed_slice().into(), - branch: Vec::new(), - head, - } - } - - fn add_node(&mut self, node: InternalNode) -> NodeIndex { - let ni = NodeIndex::new(self.nodes.len()); - self.nodes.push(node); - if let Some(head_ni) = &mut self.head { - let mut head = &mut self.nodes[head_ni.index()]; - match &mut head.kind { - NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => { - // set next pointer of previous node - debug_assert_eq!(head.next, None); - head.next = Some(ni); - } - NodeKind::Container(_, child, _) => { - self.branch.push(*head_ni); - // set child pointer of current container - debug_assert_eq!(*child, None); - *child = Some(ni); - } - } - } else if let Some(block) = self.branch.pop() { - let mut block = &mut self.nodes[block.index()]; - debug_assert!(matches!(block.kind, NodeKind::Container(..))); - block.next = Some(ni); - } else { - panic!() - } - self.head = Some(ni); - ni - } - - /// Remove the link from the node that points to the specified node. Optionally replace the - /// node with another node. Return the pointer node and whether it is a container or not. - fn replace(&mut self, node: NodeIndex, next: Option) -> (NodeIndex, bool) { - for (i, n) in self.nodes.iter_mut().enumerate().rev() { - let ni = NodeIndex::new(i); - if n.next == Some(node) { - n.next = next; - return (ni, false); - } else if n.kind.child() == Some(node) { - *n.kind.child_mut() = next; - return (ni, true); - } - } - panic!("node is never linked to") - } -} - -impl std::fmt::Debug for Builder { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.clone().finish().fmt(f) - } -} - -impl std::fmt::Debug for Tree { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - const INDENT: &str = " "; - let mut level = 0; - writeln!(f)?; - for e in self.clone() { - let indent = INDENT.repeat(level); - match e.kind { - EventKind::Enter(c) => { - write!(f, "{}{:?}", indent, c)?; - level += 1; - } - EventKind::Inline => write!(f, "{}Inline", indent)?, - EventKind::Exit(..) => { - level -= 1; - continue; - } - EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?, - } - writeln!(f, " ({}:{})", e.span.start(), e.span.end())?; - } - Ok(()) - } -} - -#[cfg(test)] -mod test { - use crate::Span; - - #[test] - fn fmt() { - let mut tree = super::Builder::new(); - tree.enter(1, Span::new(0, 1)); - tree.atom(11, Span::new(0, 1)); - tree.atom(12, Span::new(0, 1)); - tree.exit(Span::new(0, 0)); - tree.enter(2, Span::new(1, 5)); - tree.enter(21, Span::new(2, 5)); - tree.enter(211, Span::new(3, 4)); - tree.atom(2111, Span::new(3, 4)); - tree.exit(Span::new(0, 0)); - tree.exit(Span::new(0, 0)); - tree.enter(22, Span::new(4, 5)); - tree.atom(221, Span::new(4, 5)); - tree.exit(Span::new(0, 0)); - tree.exit(Span::new(0, 0)); - tree.enter(3, Span::new(5, 6)); - tree.atom(31, Span::new(5, 6)); - tree.exit(Span::new(0, 0)); - assert_eq!( - format!("{:?}", tree.finish()), - concat!( - "\n", - "1 (0:1)\n", - " 11 (0:1)\n", - " 12 (0:1)\n", - "2 (1:5)\n", - " 21 (2:5)\n", - " 211 (3:4)\n", - " 2111 (3:4)\n", - " 22 (4:5)\n", - " 221 (4:5)\n", - "3 (5:6)\n", - " 31 (5:6)\n", - ) - ); - } -} From edd89f2b2076ff4a7a7b1d380077491705058605 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Thu, 11 May 2023 21:42:47 +0200 Subject: [PATCH 20/31] block: expand parse_attr test --- src/block.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/block.rs b/src/block.rs index 40485e0..77d3639 100644 --- a/src/block.rs +++ b/src/block.rs @@ -1694,6 +1694,22 @@ mod test { (Inline, "para"), (Exit(Leaf(Paragraph)), ""), ); + test_parse!( + concat!( + "{.a}\n", // + "\n", // + "{.b}\n", // + "\n", // + "para\n", // + ), + (Atom(Attributes), "{.a}\n"), + (Atom(Blankline), "\n"), + (Atom(Attributes), "{.b}\n"), + (Atom(Blankline), "\n"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "para"), + (Exit(Leaf(Paragraph)), ""), + ); } #[test] From c377aa999842f6cb12f9f2d64a73bb758f1e76de Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Thu, 11 May 2023 21:40:28 +0200 Subject: [PATCH 21/31] block: ensure section span end before attrs e.g # sec1 <-- end sec1 here {.sec2-class0} {.sec2-class1} <-- instead of here # sec2 --- src/block.rs | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/block.rs b/src/block.rs index 77d3639..9057a06 100644 --- a/src/block.rs +++ b/src/block.rs @@ -127,6 +127,7 @@ struct TreeParser<'s> { /// The previous block element was a blank line. prev_blankline: bool, prev_loose: bool, + attr_start: Option, /// Stack of currently open lists. open_lists: Vec, /// Stack of currently open sections. @@ -147,6 +148,7 @@ impl<'s> TreeParser<'s> { src, prev_blankline: false, prev_loose: false, + attr_start: None, open_lists: Vec::new(), alignments: Vec::new(), open_sections: Vec::new(), @@ -330,6 +332,12 @@ impl<'s> TreeParser<'s> { } } + if matches!(kind, Kind::Atom(Attributes)) { + self.attr_start = self.attr_start.or_else(|| Some(self.events.len() - 1)); + } else if !matches!(kind, Kind::Atom(Blankline)) { + self.attr_start = None; + } + line_count } else { 0 @@ -379,8 +387,24 @@ impl<'s> TreeParser<'s> { .rposition(|l| l < level) .map_or(0, |i| i + 1); let pos = span_start.start() as u32; - for _ in 0..(self.open_sections.len() - first_close) { - self.exit(Span::empty_at(span_start.start())); // section + for i in 0..(self.open_sections.len() - first_close) { + let node = if let EventKind::Enter(node) = + self.events[self.open.pop().unwrap()].kind + { + node + } else { + panic!(); + }; + let end = self + .attr_start + .map_or(span_start.start(), |a| self.events[a].span.start()); + self.events.insert( + self.attr_start.map_or(self.events.len(), |a| a + i), + Event { + kind: EventKind::Exit(node), + span: Span::new(end, end), + }, + ); } self.open_sections.drain(first_close..); self.open_sections.push(*level); From b0f88230aa0b6e59563928865b147a2dd1eb0084 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Thu, 11 May 2023 21:34:28 +0200 Subject: [PATCH 22/31] parse: do not attach block attributes to outer blocks e.g. - {.inner} this paragraph would get the inner class previously --- src/lib.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/lib.rs b/src/lib.rs index 889b6c3..fa21a28 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1004,6 +1004,7 @@ impl<'s> Parser<'s> { if enter { Event::Start(cont, self.block_attributes.take()) } else { + self.block_attributes = Attributes::new(); Event::End(cont) } } From 42ce9a36707533b553de81cdf27689b77b4bb54f Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Thu, 11 May 2023 18:19:41 +0200 Subject: [PATCH 23/31] block: include attributes in open event span --- src/lib.rs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index fa21a28..f9c2527 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -559,8 +559,9 @@ pub struct Parser<'s> { /// Contents obtained by the prepass. pre_pass: PrePass<'s>, - /// Last parsed block attributes + /// Last parsed block attributes, and its starting offset. block_attributes: Attributes<'s>, + block_attributes_pos: Option, /// Current table row is a head row. table_head_row: bool, @@ -785,6 +786,7 @@ impl<'s> Parser<'s> { blocks: blocks.into_iter().peekable(), pre_pass, block_attributes: Attributes::new(), + block_attributes_pos: None, table_head_row: false, verbatim: false, inline_parser, @@ -896,15 +898,21 @@ impl<'s> Parser<'s> { } fn block(&mut self) -> Option> { - while let Some(ev) = &mut self.blocks.next() { + while let Some(mut ev) = &mut self.blocks.next() { let content = ev.span.of(self.src); let event = match ev.kind { block::EventKind::Atom(a) => match a { block::Atom::Blankline => Event::Blankline, block::Atom::ThematicBreak => { + if let Some(pos) = self.block_attributes_pos.take() { + ev.span = Span::new(pos, ev.span.end()); + } Event::ThematicBreak(self.block_attributes.take()) } block::Atom::Attributes => { + if self.block_attributes_pos.is_none() { + self.block_attributes_pos = Some(ev.span.start()); + } self.block_attributes.parse(content); continue; } @@ -1002,9 +1010,13 @@ impl<'s> Parser<'s> { }, }; if enter { + if let Some(pos) = self.block_attributes_pos.take() { + ev.span = Span::new(pos, ev.span.end()); + } Event::Start(cont, self.block_attributes.take()) } else { self.block_attributes = Attributes::new(); + self.block_attributes_pos = None; Event::End(cont) } } From f5c87f32f9e25215dbf543c56714194c7f0d4a19 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Thu, 11 May 2023 19:15:52 +0200 Subject: [PATCH 24/31] inline: include attribute in end event span --- src/inline.rs | 36 ++++++++++++++++++++++-------------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/src/inline.rs b/src/inline.rs index 76be8d0..d471ee1 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -524,7 +524,13 @@ impl<'s> Parser<'s> { self.input.span = Span::new(start_attr, state.end_attr); self.input.lexer = lex::Lexer::new(&self.input.src[state.end_attr..line_end]); - if !attrs.is_empty() { + if attrs.is_empty() { + if matches!(state.elem_ty, AttributesElementType::Container { .. }) { + let last = self.events.len() - 1; + self.events[last].span = + Span::new(self.events[last].span.start(), self.input.span.end()); + } + } else { let attr_index = self.store_attributes.len() as AttributesIndex; self.store_attributes.push(attrs); let attr_event = Event { @@ -537,11 +543,13 @@ impl<'s> Parser<'s> { match state.elem_ty { AttributesElementType::Container { e_placeholder } => { self.events[e_placeholder] = attr_event; + let last = self.events.len() - 1; if matches!(self.events[e_placeholder + 1].kind, EventKind::Str) { self.events[e_placeholder + 1].kind = EventKind::Enter(Span); - let last = self.events.len() - 1; self.events[last].kind = EventKind::Exit(Span); } + self.events[last].span = + Span::new(self.events[last].span.start(), self.input.span.end()); } AttributesElementType::Word => { self.events.push_back(attr_event); @@ -971,7 +979,7 @@ impl<'s> Parser<'s> { let attr = self.events.pop_front().unwrap(); self.events.push_front(Event { kind: EventKind::Exit(Span), - span: span_str.empty_after(), + span: attr.span, }); self.events.push_front(Event { kind: EventKind::Str, @@ -1265,7 +1273,7 @@ mod test { ), (Enter(Verbatim), "`"), (Str, "raw"), - (Exit(Verbatim), "`"), + (Exit(Verbatim), "`{#id}"), (Str, " post"), ); } @@ -1455,7 +1463,7 @@ mod test { ), (Enter(Span), ""), (Str, "[text]("), - (Exit(Span), ""), + (Exit(Span), "{.cls}"), ); } @@ -1519,7 +1527,7 @@ mod test { "{.cls}", ), (Enter(Span), "["), - (Exit(Span), "]") + (Exit(Span), "]{.cls}") ); } @@ -1536,7 +1544,7 @@ mod test { ), (Enter(Span), "["), (Str, "abc"), - (Exit(Span), "]"), + (Exit(Span), "]{.def}"), ); test_parse!("not a [span] {#id}.", (Str, "not a [span] "), (Str, ".")); } @@ -1554,7 +1562,7 @@ mod test { ), (Enter(Span), "["), (Str, "x_y"), - (Exit(Span), "]"), + (Exit(Span), "]{.bar_}"), ); } @@ -1686,7 +1694,7 @@ mod test { ), (Enter(Emphasis), "_"), (Str, "abc def"), - (Exit(Emphasis), "_"), + (Exit(Emphasis), "_{.attr}"), ); } @@ -1696,13 +1704,13 @@ mod test { "_abc def_{}", (Enter(Emphasis), "_"), (Str, "abc def"), - (Exit(Emphasis), "_"), + (Exit(Emphasis), "_{}"), ); test_parse!( "_abc def_{ % comment % } ghi", (Enter(Emphasis), "_"), (Str, "abc def"), - (Exit(Emphasis), "_"), + (Exit(Emphasis), "_{ % comment % }"), (Str, " ghi"), ); } @@ -1720,7 +1728,7 @@ mod test { ), (Enter(Emphasis), "_"), (Str, "abc def"), - (Exit(Emphasis), "_"), + (Exit(Emphasis), "_{.a}{.b}{.c}"), (Str, " "), ); } @@ -1738,7 +1746,7 @@ mod test { ), (Enter(Span), ""), (Str, "word"), - (Exit(Span), ""), + (Exit(Span), "{a=b}"), ); test_parse!( "some word{.a}{.b} with attrs", @@ -1752,7 +1760,7 @@ mod test { ), (Enter(Span), ""), (Str, "word"), - (Exit(Span), ""), + (Exit(Span), "{.a}{.b}"), (Str, " with attrs"), ); } From b6b3a6f40b9294ca2704702ded93e30c35781c29 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Tue, 2 May 2023 23:19:11 +0200 Subject: [PATCH 25/31] parse: only get block span content when needed --- src/lib.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index f9c2527..ca3d125 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -899,7 +899,6 @@ impl<'s> Parser<'s> { fn block(&mut self) -> Option> { while let Some(mut ev) = &mut self.blocks.next() { - let content = ev.span.of(self.src); let event = match ev.kind { block::EventKind::Atom(a) => match a { block::Atom::Blankline => Event::Blankline, @@ -913,7 +912,7 @@ impl<'s> Parser<'s> { if self.block_attributes_pos.is_none() { self.block_attributes_pos = Some(ev.span.start()); } - self.block_attributes.parse(content); + self.block_attributes.parse(ev.span.of(self.src)); continue; } }, @@ -1022,7 +1021,7 @@ impl<'s> Parser<'s> { } block::EventKind::Inline => { if self.verbatim { - Event::Str(content.into()) + Event::Str(ev.span.of(self.src).into()) } else { self.inline_parser.feed_line( ev.span, From 79b5119209c18f230596805c0274e8a6f03b9c3f Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sun, 30 Apr 2023 19:44:22 +0200 Subject: [PATCH 26/31] parse: add Parser::into_offset_iter resolves #3 --- src/lib.rs | 146 +++++++++++++++++++++++++++++++++++++++++++++++++--- src/span.rs | 6 +++ 2 files changed, 145 insertions(+), 7 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index ca3d125..f6800e0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,6 +51,7 @@ use std::fmt; use std::fmt::Write as FmtWrite; use std::io; +use std::ops::Range; #[cfg(feature = "html")] pub mod html; @@ -793,7 +794,117 @@ impl<'s> Parser<'s> { } } - fn inline(&mut self) -> Option> { + /// Turn the [`Parser`] into an iterator of tuples, each with an [`Event`] and a start/end byte + /// offset for its corresponding input (as a [`Range`]). + /// + /// Generally, the range of each event does not overlap with any other event and the ranges are + /// in same order as the events are emitted, i.e. the start offset of an event must be greater + /// or equal to the (exclusive) end offset of all events that were emitted before that event. + /// However, there are some exceptions to this rule: + /// + /// - Blank lines inbetween block attributes and the block causes the blankline events to + /// overlap with the block start event. + /// - Caption events are emitted before the table rows while the input for the caption content + /// is located after the table rows, causing the ranges to be out of order. + /// + /// Characters between events, that are not part of any event range, are typically whitespace + /// but may also consist of unattached attributes or `>` characters from blockquotes. + /// + /// # Examples + /// + /// Start and end events of containers correspond only to the start and end markers for that + /// container, not its inner content: + /// + /// ``` + /// # use jotdown::*; + /// # use jotdown::Event::*; + /// # use jotdown::Container::*; + /// let input = "> _hello_ [text](url)\n"; + /// assert!(matches!( + /// Parser::new(input) + /// .into_offset_iter() + /// .map(|(e, r)| (&input[r], e)) + /// .collect::>() + /// .as_slice(), + /// &[ + /// (">", Start(Blockquote, ..)), + /// ("", Start(Paragraph, ..)), + /// ("_", Start(Emphasis, ..)), + /// ("hello", Str(..)), + /// ("_", End(Emphasis)), + /// (" ", Str(..)), + /// ("[", Start(Link { .. }, ..)), + /// ("text", Str(..)), + /// ("](url)", End(Link { .. })), + /// ("", End(Paragraph)), + /// ("", End(Blockquote)), + /// ], + /// )); + /// ``` + /// + /// _Block_ attributes that belong to a container are included in the _start_ event. _Inline_ + /// attributes that belong to a container are included in the _end_ event: + /// + /// ``` + /// # use jotdown::*; + /// # use jotdown::Event::*; + /// # use jotdown::Container::*; + /// let input = " + /// {.quote} + /// > [Hello]{lang=en} world!"; + /// assert!(matches!( + /// Parser::new(input) + /// .into_offset_iter() + /// .map(|(e, r)| (&input[r], e)) + /// .collect::>() + /// .as_slice(), + /// &[ + /// ("\n", Blankline), + /// ("{.quote}\n>", Start(Blockquote, ..)), + /// ("", Start(Paragraph, ..)), + /// ("[", Start(Span, ..)), + /// ("Hello", Str(..)), + /// ("]{lang=en}", End(Span)), + /// (" world!", Str(..)), + /// ("", End(Paragraph)), + /// ("", End(Blockquote)), + /// ], + /// )); + /// ``` + /// + /// Inline events that span multiple lines may contain characters from outer block containers + /// (e.g. `>` characters from blockquotes or whitespace from list items): + /// + /// ``` + /// # use jotdown::*; + /// # use jotdown::Event::*; + /// # use jotdown::Container::*; + /// let input = " + /// > [txt](multi + /// > line)"; + /// assert!(matches!( + /// Parser::new(input) + /// .into_offset_iter() + /// .map(|(e, r)| (&input[r], e)) + /// .collect::>() + /// .as_slice(), + /// &[ + /// ("\n", Blankline), + /// (">", Start(Blockquote, ..)), + /// ("", Start(Paragraph, ..)), + /// ("[", Start(Link { .. }, ..)), + /// ("txt", Str(..)), + /// ("](multi\n> line)", End(Link { .. })), + /// ("", End(Paragraph)), + /// ("", End(Blockquote)), + /// ], + /// )); + /// ``` + pub fn into_offset_iter(self) -> OffsetIter<'s> { + OffsetIter { parser: self } + } + + fn inline(&mut self) -> Option<(Event<'s>, Range)> { let next = self.inline_parser.next()?; let (inline, mut attributes) = match next { @@ -809,7 +920,7 @@ impl<'s> Parser<'s> { inline.map(|inline| { let enter = matches!(inline.kind, inline::EventKind::Enter(_)); - match inline.kind { + let event = match inline.kind { inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => { let t = match c { inline::Container::Span => Container::Span, @@ -893,11 +1004,12 @@ impl<'s> Parser<'s> { inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => { panic!("{:?}", inline) } - } + }; + (event, inline.span.into()) }) } - fn block(&mut self) -> Option> { + fn block(&mut self) -> Option<(Event<'s>, Range)> { while let Some(mut ev) = &mut self.blocks.next() { let event = match ev.kind { block::EventKind::Atom(a) => match a { @@ -1030,22 +1142,42 @@ impl<'s> Parser<'s> { Some(block::EventKind::Inline), ), ); - return self.next(); + return self.next_span(); } } block::EventKind::Stale => continue, }; - return Some(event); + return Some((event, ev.span.into())); } None } + + fn next_span(&mut self) -> Option<(Event<'s>, Range)> { + self.inline().or_else(|| self.block()) + } } impl<'s> Iterator for Parser<'s> { type Item = Event<'s>; fn next(&mut self) -> Option { - self.inline().or_else(|| self.block()) + self.next_span().map(|(e, _)| e) + } +} + +/// An iterator that is identical to a [`Parser`], except that it also emits the location of each +/// event within the input. +/// +/// See the documentation of [`Parser::into_offset_iter`] for more information. +pub struct OffsetIter<'s> { + parser: Parser<'s>, +} + +impl<'s> Iterator for OffsetIter<'s> { + type Item = (Event<'s>, Range); + + fn next(&mut self) -> Option { + self.parser.next_span() } } diff --git a/src/span.rs b/src/span.rs index 6c595a7..722281e 100644 --- a/src/span.rs +++ b/src/span.rs @@ -4,6 +4,12 @@ pub struct Span { end: u32, } +impl From for std::ops::Range { + fn from(span: Span) -> Self { + span.start()..span.end() + } +} + impl Span { pub fn new(start: usize, end: usize) -> Self { Self::by_len(start, end.checked_sub(start).unwrap()) From 2a3973674fa4b03f17589edfd844ff71ff4c524e Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Tue, 2 May 2023 20:16:20 +0200 Subject: [PATCH 27/31] jotdown_wasm: add events with spans format --- examples/jotdown_wasm/demo.html | 6 +++++- examples/jotdown_wasm/src/lib.rs | 14 ++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/examples/jotdown_wasm/demo.html b/examples/jotdown_wasm/demo.html index ba7679b..2a028d2 100644 --- a/examples/jotdown_wasm/demo.html +++ b/examples/jotdown_wasm/demo.html @@ -21,7 +21,10 @@ output.innerText = jotdown_render(input.innerText); } else if (fmt.value == "events") { output.classList.add("verbatim") - output.innerText = jotdown_parse(input.innerText); + output.innerText = jotdown_parse(input.innerText, false); + } else if (fmt.value == "events_spans") { + output.classList.add("verbatim") + output.innerText = jotdown_parse(input.innerText, true); } else if (fmt.value == "events_indent") { output.classList.add("verbatim") output.innerText = jotdown_parse_indent(input.innerText); @@ -50,6 +53,7 @@ + diff --git a/examples/jotdown_wasm/src/lib.rs b/examples/jotdown_wasm/src/lib.rs index 9aeb866..4d6d4e8 100644 --- a/examples/jotdown_wasm/src/lib.rs +++ b/examples/jotdown_wasm/src/lib.rs @@ -22,10 +22,16 @@ pub fn jotdown_render(djot: &str) -> String { #[must_use] #[wasm_bindgen] -pub fn jotdown_parse(djot: &str) -> String { - jotdown::Parser::new(djot) - .map(|e| format!("{:?}\n", e)) - .collect() +pub fn jotdown_parse(djot: &str, spans: bool) -> String { + let mut out = String::new(); + for (e, sp) in jotdown::Parser::new(djot).into_offset_iter() { + write!(out, "{:?}", e).unwrap(); + if spans { + write!(out, " {:?} {:?}", &djot[sp.clone()], sp).unwrap(); + } + writeln!(out).unwrap(); + } + out } #[must_use] From d19e4933c9b341647230d4d9b8fdd6180789d2b7 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sun, 30 Apr 2023 19:58:23 +0200 Subject: [PATCH 28/31] afl: merge parse_balance target into parse parse_balance is a superset of parse --- .github/workflows/ci.yml | 1 - tests/afl/Cargo.toml | 4 ---- tests/afl/src/lib.rs | 15 +++++++-------- tests/afl/src/main.rs | 1 - tests/afl/src/parse_balance.rs | 3 --- 5 files changed, 7 insertions(+), 17 deletions(-) delete mode 100644 tests/afl/src/parse_balance.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 55571f3..b332999 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -72,7 +72,6 @@ jobs: matrix: target: - parse - - parse_balance - html runs-on: ubuntu-latest steps: diff --git a/tests/afl/Cargo.toml b/tests/afl/Cargo.toml index 66b25e1..70e156b 100644 --- a/tests/afl/Cargo.toml +++ b/tests/afl/Cargo.toml @@ -17,10 +17,6 @@ path = "src/main.rs" name = "parse" path = "src/parse.rs" -[[bin]] -name = "parse_balance" -path = "src/parse_balance.rs" - [[bin]] name = "html" path = "src/html.rs" diff --git a/tests/afl/src/lib.rs b/tests/afl/src/lib.rs index 0591238..c6a8bb8 100644 --- a/tests/afl/src/lib.rs +++ b/tests/afl/src/lib.rs @@ -5,27 +5,26 @@ use html5ever::tendril::TendrilSink; use html5ever::tokenizer; use html5ever::tree_builder; +/// Perform sanity checks on events. pub fn parse(data: &[u8]) { - if let Ok(s) = std::str::from_utf8(data) { - jotdown::Parser::new(s).last(); - } -} - -/// Ensure containers are always balanced, i.e. opened and closed in correct order. -pub fn parse_balance(data: &[u8]) { if let Ok(s) = std::str::from_utf8(data) { let mut open = Vec::new(); for event in jotdown::Parser::new(s) { match event { jotdown::Event::Start(c, ..) => open.push(c.clone()), - jotdown::Event::End(c) => assert_eq!(open.pop().unwrap(), c), + jotdown::Event::End(c) => { + // closes correct event + assert_eq!(open.pop().unwrap(), c); + } _ => {} } } + // no missing close assert_eq!(open, &[]); } } +/// Validate rendered html output. pub fn html(data: &[u8]) { if data.iter().any(|i| *i == 0) { return; diff --git a/tests/afl/src/main.rs b/tests/afl/src/main.rs index ad09a2d..b0a66c8 100644 --- a/tests/afl/src/main.rs +++ b/tests/afl/src/main.rs @@ -8,7 +8,6 @@ fn main() { let f = match target.as_str() { "parse" => jotdown_afl::parse, - "parse_balance" => jotdown_afl::parse_balance, "html" => jotdown_afl::html, _ => panic!("unknown target '{}'", target), }; diff --git a/tests/afl/src/parse_balance.rs b/tests/afl/src/parse_balance.rs deleted file mode 100644 index 9118fb2..0000000 --- a/tests/afl/src/parse_balance.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - afl::fuzz!(|data: &[u8]| { jotdown_afl::parse_balance(data) }); -} From e69d61a0d1919772ed5f1bb293db61de5a8bb017 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sun, 30 Apr 2023 20:03:35 +0200 Subject: [PATCH 29/31] afl parse: sanity check spans --- tests/afl/src/lib.rs | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/tests/afl/src/lib.rs b/tests/afl/src/lib.rs index c6a8bb8..03e3ae1 100644 --- a/tests/afl/src/lib.rs +++ b/tests/afl/src/lib.rs @@ -8,8 +8,42 @@ use html5ever::tree_builder; /// Perform sanity checks on events. pub fn parse(data: &[u8]) { if let Ok(s) = std::str::from_utf8(data) { + let whitelist_whitespace = s.contains('{') && s.contains('}'); // attributes are outside events let mut open = Vec::new(); - for event in jotdown::Parser::new(s) { + let mut last = (jotdown::Event::Str("".into()), 0..0); + for (event, range) in jotdown::Parser::new(s).into_offset_iter() { + // no overlap, out of order + assert!( + last.1.end <= range.start + // block attributes may overlap with start event + || ( + matches!(last.0, jotdown::Event::Blankline) + && ( + matches!( + event, + jotdown::Event::Start(ref cont, ..) if cont.is_block() + ) + || matches!(event, jotdown::Event::ThematicBreak(..)) + ) + ) + // caption event is before table rows but src is after + || ( + matches!( + last.0, + jotdown::Event::Start(jotdown::Container::Caption, ..) + | jotdown::Event::End(jotdown::Container::Caption) + ) + && range.end <= last.1.start + ), + "{} > {} {:?} {:?}", + last.1.end, + range.start, + last.0, + event + ); + last = (event.clone(), range.clone()); + // range is valid unicode, does not cross char boundary + let _ = &s[range]; match event { jotdown::Event::Start(c, ..) => open.push(c.clone()), jotdown::Event::End(c) => { @@ -21,6 +55,12 @@ pub fn parse(data: &[u8]) { } // no missing close assert_eq!(open, &[]); + // only whitespace after last event + assert!( + whitelist_whitespace || s[last.1.end..].chars().all(char::is_whitespace), + "non whitespace {:?}", + &s[last.1.end..], + ); } } From 7c28a068e957f728bd58035aeeb2803a99d01d9b Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Wed, 3 May 2023 23:40:15 +0200 Subject: [PATCH 30/31] afl: rm whitelisted html error resolved by "block: replace tree with event vec" --- tests/afl/src/lib.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/afl/src/lib.rs b/tests/afl/src/lib.rs index 03e3ae1..697ba74 100644 --- a/tests/afl/src/lib.rs +++ b/tests/afl/src/lib.rs @@ -171,9 +171,6 @@ impl<'a> tree_builder::TreeSink for Dom<'a> { "Found special tag while closing generic tag", "Formatting element not current node", "Formatting element not open", - // FIXME bug caused by empty table at end of list - "No matching tag to close", - "Unexpected open element while closing", ]; if !whitelist.iter().any(|e| msg.starts_with(e)) { #[cfg(feature = "debug")] From bbdb314ae1d020bc8c4dbc081b138d7599dce59f Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sun, 7 May 2023 11:04:34 +0200 Subject: [PATCH 31/31] parse: do not inline parse link definitions better match the actual url produced, which is verbatim --- src/block.rs | 16 ++++++++++++++-- src/lib.rs | 13 +++++++++---- 2 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/block.rs b/src/block.rs index 9057a06..2ce775d 100644 --- a/src/block.rs +++ b/src/block.rs @@ -350,7 +350,7 @@ impl<'s> TreeParser<'s> { k: &Kind, span_start: Span, span_end: Span, - lines: &mut [Span], + mut lines: &mut [Span], ) { if let Kind::Fenced { indent, .. } = k { for line in lines.iter_mut() { @@ -367,6 +367,18 @@ impl<'s> TreeParser<'s> { *line = line.trim_start(self.src); } + // skip first inline if empty + if lines.get(0).map_or(false, |l| l.is_empty()) { + lines = &mut lines[1..]; + }; + + if matches!(leaf, LinkDefinition { .. }) { + // trim ending whitespace of each inline + for line in lines.iter_mut() { + *line = line.trim_end(self.src); + } + } + // trim ending whitespace of block let l = lines.len(); if l > 0 { @@ -412,7 +424,7 @@ impl<'s> TreeParser<'s> { } // trim '#' characters - for line in lines[1..].iter_mut() { + for line in lines.iter_mut().skip(1) { *line = line.trim_start_matches(self.src, |c| c == '#' || c.is_whitespace()); } } diff --git a/src/lib.rs b/src/lib.rs index f6800e0..1c2620e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1064,6 +1064,7 @@ impl<'s> Parser<'s> { }, block::Leaf::Caption => Container::Caption, block::Leaf::LinkDefinition { label } => { + self.verbatim = enter; Container::LinkDefinition { label } } } @@ -1709,7 +1710,6 @@ mod test { Blankline, Start(LinkDefinition { label: "tag" }, Attributes::new()), Str("u".into()), - Softbreak, Str("rl".into()), End(LinkDefinition { label: "tag" }), ); @@ -1718,19 +1718,24 @@ mod test { "[text][tag]\n", "\n", "[tag]:\n", - " url\n", // + " url\n", // + " cont\n", // ), Start(Paragraph, Attributes::new()), Start( - Link("url".into(), LinkType::Span(SpanLinkType::Reference)), + Link("urlcont".into(), LinkType::Span(SpanLinkType::Reference)), Attributes::new() ), Str("text".into()), - End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))), + End(Link( + "urlcont".into(), + LinkType::Span(SpanLinkType::Reference) + )), End(Paragraph), Blankline, Start(LinkDefinition { label: "tag" }, Attributes::new()), Str("url".into()), + Str("cont".into()), End(LinkDefinition { label: "tag" }), ); }