lib: emit footnotes as they are encountered

Previously, footnotes and their children events were skipped (stored in
block tree) and inline parsed at the end. Now, they are emitted by the
parser immediately and the responsibility to aggregate them has been
moved to the renderer.

resolves #31
This commit is contained in:
Noah Hellman 2023-04-08 11:52:58 +02:00
parent c4ecd0c677
commit 99f4691e52
3 changed files with 144 additions and 290 deletions

View file

@ -5,6 +5,7 @@ use crate::Container;
use crate::Event; use crate::Event;
use crate::LinkType; use crate::LinkType;
use crate::ListKind; use crate::ListKind;
use crate::Map;
use crate::OrderedListNumbering::*; use crate::OrderedListNumbering::*;
use crate::Render; use crate::Render;
use crate::SpanLinkType; use crate::SpanLinkType;
@ -48,23 +49,29 @@ impl Default for Raw {
} }
#[derive(Default)] #[derive(Default)]
struct Writer { struct Writer<'s> {
raw: Raw, raw: Raw,
img_alt_text: usize, img_alt_text: usize,
list_tightness: Vec<bool>, list_tightness: Vec<bool>,
encountered_footnote: bool,
footnote_number: Option<std::num::NonZeroUsize>,
not_first_line: bool, not_first_line: bool,
close_para: bool,
ignore: bool, ignore: bool,
footnotes: Footnotes<'s>,
} }
impl Writer { impl<'s> Writer<'s> {
fn render_event<'s, W>(&mut self, e: &Event<'s>, mut out: W) -> std::fmt::Result fn render_event<W>(&mut self, e: &Event<'s>, mut out: W) -> std::fmt::Result
where where
W: std::fmt::Write, W: std::fmt::Write,
{ {
if matches!(&e, Event::Blankline | Event::Escape) { if let Event::Start(Container::Footnote { label }, ..) = e {
self.footnotes.start(label, Vec::new());
return Ok(());
} else if let Some(events) = self.footnotes.current() {
if matches!(e, Event::End(Container::Footnote { .. })) {
self.footnotes.end();
} else {
events.push(e.clone());
}
return Ok(()); return Ok(());
} }
@ -82,15 +89,6 @@ impl Writer {
return Ok(()); return Ok(());
} }
let close_para = self.close_para;
if close_para {
self.close_para = false;
if !matches!(&e, Event::End(Container::Footnote { .. })) {
// no need to add href before para close
out.write_str("</p>")?;
}
}
match e { match e {
Event::Start(c, attrs) => { Event::Start(c, attrs) => {
if c.is_block() && self.not_first_line { if c.is_block() && self.not_first_line {
@ -129,16 +127,7 @@ impl Writer {
} }
Container::DescriptionList => out.write_str("<dl")?, Container::DescriptionList => out.write_str("<dl")?,
Container::DescriptionDetails => out.write_str("<dd")?, Container::DescriptionDetails => out.write_str("<dd")?,
Container::Footnote { number, .. } => { Container::Footnote { .. } => unreachable!(),
debug_assert!(self.footnote_number.is_none());
self.footnote_number = Some((*number).try_into().unwrap());
if !self.encountered_footnote {
self.encountered_footnote = true;
out.write_str("<section role=\"doc-endnotes\">\n<hr>\n<ol>\n")?;
}
write!(out, "<li id=\"fn{}\">", number)?;
return Ok(());
}
Container::Table => out.write_str("<table")?, Container::Table => out.write_str("<table")?,
Container::TableRow { .. } => out.write_str("<tr")?, Container::TableRow { .. } => out.write_str("<tr")?,
Container::Section { .. } => out.write_str("<section")?, Container::Section { .. } => out.write_str("<section")?,
@ -298,7 +287,7 @@ impl Writer {
} }
} }
Event::End(c) => { Event::End(c) => {
if c.is_block_container() && !matches!(c, Container::Footnote { .. }) { if c.is_block_container() {
out.write_char('\n')?; out.write_char('\n')?;
} }
if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) { if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) {
@ -322,19 +311,7 @@ impl Writer {
} }
Container::DescriptionList => out.write_str("</dl>")?, Container::DescriptionList => out.write_str("</dl>")?,
Container::DescriptionDetails => out.write_str("</dd>")?, Container::DescriptionDetails => out.write_str("</dd>")?,
Container::Footnote { number, .. } => { Container::Footnote { .. } => unreachable!(),
if !close_para {
// create a new paragraph
out.write_str("\n<p>")?;
}
write!(
out,
r##"<a href="#fnref{}" role="doc-backlink">↩︎︎</a></p>"##,
number,
)?;
out.write_str("\n</li>")?;
self.footnote_number = None;
}
Container::Table => out.write_str("</table>")?, Container::Table => out.write_str("</table>")?,
Container::TableRow { .. } => out.write_str("</tr>")?, Container::TableRow { .. } => out.write_str("</tr>")?,
Container::Section { .. } => out.write_str("</section>")?, Container::Section { .. } => out.write_str("</section>")?,
@ -343,10 +320,8 @@ impl Writer {
if matches!(self.list_tightness.last(), Some(true)) { if matches!(self.list_tightness.last(), Some(true)) {
return Ok(()); return Ok(());
} }
if self.footnote_number.is_none() { if !self.footnotes.in_epilogue() {
out.write_str("</p>")?; out.write_str("</p>")?;
} else {
self.close_para = true;
} }
} }
Container::Heading { level, .. } => write!(out, "</h{}>", level)?, Container::Heading { level, .. } => write!(out, "</h{}>", level)?,
@ -394,7 +369,8 @@ impl Writer {
Raw::Html => out.write_str(s)?, Raw::Html => out.write_str(s)?,
Raw::Other => {} Raw::Other => {}
}, },
Event::FootnoteReference(_tag, number) => { Event::FootnoteReference(label) => {
let number = self.footnotes.reference(label);
if self.img_alt_text == 0 { if self.img_alt_text == 0 {
write!( write!(
out, out,
@ -414,7 +390,7 @@ impl Writer {
Event::NonBreakingSpace => out.write_str("&nbsp;")?, Event::NonBreakingSpace => out.write_str("&nbsp;")?,
Event::Hardbreak => out.write_str("<br>\n")?, Event::Hardbreak => out.write_str("<br>\n")?,
Event::Softbreak => out.write_char('\n')?, Event::Softbreak => out.write_char('\n')?,
Event::Escape | Event::Blankline => unreachable!("filtered out"), Event::Escape | Event::Blankline => {}
Event::ThematicBreak(attrs) => { Event::ThematicBreak(attrs) => {
out.write_str("\n<hr")?; out.write_str("\n<hr")?;
for (a, v) in attrs.iter() { for (a, v) in attrs.iter() {
@ -434,9 +410,41 @@ impl Writer {
where where
W: std::fmt::Write, W: std::fmt::Write,
{ {
if self.encountered_footnote { if self.footnotes.reference_encountered() {
out.write_str("\n<section role=\"doc-endnotes\">\n<hr>\n<ol>")?;
while let Some((number, events)) = self.footnotes.next() {
write!(out, "\n<li id=\"fn{}\">", number)?;
let mut unclosed_para = false;
for e in events.iter().flatten() {
if matches!(&e, Event::Blankline | Event::Escape) {
continue;
}
if unclosed_para {
// not a footnote, so no need to add href before para close
out.write_str("</p>")?;
}
self.render_event(e, &mut out)?;
unclosed_para = matches!(e, Event::End(Container::Paragraph { .. }))
&& !matches!(self.list_tightness.last(), Some(true));
}
if !unclosed_para {
// create a new paragraph
out.write_str("\n<p>")?;
}
write!(
out,
r##"<a href="#fnref{}" role="doc-backlink">↩︎︎</a></p>"##,
number,
)?;
out.write_str("\n</li>")?;
}
out.write_str("\n</ol>\n</section>")?; out.write_str("\n</ol>\n</section>")?;
} }
out.write_char('\n')?; out.write_char('\n')?;
Ok(()) Ok(())
@ -481,3 +489,73 @@ where
} }
out.write_str(s) out.write_str(s)
} }
/// Helper to aggregate footnotes for rendering at the end of the document. It will cache footnote
/// events until they should be emitted at the end.
///
/// When footnotes should be rendered, they can be pulled with the [`Footnotes::next`] function in
/// the order they were first referenced.
#[derive(Default)]
struct Footnotes<'s> {
/// Stack of current open footnotes, with label and staging buffer.
open: Vec<(&'s str, Vec<Event<'s>>)>,
/// Footnote references in the order they were first encountered.
references: Vec<&'s str>,
/// Events for each footnote.
events: Map<&'s str, Vec<Event<'s>>>,
/// Number of last footnote that was emitted.
number: usize,
}
impl<'s> Footnotes<'s> {
/// Returns `true` if any reference has been encountered.
fn reference_encountered(&self) -> bool {
!self.references.is_empty()
}
/// Returns `true` if within the epilogue, i.e. if any footnotes have been pulled.
fn in_epilogue(&self) -> bool {
self.number > 0
}
/// Add a footnote reference.
fn reference(&mut self, label: &'s str) -> usize {
self.references
.iter()
.position(|t| *t == label)
.map_or_else(
|| {
self.references.push(label);
self.references.len()
},
|i| i + 1,
)
}
/// Start aggregating a footnote.
fn start(&mut self, label: &'s str, events: Vec<Event<'s>>) {
self.open.push((label, events));
}
/// Obtain the current (most recently started) footnote.
fn current(&mut self) -> Option<&mut Vec<Event<'s>>> {
self.open.last_mut().map(|(_, e)| e)
}
/// End the current (most recently started) footnote.
fn end(&mut self) {
let (label, stage) = self.open.pop().unwrap();
self.events.insert(label, stage);
}
}
impl<'s> Iterator for Footnotes<'s> {
type Item = (usize, Option<Vec<Event<'s>>>);
fn next(&mut self) -> Option<Self::Item> {
self.references.get(self.number).map(|label| {
self.number += 1;
(self.number, self.events.remove(label))
})
}
}

View file

@ -209,7 +209,7 @@ pub enum Event<'s> {
/// A string object, text only. /// A string object, text only.
Str(CowStr<'s>), Str(CowStr<'s>),
/// A footnote reference. /// A footnote reference.
FootnoteReference(&'s str, usize), FootnoteReference(&'s str),
/// A symbol, by default rendered literally but may be treated specially. /// A symbol, by default rendered literally but may be treated specially.
Symbol(CowStr<'s>), Symbol(CowStr<'s>),
/// Left single quotation mark. /// Left single quotation mark.
@ -262,7 +262,7 @@ pub enum Container<'s> {
/// Details describing a term within a description list. /// Details describing a term within a description list.
DescriptionDetails, DescriptionDetails,
/// A footnote definition. /// A footnote definition.
Footnote { tag: &'s str, number: usize }, Footnote { label: &'s str },
/// A table element. /// A table element.
Table, Table,
/// A row element of a table. /// A row element of a table.
@ -569,15 +569,6 @@ pub struct Parser<'s> {
/// Currently within a verbatim code block. /// Currently within a verbatim code block.
verbatim: bool, verbatim: bool,
/// Footnote references in the order they were encountered, without duplicates.
footnote_references: Vec<&'s str>,
/// Cache of footnotes to emit at the end.
footnotes: Map<&'s str, block::Tree>,
/// Next or current footnote being parsed and emitted.
footnote_index: usize,
/// Currently within a footnote.
footnote_active: bool,
/// Inline parser. /// Inline parser.
inline_parser: inline::Parser<'s>, inline_parser: inline::Parser<'s>,
} }
@ -755,10 +746,6 @@ impl<'s> Parser<'s> {
block_attributes: Attributes::new(), block_attributes: Attributes::new(),
table_head_row: false, table_head_row: false,
verbatim: false, verbatim: false,
footnote_references: Vec::new(),
footnotes: Map::new(),
footnote_index: 0,
footnote_active: false,
inline_parser, inline_parser,
} }
} }
@ -847,19 +834,7 @@ impl<'s> Parser<'s> {
} }
inline::EventKind::Atom(a) => match a { inline::EventKind::Atom(a) => match a {
inline::Atom::FootnoteReference => { inline::Atom::FootnoteReference => {
let tag = inline.span.of(self.src); Event::FootnoteReference(inline.span.of(self.src))
let number = self
.footnote_references
.iter()
.position(|t| *t == tag)
.map_or_else(
|| {
self.footnote_references.push(tag);
self.footnote_references.len()
},
|i| i + 1,
);
Event::FootnoteReference(inline.span.of(self.src), number)
} }
inline::Atom::Symbol => Event::Symbol(inline.span.of(self.src).into()), inline::Atom::Symbol => Event::Symbol(inline.span.of(self.src).into()),
inline::Atom::Quote { ty, left } => match (ty, left) { inline::Atom::Quote { ty, left } => match (ty, left) {
@ -941,12 +916,7 @@ impl<'s> Parser<'s> {
block::Container::Div { .. } => Container::Div { block::Container::Div { .. } => Container::Div {
class: (!ev.span.is_empty()).then(|| content), class: (!ev.span.is_empty()).then(|| content),
}, },
block::Container::Footnote => { block::Container::Footnote => Container::Footnote { label: content },
debug_assert!(enter);
self.footnotes.insert(content, self.tree.take_branch());
self.block_attributes = Attributes::new();
continue;
}
block::Container::List(block::ListKind { ty, tight }) => { block::Container::List(block::ListKind { ty, tight }) => {
if matches!(ty, block::ListType::Description) { if matches!(ty, block::ListType::Description) {
Container::DescriptionList Container::DescriptionList
@ -1013,43 +983,13 @@ impl<'s> Parser<'s> {
} }
None None
} }
fn footnote(&mut self) -> Option<Event<'s>> {
if self.footnote_active {
let tag = self.footnote_references.get(self.footnote_index).unwrap();
self.footnote_index += 1;
self.footnote_active = false;
Some(Event::End(Container::Footnote {
tag,
number: self.footnote_index,
}))
} else if let Some(tag) = self.footnote_references.get(self.footnote_index) {
self.tree = self
.footnotes
.remove(tag)
.unwrap_or_else(block::Tree::empty);
self.footnote_active = true;
Some(Event::Start(
Container::Footnote {
tag,
number: self.footnote_index + 1,
},
Attributes::new(),
))
} else {
None
}
}
} }
impl<'s> Iterator for Parser<'s> { impl<'s> Iterator for Parser<'s> {
type Item = Event<'s>; type Item = Event<'s>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.inline() self.inline().or_else(|| self.block())
.or_else(|| self.block())
.or_else(|| self.footnote())
} }
} }
@ -1563,43 +1503,10 @@ mod test {
test_parse!( test_parse!(
"[^a][^b][^c]", "[^a][^b][^c]",
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
FootnoteReference("a", 1), FootnoteReference("a"),
FootnoteReference("b", 2), FootnoteReference("b"),
FootnoteReference("c", 3), FootnoteReference("c"),
End(Paragraph), End(Paragraph),
Start(
Footnote {
tag: "a",
number: 1
},
Attributes::new()
),
End(Footnote {
tag: "a",
number: 1
}),
Start(
Footnote {
tag: "b",
number: 2
},
Attributes::new()
),
End(Footnote {
tag: "b",
number: 2
}),
Start(
Footnote {
tag: "c",
number: 3
},
Attributes::new()
),
End(Footnote {
tag: "c",
number: 3
}),
); );
} }
@ -1608,23 +1515,14 @@ mod test {
test_parse!( test_parse!(
"[^a]\n\n[^a]: a\n", "[^a]\n\n[^a]: a\n",
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
FootnoteReference("a", 1), FootnoteReference("a"),
End(Paragraph), End(Paragraph),
Blankline, Blankline,
Start( Start(Footnote { label: "a" }, Attributes::new()),
Footnote {
tag: "a",
number: 1
},
Attributes::new()
),
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
Str("a".into()), Str("a".into()),
End(Paragraph), End(Paragraph),
End(Footnote { End(Footnote { label: "a" }),
tag: "a",
number: 1
}),
); );
} }
@ -1639,16 +1537,10 @@ mod test {
" def", // " def", //
), ),
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
FootnoteReference("a", 1), FootnoteReference("a"),
End(Paragraph), End(Paragraph),
Blankline, Blankline,
Start( Start(Footnote { label: "a" }, Attributes::new()),
Footnote {
tag: "a",
number: 1
},
Attributes::new()
),
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
Str("abc".into()), Str("abc".into()),
End(Paragraph), End(Paragraph),
@ -1656,10 +1548,7 @@ mod test {
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
Str("def".into()), Str("def".into()),
End(Paragraph), End(Paragraph),
End(Footnote { End(Footnote { label: "a" }),
tag: "a",
number: 1
}),
); );
} }
@ -1673,26 +1562,17 @@ mod test {
"para\n", // "para\n", //
), ),
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
FootnoteReference("a", 1), FootnoteReference("a"),
End(Paragraph), End(Paragraph),
Blankline, Blankline,
Start(Paragraph, Attributes::new()), Start(Footnote { label: "a" }, Attributes::new()),
Str("para".into()),
End(Paragraph),
Start(
Footnote {
tag: "a",
number: 1
},
Attributes::new()
),
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
Str("note".into()), Str("note".into()),
End(Paragraph), End(Paragraph),
End(Footnote { End(Footnote { label: "a" }),
tag: "a", Start(Paragraph, Attributes::new()),
number: 1 Str("para".into()),
}), End(Paragraph),
); );
} }

View file

@ -36,14 +36,6 @@ pub struct Tree<C: 'static, A: 'static> {
} }
impl<C: Clone, A: Clone> Tree<C, A> { impl<C: Clone, A: Clone> Tree<C, A> {
pub fn empty() -> Self {
Self {
nodes: vec![].into_boxed_slice().into(),
branch: Vec::new(),
head: None,
}
}
/// Count number of direct children nodes. /// Count number of direct children nodes.
pub fn count_children(&self) -> usize { pub fn count_children(&self) -> usize {
let mut head = self.head; let mut head = self.head;
@ -56,22 +48,6 @@ impl<C: Clone, A: Clone> Tree<C, A> {
count count
} }
/// Split off the remaining part of the current branch. The returned [`Tree`] will continue on
/// the branch, this [`Tree`] will skip over the current branch.
pub fn take_branch(&mut self) -> Self {
let head = self.head.take();
self.head = self.branch.pop();
if let Some(h) = self.head {
let n = &self.nodes[h.index()];
self.head = n.next;
}
Self {
nodes: self.nodes.clone(),
branch: Vec::new(),
head,
}
}
/// Retrieve all inlines until the end of the current container. Panics if any upcoming node is /// Retrieve all inlines until the end of the current container. Panics if any upcoming node is
/// not an inline node. /// not an inline node.
pub fn take_inlines(&mut self) -> impl Iterator<Item = Span> + '_ { pub fn take_inlines(&mut self) -> impl Iterator<Item = Span> + '_ {
@ -410,9 +386,6 @@ impl<C: std::fmt::Debug + Clone, A: std::fmt::Debug + Clone> std::fmt::Debug for
mod test { mod test {
use crate::Span; use crate::Span;
use super::Event;
use super::EventKind;
#[test] #[test]
fn fmt() { fn fmt() {
let mut tree = super::Builder::new(); let mut tree = super::Builder::new();
@ -451,81 +424,4 @@ mod test {
) )
); );
} }
#[test]
fn branch_take_branch() {
let mut b = super::Builder::new();
let sp = Span::new(0, 0);
b.enter(1, sp);
b.atom(11, sp);
b.exit();
b.enter(2, sp);
b.enter(21, sp);
b.atom(211, sp);
b.exit();
b.exit();
b.enter(3, sp);
b.atom(31, sp);
b.exit();
let mut tree = b.finish();
assert_eq!(
(&mut tree).take(3).collect::<Vec<_>>(),
&[
Event {
kind: EventKind::Enter(1),
span: sp
},
Event {
kind: EventKind::Atom(11),
span: sp
},
Event {
kind: EventKind::Exit(1),
span: sp
},
]
);
assert_eq!(
tree.next(),
Some(Event {
kind: EventKind::Enter(2),
span: sp
})
);
assert_eq!(
tree.take_branch().collect::<Vec<_>>(),
&[
Event {
kind: EventKind::Enter(21),
span: sp
},
Event {
kind: EventKind::Atom(211),
span: sp
},
Event {
kind: EventKind::Exit(21),
span: sp
},
]
);
assert_eq!(
tree.collect::<Vec<_>>(),
&[
Event {
kind: EventKind::Enter(3),
span: sp
},
Event {
kind: EventKind::Atom(31),
span: sp
},
Event {
kind: EventKind::Exit(3),
span: sp
},
]
);
}
} }