diff --git a/src/block.rs b/src/block.rs index d4e7923..201de9c 100644 --- a/src/block.rs +++ b/src/block.rs @@ -618,6 +618,32 @@ mod test { ); } + #[test] + fn parse_footnote_post() { + test_parse!( + concat!( + "[^a]\n", + "\n", + "[^a]: note\n", + "\n", + "para\n", // + ), + (Enter(Leaf(Paragraph)), ""), + (Inline, "[^a]"), + (Exit(Leaf(Paragraph)), ""), + (Atom(Blankline), "\n"), + (Enter(Container(Footnote)), "a"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "note"), + (Exit(Leaf(Paragraph)), ""), + (Atom(Blankline), "\n"), + (Exit(Container(Footnote)), "a"), + (Enter(Leaf(Paragraph)), ""), + (Inline, "para"), + (Exit(Leaf(Paragraph)), ""), + ); + } + #[test] fn parse_attr() { test_parse!( @@ -754,4 +780,42 @@ mod test { 1, ); } + + #[test] + fn block_footnote_empty() { + test_block!("[^tag]:\n", Block::Container(Footnote), "tag", 1); + } + + #[test] + fn block_footnote_single() { + test_block!("[^tag]: a\n", Block::Container(Footnote), "tag", 1); + } + + #[test] + fn block_footnote_multiline() { + test_block!( + concat!( + "[^tag]: a\n", + " b\n", // + ), + Block::Container(Footnote), + "tag", + 2, + ); + } + + #[test] + fn block_footnote_multiline_post() { + test_block!( + concat!( + "[^tag]: a\n", + " b\n", + "\n", + "para\n", // + ), + Block::Container(Footnote), + "tag", + 3, + ); + } } diff --git a/src/html.rs b/src/html.rs index b033444..96625ea 100644 --- a/src/html.rs +++ b/src/html.rs @@ -48,25 +48,31 @@ enum Raw { Other, } -struct Writer { - events: I, +struct Writer { + events: std::iter::Peekable, out: W, raw: Raw, text_only: bool, + encountered_footnote: bool, + footnote_number: Option, + footnote_backlink_written: bool, } impl<'s, I: Iterator>, W: std::fmt::Write> Writer { fn new(events: I, out: W) -> Self { Self { - events, + events: events.peekable(), out, raw: Raw::None, text_only: false, + encountered_footnote: false, + footnote_number: None, + footnote_backlink_written: false, } } fn write(&mut self) -> std::fmt::Result { - for e in &mut self.events { + while let Some(e) = self.events.next() { match e { Event::Start(c, attrs) => { if c.is_block() { @@ -81,7 +87,18 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer { Container::ListItem => self.out.write_str(" self.out.write_str(" self.out.write_str(" todo!(), + Container::Footnote { number, .. } => { + assert!(self.footnote_number.is_none()); + self.footnote_number = Some((*number).try_into().unwrap()); + if !self.encountered_footnote { + self.encountered_footnote = true; + self.out + .write_str("
\n
\n
    \n")?; + } + write!(self.out, "
  1. ", number)?; + self.footnote_backlink_written = false; + continue; + } Container::Table => self.out.write_str(" self.out.write_str(" self.out.write_str(">, W: std::fmt::Write> Writer { Container::ListItem => self.out.write_str("
  2. ")?, Container::DescriptionList => self.out.write_str("")?, Container::DescriptionDetails => self.out.write_str("")?, - Container::Footnote { .. } => todo!(), + Container::Footnote { number, .. } => { + if !self.footnote_backlink_written { + write!( + self.out, + "\n

    ↩︎︎

    ", + number, + )?; + } + self.out.write_str("\n")?; + self.footnote_number = None; + } Container::Table => self.out.write_str("")?, Container::TableRow => self.out.write_str("")?, Container::Div { .. } => self.out.write_str("")?, - Container::Paragraph => self.out.write_str("

    ")?, + Container::Paragraph => { + if let Some(num) = self.footnote_number { + if matches!( + self.events.peek(), + Some(Event::End(Container::Footnote { .. })) + ) { + write!( + self.out, + r##"↩︎︎"##, + num + )?; + self.footnote_backlink_written = true; + } + } + self.out.write_str("

    ")?; + } Container::Heading { level } => write!(self.out, "", level)?, Container::TableCell => self.out.write_str("")?, Container::DescriptionTerm => self.out.write_str("")?, @@ -268,6 +310,13 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer { } Event::Atom(a) => match a { + Atom::FootnoteReference(_tag, number) => { + write!( + self.out, + r##"{}"##, + number, number, number + )?; + } Atom::Ellipsis => self.out.write_str("…")?, Atom::EnDash => self.out.write_str("–")?, Atom::EmDash => self.out.write_str("—")?, @@ -279,6 +328,10 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer { }, } } + if self.encountered_footnote { + self.out.write_str("\n
\n
")?; + } + self.out.write_char('\n')?; Ok(()) } } diff --git a/src/inline.rs b/src/inline.rs index 34e6c0a..44ff7a7 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -10,6 +10,7 @@ use Container::*; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Atom { + FootnoteReference, Softbreak, Hardbreak, Escape, @@ -111,6 +112,7 @@ impl + Clone> Parser { self.parse_verbatim(&first) .or_else(|| self.parse_attributes(&first)) .or_else(|| self.parse_autolink(&first)) + .or_else(|| self.parse_footnote_reference(&first)) .or_else(|| self.parse_container(&first)) .or_else(|| self.parse_atom(&first)) .unwrap_or(Event { @@ -341,6 +343,52 @@ impl + Clone> Parser { } } + fn parse_footnote_reference(&mut self, first: &lex::Token) -> Option { + if first.kind == lex::Kind::Open(Delimiter::Bracket) + && matches!( + self.peek(), + Some(lex::Token { + kind: lex::Kind::Sym(Symbol::Caret), + .. + }) + ) + { + let tok = self.eat(); + debug_assert_eq!( + tok, + Some(lex::Token { + kind: lex::Kind::Sym(Symbol::Caret), + len: 1, + }) + ); + let mut ahead = self.lexer.chars(); + let mut end = false; + let len = (&mut ahead) + .take_while(|c| { + if *c == '[' { + return false; + } + if *c == ']' { + end = true; + }; + !end && *c != '\n' + }) + .count(); + end.then(|| { + self.lexer = lex::Lexer::new(ahead); + self.span = Span::by_len(self.span.end(), len); + let ev = Event { + kind: EventKind::Atom(FootnoteReference), + span: self.span, + }; + self.span = Span::by_len(self.span.end(), 1); + ev + }) + } else { + None + } + } + fn parse_container(&mut self, first: &lex::Token) -> Option { Delim::from_token(first.kind).map(|(delim, dir)| { self.openers @@ -633,6 +681,7 @@ impl + Clone> Iterator for Parser { #[cfg(test)] mod test { + use super::Atom::*; use super::Container::*; use super::EventKind::*; use super::Verbatim; @@ -928,6 +977,16 @@ mod test { test_parse!("", (Str, "")); } + #[test] + fn footnote_reference() { + test_parse!( + "text[^footnote]. more text", + (Str, "text"), + (Atom(FootnoteReference), "footnote"), + (Str, ". more text"), + ); + } + #[test] fn container_basic() { test_parse!( diff --git a/src/lib.rs b/src/lib.rs index 6285181..48343a2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -25,7 +25,7 @@ pub enum Event<'s> { /// A string object, text only. Str(CowStr<'s>), /// An atomic element. - Atom(Atom), + Atom(Atom<'s>), } #[derive(Debug, PartialEq, Eq)] @@ -41,7 +41,7 @@ pub enum Container<'s> { /// Details describing a term within a description list. DescriptionDetails, /// A footnote definition. - Footnote { tag: &'s str }, + Footnote { tag: &'s str, number: usize }, /// A table element. Table, /// A row element of a table. @@ -212,7 +212,9 @@ pub enum OrderedListStyle { } #[derive(Debug, PartialEq, Eq)] -pub enum Atom { +pub enum Atom<'s> { + /// A footnote reference. + FootnoteReference(&'s str, usize), /// A horizontal ellipsis, i.e. a set of three periods. Ellipsis, /// An en dash. @@ -257,7 +259,7 @@ impl<'s> Container<'s> { match c { block::Container::Blockquote => Self::Blockquote, block::Container::Div => panic!(), - block::Container::Footnote => Self::Footnote { tag: content }, + block::Container::Footnote => panic!(), block::Container::ListItem => todo!(), } } @@ -272,6 +274,14 @@ pub struct Parser<'s> { tree: block::Branch, inlines: span::InlineSpans<'s>, inline_parser: Option>>, + /// Footnote references in the order they were encountered, without duplicates. + footnote_references: Vec<&'s str>, + /// Cache of footnotes to emit at the end. + footnotes: std::collections::HashMap<&'s str, block::Branch>, + /// Next or current footnote being parsed and emitted. + footnote_index: usize, + /// Currently within a footnote. + footnote_active: bool, } impl<'s> Parser<'s> { @@ -305,6 +315,10 @@ impl<'s> Parser<'s> { _tree_data: tree, link_definitions, tree: branch, + footnote_references: Vec::new(), + footnotes: std::collections::HashMap::new(), + footnote_index: 0, + footnote_active: false, inlines: span::InlineSpans::new(src), inline_parser: None, } @@ -389,6 +403,30 @@ impl<'s> Parser<'s> { } } inline::EventKind::Atom(a) => Event::Atom(match a { + inline::Atom::FootnoteReference => { + let tag = match self.inlines.src(inline.span) { + CowStr::Borrowed(s) => s, + CowStr::Owned(..) => panic!(), + }; + let number = self + .footnote_references + .iter() + .position(|t| *t == tag) + .map_or_else( + || { + self.footnote_references.push(tag); + self.footnote_references.len() + }, + |i| i + 1, + ); + Atom::FootnoteReference( + match self.inlines.src(inline.span) { + CowStr::Borrowed(s) => s, + CowStr::Owned(..) => panic!(), + }, + number, + ) + } inline::Atom::Ellipsis => Atom::Ellipsis, inline::Atom::EnDash => Atom::EnDash, inline::Atom::EmDash => Atom::EmDash, @@ -439,6 +477,10 @@ impl<'s> Parser<'s> { block::Container::Div { .. } => Container::Div { class: (!ev.span.is_empty()).then(|| content), }, + block::Container::Footnote => { + self.footnotes.insert(content, self.tree.take_branch()); + continue; + } _ => Container::from_container_block(content, c), }; Event::Start(container, attributes) @@ -456,13 +498,43 @@ impl<'s> Parser<'s> { } None } + + fn footnote(&mut self) -> Option> { + if self.footnote_active { + let tag = self.footnote_references.get(self.footnote_index).unwrap(); + self.footnote_index += 1; + self.footnote_active = false; + Some(Event::End(Container::Footnote { + tag, + number: self.footnote_index, + })) + } else if let Some(tag) = self.footnote_references.get(self.footnote_index) { + self.tree = self + .footnotes + .remove(tag) + .unwrap_or_else(block::Branch::empty); + self.footnote_active = true; + + Some(Event::Start( + Container::Footnote { + tag, + number: self.footnote_index + 1, + }, + Attributes::new(), + )) + } else { + None + } + } } impl<'s> Iterator for Parser<'s> { type Item = Event<'s>; fn next(&mut self) -> Option { - self.inline().or_else(|| self.block()) + self.inline() + .or_else(|| self.block()) + .or_else(|| self.footnote()) } } @@ -730,6 +802,144 @@ mod test { ); } + #[test] + fn footnote_references() { + test_parse!( + "[^a][^b][^c]", + Start(Paragraph, Attributes::new()), + Atom(FootnoteReference("a", 1)), + Atom(FootnoteReference("b", 2)), + Atom(FootnoteReference("c", 3)), + End(Paragraph), + Start( + Footnote { + tag: "a", + number: 1 + }, + Attributes::new() + ), + End(Footnote { + tag: "a", + number: 1 + }), + Start( + Footnote { + tag: "b", + number: 2 + }, + Attributes::new() + ), + End(Footnote { + tag: "b", + number: 2 + }), + Start( + Footnote { + tag: "c", + number: 3 + }, + Attributes::new() + ), + End(Footnote { + tag: "c", + number: 3 + }), + ); + } + + #[test] + fn footnote() { + test_parse!( + "[^a]\n\n[^a]: a\n", + Start(Paragraph, Attributes::new()), + Atom(FootnoteReference("a", 1)), + End(Paragraph), + Atom(Blankline), + Start( + Footnote { + tag: "a", + number: 1 + }, + Attributes::new() + ), + Start(Paragraph, Attributes::new()), + Str("a".into()), + End(Paragraph), + End(Footnote { + tag: "a", + number: 1 + }), + ); + } + + #[test] + fn footnote_multiblock() { + test_parse!( + concat!( + "[^a]\n", + "\n", + "[^a]: abc\n", + "\n", + " def", // + ), + Start(Paragraph, Attributes::new()), + Atom(FootnoteReference("a", 1)), + End(Paragraph), + Atom(Blankline), + Start( + Footnote { + tag: "a", + number: 1 + }, + Attributes::new() + ), + Start(Paragraph, Attributes::new()), + Str("abc".into()), + End(Paragraph), + Atom(Blankline), + Start(Paragraph, Attributes::new()), + Str("def".into()), + End(Paragraph), + End(Footnote { + tag: "a", + number: 1 + }), + ); + } + + #[test] + fn footnote_post() { + test_parse!( + concat!( + "[^a]\n", + "\n", + "[^a]: note\n", + "para\n", // + ), + Start(Paragraph, Attributes::new()), + Atom(FootnoteReference("a", 1)), + End(Paragraph), + Atom(Blankline), + Start(Paragraph, Attributes::new()), + Str("para".into()), + End(Paragraph), + Start( + Footnote { + tag: "a", + number: 1 + }, + Attributes::new() + ), + Start(Paragraph, Attributes::new()), + Str("note".into()), + End(Paragraph), + End(Footnote { + tag: "a", + number: 1 + }), + ); + } + #[test] fn attr_block() { test_parse!( diff --git a/src/tree.rs b/src/tree.rs index beebacd..50b6628 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -50,6 +50,14 @@ pub struct Branch { } impl Branch { + pub fn empty() -> Self { + Self { + nodes: &[], + branch: Vec::new(), + head: None, + } + } + /// Count number of direct children nodes. pub fn count_children(&self) -> usize { let mut head = self.head; @@ -62,8 +70,6 @@ impl Branch { count } - /// Split off the remaining part of the current branch. The returned [`Branch`] will continue on - /// the branch, this [`Branch`] will skip over the current branch. pub fn take_branch(&mut self) -> Self { let head = self.head.take(); self.head = self.branch.pop();