diff --git a/src/attr.rs b/src/attr.rs
index 8a7bfa6..63079f6 100644
--- a/src/attr.rs
+++ b/src/attr.rs
@@ -100,6 +100,11 @@ impl<'s> Attributes<'s> {
}
}
+ #[must_use]
+ pub fn get(&self, key: &str) -> Option<&str> {
+ self.iter().find(|(k, _)| *k == key).map(|(_, v)| v)
+ }
+
pub fn iter(&self) -> impl Iterator- + '_ {
self.0
.iter()
diff --git a/src/block.rs b/src/block.rs
index 288364b..0b411eb 100644
--- a/src/block.rs
+++ b/src/block.rs
@@ -59,7 +59,9 @@ pub enum Leaf {
/// Span is `#` characters.
/// Each inline is a line.
- Heading,
+ Heading {
+ has_section: bool,
+ },
/// Span is '|'.
/// Has zero or one inline for the cell contents.
@@ -254,7 +256,7 @@ impl<'s> TreeParser<'s> {
fn parse_leaf(
&mut self,
- leaf: Leaf,
+ mut leaf: Leaf,
k: &Kind,
span: Span,
lines: &mut [Span],
@@ -300,6 +302,10 @@ impl<'s> TreeParser<'s> {
}
}
+ if let Leaf::Heading { has_section } = &mut leaf {
+ *has_section = top_level;
+ }
+
self.tree.enter(Node::Leaf(leaf), span);
lines
.iter()
@@ -573,7 +579,7 @@ impl From<&Kind> for Block {
match kind {
Kind::Atom(a) => Self::Atom(*a),
Kind::Paragraph => Self::Leaf(Paragraph),
- Kind::Heading { .. } => Self::Leaf(Heading),
+ Kind::Heading { .. } => Self::Leaf(Heading { has_section: false }),
Kind::Fenced {
kind: FenceKind::CodeBlock(..),
..
@@ -983,13 +989,13 @@ mod test {
"## b\n", //
),
(Enter(Container(Section)), "#"),
- (Enter(Leaf(Heading)), "#"),
+ (Enter(Leaf(Heading { has_section: true })), "#"),
(Inline, "a"),
- (Exit(Leaf(Heading)), "#"),
+ (Exit(Leaf(Heading { has_section: true })), "#"),
(Enter(Container(Section)), "##"),
- (Enter(Leaf(Heading)), "##"),
+ (Enter(Leaf(Heading { has_section: true })), "##"),
(Inline, "b"),
- (Exit(Leaf(Heading)), "##"),
+ (Exit(Leaf(Heading { has_section: true })), "##"),
(Exit(Container(Section)), "##"),
(Exit(Container(Section)), "#"),
);
@@ -1003,9 +1009,9 @@ mod test {
"heading\n", //
),
(Enter(Container(Section)), "#"),
- (Enter(Leaf(Heading)), "#"),
+ (Enter(Leaf(Heading { has_section: true })), "#"),
(Inline, "heading"),
- (Exit(Leaf(Heading)), "#"),
+ (Exit(Leaf(Heading { has_section: true })), "#"),
(Exit(Container(Section)), "#"),
);
}
@@ -1021,17 +1027,17 @@ mod test {
"15\n", //
),
(Enter(Container(Section)), "#"),
- (Enter(Leaf(Heading)), "#"),
+ (Enter(Leaf(Heading { has_section: true })), "#"),
(Inline, "2"),
- (Exit(Leaf(Heading)), "#"),
+ (Exit(Leaf(Heading { has_section: true })), "#"),
(Atom(Blankline), "\n"),
(Exit(Container(Section)), "#"),
(Enter(Container(Section)), "#"),
- (Enter(Leaf(Heading)), "#"),
+ (Enter(Leaf(Heading { has_section: true })), "#"),
(Inline, "8\n"),
(Inline, "12\n"),
(Inline, "15"),
- (Exit(Leaf(Heading)), "#"),
+ (Exit(Leaf(Heading { has_section: true })), "#"),
(Exit(Container(Section)), "#"),
);
}
@@ -1045,11 +1051,11 @@ mod test {
"c\n", //
),
(Enter(Container(Section)), "#"),
- (Enter(Leaf(Heading)), "#"),
+ (Enter(Leaf(Heading { has_section: true })), "#"),
(Inline, "a\n"),
(Inline, "b\n"),
(Inline, "c"),
- (Exit(Leaf(Heading)), "#"),
+ (Exit(Leaf(Heading { has_section: true })), "#"),
(Exit(Container(Section)), "#"),
);
}
@@ -1071,39 +1077,39 @@ mod test {
"# b\n",
),
(Enter(Container(Section)), "#"),
- (Enter(Leaf(Heading)), "#"),
+ (Enter(Leaf(Heading { has_section: true })), "#"),
(Inline, "a"),
- (Exit(Leaf(Heading)), "#"),
+ (Exit(Leaf(Heading { has_section: true })), "#"),
(Atom(Blankline), "\n"),
(Enter(Container(Section)), "##"),
- (Enter(Leaf(Heading)), "##"),
+ (Enter(Leaf(Heading { has_section: true })), "##"),
(Inline, "aa"),
- (Exit(Leaf(Heading)), "##"),
+ (Exit(Leaf(Heading { has_section: true })), "##"),
(Atom(Blankline), "\n"),
(Enter(Container(Section)), "####"),
- (Enter(Leaf(Heading)), "####"),
+ (Enter(Leaf(Heading { has_section: true })), "####"),
(Inline, "aaaa"),
- (Exit(Leaf(Heading)), "####"),
+ (Exit(Leaf(Heading { has_section: true })), "####"),
(Atom(Blankline), "\n"),
(Exit(Container(Section)), "####"),
(Exit(Container(Section)), "##"),
(Enter(Container(Section)), "##"),
- (Enter(Leaf(Heading)), "##"),
+ (Enter(Leaf(Heading { has_section: true })), "##"),
(Inline, "ab"),
- (Exit(Leaf(Heading)), "##"),
+ (Exit(Leaf(Heading { has_section: true })), "##"),
(Atom(Blankline), "\n"),
(Enter(Container(Section)), "###"),
- (Enter(Leaf(Heading)), "###"),
+ (Enter(Leaf(Heading { has_section: true })), "###"),
(Inline, "aba"),
- (Exit(Leaf(Heading)), "###"),
+ (Exit(Leaf(Heading { has_section: true })), "###"),
(Atom(Blankline), "\n"),
(Exit(Container(Section)), "###"),
(Exit(Container(Section)), "##"),
(Exit(Container(Section)), "#"),
(Enter(Container(Section)), "#"),
- (Enter(Leaf(Heading)), "#"),
+ (Enter(Leaf(Heading { has_section: true })), "#"),
(Inline, "b"),
- (Exit(Leaf(Heading)), "#"),
+ (Exit(Leaf(Heading { has_section: true })), "#"),
(Exit(Container(Section)), "#"),
);
}
@@ -1141,9 +1147,9 @@ mod test {
(Inline, "a"),
(Exit(Leaf(Paragraph)), ""),
(Atom(Blankline), "\n"),
- (Enter(Leaf(Heading)), "##"),
+ (Enter(Leaf(Heading { has_section: false })), "##"),
(Inline, "hl"),
- (Exit(Leaf(Heading)), "##"),
+ (Exit(Leaf(Heading { has_section: false })), "##"),
(Atom(Blankline), "\n"),
(Enter(Leaf(Paragraph)), ""),
(Inline, "para"),
diff --git a/src/html.rs b/src/html.rs
index 37c61d3..8554f1b 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -148,7 +148,7 @@ impl<'s, I: Iterator
- >, W: std::fmt::Write> Writer<'s, I, W> {
}
Container::Table => self.out.write_str("
self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" {
if matches!(self.list_tightness.last(), Some(true)) {
@@ -156,7 +156,7 @@ impl<'s, I: Iterator
- >, W: std::fmt::Write> Writer<'s, I, W> {
}
self.out.write_str("
write!(self.out, " write!(self.out, " self.out.write_str(" self.out.write_str(" | self.out.write_str(">, W: std::fmt::Write> Writer<'s, I, W> {
write!(self.out, r#" {}="{}""#, a, v)?;
}
+ if let Container::Heading {
+ id,
+ has_section: false,
+ ..
+ }
+ | Container::Section { id } = &c
+ {
+ if !attrs.iter().any(|(a, _)| a == "id") {
+ write!(self.out, r#" id="{}""#, id)?;
+ }
+ }
+
if attrs.iter().any(|(a, _)| a == "class")
|| matches!(
c,
@@ -312,7 +324,7 @@ impl<'s, I: Iterator- >, W: std::fmt::Write> Writer<'s, I, W> {
}
Container::Table => self.out.write_str("
|
")?,
Container::TableRow { .. } => self.out.write_str("")?,
- Container::Section => self.out.write_str("")?,
+ Container::Section { .. } => self.out.write_str("")?,
Container::Div { .. } => self.out.write_str("")?,
Container::Paragraph => {
if matches!(self.list_tightness.last(), Some(true)) {
@@ -333,7 +345,7 @@ impl<'s, I: Iterator- >, W: std::fmt::Write> Writer<'s, I, W> {
}
self.out.write_str("
")?;
}
- Container::Heading { level } => write!(self.out, "", level)?,
+ Container::Heading { level, .. } => write!(self.out, "", level)?,
Container::TableCell { head: false, .. } => self.out.write_str("")?,
Container::TableCell { head: true, .. } => self.out.write_str("")?,
Container::Caption => self.out.write_str("")?,
diff --git a/src/lib.rs b/src/lib.rs
index 52f0265..f0bef23 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,5 @@
+use std::fmt::Write;
+
pub mod html;
mod attr;
@@ -49,13 +51,17 @@ pub enum Container<'s> {
/// A row element of a table.
TableRow { head: bool },
/// A section belonging to a top level heading.
- Section,
+ Section { id: CowStr<'s> },
/// A block-level divider element.
Div { class: Option<&'s str> },
/// A paragraph.
Paragraph,
/// A heading.
- Heading { level: u16 },
+ Heading {
+ level: u16,
+ has_section: bool,
+ id: CowStr<'s>,
+ },
/// A cell element of row within a table.
TableCell { alignment: Alignment, head: bool },
/// A caption within a table.
@@ -107,7 +113,7 @@ impl<'s> Container<'s> {
| Self::Footnote { .. }
| Self::Table
| Self::TableRow { .. }
- | Self::Section
+ | Self::Section { .. }
| Self::Div { .. }
| Self::Paragraph
| Self::Heading { .. }
@@ -144,7 +150,7 @@ impl<'s> Container<'s> {
| Self::Footnote { .. }
| Self::Table
| Self::TableRow { .. }
- | Self::Section
+ | Self::Section { .. }
| Self::Div { .. } => true,
Self::Paragraph
| Self::Heading { .. }
@@ -321,15 +327,12 @@ impl OrderedListStyle {
pub struct Parser<'s> {
src: &'s str,
- /// Link definitions encountered during block parse, written once.
- link_definitions: std::collections::HashMap<&'s str, (CowStr<'s>, attr::Attributes<'s>)>,
-
- /// Block tree cursor.
+ /// Block tree parsed at first.
tree: block::Tree,
- /// Spans to the inlines in the block currently being parsed.
- inlines: span::InlineSpans<'s>,
- /// Inline parser, recreated for each new inline.
- inline_parser: Option>>,
+
+ /// Contents obtained by the prepass.
+ pre_pass: PrePass<'s>,
+
/// Last parsed block attributes
block_attributes: Attributes<'s>,
@@ -344,47 +347,168 @@ pub struct Parser<'s> {
footnote_index: usize,
/// Currently within a footnote.
footnote_active: bool,
+
+ /// Spans to the inlines in the leaf block currently being parsed.
+ inlines: span::InlineSpans<'s>,
+ /// Inline parser, recreated for each new inline.
+ inline_parser: Option>>,
+}
+
+struct Heading {
+ /// Location of heading in src.
+ location: usize,
+ /// Automatically generated id from heading text.
+ id_auto: String,
+ /// Overriding id from an explicit attribute on the heading.
+ id_override: Option,
+}
+
+/// Because of potential future references, an initial pass is required to obtain all definitions.
+struct PrePass<'s> {
+ /// Link definitions and their attributes.
+ link_definitions: std::collections::HashMap<&'s str, (CowStr<'s>, attr::Attributes<'s>)>,
+ /// Cache of all heading ids.
+ headings: Vec,
+ /// Indices to headings sorted lexicographically.
+ headings_lex: Vec,
+}
+
+impl<'s> PrePass<'s> {
+ #[must_use]
+ fn new(src: &'s str, mut tree: block::Tree) -> Self {
+ let mut link_definitions = std::collections::HashMap::new();
+ let mut headings: Vec = Vec::new();
+
+ let mut inlines = span::InlineSpans::new(src);
+
+ let mut attr_prev: Option = None;
+ while let Some(e) = tree.next() {
+ match e.kind {
+ tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition)) => {
+ // All link definition tags have to be obtained initially, as references can
+ // appear before the definition.
+ let tag = e.span.of(src);
+ let attrs =
+ attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src)));
+ let url = match tree.count_children() {
+ 0 => "".into(),
+ 1 => tree.take_inlines().next().unwrap().of(src).trim().into(),
+ _ => tree.take_inlines().map(|sp| sp.of(src).trim()).collect(),
+ };
+ link_definitions.insert(tag, (url, attrs));
+ }
+ tree::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => {
+ // All headings ids have to be obtained initially, as references can appear
+ // before the heading. Additionally, determining the id requires inline parsing
+ // as formatting must be removed.
+ //
+ // We choose to parse all headers twice instead of caching them.
+ let attrs = attr_prev.map(|sp| attr::parse(sp.of(src)));
+ let id_override = attrs
+ .as_ref()
+ .and_then(|attrs| attrs.get("id"))
+ .map(ToString::to_string);
+
+ inlines.set_spans(tree.take_inlines());
+ let mut id_auto = String::new();
+ inline::Parser::new(inlines.chars()).for_each(|ev| match ev.kind {
+ inline::EventKind::Str => {
+ let mut chars = inlines.slice(ev.span).chars().peekable();
+ while let Some(c) = chars.next() {
+ if c.is_whitespace() {
+ while chars.peek().map_or(false, |c| c.is_whitespace()) {
+ chars.next();
+ }
+ if !id_auto.is_empty() {
+ id_auto.push('-');
+ }
+ } else if !c.is_ascii_punctuation() || matches!(c, '-' | '_') {
+ id_auto.push(c);
+ }
+ }
+ }
+ inline::EventKind::Atom(inline::Atom::Softbreak) => {
+ id_auto.push('-');
+ }
+ _ => {}
+ });
+ id_auto.drain(id_auto.trim_end_matches('-').len()..);
+
+ // ensure id unique
+ if headings.iter().any(|h| h.id_auto == id_auto) || id_auto.is_empty() {
+ if id_auto.is_empty() {
+ id_auto.push('s');
+ }
+ let mut num = 1;
+ id_auto.push('-');
+ let i_num = id_auto.len();
+ write!(id_auto, "{}", num).unwrap();
+ while headings.iter().any(|h| h.id_auto == id_auto) {
+ num += 1;
+ id_auto.drain(i_num..);
+ write!(id_auto, "{}", num).unwrap();
+ }
+ }
+
+ headings.push(Heading {
+ location: e.span.start(),
+ id_auto,
+ id_override,
+ });
+ }
+ tree::EventKind::Atom(block::Atom::Attributes) => {
+ attr_prev = Some(e.span);
+ }
+ tree::EventKind::Enter(..)
+ | tree::EventKind::Exit(block::Node::Container(block::Container::Section {
+ ..
+ })) => {}
+ _ => {
+ attr_prev = None;
+ }
+ }
+ }
+
+ let mut headings_lex = (0..headings.len()).collect::>();
+ headings_lex.sort_by_key(|i| &headings[*i].id_auto);
+
+ Self {
+ link_definitions,
+ headings,
+ headings_lex,
+ }
+ }
+
+ fn heading_id(&self, i: usize) -> &str {
+ let h = &self.headings[i];
+ h.id_override.as_ref().unwrap_or(&h.id_auto)
+ }
+
+ fn heading_id_by_location(&self, location: usize) -> Option<&str> {
+ self.headings
+ .binary_search_by_key(&location, |h| h.location)
+ .ok()
+ .map(|i| self.heading_id(i))
+ }
+
+ fn heading_id_by_tag(&self, tag: &str) -> Option<&str> {
+ self.headings_lex
+ .binary_search_by_key(&tag, |i| &self.headings[*i].id_auto)
+ .ok()
+ .map(|i| self.heading_id(i))
+ }
}
impl<'s> Parser<'s> {
#[must_use]
pub fn new(src: &'s str) -> Self {
let tree = block::parse(src);
-
- // All link definition tags have to be obtained initially, as references can appear before
- // the definition.
- let link_definitions = {
- let mut branch = tree.clone();
- let mut defs = std::collections::HashMap::new();
- let mut attr_prev: Option = None;
- while let Some(e) = branch.next() {
- if let tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition)) =
- e.kind
- {
- let tag = e.span.of(src);
- let attrs =
- attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src)));
- let url = match branch.count_children() {
- 0 => "".into(),
- 1 => branch.take_inlines().next().unwrap().of(src).trim().into(),
- _ => branch.take_inlines().map(|sp| sp.of(src).trim()).collect(),
- };
- defs.insert(tag, (url, attrs));
- } else if let tree::EventKind::Atom(block::Atom::Attributes) = e.kind {
- attr_prev = Some(e.span);
- } else {
- attr_prev = None;
- }
- }
- defs
- };
-
- let branch = tree.clone();
+ let pre_pass = PrePass::new(src, tree.clone());
Self {
src,
- link_definitions,
- tree: branch,
+ tree,
+ pre_pass,
block_attributes: Attributes::new(),
table_head_row: false,
footnote_references: Vec::new(),
@@ -453,12 +577,18 @@ impl<'s> Parser<'s> {
CowStr::Owned(s) => s.replace('\n', " ").into(),
s @ CowStr::Borrowed(_) => s,
};
- let (url, attrs_def) = self
- .link_definitions
- .get(tag.as_ref())
- .cloned()
- .unwrap_or_else(|| ("".into(), Attributes::new()));
- attributes.union(attrs_def);
+ let link_def =
+ self.pre_pass.link_definitions.get(tag.as_ref()).cloned();
+
+ let url = if let Some((url, attrs_def)) = link_def {
+ attributes.union(attrs_def);
+ url
+ } else {
+ self.pre_pass
+ .heading_id_by_tag(tag.as_ref())
+ .map_or_else(|| "".into(), |id| format!("#{}", id).into())
+ };
+
if matches!(c, inline::Container::ReferenceLink) {
Container::Link(url, LinkType::Span(SpanLinkType::Reference))
} else {
@@ -561,8 +691,15 @@ impl<'s> Parser<'s> {
}
match l {
block::Leaf::Paragraph => Container::Paragraph,
- block::Leaf::Heading => Container::Heading {
+ block::Leaf::Heading { has_section } => Container::Heading {
level: content.len().try_into().unwrap(),
+ has_section,
+ id: self
+ .pre_pass
+ .heading_id_by_location(ev.span.start())
+ .unwrap_or_default()
+ .to_string()
+ .into(),
},
block::Leaf::CodeBlock => {
if let Some(format) = content.strip_prefix('=') {
@@ -631,7 +768,14 @@ impl<'s> Parser<'s> {
}
Container::TableRow { head }
}
- block::Container::Section => Container::Section,
+ block::Container::Section => Container::Section {
+ id: self
+ .pre_pass
+ .heading_id_by_location(ev.span.start())
+ .unwrap_or_default()
+ .to_string()
+ .into(),
+ },
},
};
if enter {
@@ -751,20 +895,49 @@ mod test {
fn heading() {
test_parse!(
"#\n",
- Start(Section, Attributes::new()),
- Start(Heading { level: 1 }, Attributes::new()),
- End(Heading { level: 1 }),
- End(Section),
+ Start(Section { id: "s-1".into() }, Attributes::new()),
+ Start(
+ Heading {
+ level: 1,
+ has_section: true,
+ id: "s-1".into()
+ },
+ Attributes::new()
+ ),
+ End(Heading {
+ level: 1,
+ has_section: true,
+ id: "s-1".into()
+ }),
+ End(Section { id: "s-1".into() }),
);
test_parse!(
"# abc\ndef\n",
- Start(Section, Attributes::new()),
- Start(Heading { level: 1 }, Attributes::new()),
+ Start(
+ Section {
+ id: "abc-def".into()
+ },
+ Attributes::new()
+ ),
+ Start(
+ Heading {
+ level: 1,
+ has_section: true,
+ id: "abc-def".into()
+ },
+ Attributes::new()
+ ),
Str("abc".into()),
Atom(Softbreak),
Str("def".into()),
- End(Heading { level: 1 }),
- End(Section),
+ End(Heading {
+ level: 1,
+ has_section: true,
+ id: "abc-def".into(),
+ }),
+ End(Section {
+ id: "abc-def".into()
+ }),
);
}
@@ -776,16 +949,41 @@ mod test {
"{a=b}\n",
"# def\n", //
),
- Start(Section, Attributes::new()),
- Start(Heading { level: 1 }, Attributes::new()),
+ Start(Section { id: "abc".into() }, Attributes::new()),
+ Start(
+ Heading {
+ level: 1,
+ has_section: true,
+ id: "abc".into()
+ },
+ Attributes::new()
+ ),
Str("abc".into()),
- End(Heading { level: 1 }),
- End(Section),
- Start(Section, [("a", "b")].into_iter().collect(),),
- Start(Heading { level: 1 }, Attributes::new(),),
+ End(Heading {
+ level: 1,
+ has_section: true,
+ id: "abc".into(),
+ }),
+ End(Section { id: "abc".into() }),
+ Start(
+ Section { id: "def".into() },
+ [("a", "b")].into_iter().collect(),
+ ),
+ Start(
+ Heading {
+ level: 1,
+ has_section: true,
+ id: "def".into()
+ },
+ Attributes::new(),
+ ),
Str("def".into()),
- End(Heading { level: 1 }),
- End(Section),
+ End(Heading {
+ level: 1,
+ has_section: true,
+ id: "def".into(),
+ }),
+ End(Section { id: "def".into() }),
);
}
diff --git a/src/span.rs b/src/span.rs
index 3874063..126f9b4 100644
--- a/src/span.rs
+++ b/src/span.rs
@@ -302,7 +302,7 @@ impl<'s, 'i> DiscontinuousString<'s> for InlineSpansSlice<'s, 'i> {
}
}
-type InlineSpansSliceIter<'i> = std::iter::Chain<
+pub type InlineSpansSliceIter<'i> = std::iter::Chain<
std::iter::Chain, std::iter::Copied>>,
std::iter::Once,
>;