PR #39 add source map

Merge branch 'spans2'
This commit is contained in:
Noah Hellman 2023-05-13 23:11:52 +02:00
commit 70303e7e4b
12 changed files with 1644 additions and 1201 deletions

View file

@ -72,7 +72,6 @@ jobs:
matrix: matrix:
target: target:
- parse - parse
- parse_balance
- html - html
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:

View file

@ -21,7 +21,10 @@
output.innerText = jotdown_render(input.innerText); output.innerText = jotdown_render(input.innerText);
} else if (fmt.value == "events") { } else if (fmt.value == "events") {
output.classList.add("verbatim") output.classList.add("verbatim")
output.innerText = jotdown_parse(input.innerText); output.innerText = jotdown_parse(input.innerText, false);
} else if (fmt.value == "events_spans") {
output.classList.add("verbatim")
output.innerText = jotdown_parse(input.innerText, true);
} else if (fmt.value == "events_indent") { } else if (fmt.value == "events_indent") {
output.classList.add("verbatim") output.classList.add("verbatim")
output.innerText = jotdown_parse_indent(input.innerText); output.innerText = jotdown_parse_indent(input.innerText);
@ -50,6 +53,7 @@
<option value="preview">preview</option> <option value="preview">preview</option>
<option value="html">html</option> <option value="html">html</option>
<option value="events">events</option> <option value="events">events</option>
<option value="events_spans">events (with offsets)</option>
<option value="events_indent">events (indented)</option> <option value="events_indent">events (indented)</option>
</select> </select>
</div> </div>

View file

@ -22,10 +22,16 @@ pub fn jotdown_render(djot: &str) -> String {
#[must_use] #[must_use]
#[wasm_bindgen] #[wasm_bindgen]
pub fn jotdown_parse(djot: &str) -> String { pub fn jotdown_parse(djot: &str, spans: bool) -> String {
jotdown::Parser::new(djot) let mut out = String::new();
.map(|e| format!("{:?}\n", e)) for (e, sp) in jotdown::Parser::new(djot).into_offset_iter() {
.collect() write!(out, "{:?}", e).unwrap();
if spans {
write!(out, " {:?} {:?}", &djot[sp.clone()], sp).unwrap();
}
writeln!(out).unwrap();
}
out
} }
#[must_use] #[must_use]

File diff suppressed because it is too large Load diff

View file

@ -12,9 +12,9 @@ use Container::*;
use ControlFlow::*; use ControlFlow::*;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Atom { pub enum Atom<'s> {
FootnoteReference, FootnoteReference { label: &'s str },
Symbol, Symbol(&'s str),
Softbreak, Softbreak,
Hardbreak, Hardbreak,
Escape, Escape,
@ -26,7 +26,7 @@ pub enum Atom {
} }
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum Container { pub enum Container<'s> {
Span, Span,
Subscript, Subscript,
Superscript, Superscript,
@ -36,16 +36,14 @@ pub enum Container {
Strong, Strong,
Mark, Mark,
Verbatim, Verbatim,
/// Span is the format. RawFormat { format: &'s str },
RawFormat,
InlineMath, InlineMath,
DisplayMath, DisplayMath,
ReferenceLink(CowStrIndex), ReferenceLink(CowStrIndex),
ReferenceImage(CowStrIndex), ReferenceImage(CowStrIndex),
InlineLink(CowStrIndex), InlineLink(CowStrIndex),
InlineImage(CowStrIndex), InlineImage(CowStrIndex),
/// Open delimiter span is URL, closing is '>'. Autolink(&'s str),
Autolink,
} }
type CowStrIndex = u32; type CowStrIndex = u32;
@ -57,10 +55,10 @@ pub enum QuoteType {
} }
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub enum EventKind { pub enum EventKind<'s> {
Enter(Container), Enter(Container<'s>),
Exit(Container), Exit(Container<'s>),
Atom(Atom), Atom(Atom<'s>),
Str, Str,
Attributes { Attributes {
container: bool, container: bool,
@ -72,8 +70,8 @@ pub enum EventKind {
type AttributesIndex = u32; type AttributesIndex = u32;
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct Event { pub struct Event<'s> {
pub kind: EventKind, pub kind: EventKind<'s>,
pub span: Span, pub span: Span,
} }
@ -218,7 +216,7 @@ pub struct Parser<'s> {
openers: Vec<(Opener, usize)>, openers: Vec<(Opener, usize)>,
/// Buffer queue for next events. Events are buffered until no modifications due to future /// Buffer queue for next events. Events are buffered until no modifications due to future
/// characters are needed. /// characters are needed.
events: std::collections::VecDeque<Event>, events: std::collections::VecDeque<Event<'s>>,
/// State if inside a verbatim container. /// State if inside a verbatim container.
verbatim: Option<VerbatimState>, verbatim: Option<VerbatimState>,
/// State if currently parsing potential attributes. /// State if currently parsing potential attributes.
@ -268,12 +266,12 @@ impl<'s> Parser<'s> {
self.store_attributes.clear(); self.store_attributes.clear();
} }
fn push_sp(&mut self, kind: EventKind, span: Span) -> Option<ControlFlow> { fn push_sp(&mut self, kind: EventKind<'s>, span: Span) -> Option<ControlFlow> {
self.events.push_back(Event { kind, span }); self.events.push_back(Event { kind, span });
Some(Continue) Some(Continue)
} }
fn push(&mut self, kind: EventKind) -> Option<ControlFlow> { fn push(&mut self, kind: EventKind<'s>) -> Option<ControlFlow> {
self.push_sp(kind, self.input.span) self.push_sp(kind, self.input.span)
} }
@ -310,17 +308,16 @@ impl<'s> Parser<'s> {
&& matches!(first.kind, lex::Kind::Seq(Sequence::Backtick)) && matches!(first.kind, lex::Kind::Seq(Sequence::Backtick))
{ {
let raw_format = self.input.ahead_raw_format(); let raw_format = self.input.ahead_raw_format();
let mut span_closer = self.input.span;
if let Some(span_format) = raw_format { if let Some(span_format) = raw_format {
self.events[event_opener].kind = EventKind::Enter(RawFormat); self.events[event_opener].kind = EventKind::Enter(RawFormat {
self.events[event_opener].span = span_format; format: span_format.of(self.input.src),
self.input.span = span_format.translate(1); });
span_closer = span_format; self.input.span = Span::new(self.input.span.start(), span_format.end() + 1);
}; };
let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind { let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind {
debug_assert!(matches!( debug_assert!(matches!(
ty, ty,
Verbatim | RawFormat | InlineMath | DisplayMath Verbatim | RawFormat { .. } | InlineMath | DisplayMath
)); ));
ty ty
} else { } else {
@ -330,7 +327,7 @@ impl<'s> Parser<'s> {
{ {
self.events.drain(*event_skip..); self.events.drain(*event_skip..);
} }
self.push_sp(EventKind::Exit(ty_opener), span_closer); self.push(EventKind::Exit(ty_opener));
self.verbatim = None; self.verbatim = None;
if raw_format.is_none() if raw_format.is_none()
&& self.input.peek().map_or(false, |t| { && self.input.peek().map_or(false, |t| {
@ -527,7 +524,13 @@ impl<'s> Parser<'s> {
self.input.span = Span::new(start_attr, state.end_attr); self.input.span = Span::new(start_attr, state.end_attr);
self.input.lexer = lex::Lexer::new(&self.input.src[state.end_attr..line_end]); self.input.lexer = lex::Lexer::new(&self.input.src[state.end_attr..line_end]);
if !attrs.is_empty() { if attrs.is_empty() {
if matches!(state.elem_ty, AttributesElementType::Container { .. }) {
let last = self.events.len() - 1;
self.events[last].span =
Span::new(self.events[last].span.start(), self.input.span.end());
}
} else {
let attr_index = self.store_attributes.len() as AttributesIndex; let attr_index = self.store_attributes.len() as AttributesIndex;
self.store_attributes.push(attrs); self.store_attributes.push(attrs);
let attr_event = Event { let attr_event = Event {
@ -540,11 +543,13 @@ impl<'s> Parser<'s> {
match state.elem_ty { match state.elem_ty {
AttributesElementType::Container { e_placeholder } => { AttributesElementType::Container { e_placeholder } => {
self.events[e_placeholder] = attr_event; self.events[e_placeholder] = attr_event;
let last = self.events.len() - 1;
if matches!(self.events[e_placeholder + 1].kind, EventKind::Str) { if matches!(self.events[e_placeholder + 1].kind, EventKind::Str) {
self.events[e_placeholder + 1].kind = EventKind::Enter(Span); self.events[e_placeholder + 1].kind = EventKind::Enter(Span);
let last = self.events.len() - 1;
self.events[last].kind = EventKind::Exit(Span); self.events[last].kind = EventKind::Exit(Span);
} }
self.events[last].span =
Span::new(self.events[last].span.start(), self.input.span.end());
} }
AttributesElementType::Word => { AttributesElementType::Word => {
self.events.push_back(attr_event); self.events.push_back(attr_event);
@ -577,12 +582,13 @@ impl<'s> Parser<'s> {
.sum(); .sum();
if end && is_url { if end && is_url {
self.input.lexer = lex::Lexer::new(ahead.as_str()); self.input.lexer = lex::Lexer::new(ahead.as_str());
self.input.span = self.input.span.after(len); let span_url = self.input.span.after(len);
self.push(EventKind::Enter(Autolink)); let url = span_url.of(self.input.src);
self.push(EventKind::Enter(Autolink(url)));
self.input.span = span_url;
self.push(EventKind::Str); self.push(EventKind::Str);
self.push(EventKind::Exit(Autolink));
self.input.span = self.input.span.after(1); self.input.span = self.input.span.after(1);
return Some(Continue); return self.push(EventKind::Exit(Autolink(url)));
} }
} }
None None
@ -606,10 +612,11 @@ impl<'s> Parser<'s> {
.sum(); .sum();
if end && valid { if end && valid {
self.input.lexer = lex::Lexer::new(ahead.as_str()); self.input.lexer = lex::Lexer::new(ahead.as_str());
self.input.span = self.input.span.after(len); let span_symbol = self.input.span.after(len);
self.push(EventKind::Atom(Symbol)); self.input.span = Span::new(self.input.span.start(), span_symbol.end() + 1);
self.input.span = self.input.span.after(1); return self.push(EventKind::Atom(Atom::Symbol(
return Some(Continue); span_symbol.of(self.input.src),
)));
} }
} }
None None
@ -649,10 +656,10 @@ impl<'s> Parser<'s> {
.sum(); .sum();
if end { if end {
self.input.lexer = lex::Lexer::new(ahead.as_str()); self.input.lexer = lex::Lexer::new(ahead.as_str());
self.input.span = self.input.span.after(len); let span_label = self.input.span.after(len);
self.push(EventKind::Atom(FootnoteReference)); let label = span_label.of(self.input.src);
self.input.span = self.input.span.after(1); self.input.span = Span::new(self.input.span.start(), span_label.end() + 1);
return Some(Continue); return self.push(EventKind::Atom(FootnoteReference { label }));
} }
} }
None None
@ -925,7 +932,7 @@ impl<'s> Parser<'s> {
self.push(EventKind::Atom(atom)) self.push(EventKind::Atom(atom))
} }
fn merge_str_events(&mut self, span_str: Span) -> Event { fn merge_str_events(&mut self, span_str: Span) -> Event<'s> {
let mut span = span_str; let mut span = span_str;
let should_merge = |e: &Event, span: Span| { let should_merge = |e: &Event, span: Span| {
matches!(e.kind, EventKind::Str | EventKind::Placeholder) matches!(e.kind, EventKind::Str | EventKind::Placeholder)
@ -952,7 +959,7 @@ impl<'s> Parser<'s> {
} }
} }
fn apply_word_attributes(&mut self, span_str: Span) -> Event { fn apply_word_attributes(&mut self, span_str: Span) -> Event<'s> {
if let Some(i) = span_str if let Some(i) = span_str
.of(self.input.src) .of(self.input.src)
.bytes() .bytes()
@ -972,7 +979,7 @@ impl<'s> Parser<'s> {
let attr = self.events.pop_front().unwrap(); let attr = self.events.pop_front().unwrap();
self.events.push_front(Event { self.events.push_front(Event {
kind: EventKind::Exit(Span), kind: EventKind::Exit(Span),
span: span_str.empty_after(), span: attr.span,
}); });
self.events.push_front(Event { self.events.push_front(Event {
kind: EventKind::Str, kind: EventKind::Str,
@ -1089,8 +1096,8 @@ impl Opener {
} }
} }
enum DelimEventKind { enum DelimEventKind<'s> {
Container(Container), Container(Container<'s>),
Span(SpanType), Span(SpanType),
Quote(QuoteType), Quote(QuoteType),
Link { Link {
@ -1100,7 +1107,7 @@ enum DelimEventKind {
}, },
} }
impl From<Opener> for DelimEventKind { impl<'s> From<Opener> for DelimEventKind<'s> {
fn from(d: Opener) -> Self { fn from(d: Opener) -> Self {
match d { match d {
Opener::Span(ty) => Self::Span(ty), Opener::Span(ty) => Self::Span(ty),
@ -1127,7 +1134,7 @@ impl From<Opener> for DelimEventKind {
} }
impl<'s> Iterator for Parser<'s> { impl<'s> Iterator for Parser<'s> {
type Item = Event; type Item = Event<'s>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
while self.events.is_empty() while self.events.is_empty()
@ -1158,7 +1165,7 @@ impl<'s> Iterator for Parser<'s> {
let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind { let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind {
debug_assert!(matches!( debug_assert!(matches!(
ty, ty,
Verbatim | RawFormat | InlineMath | DisplayMath Verbatim | RawFormat { .. } | InlineMath | DisplayMath
)); ));
ty ty
} else { } else {
@ -1266,7 +1273,7 @@ mod test {
), ),
(Enter(Verbatim), "`"), (Enter(Verbatim), "`"),
(Str, "raw"), (Str, "raw"),
(Exit(Verbatim), "`"), (Exit(Verbatim), "`{#id}"),
(Str, " post"), (Str, " post"),
); );
} }
@ -1336,16 +1343,16 @@ mod test {
fn raw_format() { fn raw_format() {
test_parse!( test_parse!(
"`raw`{=format}", "`raw`{=format}",
(Enter(RawFormat), "format"), (Enter(RawFormat { format: "format" }), "`"),
(Str, "raw"), (Str, "raw"),
(Exit(RawFormat), "format"), (Exit(RawFormat { format: "format" }), "`{=format}"),
); );
test_parse!( test_parse!(
"before `raw`{=format} after", "before `raw`{=format} after",
(Str, "before "), (Str, "before "),
(Enter(RawFormat), "format"), (Enter(RawFormat { format: "format" }), "`"),
(Str, "raw"), (Str, "raw"),
(Exit(RawFormat), "format"), (Exit(RawFormat { format: "format" }), "`{=format}"),
(Str, " after"), (Str, " after"),
); );
} }
@ -1456,7 +1463,7 @@ mod test {
), ),
(Enter(Span), ""), (Enter(Span), ""),
(Str, "[text]("), (Str, "[text]("),
(Exit(Span), ""), (Exit(Span), "{.cls}"),
); );
} }
@ -1520,7 +1527,7 @@ mod test {
"{.cls}", "{.cls}",
), ),
(Enter(Span), "["), (Enter(Span), "["),
(Exit(Span), "]") (Exit(Span), "]{.cls}")
); );
} }
@ -1537,7 +1544,7 @@ mod test {
), ),
(Enter(Span), "["), (Enter(Span), "["),
(Str, "abc"), (Str, "abc"),
(Exit(Span), "]"), (Exit(Span), "]{.def}"),
); );
test_parse!("not a [span] {#id}.", (Str, "not a [span] "), (Str, ".")); test_parse!("not a [span] {#id}.", (Str, "not a [span] "), (Str, "."));
} }
@ -1555,7 +1562,7 @@ mod test {
), ),
(Enter(Span), "["), (Enter(Span), "["),
(Str, "x_y"), (Str, "x_y"),
(Exit(Span), "]"), (Exit(Span), "]{.bar_}"),
); );
} }
@ -1563,24 +1570,24 @@ mod test {
fn autolink() { fn autolink() {
test_parse!( test_parse!(
"<https://example.com>", "<https://example.com>",
(Enter(Autolink), "https://example.com"), (Enter(Autolink("https://example.com",)), "<"),
(Str, "https://example.com"), (Str, "https://example.com"),
(Exit(Autolink), "https://example.com") (Exit(Autolink("https://example.com",)), ">")
); );
test_parse!( test_parse!(
"<a@b.c>", "<a@b.c>",
(Enter(Autolink), "a@b.c"), (Enter(Autolink("a@b.c")), "<"),
(Str, "a@b.c"), (Str, "a@b.c"),
(Exit(Autolink), "a@b.c"), (Exit(Autolink("a@b.c")), ">"),
); );
test_parse!( test_parse!(
"<http://a.b><http://c.d>", "<http://a.b><http://c.d>",
(Enter(Autolink), "http://a.b"), (Enter(Autolink("http://a.b")), "<"),
(Str, "http://a.b"), (Str, "http://a.b"),
(Exit(Autolink), "http://a.b"), (Exit(Autolink("http://a.b")), ">"),
(Enter(Autolink), "http://c.d"), (Enter(Autolink("http://c.d")), "<"),
(Str, "http://c.d"), (Str, "http://c.d"),
(Exit(Autolink), "http://c.d"), (Exit(Autolink("http://c.d")), ">"),
); );
test_parse!("<not-a-url>", (Str, "<not-a-url>")); test_parse!("<not-a-url>", (Str, "<not-a-url>"));
} }
@ -1590,7 +1597,7 @@ mod test {
test_parse!( test_parse!(
"text[^footnote]. more text", "text[^footnote]. more text",
(Str, "text"), (Str, "text"),
(Atom(FootnoteReference), "footnote"), (Atom(FootnoteReference { label: "footnote" }), "[^footnote]"),
(Str, ". more text"), (Str, ". more text"),
); );
} }
@ -1687,7 +1694,7 @@ mod test {
), ),
(Enter(Emphasis), "_"), (Enter(Emphasis), "_"),
(Str, "abc def"), (Str, "abc def"),
(Exit(Emphasis), "_"), (Exit(Emphasis), "_{.attr}"),
); );
} }
@ -1697,13 +1704,13 @@ mod test {
"_abc def_{}", "_abc def_{}",
(Enter(Emphasis), "_"), (Enter(Emphasis), "_"),
(Str, "abc def"), (Str, "abc def"),
(Exit(Emphasis), "_"), (Exit(Emphasis), "_{}"),
); );
test_parse!( test_parse!(
"_abc def_{ % comment % } ghi", "_abc def_{ % comment % } ghi",
(Enter(Emphasis), "_"), (Enter(Emphasis), "_"),
(Str, "abc def"), (Str, "abc def"),
(Exit(Emphasis), "_"), (Exit(Emphasis), "_{ % comment % }"),
(Str, " ghi"), (Str, " ghi"),
); );
} }
@ -1721,7 +1728,7 @@ mod test {
), ),
(Enter(Emphasis), "_"), (Enter(Emphasis), "_"),
(Str, "abc def"), (Str, "abc def"),
(Exit(Emphasis), "_"), (Exit(Emphasis), "_{.a}{.b}{.c}"),
(Str, " "), (Str, " "),
); );
} }
@ -1739,7 +1746,7 @@ mod test {
), ),
(Enter(Span), ""), (Enter(Span), ""),
(Str, "word"), (Str, "word"),
(Exit(Span), ""), (Exit(Span), "{a=b}"),
); );
test_parse!( test_parse!(
"some word{.a}{.b} with attrs", "some word{.a}{.b} with attrs",
@ -1753,7 +1760,7 @@ mod test {
), ),
(Enter(Span), ""), (Enter(Span), ""),
(Str, "word"), (Str, "word"),
(Exit(Span), ""), (Exit(Span), "{.a}{.b}"),
(Str, " with attrs"), (Str, " with attrs"),
); );
} }

View file

@ -51,6 +51,7 @@
use std::fmt; use std::fmt;
use std::fmt::Write as FmtWrite; use std::fmt::Write as FmtWrite;
use std::io; use std::io;
use std::ops::Range;
#[cfg(feature = "html")] #[cfg(feature = "html")]
pub mod html; pub mod html;
@ -60,7 +61,6 @@ mod block;
mod inline; mod inline;
mod lex; mod lex;
mod span; mod span;
mod tree;
use span::Span; use span::Span;
@ -555,13 +555,14 @@ pub struct Parser<'s> {
src: &'s str, src: &'s str,
/// Block tree parsed at first. /// Block tree parsed at first.
tree: block::Tree, blocks: std::iter::Peekable<std::vec::IntoIter<block::Event<'s>>>,
/// Contents obtained by the prepass. /// Contents obtained by the prepass.
pre_pass: PrePass<'s>, pre_pass: PrePass<'s>,
/// Last parsed block attributes /// Last parsed block attributes, and its starting offset.
block_attributes: Attributes<'s>, block_attributes: Attributes<'s>,
block_attributes_pos: Option<usize>,
/// Current table row is a head row. /// Current table row is a head row.
table_head_row: bool, table_head_row: bool,
@ -576,7 +577,7 @@ pub struct Parser<'s> {
#[derive(Clone)] #[derive(Clone)]
struct Heading { struct Heading {
/// Location of heading in src. /// Location of heading in src.
location: usize, location: u32,
/// Automatically generated id from heading text. /// Automatically generated id from heading text.
id_auto: String, id_auto: String,
/// Text of heading, formatting stripped. /// Text of heading, formatting stripped.
@ -598,28 +599,50 @@ struct PrePass<'s> {
impl<'s> PrePass<'s> { impl<'s> PrePass<'s> {
#[must_use] #[must_use]
fn new(src: &'s str, mut tree: block::Tree, inline_parser: &mut inline::Parser<'s>) -> Self { fn new(
src: &'s str,
blocks: std::slice::Iter<block::Event<'s>>,
inline_parser: &mut inline::Parser<'s>,
) -> Self {
let mut link_definitions = Map::new(); let mut link_definitions = Map::new();
let mut headings: Vec<Heading> = Vec::new(); let mut headings: Vec<Heading> = Vec::new();
let mut used_ids: Set<&str> = Set::new(); let mut used_ids: Set<&str> = Set::new();
let mut blocks = blocks.peekable();
let mut attr_prev: Option<Span> = None; let mut attr_prev: Option<Span> = None;
while let Some(e) = tree.next() { while let Some(e) = blocks.next() {
match e.kind { match e.kind {
tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition)) => { block::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition {
label,
})) => {
fn next_is_inline(
bs: &mut std::iter::Peekable<std::slice::Iter<block::Event>>,
) -> bool {
matches!(bs.peek().map(|e| &e.kind), Some(block::EventKind::Inline))
}
// All link definition tags have to be obtained initially, as references can // All link definition tags have to be obtained initially, as references can
// appear before the definition. // appear before the definition.
let tag = e.span.of(src);
let attrs = let attrs =
attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src))); attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src)));
let url = match tree.count_children() { let url = if !next_is_inline(&mut blocks) {
0 => "".into(), "".into()
1 => tree.take_inlines().next().unwrap().of(src).trim().into(), } else {
_ => tree.take_inlines().map(|sp| sp.of(src).trim()).collect(), let start = blocks.next().unwrap().span.of(src).trim();
if !next_is_inline(&mut blocks) {
start.into()
} else {
let mut url = start.to_string();
while next_is_inline(&mut blocks) {
url.push_str(blocks.next().unwrap().span.of(src).trim());
}
url.into()
}
}; };
link_definitions.insert(tag, (url, attrs)); link_definitions.insert(label, (url, attrs));
} }
tree::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => { block::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => {
// All headings ids have to be obtained initially, as references can appear // All headings ids have to be obtained initially, as references can appear
// before the heading. Additionally, determining the id requires inline parsing // before the heading. Additionally, determining the id requires inline parsing
// as formatting must be removed. // as formatting must be removed.
@ -634,10 +657,21 @@ impl<'s> PrePass<'s> {
let mut id_auto = String::new(); let mut id_auto = String::new();
let mut text = String::new(); let mut text = String::new();
let mut last_whitespace = true; let mut last_whitespace = true;
let inlines = tree.take_inlines().collect::<Vec<_>>();
inline_parser.reset(); inline_parser.reset();
inlines.iter().enumerate().for_each(|(i, sp)| { let mut last_end = 0;
inline_parser.feed_line(*sp, i == inlines.len() - 1); loop {
let span_inline = blocks.next().and_then(|e| {
if matches!(e.kind, block::EventKind::Inline) {
last_end = e.span.end();
Some(e.span)
} else {
None
}
});
inline_parser.feed_line(
span_inline.unwrap_or_else(|| Span::empty_at(last_end)),
span_inline.is_none(),
);
inline_parser.for_each(|ev| match ev.kind { inline_parser.for_each(|ev| match ev.kind {
inline::EventKind::Str => { inline::EventKind::Str => {
text.push_str(ev.span.of(src)); text.push_str(ev.span.of(src));
@ -662,8 +696,11 @@ impl<'s> PrePass<'s> {
id_auto.push('-'); id_auto.push('-');
} }
_ => {} _ => {}
}) });
}); if span_inline.is_none() {
break;
}
}
id_auto.drain(id_auto.trim_end_matches('-').len()..); id_auto.drain(id_auto.trim_end_matches('-').len()..);
// ensure id unique // ensure id unique
@ -689,17 +726,17 @@ impl<'s> PrePass<'s> {
std::mem::transmute::<&str, &'static str>(id_auto.as_ref()) std::mem::transmute::<&str, &'static str>(id_auto.as_ref())
}); });
headings.push(Heading { headings.push(Heading {
location: e.span.start(), location: e.span.start() as u32,
id_auto, id_auto,
text, text,
id_override, id_override,
}); });
} }
tree::EventKind::Atom(block::Atom::Attributes) => { block::EventKind::Atom(block::Atom::Attributes) => {
attr_prev = Some(e.span); attr_prev = Some(e.span);
} }
tree::EventKind::Enter(..) block::EventKind::Enter(..)
| tree::EventKind::Exit(block::Node::Container(block::Container::Section { | block::EventKind::Exit(block::Node::Container(block::Container::Section {
.. ..
})) => {} })) => {}
_ => { _ => {
@ -723,7 +760,7 @@ impl<'s> PrePass<'s> {
h.id_override.as_ref().unwrap_or(&h.id_auto) h.id_override.as_ref().unwrap_or(&h.id_auto)
} }
fn heading_id_by_location(&self, location: usize) -> Option<&str> { fn heading_id_by_location(&self, location: u32) -> Option<&str> {
self.headings self.headings
.binary_search_by_key(&location, |h| h.location) .binary_search_by_key(&location, |h| h.location)
.ok() .ok()
@ -741,22 +778,133 @@ impl<'s> PrePass<'s> {
impl<'s> Parser<'s> { impl<'s> Parser<'s> {
#[must_use] #[must_use]
pub fn new(src: &'s str) -> Self { pub fn new(src: &'s str) -> Self {
let tree = block::parse(src); let blocks = block::parse(src);
let mut inline_parser = inline::Parser::new(src); let mut inline_parser = inline::Parser::new(src);
let pre_pass = PrePass::new(src, tree.clone(), &mut inline_parser); let pre_pass = PrePass::new(src, blocks.iter(), &mut inline_parser);
Self { Self {
src, src,
tree, blocks: blocks.into_iter().peekable(),
pre_pass, pre_pass,
block_attributes: Attributes::new(), block_attributes: Attributes::new(),
block_attributes_pos: None,
table_head_row: false, table_head_row: false,
verbatim: false, verbatim: false,
inline_parser, inline_parser,
} }
} }
fn inline(&mut self) -> Option<Event<'s>> { /// Turn the [`Parser`] into an iterator of tuples, each with an [`Event`] and a start/end byte
/// offset for its corresponding input (as a [`Range<usize>`]).
///
/// Generally, the range of each event does not overlap with any other event and the ranges are
/// in same order as the events are emitted, i.e. the start offset of an event must be greater
/// or equal to the (exclusive) end offset of all events that were emitted before that event.
/// However, there are some exceptions to this rule:
///
/// - Blank lines inbetween block attributes and the block causes the blankline events to
/// overlap with the block start event.
/// - Caption events are emitted before the table rows while the input for the caption content
/// is located after the table rows, causing the ranges to be out of order.
///
/// Characters between events, that are not part of any event range, are typically whitespace
/// but may also consist of unattached attributes or `>` characters from blockquotes.
///
/// # Examples
///
/// Start and end events of containers correspond only to the start and end markers for that
/// container, not its inner content:
///
/// ```
/// # use jotdown::*;
/// # use jotdown::Event::*;
/// # use jotdown::Container::*;
/// let input = "> _hello_ [text](url)\n";
/// assert!(matches!(
/// Parser::new(input)
/// .into_offset_iter()
/// .map(|(e, r)| (&input[r], e))
/// .collect::<Vec<_>>()
/// .as_slice(),
/// &[
/// (">", Start(Blockquote, ..)),
/// ("", Start(Paragraph, ..)),
/// ("_", Start(Emphasis, ..)),
/// ("hello", Str(..)),
/// ("_", End(Emphasis)),
/// (" ", Str(..)),
/// ("[", Start(Link { .. }, ..)),
/// ("text", Str(..)),
/// ("](url)", End(Link { .. })),
/// ("", End(Paragraph)),
/// ("", End(Blockquote)),
/// ],
/// ));
/// ```
///
/// _Block_ attributes that belong to a container are included in the _start_ event. _Inline_
/// attributes that belong to a container are included in the _end_ event:
///
/// ```
/// # use jotdown::*;
/// # use jotdown::Event::*;
/// # use jotdown::Container::*;
/// let input = "
/// {.quote}
/// > [Hello]{lang=en} world!";
/// assert!(matches!(
/// Parser::new(input)
/// .into_offset_iter()
/// .map(|(e, r)| (&input[r], e))
/// .collect::<Vec<_>>()
/// .as_slice(),
/// &[
/// ("\n", Blankline),
/// ("{.quote}\n>", Start(Blockquote, ..)),
/// ("", Start(Paragraph, ..)),
/// ("[", Start(Span, ..)),
/// ("Hello", Str(..)),
/// ("]{lang=en}", End(Span)),
/// (" world!", Str(..)),
/// ("", End(Paragraph)),
/// ("", End(Blockquote)),
/// ],
/// ));
/// ```
///
/// Inline events that span multiple lines may contain characters from outer block containers
/// (e.g. `>` characters from blockquotes or whitespace from list items):
///
/// ```
/// # use jotdown::*;
/// # use jotdown::Event::*;
/// # use jotdown::Container::*;
/// let input = "
/// > [txt](multi
/// > line)";
/// assert!(matches!(
/// Parser::new(input)
/// .into_offset_iter()
/// .map(|(e, r)| (&input[r], e))
/// .collect::<Vec<_>>()
/// .as_slice(),
/// &[
/// ("\n", Blankline),
/// (">", Start(Blockquote, ..)),
/// ("", Start(Paragraph, ..)),
/// ("[", Start(Link { .. }, ..)),
/// ("txt", Str(..)),
/// ("](multi\n> line)", End(Link { .. })),
/// ("", End(Paragraph)),
/// ("", End(Blockquote)),
/// ],
/// ));
/// ```
pub fn into_offset_iter(self) -> OffsetIter<'s> {
OffsetIter { parser: self }
}
fn inline(&mut self) -> Option<(Event<'s>, Range<usize>)> {
let next = self.inline_parser.next()?; let next = self.inline_parser.next()?;
let (inline, mut attributes) = match next { let (inline, mut attributes) = match next {
@ -772,16 +920,14 @@ impl<'s> Parser<'s> {
inline.map(|inline| { inline.map(|inline| {
let enter = matches!(inline.kind, inline::EventKind::Enter(_)); let enter = matches!(inline.kind, inline::EventKind::Enter(_));
match inline.kind { let event = match inline.kind {
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => { inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
let t = match c { let t = match c {
inline::Container::Span => Container::Span, inline::Container::Span => Container::Span,
inline::Container::Verbatim => Container::Verbatim, inline::Container::Verbatim => Container::Verbatim,
inline::Container::InlineMath => Container::Math { display: false }, inline::Container::InlineMath => Container::Math { display: false },
inline::Container::DisplayMath => Container::Math { display: true }, inline::Container::DisplayMath => Container::Math { display: true },
inline::Container::RawFormat => Container::RawInline { inline::Container::RawFormat { format } => Container::RawInline { format },
format: inline.span.of(self.src),
},
inline::Container::Subscript => Container::Subscript, inline::Container::Subscript => Container::Subscript,
inline::Container::Superscript => Container::Superscript, inline::Container::Superscript => Container::Superscript,
inline::Container::Insert => Container::Insert, inline::Container::Insert => Container::Insert,
@ -822,14 +968,13 @@ impl<'s> Parser<'s> {
Container::Image(url_or_tag, ty) Container::Image(url_or_tag, ty)
} }
} }
inline::Container::Autolink => { inline::Container::Autolink(url) => {
let url: CowStr = inline.span.of(self.src).into();
let ty = if url.contains('@') { let ty = if url.contains('@') {
LinkType::Email LinkType::Email
} else { } else {
LinkType::AutoLink LinkType::AutoLink
}; };
Container::Link(url, ty) Container::Link(url.into(), ty)
} }
}; };
if enter { if enter {
@ -839,10 +984,8 @@ impl<'s> Parser<'s> {
} }
} }
inline::EventKind::Atom(a) => match a { inline::EventKind::Atom(a) => match a {
inline::Atom::FootnoteReference => { inline::Atom::FootnoteReference { label } => Event::FootnoteReference(label),
Event::FootnoteReference(inline.span.of(self.src)) inline::Atom::Symbol(sym) => Event::Symbol(sym.into()),
}
inline::Atom::Symbol => Event::Symbol(inline.span.of(self.src).into()),
inline::Atom::Quote { ty, left } => match (ty, left) { inline::Atom::Quote { ty, left } => match (ty, left) {
(inline::QuoteType::Single, true) => Event::LeftSingleQuote, (inline::QuoteType::Single, true) => Event::LeftSingleQuote,
(inline::QuoteType::Single, false) => Event::RightSingleQuote, (inline::QuoteType::Single, false) => Event::RightSingleQuote,
@ -861,48 +1004,58 @@ impl<'s> Parser<'s> {
inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => { inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => {
panic!("{:?}", inline) panic!("{:?}", inline)
} }
} };
(event, inline.span.into())
}) })
} }
fn block(&mut self) -> Option<Event<'s>> { fn block(&mut self) -> Option<(Event<'s>, Range<usize>)> {
while let Some(ev) = &mut self.tree.next() { while let Some(mut ev) = &mut self.blocks.next() {
let content = ev.span.of(self.src);
let event = match ev.kind { let event = match ev.kind {
tree::EventKind::Atom(a) => match a { block::EventKind::Atom(a) => match a {
block::Atom::Blankline => Event::Blankline, block::Atom::Blankline => Event::Blankline,
block::Atom::ThematicBreak => { block::Atom::ThematicBreak => {
if let Some(pos) = self.block_attributes_pos.take() {
ev.span = Span::new(pos, ev.span.end());
}
Event::ThematicBreak(self.block_attributes.take()) Event::ThematicBreak(self.block_attributes.take())
} }
block::Atom::Attributes => { block::Atom::Attributes => {
self.block_attributes.parse(content); if self.block_attributes_pos.is_none() {
self.block_attributes_pos = Some(ev.span.start());
}
self.block_attributes.parse(ev.span.of(self.src));
continue; continue;
} }
}, },
tree::EventKind::Enter(c) | tree::EventKind::Exit(c) => { block::EventKind::Enter(c) | block::EventKind::Exit(c) => {
let enter = matches!(ev.kind, tree::EventKind::Enter(..)); let enter = matches!(ev.kind, block::EventKind::Enter(..));
let cont = match c { let cont = match c {
block::Node::Leaf(l) => { block::Node::Leaf(l) => {
self.inline_parser.reset(); self.inline_parser.reset();
match l { match l {
block::Leaf::Paragraph => Container::Paragraph, block::Leaf::Paragraph => Container::Paragraph,
block::Leaf::Heading { has_section } => Container::Heading { block::Leaf::Heading {
level: content.len().try_into().unwrap(), level,
has_section,
pos,
} => Container::Heading {
level,
has_section, has_section,
id: self id: self
.pre_pass .pre_pass
.heading_id_by_location(ev.span.start()) .heading_id_by_location(pos)
.unwrap_or_default() .unwrap_or_default()
.to_string() .to_string()
.into(), .into(),
}, },
block::Leaf::DescriptionTerm => Container::DescriptionTerm, block::Leaf::DescriptionTerm => Container::DescriptionTerm,
block::Leaf::CodeBlock => { block::Leaf::CodeBlock { language } => {
self.verbatim = enter; self.verbatim = enter;
if let Some(format) = content.strip_prefix('=') { if let Some(format) = language.strip_prefix('=') {
Container::RawBlock { format } Container::RawBlock { format }
} else { } else {
Container::CodeBlock { language: content } Container::CodeBlock { language }
} }
} }
block::Leaf::TableCell(alignment) => Container::TableCell { block::Leaf::TableCell(alignment) => Container::TableCell {
@ -910,16 +1063,20 @@ impl<'s> Parser<'s> {
head: self.table_head_row, head: self.table_head_row,
}, },
block::Leaf::Caption => Container::Caption, block::Leaf::Caption => Container::Caption,
block::Leaf::LinkDefinition => { block::Leaf::LinkDefinition { label } => {
Container::LinkDefinition { label: content } self.verbatim = enter;
Container::LinkDefinition { label }
} }
} }
} }
block::Node::Container(c) => match c { block::Node::Container(c) => match c {
block::Container::Blockquote => Container::Blockquote, block::Container::Blockquote => Container::Blockquote,
block::Container::Div => Container::Div { class: content }, block::Container::Div { class } => Container::Div { class },
block::Container::Footnote => Container::Footnote { label: content }, block::Container::Footnote { label } => Container::Footnote { label },
block::Container::List(block::ListKind { ty, tight }) => { block::Container::List {
kind: block::ListKind { ty, tight },
marker,
} => {
if matches!(ty, block::ListType::Description) { if matches!(ty, block::ListType::Description) {
Container::DescriptionList Container::DescriptionList
} else { } else {
@ -927,9 +1084,8 @@ impl<'s> Parser<'s> {
block::ListType::Unordered(..) => ListKind::Unordered, block::ListType::Unordered(..) => ListKind::Unordered,
block::ListType::Task => ListKind::Task, block::ListType::Task => ListKind::Task,
block::ListType::Ordered(numbering, style) => { block::ListType::Ordered(numbering, style) => {
let start = numbering let start =
.parse_number(style.number(content)) numbering.parse_number(style.number(marker)).max(1);
.max(1);
ListKind::Ordered { ListKind::Ordered {
numbering, numbering,
style, style,
@ -941,12 +1097,12 @@ impl<'s> Parser<'s> {
Container::List { kind, tight } Container::List { kind, tight }
} }
} }
block::Container::ListItem(ty) => match ty { block::Container::ListItem(kind) => match kind {
block::ListType::Task => Container::TaskListItem { block::ListItemKind::Task { checked } => {
checked: content.as_bytes()[3] != b' ', Container::TaskListItem { checked }
}, }
block::ListType::Description => Container::DescriptionDetails, block::ListItemKind::Description => Container::DescriptionDetails,
_ => Container::ListItem, block::ListItemKind::List => Container::ListItem,
}, },
block::Container::Table => Container::Table, block::Container::Table => Container::Table,
block::Container::TableRow { head } => { block::Container::TableRow { head } => {
@ -955,10 +1111,10 @@ impl<'s> Parser<'s> {
} }
Container::TableRow { head } Container::TableRow { head }
} }
block::Container::Section => Container::Section { block::Container::Section { pos } => Container::Section {
id: self id: self
.pre_pass .pre_pass
.heading_id_by_location(ev.span.start()) .heading_id_by_location(pos)
.unwrap_or_default() .unwrap_or_default()
.to_string() .to_string()
.into(), .into(),
@ -966,32 +1122,63 @@ impl<'s> Parser<'s> {
}, },
}; };
if enter { if enter {
if let Some(pos) = self.block_attributes_pos.take() {
ev.span = Span::new(pos, ev.span.end());
}
Event::Start(cont, self.block_attributes.take()) Event::Start(cont, self.block_attributes.take())
} else { } else {
self.block_attributes = Attributes::new();
self.block_attributes_pos = None;
Event::End(cont) Event::End(cont)
} }
} }
tree::EventKind::Inline => { block::EventKind::Inline => {
if self.verbatim { if self.verbatim {
Event::Str(content.into()) Event::Str(ev.span.of(self.src).into())
} else { } else {
self.inline_parser self.inline_parser.feed_line(
.feed_line(ev.span, self.tree.branch_is_empty()); ev.span,
return self.next(); !matches!(
self.blocks.peek().map(|e| &e.kind),
Some(block::EventKind::Inline),
),
);
return self.next_span();
} }
} }
block::EventKind::Stale => continue,
}; };
return Some(event); return Some((event, ev.span.into()));
} }
None None
} }
fn next_span(&mut self) -> Option<(Event<'s>, Range<usize>)> {
self.inline().or_else(|| self.block())
}
} }
impl<'s> Iterator for Parser<'s> { impl<'s> Iterator for Parser<'s> {
type Item = Event<'s>; type Item = Event<'s>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.inline().or_else(|| self.block()) self.next_span().map(|(e, _)| e)
}
}
/// An iterator that is identical to a [`Parser`], except that it also emits the location of each
/// event within the input.
///
/// See the documentation of [`Parser::into_offset_iter`] for more information.
pub struct OffsetIter<'s> {
parser: Parser<'s>,
}
impl<'s> Iterator for OffsetIter<'s> {
type Item = (Event<'s>, Range<usize>);
fn next(&mut self) -> Option<Self::Item> {
self.parser.next_span()
} }
} }
@ -1523,7 +1710,6 @@ mod test {
Blankline, Blankline,
Start(LinkDefinition { label: "tag" }, Attributes::new()), Start(LinkDefinition { label: "tag" }, Attributes::new()),
Str("u".into()), Str("u".into()),
Softbreak,
Str("rl".into()), Str("rl".into()),
End(LinkDefinition { label: "tag" }), End(LinkDefinition { label: "tag" }),
); );
@ -1532,19 +1718,24 @@ mod test {
"[text][tag]\n", "[text][tag]\n",
"\n", "\n",
"[tag]:\n", "[tag]:\n",
" url\n", // " url\n", //
" cont\n", //
), ),
Start(Paragraph, Attributes::new()), Start(Paragraph, Attributes::new()),
Start( Start(
Link("url".into(), LinkType::Span(SpanLinkType::Reference)), Link("urlcont".into(), LinkType::Span(SpanLinkType::Reference)),
Attributes::new() Attributes::new()
), ),
Str("text".into()), Str("text".into()),
End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))), End(Link(
"urlcont".into(),
LinkType::Span(SpanLinkType::Reference)
)),
End(Paragraph), End(Paragraph),
Blankline, Blankline,
Start(LinkDefinition { label: "tag" }, Attributes::new()), Start(LinkDefinition { label: "tag" }, Attributes::new()),
Str("url".into()), Str("url".into()),
Str("cont".into()),
End(LinkDefinition { label: "tag" }), End(LinkDefinition { label: "tag" }),
); );
} }

View file

@ -4,6 +4,12 @@ pub struct Span {
end: u32, end: u32,
} }
impl From<Span> for std::ops::Range<usize> {
fn from(span: Span) -> Self {
span.start()..span.end()
}
}
impl Span { impl Span {
pub fn new(start: usize, end: usize) -> Self { pub fn new(start: usize, end: usize) -> Self {
Self::by_len(start, end.checked_sub(start).unwrap()) Self::by_len(start, end.checked_sub(start).unwrap())

View file

@ -1,427 +0,0 @@
use crate::Span;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EventKind<C, A> {
Enter(C),
Inline,
Exit(C),
Atom(A),
}
#[derive(Debug)]
pub struct Node<'a, C, A> {
pub index: NodeIndex,
pub elem: Element<'a, C, A>,
pub span: Span,
}
#[derive(Debug)]
pub enum Element<'a, C, A> {
Container(&'a mut C),
Atom(&'a mut A),
Inline,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Event<C, A> {
pub kind: EventKind<C, A>,
pub span: Span,
}
#[derive(Clone)]
pub struct Tree<C: 'static, A: 'static> {
nodes: std::rc::Rc<[InternalNode<C, A>]>,
branch: Vec<NodeIndex>,
head: Option<NodeIndex>,
}
impl<C: Clone, A: Clone> Tree<C, A> {
/// Count number of direct children nodes.
pub fn count_children(&self) -> usize {
let mut head = self.head;
let mut count = 0;
while let Some(h) = head {
let n = &self.nodes[h.index()];
head = n.next;
count += 1;
}
count
}
/// Retrieve all inlines until the end of the current container. Panics if any upcoming node is
/// not an inline node.
pub fn take_inlines(&mut self) -> impl Iterator<Item = Span> + '_ {
let mut head = self.head.take();
std::iter::from_fn(move || {
head.take().map(|h| {
let n = &self.nodes[h.index()];
debug_assert!(matches!(n.kind, NodeKind::Inline));
head = n.next;
n.span
})
})
}
pub fn branch_is_empty(&self) -> bool {
matches!(self.head, None)
}
}
impl<C: Clone, A: Clone> Iterator for Tree<C, A> {
type Item = Event<C, A>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(head) = self.head {
let n = &self.nodes[head.index()];
let kind = match &n.kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, child) => {
self.branch.push(head);
self.head = *child;
EventKind::Enter(c.clone())
}
NodeKind::Atom(a) => {
self.head = n.next;
EventKind::Atom(a.clone())
}
NodeKind::Inline => {
self.head = n.next;
EventKind::Inline
}
};
Some(Event { kind, span: n.span })
} else if let Some(block_ni) = self.branch.pop() {
let InternalNode { next, kind, span } = &self.nodes[block_ni.index()];
let kind = EventKind::Exit(kind.container().unwrap().clone());
self.head = *next;
Some(Event { kind, span: *span })
} else {
None
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct NodeIndex(std::num::NonZeroUsize);
impl NodeIndex {
fn new(i: usize) -> Self {
debug_assert_ne!(i, usize::MAX);
Self((i + 1).try_into().unwrap())
}
fn root() -> Self {
Self::new(0)
}
fn index(self) -> usize {
usize::from(self.0) - 1
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum NodeKind<C, A> {
Root,
Container(C, Option<NodeIndex>),
Atom(A),
Inline,
}
#[derive(Debug, Clone)]
struct InternalNode<C, A> {
span: Span,
kind: NodeKind<C, A>,
next: Option<NodeIndex>,
}
#[derive(Clone)]
pub struct Builder<C, A> {
nodes: Vec<InternalNode<C, A>>,
branch: Vec<NodeIndex>,
head: Option<NodeIndex>,
depth: usize,
}
impl<C, A> NodeKind<C, A> {
fn child(&self) -> Option<NodeIndex> {
if let NodeKind::Container(_, child) = self {
*child
} else {
None
}
}
fn child_mut(&mut self) -> &mut Option<NodeIndex> {
if let NodeKind::Container(_, child) = self {
child
} else {
panic!()
}
}
fn container(&self) -> Option<&C> {
if let NodeKind::Container(c, _) = self {
Some(c)
} else {
None
}
}
}
impl<'a, C, A> From<&'a mut NodeKind<C, A>> for Element<'a, C, A> {
fn from(kind: &'a mut NodeKind<C, A>) -> Self {
match kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, ..) => Element::Container(c),
NodeKind::Atom(a) => Element::Atom(a),
NodeKind::Inline => Element::Inline,
}
}
}
impl<C, A> Builder<C, A> {
pub(super) fn new() -> Self {
Builder {
nodes: vec![InternalNode {
span: Span::default(),
kind: NodeKind::Root,
next: None,
}],
branch: vec![],
head: Some(NodeIndex::root()),
depth: 0,
}
}
pub(super) fn atom(&mut self, a: A, span: Span) {
self.add_node(InternalNode {
span,
kind: NodeKind::Atom(a),
next: None,
});
}
pub(super) fn inline(&mut self, span: Span) {
self.add_node(InternalNode {
span,
kind: NodeKind::Inline,
next: None,
});
}
pub(super) fn enter(&mut self, c: C, span: Span) -> NodeIndex {
self.depth += 1;
self.add_node(InternalNode {
span,
kind: NodeKind::Container(c, None),
next: None,
})
}
pub(super) fn exit(&mut self) {
self.depth -= 1;
if let Some(head) = self.head.take() {
if matches!(self.nodes[head.index()].kind, NodeKind::Container(..)) {
self.branch.push(head);
}
} else {
let last = self.branch.pop();
debug_assert_ne!(last, None);
}
}
/// Exit and discard all the contents of the current container.
pub(super) fn exit_discard(&mut self) {
self.exit();
let exited = self.branch.pop().unwrap();
self.nodes.drain(exited.index()..);
let (prev, has_parent) = self.replace(exited, None);
if has_parent {
self.head = Some(prev);
} else {
self.branch.push(prev);
}
}
/// Swap the node and its children with either its parent or the node before.
pub fn swap_prev(&mut self, node: NodeIndex) {
let next = self.nodes[node.index()].next;
if let Some(n) = next {
self.replace(n, None);
}
let (prev, _) = self.replace(node, next);
self.replace(prev, Some(node));
self.nodes[node.index()].next = Some(prev);
}
/// Remove the specified node and its children.
pub fn remove(&mut self, node: NodeIndex) {
let next = self.nodes[node.index()].next;
self.replace(node, next);
}
pub(super) fn depth(&self) -> usize {
self.depth
}
pub(super) fn elem(&mut self, ni: NodeIndex) -> Element<C, A> {
match &mut self.nodes[ni.index()].kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, ..) => Element::Container(c),
NodeKind::Atom(a) => Element::Atom(a),
NodeKind::Inline => Element::Inline,
}
}
/// Retrieve all children nodes for the specified node, in the order that they were added.
pub(super) fn children(&mut self, node: NodeIndex) -> impl Iterator<Item = Node<C, A>> {
// XXX assumes no modifications
let n = &self.nodes[node.index()];
let range = if let Some(start) = n.kind.child() {
start.index()..n.next.map_or(self.nodes.len(), NodeIndex::index)
} else {
0..0
};
range
.clone()
.map(NodeIndex::new)
.zip(self.nodes[range].iter_mut())
.map(|(index, n)| Node {
index,
elem: Element::from(&mut n.kind),
span: n.span,
})
}
pub(super) fn finish(self) -> Tree<C, A> {
debug_assert_eq!(self.depth, 0);
let head = self.nodes[NodeIndex::root().index()].next;
Tree {
nodes: self.nodes.into_boxed_slice().into(),
branch: Vec::new(),
head,
}
}
fn add_node(&mut self, node: InternalNode<C, A>) -> NodeIndex {
let ni = NodeIndex::new(self.nodes.len());
self.nodes.push(node);
if let Some(head_ni) = &mut self.head {
let mut head = &mut self.nodes[head_ni.index()];
match &mut head.kind {
NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => {
// set next pointer of previous node
debug_assert_eq!(head.next, None);
head.next = Some(ni);
}
NodeKind::Container(_, child) => {
self.branch.push(*head_ni);
// set child pointer of current container
debug_assert_eq!(*child, None);
*child = Some(ni);
}
}
} else if let Some(block) = self.branch.pop() {
let mut block = &mut self.nodes[block.index()];
debug_assert!(matches!(block.kind, NodeKind::Container(..)));
block.next = Some(ni);
} else {
panic!()
}
self.head = Some(ni);
ni
}
/// Remove the link from the node that points to the specified node. Optionally replace the
/// node with another node. Return the pointer node and whether it is a container or not.
fn replace(&mut self, node: NodeIndex, next: Option<NodeIndex>) -> (NodeIndex, bool) {
for (i, n) in self.nodes.iter_mut().enumerate().rev() {
let ni = NodeIndex::new(i);
if n.next == Some(node) {
n.next = next;
return (ni, false);
} else if n.kind.child() == Some(node) {
*n.kind.child_mut() = next;
return (ni, true);
}
}
panic!("node is never linked to")
}
}
impl<C: std::fmt::Debug + Clone + 'static, A: std::fmt::Debug + Clone + 'static> std::fmt::Debug
for Builder<C, A>
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.clone().finish().fmt(f)
}
}
impl<C: std::fmt::Debug + Clone, A: std::fmt::Debug + Clone> std::fmt::Debug for Tree<C, A> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
const INDENT: &str = " ";
let mut level = 0;
writeln!(f)?;
for e in self.clone() {
let indent = INDENT.repeat(level);
match e.kind {
EventKind::Enter(c) => {
write!(f, "{}{:?}", indent, c)?;
level += 1;
}
EventKind::Inline => write!(f, "{}Inline", indent)?,
EventKind::Exit(..) => {
level -= 1;
continue;
}
EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?,
}
writeln!(f, " ({}:{})", e.span.start(), e.span.end())?;
}
Ok(())
}
}
#[cfg(test)]
mod test {
use crate::Span;
#[test]
fn fmt() {
let mut tree = super::Builder::new();
tree.enter(1, Span::new(0, 1));
tree.atom(11, Span::new(0, 1));
tree.atom(12, Span::new(0, 1));
tree.exit();
tree.enter(2, Span::new(1, 5));
tree.enter(21, Span::new(2, 5));
tree.enter(211, Span::new(3, 4));
tree.atom(2111, Span::new(3, 4));
tree.exit();
tree.exit();
tree.enter(22, Span::new(4, 5));
tree.atom(221, Span::new(4, 5));
tree.exit();
tree.exit();
tree.enter(3, Span::new(5, 6));
tree.atom(31, Span::new(5, 6));
tree.exit();
assert_eq!(
format!("{:?}", tree.finish()),
concat!(
"\n",
"1 (0:1)\n",
" 11 (0:1)\n",
" 12 (0:1)\n",
"2 (1:5)\n",
" 21 (2:5)\n",
" 211 (3:4)\n",
" 2111 (3:4)\n",
" 22 (4:5)\n",
" 221 (4:5)\n",
"3 (5:6)\n",
" 31 (5:6)\n",
)
);
}
}

View file

@ -17,10 +17,6 @@ path = "src/main.rs"
name = "parse" name = "parse"
path = "src/parse.rs" path = "src/parse.rs"
[[bin]]
name = "parse_balance"
path = "src/parse_balance.rs"
[[bin]] [[bin]]
name = "html" name = "html"
path = "src/html.rs" path = "src/html.rs"

View file

@ -5,27 +5,66 @@ use html5ever::tendril::TendrilSink;
use html5ever::tokenizer; use html5ever::tokenizer;
use html5ever::tree_builder; use html5ever::tree_builder;
/// Perform sanity checks on events.
pub fn parse(data: &[u8]) { pub fn parse(data: &[u8]) {
if let Ok(s) = std::str::from_utf8(data) { if let Ok(s) = std::str::from_utf8(data) {
jotdown::Parser::new(s).last(); let whitelist_whitespace = s.contains('{') && s.contains('}'); // attributes are outside events
}
}
/// Ensure containers are always balanced, i.e. opened and closed in correct order.
pub fn parse_balance(data: &[u8]) {
if let Ok(s) = std::str::from_utf8(data) {
let mut open = Vec::new(); let mut open = Vec::new();
for event in jotdown::Parser::new(s) { let mut last = (jotdown::Event::Str("".into()), 0..0);
for (event, range) in jotdown::Parser::new(s).into_offset_iter() {
// no overlap, out of order
assert!(
last.1.end <= range.start
// block attributes may overlap with start event
|| (
matches!(last.0, jotdown::Event::Blankline)
&& (
matches!(
event,
jotdown::Event::Start(ref cont, ..) if cont.is_block()
)
|| matches!(event, jotdown::Event::ThematicBreak(..))
)
)
// caption event is before table rows but src is after
|| (
matches!(
last.0,
jotdown::Event::Start(jotdown::Container::Caption, ..)
| jotdown::Event::End(jotdown::Container::Caption)
)
&& range.end <= last.1.start
),
"{} > {} {:?} {:?}",
last.1.end,
range.start,
last.0,
event
);
last = (event.clone(), range.clone());
// range is valid unicode, does not cross char boundary
let _ = &s[range];
match event { match event {
jotdown::Event::Start(c, ..) => open.push(c.clone()), jotdown::Event::Start(c, ..) => open.push(c.clone()),
jotdown::Event::End(c) => assert_eq!(open.pop().unwrap(), c), jotdown::Event::End(c) => {
// closes correct event
assert_eq!(open.pop().unwrap(), c);
}
_ => {} _ => {}
} }
} }
// no missing close
assert_eq!(open, &[]); assert_eq!(open, &[]);
// only whitespace after last event
assert!(
whitelist_whitespace || s[last.1.end..].chars().all(char::is_whitespace),
"non whitespace {:?}",
&s[last.1.end..],
);
} }
} }
/// Validate rendered html output.
pub fn html(data: &[u8]) { pub fn html(data: &[u8]) {
if data.iter().any(|i| *i == 0) { if data.iter().any(|i| *i == 0) {
return; return;
@ -132,9 +171,6 @@ impl<'a> tree_builder::TreeSink for Dom<'a> {
"Found special tag while closing generic tag", "Found special tag while closing generic tag",
"Formatting element not current node", "Formatting element not current node",
"Formatting element not open", "Formatting element not open",
// FIXME bug caused by empty table at end of list
"No matching tag to close",
"Unexpected open element while closing",
]; ];
if !whitelist.iter().any(|e| msg.starts_with(e)) { if !whitelist.iter().any(|e| msg.starts_with(e)) {
#[cfg(feature = "debug")] #[cfg(feature = "debug")]

View file

@ -8,7 +8,6 @@ fn main() {
let f = match target.as_str() { let f = match target.as_str() {
"parse" => jotdown_afl::parse, "parse" => jotdown_afl::parse,
"parse_balance" => jotdown_afl::parse_balance,
"html" => jotdown_afl::html, "html" => jotdown_afl::html,
_ => panic!("unknown target '{}'", target), _ => panic!("unknown target '{}'", target),
}; };

View file

@ -1,3 +0,0 @@
fn main() {
afl::fuzz!(|data: &[u8]| { jotdown_afl::parse_balance(data) });
}