commit
70303e7e4b
12 changed files with 1644 additions and 1201 deletions
1
.github/workflows/ci.yml
vendored
1
.github/workflows/ci.yml
vendored
|
@ -72,7 +72,6 @@ jobs:
|
|||
matrix:
|
||||
target:
|
||||
- parse
|
||||
- parse_balance
|
||||
- html
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
|
|
@ -21,7 +21,10 @@
|
|||
output.innerText = jotdown_render(input.innerText);
|
||||
} else if (fmt.value == "events") {
|
||||
output.classList.add("verbatim")
|
||||
output.innerText = jotdown_parse(input.innerText);
|
||||
output.innerText = jotdown_parse(input.innerText, false);
|
||||
} else if (fmt.value == "events_spans") {
|
||||
output.classList.add("verbatim")
|
||||
output.innerText = jotdown_parse(input.innerText, true);
|
||||
} else if (fmt.value == "events_indent") {
|
||||
output.classList.add("verbatim")
|
||||
output.innerText = jotdown_parse_indent(input.innerText);
|
||||
|
@ -50,6 +53,7 @@
|
|||
<option value="preview">preview</option>
|
||||
<option value="html">html</option>
|
||||
<option value="events">events</option>
|
||||
<option value="events_spans">events (with offsets)</option>
|
||||
<option value="events_indent">events (indented)</option>
|
||||
</select>
|
||||
</div>
|
||||
|
|
|
@ -22,10 +22,16 @@ pub fn jotdown_render(djot: &str) -> String {
|
|||
|
||||
#[must_use]
|
||||
#[wasm_bindgen]
|
||||
pub fn jotdown_parse(djot: &str) -> String {
|
||||
jotdown::Parser::new(djot)
|
||||
.map(|e| format!("{:?}\n", e))
|
||||
.collect()
|
||||
pub fn jotdown_parse(djot: &str, spans: bool) -> String {
|
||||
let mut out = String::new();
|
||||
for (e, sp) in jotdown::Parser::new(djot).into_offset_iter() {
|
||||
write!(out, "{:?}", e).unwrap();
|
||||
if spans {
|
||||
write!(out, " {:?} {:?}", &djot[sp.clone()], sp).unwrap();
|
||||
}
|
||||
writeln!(out).unwrap();
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
|
|
1825
src/block.rs
1825
src/block.rs
File diff suppressed because it is too large
Load diff
147
src/inline.rs
147
src/inline.rs
|
@ -12,9 +12,9 @@ use Container::*;
|
|||
use ControlFlow::*;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Atom {
|
||||
FootnoteReference,
|
||||
Symbol,
|
||||
pub enum Atom<'s> {
|
||||
FootnoteReference { label: &'s str },
|
||||
Symbol(&'s str),
|
||||
Softbreak,
|
||||
Hardbreak,
|
||||
Escape,
|
||||
|
@ -26,7 +26,7 @@ pub enum Atom {
|
|||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub enum Container {
|
||||
pub enum Container<'s> {
|
||||
Span,
|
||||
Subscript,
|
||||
Superscript,
|
||||
|
@ -36,16 +36,14 @@ pub enum Container {
|
|||
Strong,
|
||||
Mark,
|
||||
Verbatim,
|
||||
/// Span is the format.
|
||||
RawFormat,
|
||||
RawFormat { format: &'s str },
|
||||
InlineMath,
|
||||
DisplayMath,
|
||||
ReferenceLink(CowStrIndex),
|
||||
ReferenceImage(CowStrIndex),
|
||||
InlineLink(CowStrIndex),
|
||||
InlineImage(CowStrIndex),
|
||||
/// Open delimiter span is URL, closing is '>'.
|
||||
Autolink,
|
||||
Autolink(&'s str),
|
||||
}
|
||||
|
||||
type CowStrIndex = u32;
|
||||
|
@ -57,10 +55,10 @@ pub enum QuoteType {
|
|||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub enum EventKind {
|
||||
Enter(Container),
|
||||
Exit(Container),
|
||||
Atom(Atom),
|
||||
pub enum EventKind<'s> {
|
||||
Enter(Container<'s>),
|
||||
Exit(Container<'s>),
|
||||
Atom(Atom<'s>),
|
||||
Str,
|
||||
Attributes {
|
||||
container: bool,
|
||||
|
@ -72,8 +70,8 @@ pub enum EventKind {
|
|||
type AttributesIndex = u32;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct Event {
|
||||
pub kind: EventKind,
|
||||
pub struct Event<'s> {
|
||||
pub kind: EventKind<'s>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
|
@ -218,7 +216,7 @@ pub struct Parser<'s> {
|
|||
openers: Vec<(Opener, usize)>,
|
||||
/// Buffer queue for next events. Events are buffered until no modifications due to future
|
||||
/// characters are needed.
|
||||
events: std::collections::VecDeque<Event>,
|
||||
events: std::collections::VecDeque<Event<'s>>,
|
||||
/// State if inside a verbatim container.
|
||||
verbatim: Option<VerbatimState>,
|
||||
/// State if currently parsing potential attributes.
|
||||
|
@ -268,12 +266,12 @@ impl<'s> Parser<'s> {
|
|||
self.store_attributes.clear();
|
||||
}
|
||||
|
||||
fn push_sp(&mut self, kind: EventKind, span: Span) -> Option<ControlFlow> {
|
||||
fn push_sp(&mut self, kind: EventKind<'s>, span: Span) -> Option<ControlFlow> {
|
||||
self.events.push_back(Event { kind, span });
|
||||
Some(Continue)
|
||||
}
|
||||
|
||||
fn push(&mut self, kind: EventKind) -> Option<ControlFlow> {
|
||||
fn push(&mut self, kind: EventKind<'s>) -> Option<ControlFlow> {
|
||||
self.push_sp(kind, self.input.span)
|
||||
}
|
||||
|
||||
|
@ -310,17 +308,16 @@ impl<'s> Parser<'s> {
|
|||
&& matches!(first.kind, lex::Kind::Seq(Sequence::Backtick))
|
||||
{
|
||||
let raw_format = self.input.ahead_raw_format();
|
||||
let mut span_closer = self.input.span;
|
||||
if let Some(span_format) = raw_format {
|
||||
self.events[event_opener].kind = EventKind::Enter(RawFormat);
|
||||
self.events[event_opener].span = span_format;
|
||||
self.input.span = span_format.translate(1);
|
||||
span_closer = span_format;
|
||||
self.events[event_opener].kind = EventKind::Enter(RawFormat {
|
||||
format: span_format.of(self.input.src),
|
||||
});
|
||||
self.input.span = Span::new(self.input.span.start(), span_format.end() + 1);
|
||||
};
|
||||
let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind {
|
||||
debug_assert!(matches!(
|
||||
ty,
|
||||
Verbatim | RawFormat | InlineMath | DisplayMath
|
||||
Verbatim | RawFormat { .. } | InlineMath | DisplayMath
|
||||
));
|
||||
ty
|
||||
} else {
|
||||
|
@ -330,7 +327,7 @@ impl<'s> Parser<'s> {
|
|||
{
|
||||
self.events.drain(*event_skip..);
|
||||
}
|
||||
self.push_sp(EventKind::Exit(ty_opener), span_closer);
|
||||
self.push(EventKind::Exit(ty_opener));
|
||||
self.verbatim = None;
|
||||
if raw_format.is_none()
|
||||
&& self.input.peek().map_or(false, |t| {
|
||||
|
@ -527,7 +524,13 @@ impl<'s> Parser<'s> {
|
|||
self.input.span = Span::new(start_attr, state.end_attr);
|
||||
self.input.lexer = lex::Lexer::new(&self.input.src[state.end_attr..line_end]);
|
||||
|
||||
if !attrs.is_empty() {
|
||||
if attrs.is_empty() {
|
||||
if matches!(state.elem_ty, AttributesElementType::Container { .. }) {
|
||||
let last = self.events.len() - 1;
|
||||
self.events[last].span =
|
||||
Span::new(self.events[last].span.start(), self.input.span.end());
|
||||
}
|
||||
} else {
|
||||
let attr_index = self.store_attributes.len() as AttributesIndex;
|
||||
self.store_attributes.push(attrs);
|
||||
let attr_event = Event {
|
||||
|
@ -540,11 +543,13 @@ impl<'s> Parser<'s> {
|
|||
match state.elem_ty {
|
||||
AttributesElementType::Container { e_placeholder } => {
|
||||
self.events[e_placeholder] = attr_event;
|
||||
let last = self.events.len() - 1;
|
||||
if matches!(self.events[e_placeholder + 1].kind, EventKind::Str) {
|
||||
self.events[e_placeholder + 1].kind = EventKind::Enter(Span);
|
||||
let last = self.events.len() - 1;
|
||||
self.events[last].kind = EventKind::Exit(Span);
|
||||
}
|
||||
self.events[last].span =
|
||||
Span::new(self.events[last].span.start(), self.input.span.end());
|
||||
}
|
||||
AttributesElementType::Word => {
|
||||
self.events.push_back(attr_event);
|
||||
|
@ -577,12 +582,13 @@ impl<'s> Parser<'s> {
|
|||
.sum();
|
||||
if end && is_url {
|
||||
self.input.lexer = lex::Lexer::new(ahead.as_str());
|
||||
self.input.span = self.input.span.after(len);
|
||||
self.push(EventKind::Enter(Autolink));
|
||||
let span_url = self.input.span.after(len);
|
||||
let url = span_url.of(self.input.src);
|
||||
self.push(EventKind::Enter(Autolink(url)));
|
||||
self.input.span = span_url;
|
||||
self.push(EventKind::Str);
|
||||
self.push(EventKind::Exit(Autolink));
|
||||
self.input.span = self.input.span.after(1);
|
||||
return Some(Continue);
|
||||
return self.push(EventKind::Exit(Autolink(url)));
|
||||
}
|
||||
}
|
||||
None
|
||||
|
@ -606,10 +612,11 @@ impl<'s> Parser<'s> {
|
|||
.sum();
|
||||
if end && valid {
|
||||
self.input.lexer = lex::Lexer::new(ahead.as_str());
|
||||
self.input.span = self.input.span.after(len);
|
||||
self.push(EventKind::Atom(Symbol));
|
||||
self.input.span = self.input.span.after(1);
|
||||
return Some(Continue);
|
||||
let span_symbol = self.input.span.after(len);
|
||||
self.input.span = Span::new(self.input.span.start(), span_symbol.end() + 1);
|
||||
return self.push(EventKind::Atom(Atom::Symbol(
|
||||
span_symbol.of(self.input.src),
|
||||
)));
|
||||
}
|
||||
}
|
||||
None
|
||||
|
@ -649,10 +656,10 @@ impl<'s> Parser<'s> {
|
|||
.sum();
|
||||
if end {
|
||||
self.input.lexer = lex::Lexer::new(ahead.as_str());
|
||||
self.input.span = self.input.span.after(len);
|
||||
self.push(EventKind::Atom(FootnoteReference));
|
||||
self.input.span = self.input.span.after(1);
|
||||
return Some(Continue);
|
||||
let span_label = self.input.span.after(len);
|
||||
let label = span_label.of(self.input.src);
|
||||
self.input.span = Span::new(self.input.span.start(), span_label.end() + 1);
|
||||
return self.push(EventKind::Atom(FootnoteReference { label }));
|
||||
}
|
||||
}
|
||||
None
|
||||
|
@ -925,7 +932,7 @@ impl<'s> Parser<'s> {
|
|||
self.push(EventKind::Atom(atom))
|
||||
}
|
||||
|
||||
fn merge_str_events(&mut self, span_str: Span) -> Event {
|
||||
fn merge_str_events(&mut self, span_str: Span) -> Event<'s> {
|
||||
let mut span = span_str;
|
||||
let should_merge = |e: &Event, span: Span| {
|
||||
matches!(e.kind, EventKind::Str | EventKind::Placeholder)
|
||||
|
@ -952,7 +959,7 @@ impl<'s> Parser<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
fn apply_word_attributes(&mut self, span_str: Span) -> Event {
|
||||
fn apply_word_attributes(&mut self, span_str: Span) -> Event<'s> {
|
||||
if let Some(i) = span_str
|
||||
.of(self.input.src)
|
||||
.bytes()
|
||||
|
@ -972,7 +979,7 @@ impl<'s> Parser<'s> {
|
|||
let attr = self.events.pop_front().unwrap();
|
||||
self.events.push_front(Event {
|
||||
kind: EventKind::Exit(Span),
|
||||
span: span_str.empty_after(),
|
||||
span: attr.span,
|
||||
});
|
||||
self.events.push_front(Event {
|
||||
kind: EventKind::Str,
|
||||
|
@ -1089,8 +1096,8 @@ impl Opener {
|
|||
}
|
||||
}
|
||||
|
||||
enum DelimEventKind {
|
||||
Container(Container),
|
||||
enum DelimEventKind<'s> {
|
||||
Container(Container<'s>),
|
||||
Span(SpanType),
|
||||
Quote(QuoteType),
|
||||
Link {
|
||||
|
@ -1100,7 +1107,7 @@ enum DelimEventKind {
|
|||
},
|
||||
}
|
||||
|
||||
impl From<Opener> for DelimEventKind {
|
||||
impl<'s> From<Opener> for DelimEventKind<'s> {
|
||||
fn from(d: Opener) -> Self {
|
||||
match d {
|
||||
Opener::Span(ty) => Self::Span(ty),
|
||||
|
@ -1127,7 +1134,7 @@ impl From<Opener> for DelimEventKind {
|
|||
}
|
||||
|
||||
impl<'s> Iterator for Parser<'s> {
|
||||
type Item = Event;
|
||||
type Item = Event<'s>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
while self.events.is_empty()
|
||||
|
@ -1158,7 +1165,7 @@ impl<'s> Iterator for Parser<'s> {
|
|||
let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind {
|
||||
debug_assert!(matches!(
|
||||
ty,
|
||||
Verbatim | RawFormat | InlineMath | DisplayMath
|
||||
Verbatim | RawFormat { .. } | InlineMath | DisplayMath
|
||||
));
|
||||
ty
|
||||
} else {
|
||||
|
@ -1266,7 +1273,7 @@ mod test {
|
|||
),
|
||||
(Enter(Verbatim), "`"),
|
||||
(Str, "raw"),
|
||||
(Exit(Verbatim), "`"),
|
||||
(Exit(Verbatim), "`{#id}"),
|
||||
(Str, " post"),
|
||||
);
|
||||
}
|
||||
|
@ -1336,16 +1343,16 @@ mod test {
|
|||
fn raw_format() {
|
||||
test_parse!(
|
||||
"`raw`{=format}",
|
||||
(Enter(RawFormat), "format"),
|
||||
(Enter(RawFormat { format: "format" }), "`"),
|
||||
(Str, "raw"),
|
||||
(Exit(RawFormat), "format"),
|
||||
(Exit(RawFormat { format: "format" }), "`{=format}"),
|
||||
);
|
||||
test_parse!(
|
||||
"before `raw`{=format} after",
|
||||
(Str, "before "),
|
||||
(Enter(RawFormat), "format"),
|
||||
(Enter(RawFormat { format: "format" }), "`"),
|
||||
(Str, "raw"),
|
||||
(Exit(RawFormat), "format"),
|
||||
(Exit(RawFormat { format: "format" }), "`{=format}"),
|
||||
(Str, " after"),
|
||||
);
|
||||
}
|
||||
|
@ -1456,7 +1463,7 @@ mod test {
|
|||
),
|
||||
(Enter(Span), ""),
|
||||
(Str, "[text]("),
|
||||
(Exit(Span), ""),
|
||||
(Exit(Span), "{.cls}"),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1520,7 +1527,7 @@ mod test {
|
|||
"{.cls}",
|
||||
),
|
||||
(Enter(Span), "["),
|
||||
(Exit(Span), "]")
|
||||
(Exit(Span), "]{.cls}")
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1537,7 +1544,7 @@ mod test {
|
|||
),
|
||||
(Enter(Span), "["),
|
||||
(Str, "abc"),
|
||||
(Exit(Span), "]"),
|
||||
(Exit(Span), "]{.def}"),
|
||||
);
|
||||
test_parse!("not a [span] {#id}.", (Str, "not a [span] "), (Str, "."));
|
||||
}
|
||||
|
@ -1555,7 +1562,7 @@ mod test {
|
|||
),
|
||||
(Enter(Span), "["),
|
||||
(Str, "x_y"),
|
||||
(Exit(Span), "]"),
|
||||
(Exit(Span), "]{.bar_}"),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1563,24 +1570,24 @@ mod test {
|
|||
fn autolink() {
|
||||
test_parse!(
|
||||
"<https://example.com>",
|
||||
(Enter(Autolink), "https://example.com"),
|
||||
(Enter(Autolink("https://example.com",)), "<"),
|
||||
(Str, "https://example.com"),
|
||||
(Exit(Autolink), "https://example.com")
|
||||
(Exit(Autolink("https://example.com",)), ">")
|
||||
);
|
||||
test_parse!(
|
||||
"<a@b.c>",
|
||||
(Enter(Autolink), "a@b.c"),
|
||||
(Enter(Autolink("a@b.c")), "<"),
|
||||
(Str, "a@b.c"),
|
||||
(Exit(Autolink), "a@b.c"),
|
||||
(Exit(Autolink("a@b.c")), ">"),
|
||||
);
|
||||
test_parse!(
|
||||
"<http://a.b><http://c.d>",
|
||||
(Enter(Autolink), "http://a.b"),
|
||||
(Enter(Autolink("http://a.b")), "<"),
|
||||
(Str, "http://a.b"),
|
||||
(Exit(Autolink), "http://a.b"),
|
||||
(Enter(Autolink), "http://c.d"),
|
||||
(Exit(Autolink("http://a.b")), ">"),
|
||||
(Enter(Autolink("http://c.d")), "<"),
|
||||
(Str, "http://c.d"),
|
||||
(Exit(Autolink), "http://c.d"),
|
||||
(Exit(Autolink("http://c.d")), ">"),
|
||||
);
|
||||
test_parse!("<not-a-url>", (Str, "<not-a-url>"));
|
||||
}
|
||||
|
@ -1590,7 +1597,7 @@ mod test {
|
|||
test_parse!(
|
||||
"text[^footnote]. more text",
|
||||
(Str, "text"),
|
||||
(Atom(FootnoteReference), "footnote"),
|
||||
(Atom(FootnoteReference { label: "footnote" }), "[^footnote]"),
|
||||
(Str, ". more text"),
|
||||
);
|
||||
}
|
||||
|
@ -1687,7 +1694,7 @@ mod test {
|
|||
),
|
||||
(Enter(Emphasis), "_"),
|
||||
(Str, "abc def"),
|
||||
(Exit(Emphasis), "_"),
|
||||
(Exit(Emphasis), "_{.attr}"),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1697,13 +1704,13 @@ mod test {
|
|||
"_abc def_{}",
|
||||
(Enter(Emphasis), "_"),
|
||||
(Str, "abc def"),
|
||||
(Exit(Emphasis), "_"),
|
||||
(Exit(Emphasis), "_{}"),
|
||||
);
|
||||
test_parse!(
|
||||
"_abc def_{ % comment % } ghi",
|
||||
(Enter(Emphasis), "_"),
|
||||
(Str, "abc def"),
|
||||
(Exit(Emphasis), "_"),
|
||||
(Exit(Emphasis), "_{ % comment % }"),
|
||||
(Str, " ghi"),
|
||||
);
|
||||
}
|
||||
|
@ -1721,7 +1728,7 @@ mod test {
|
|||
),
|
||||
(Enter(Emphasis), "_"),
|
||||
(Str, "abc def"),
|
||||
(Exit(Emphasis), "_"),
|
||||
(Exit(Emphasis), "_{.a}{.b}{.c}"),
|
||||
(Str, " "),
|
||||
);
|
||||
}
|
||||
|
@ -1739,7 +1746,7 @@ mod test {
|
|||
),
|
||||
(Enter(Span), ""),
|
||||
(Str, "word"),
|
||||
(Exit(Span), ""),
|
||||
(Exit(Span), "{a=b}"),
|
||||
);
|
||||
test_parse!(
|
||||
"some word{.a}{.b} with attrs",
|
||||
|
@ -1753,7 +1760,7 @@ mod test {
|
|||
),
|
||||
(Enter(Span), ""),
|
||||
(Str, "word"),
|
||||
(Exit(Span), ""),
|
||||
(Exit(Span), "{.a}{.b}"),
|
||||
(Str, " with attrs"),
|
||||
);
|
||||
}
|
||||
|
|
351
src/lib.rs
351
src/lib.rs
|
@ -51,6 +51,7 @@
|
|||
use std::fmt;
|
||||
use std::fmt::Write as FmtWrite;
|
||||
use std::io;
|
||||
use std::ops::Range;
|
||||
|
||||
#[cfg(feature = "html")]
|
||||
pub mod html;
|
||||
|
@ -60,7 +61,6 @@ mod block;
|
|||
mod inline;
|
||||
mod lex;
|
||||
mod span;
|
||||
mod tree;
|
||||
|
||||
use span::Span;
|
||||
|
||||
|
@ -555,13 +555,14 @@ pub struct Parser<'s> {
|
|||
src: &'s str,
|
||||
|
||||
/// Block tree parsed at first.
|
||||
tree: block::Tree,
|
||||
blocks: std::iter::Peekable<std::vec::IntoIter<block::Event<'s>>>,
|
||||
|
||||
/// Contents obtained by the prepass.
|
||||
pre_pass: PrePass<'s>,
|
||||
|
||||
/// Last parsed block attributes
|
||||
/// Last parsed block attributes, and its starting offset.
|
||||
block_attributes: Attributes<'s>,
|
||||
block_attributes_pos: Option<usize>,
|
||||
|
||||
/// Current table row is a head row.
|
||||
table_head_row: bool,
|
||||
|
@ -576,7 +577,7 @@ pub struct Parser<'s> {
|
|||
#[derive(Clone)]
|
||||
struct Heading {
|
||||
/// Location of heading in src.
|
||||
location: usize,
|
||||
location: u32,
|
||||
/// Automatically generated id from heading text.
|
||||
id_auto: String,
|
||||
/// Text of heading, formatting stripped.
|
||||
|
@ -598,28 +599,50 @@ struct PrePass<'s> {
|
|||
|
||||
impl<'s> PrePass<'s> {
|
||||
#[must_use]
|
||||
fn new(src: &'s str, mut tree: block::Tree, inline_parser: &mut inline::Parser<'s>) -> Self {
|
||||
fn new(
|
||||
src: &'s str,
|
||||
blocks: std::slice::Iter<block::Event<'s>>,
|
||||
inline_parser: &mut inline::Parser<'s>,
|
||||
) -> Self {
|
||||
let mut link_definitions = Map::new();
|
||||
let mut headings: Vec<Heading> = Vec::new();
|
||||
let mut used_ids: Set<&str> = Set::new();
|
||||
|
||||
let mut blocks = blocks.peekable();
|
||||
|
||||
let mut attr_prev: Option<Span> = None;
|
||||
while let Some(e) = tree.next() {
|
||||
while let Some(e) = blocks.next() {
|
||||
match e.kind {
|
||||
tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition)) => {
|
||||
block::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition {
|
||||
label,
|
||||
})) => {
|
||||
fn next_is_inline(
|
||||
bs: &mut std::iter::Peekable<std::slice::Iter<block::Event>>,
|
||||
) -> bool {
|
||||
matches!(bs.peek().map(|e| &e.kind), Some(block::EventKind::Inline))
|
||||
}
|
||||
|
||||
// All link definition tags have to be obtained initially, as references can
|
||||
// appear before the definition.
|
||||
let tag = e.span.of(src);
|
||||
let attrs =
|
||||
attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src)));
|
||||
let url = match tree.count_children() {
|
||||
0 => "".into(),
|
||||
1 => tree.take_inlines().next().unwrap().of(src).trim().into(),
|
||||
_ => tree.take_inlines().map(|sp| sp.of(src).trim()).collect(),
|
||||
let url = if !next_is_inline(&mut blocks) {
|
||||
"".into()
|
||||
} else {
|
||||
let start = blocks.next().unwrap().span.of(src).trim();
|
||||
if !next_is_inline(&mut blocks) {
|
||||
start.into()
|
||||
} else {
|
||||
let mut url = start.to_string();
|
||||
while next_is_inline(&mut blocks) {
|
||||
url.push_str(blocks.next().unwrap().span.of(src).trim());
|
||||
}
|
||||
url.into()
|
||||
}
|
||||
};
|
||||
link_definitions.insert(tag, (url, attrs));
|
||||
link_definitions.insert(label, (url, attrs));
|
||||
}
|
||||
tree::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => {
|
||||
block::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => {
|
||||
// All headings ids have to be obtained initially, as references can appear
|
||||
// before the heading. Additionally, determining the id requires inline parsing
|
||||
// as formatting must be removed.
|
||||
|
@ -634,10 +657,21 @@ impl<'s> PrePass<'s> {
|
|||
let mut id_auto = String::new();
|
||||
let mut text = String::new();
|
||||
let mut last_whitespace = true;
|
||||
let inlines = tree.take_inlines().collect::<Vec<_>>();
|
||||
inline_parser.reset();
|
||||
inlines.iter().enumerate().for_each(|(i, sp)| {
|
||||
inline_parser.feed_line(*sp, i == inlines.len() - 1);
|
||||
let mut last_end = 0;
|
||||
loop {
|
||||
let span_inline = blocks.next().and_then(|e| {
|
||||
if matches!(e.kind, block::EventKind::Inline) {
|
||||
last_end = e.span.end();
|
||||
Some(e.span)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
});
|
||||
inline_parser.feed_line(
|
||||
span_inline.unwrap_or_else(|| Span::empty_at(last_end)),
|
||||
span_inline.is_none(),
|
||||
);
|
||||
inline_parser.for_each(|ev| match ev.kind {
|
||||
inline::EventKind::Str => {
|
||||
text.push_str(ev.span.of(src));
|
||||
|
@ -662,8 +696,11 @@ impl<'s> PrePass<'s> {
|
|||
id_auto.push('-');
|
||||
}
|
||||
_ => {}
|
||||
})
|
||||
});
|
||||
});
|
||||
if span_inline.is_none() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
id_auto.drain(id_auto.trim_end_matches('-').len()..);
|
||||
|
||||
// ensure id unique
|
||||
|
@ -689,17 +726,17 @@ impl<'s> PrePass<'s> {
|
|||
std::mem::transmute::<&str, &'static str>(id_auto.as_ref())
|
||||
});
|
||||
headings.push(Heading {
|
||||
location: e.span.start(),
|
||||
location: e.span.start() as u32,
|
||||
id_auto,
|
||||
text,
|
||||
id_override,
|
||||
});
|
||||
}
|
||||
tree::EventKind::Atom(block::Atom::Attributes) => {
|
||||
block::EventKind::Atom(block::Atom::Attributes) => {
|
||||
attr_prev = Some(e.span);
|
||||
}
|
||||
tree::EventKind::Enter(..)
|
||||
| tree::EventKind::Exit(block::Node::Container(block::Container::Section {
|
||||
block::EventKind::Enter(..)
|
||||
| block::EventKind::Exit(block::Node::Container(block::Container::Section {
|
||||
..
|
||||
})) => {}
|
||||
_ => {
|
||||
|
@ -723,7 +760,7 @@ impl<'s> PrePass<'s> {
|
|||
h.id_override.as_ref().unwrap_or(&h.id_auto)
|
||||
}
|
||||
|
||||
fn heading_id_by_location(&self, location: usize) -> Option<&str> {
|
||||
fn heading_id_by_location(&self, location: u32) -> Option<&str> {
|
||||
self.headings
|
||||
.binary_search_by_key(&location, |h| h.location)
|
||||
.ok()
|
||||
|
@ -741,22 +778,133 @@ impl<'s> PrePass<'s> {
|
|||
impl<'s> Parser<'s> {
|
||||
#[must_use]
|
||||
pub fn new(src: &'s str) -> Self {
|
||||
let tree = block::parse(src);
|
||||
let blocks = block::parse(src);
|
||||
let mut inline_parser = inline::Parser::new(src);
|
||||
let pre_pass = PrePass::new(src, tree.clone(), &mut inline_parser);
|
||||
let pre_pass = PrePass::new(src, blocks.iter(), &mut inline_parser);
|
||||
|
||||
Self {
|
||||
src,
|
||||
tree,
|
||||
blocks: blocks.into_iter().peekable(),
|
||||
pre_pass,
|
||||
block_attributes: Attributes::new(),
|
||||
block_attributes_pos: None,
|
||||
table_head_row: false,
|
||||
verbatim: false,
|
||||
inline_parser,
|
||||
}
|
||||
}
|
||||
|
||||
fn inline(&mut self) -> Option<Event<'s>> {
|
||||
/// Turn the [`Parser`] into an iterator of tuples, each with an [`Event`] and a start/end byte
|
||||
/// offset for its corresponding input (as a [`Range<usize>`]).
|
||||
///
|
||||
/// Generally, the range of each event does not overlap with any other event and the ranges are
|
||||
/// in same order as the events are emitted, i.e. the start offset of an event must be greater
|
||||
/// or equal to the (exclusive) end offset of all events that were emitted before that event.
|
||||
/// However, there are some exceptions to this rule:
|
||||
///
|
||||
/// - Blank lines inbetween block attributes and the block causes the blankline events to
|
||||
/// overlap with the block start event.
|
||||
/// - Caption events are emitted before the table rows while the input for the caption content
|
||||
/// is located after the table rows, causing the ranges to be out of order.
|
||||
///
|
||||
/// Characters between events, that are not part of any event range, are typically whitespace
|
||||
/// but may also consist of unattached attributes or `>` characters from blockquotes.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// Start and end events of containers correspond only to the start and end markers for that
|
||||
/// container, not its inner content:
|
||||
///
|
||||
/// ```
|
||||
/// # use jotdown::*;
|
||||
/// # use jotdown::Event::*;
|
||||
/// # use jotdown::Container::*;
|
||||
/// let input = "> _hello_ [text](url)\n";
|
||||
/// assert!(matches!(
|
||||
/// Parser::new(input)
|
||||
/// .into_offset_iter()
|
||||
/// .map(|(e, r)| (&input[r], e))
|
||||
/// .collect::<Vec<_>>()
|
||||
/// .as_slice(),
|
||||
/// &[
|
||||
/// (">", Start(Blockquote, ..)),
|
||||
/// ("", Start(Paragraph, ..)),
|
||||
/// ("_", Start(Emphasis, ..)),
|
||||
/// ("hello", Str(..)),
|
||||
/// ("_", End(Emphasis)),
|
||||
/// (" ", Str(..)),
|
||||
/// ("[", Start(Link { .. }, ..)),
|
||||
/// ("text", Str(..)),
|
||||
/// ("](url)", End(Link { .. })),
|
||||
/// ("", End(Paragraph)),
|
||||
/// ("", End(Blockquote)),
|
||||
/// ],
|
||||
/// ));
|
||||
/// ```
|
||||
///
|
||||
/// _Block_ attributes that belong to a container are included in the _start_ event. _Inline_
|
||||
/// attributes that belong to a container are included in the _end_ event:
|
||||
///
|
||||
/// ```
|
||||
/// # use jotdown::*;
|
||||
/// # use jotdown::Event::*;
|
||||
/// # use jotdown::Container::*;
|
||||
/// let input = "
|
||||
/// {.quote}
|
||||
/// > [Hello]{lang=en} world!";
|
||||
/// assert!(matches!(
|
||||
/// Parser::new(input)
|
||||
/// .into_offset_iter()
|
||||
/// .map(|(e, r)| (&input[r], e))
|
||||
/// .collect::<Vec<_>>()
|
||||
/// .as_slice(),
|
||||
/// &[
|
||||
/// ("\n", Blankline),
|
||||
/// ("{.quote}\n>", Start(Blockquote, ..)),
|
||||
/// ("", Start(Paragraph, ..)),
|
||||
/// ("[", Start(Span, ..)),
|
||||
/// ("Hello", Str(..)),
|
||||
/// ("]{lang=en}", End(Span)),
|
||||
/// (" world!", Str(..)),
|
||||
/// ("", End(Paragraph)),
|
||||
/// ("", End(Blockquote)),
|
||||
/// ],
|
||||
/// ));
|
||||
/// ```
|
||||
///
|
||||
/// Inline events that span multiple lines may contain characters from outer block containers
|
||||
/// (e.g. `>` characters from blockquotes or whitespace from list items):
|
||||
///
|
||||
/// ```
|
||||
/// # use jotdown::*;
|
||||
/// # use jotdown::Event::*;
|
||||
/// # use jotdown::Container::*;
|
||||
/// let input = "
|
||||
/// > [txt](multi
|
||||
/// > line)";
|
||||
/// assert!(matches!(
|
||||
/// Parser::new(input)
|
||||
/// .into_offset_iter()
|
||||
/// .map(|(e, r)| (&input[r], e))
|
||||
/// .collect::<Vec<_>>()
|
||||
/// .as_slice(),
|
||||
/// &[
|
||||
/// ("\n", Blankline),
|
||||
/// (">", Start(Blockquote, ..)),
|
||||
/// ("", Start(Paragraph, ..)),
|
||||
/// ("[", Start(Link { .. }, ..)),
|
||||
/// ("txt", Str(..)),
|
||||
/// ("](multi\n> line)", End(Link { .. })),
|
||||
/// ("", End(Paragraph)),
|
||||
/// ("", End(Blockquote)),
|
||||
/// ],
|
||||
/// ));
|
||||
/// ```
|
||||
pub fn into_offset_iter(self) -> OffsetIter<'s> {
|
||||
OffsetIter { parser: self }
|
||||
}
|
||||
|
||||
fn inline(&mut self) -> Option<(Event<'s>, Range<usize>)> {
|
||||
let next = self.inline_parser.next()?;
|
||||
|
||||
let (inline, mut attributes) = match next {
|
||||
|
@ -772,16 +920,14 @@ impl<'s> Parser<'s> {
|
|||
|
||||
inline.map(|inline| {
|
||||
let enter = matches!(inline.kind, inline::EventKind::Enter(_));
|
||||
match inline.kind {
|
||||
let event = match inline.kind {
|
||||
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
||||
let t = match c {
|
||||
inline::Container::Span => Container::Span,
|
||||
inline::Container::Verbatim => Container::Verbatim,
|
||||
inline::Container::InlineMath => Container::Math { display: false },
|
||||
inline::Container::DisplayMath => Container::Math { display: true },
|
||||
inline::Container::RawFormat => Container::RawInline {
|
||||
format: inline.span.of(self.src),
|
||||
},
|
||||
inline::Container::RawFormat { format } => Container::RawInline { format },
|
||||
inline::Container::Subscript => Container::Subscript,
|
||||
inline::Container::Superscript => Container::Superscript,
|
||||
inline::Container::Insert => Container::Insert,
|
||||
|
@ -822,14 +968,13 @@ impl<'s> Parser<'s> {
|
|||
Container::Image(url_or_tag, ty)
|
||||
}
|
||||
}
|
||||
inline::Container::Autolink => {
|
||||
let url: CowStr = inline.span.of(self.src).into();
|
||||
inline::Container::Autolink(url) => {
|
||||
let ty = if url.contains('@') {
|
||||
LinkType::Email
|
||||
} else {
|
||||
LinkType::AutoLink
|
||||
};
|
||||
Container::Link(url, ty)
|
||||
Container::Link(url.into(), ty)
|
||||
}
|
||||
};
|
||||
if enter {
|
||||
|
@ -839,10 +984,8 @@ impl<'s> Parser<'s> {
|
|||
}
|
||||
}
|
||||
inline::EventKind::Atom(a) => match a {
|
||||
inline::Atom::FootnoteReference => {
|
||||
Event::FootnoteReference(inline.span.of(self.src))
|
||||
}
|
||||
inline::Atom::Symbol => Event::Symbol(inline.span.of(self.src).into()),
|
||||
inline::Atom::FootnoteReference { label } => Event::FootnoteReference(label),
|
||||
inline::Atom::Symbol(sym) => Event::Symbol(sym.into()),
|
||||
inline::Atom::Quote { ty, left } => match (ty, left) {
|
||||
(inline::QuoteType::Single, true) => Event::LeftSingleQuote,
|
||||
(inline::QuoteType::Single, false) => Event::RightSingleQuote,
|
||||
|
@ -861,48 +1004,58 @@ impl<'s> Parser<'s> {
|
|||
inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => {
|
||||
panic!("{:?}", inline)
|
||||
}
|
||||
}
|
||||
};
|
||||
(event, inline.span.into())
|
||||
})
|
||||
}
|
||||
|
||||
fn block(&mut self) -> Option<Event<'s>> {
|
||||
while let Some(ev) = &mut self.tree.next() {
|
||||
let content = ev.span.of(self.src);
|
||||
fn block(&mut self) -> Option<(Event<'s>, Range<usize>)> {
|
||||
while let Some(mut ev) = &mut self.blocks.next() {
|
||||
let event = match ev.kind {
|
||||
tree::EventKind::Atom(a) => match a {
|
||||
block::EventKind::Atom(a) => match a {
|
||||
block::Atom::Blankline => Event::Blankline,
|
||||
block::Atom::ThematicBreak => {
|
||||
if let Some(pos) = self.block_attributes_pos.take() {
|
||||
ev.span = Span::new(pos, ev.span.end());
|
||||
}
|
||||
Event::ThematicBreak(self.block_attributes.take())
|
||||
}
|
||||
block::Atom::Attributes => {
|
||||
self.block_attributes.parse(content);
|
||||
if self.block_attributes_pos.is_none() {
|
||||
self.block_attributes_pos = Some(ev.span.start());
|
||||
}
|
||||
self.block_attributes.parse(ev.span.of(self.src));
|
||||
continue;
|
||||
}
|
||||
},
|
||||
tree::EventKind::Enter(c) | tree::EventKind::Exit(c) => {
|
||||
let enter = matches!(ev.kind, tree::EventKind::Enter(..));
|
||||
block::EventKind::Enter(c) | block::EventKind::Exit(c) => {
|
||||
let enter = matches!(ev.kind, block::EventKind::Enter(..));
|
||||
let cont = match c {
|
||||
block::Node::Leaf(l) => {
|
||||
self.inline_parser.reset();
|
||||
match l {
|
||||
block::Leaf::Paragraph => Container::Paragraph,
|
||||
block::Leaf::Heading { has_section } => Container::Heading {
|
||||
level: content.len().try_into().unwrap(),
|
||||
block::Leaf::Heading {
|
||||
level,
|
||||
has_section,
|
||||
pos,
|
||||
} => Container::Heading {
|
||||
level,
|
||||
has_section,
|
||||
id: self
|
||||
.pre_pass
|
||||
.heading_id_by_location(ev.span.start())
|
||||
.heading_id_by_location(pos)
|
||||
.unwrap_or_default()
|
||||
.to_string()
|
||||
.into(),
|
||||
},
|
||||
block::Leaf::DescriptionTerm => Container::DescriptionTerm,
|
||||
block::Leaf::CodeBlock => {
|
||||
block::Leaf::CodeBlock { language } => {
|
||||
self.verbatim = enter;
|
||||
if let Some(format) = content.strip_prefix('=') {
|
||||
if let Some(format) = language.strip_prefix('=') {
|
||||
Container::RawBlock { format }
|
||||
} else {
|
||||
Container::CodeBlock { language: content }
|
||||
Container::CodeBlock { language }
|
||||
}
|
||||
}
|
||||
block::Leaf::TableCell(alignment) => Container::TableCell {
|
||||
|
@ -910,16 +1063,20 @@ impl<'s> Parser<'s> {
|
|||
head: self.table_head_row,
|
||||
},
|
||||
block::Leaf::Caption => Container::Caption,
|
||||
block::Leaf::LinkDefinition => {
|
||||
Container::LinkDefinition { label: content }
|
||||
block::Leaf::LinkDefinition { label } => {
|
||||
self.verbatim = enter;
|
||||
Container::LinkDefinition { label }
|
||||
}
|
||||
}
|
||||
}
|
||||
block::Node::Container(c) => match c {
|
||||
block::Container::Blockquote => Container::Blockquote,
|
||||
block::Container::Div => Container::Div { class: content },
|
||||
block::Container::Footnote => Container::Footnote { label: content },
|
||||
block::Container::List(block::ListKind { ty, tight }) => {
|
||||
block::Container::Div { class } => Container::Div { class },
|
||||
block::Container::Footnote { label } => Container::Footnote { label },
|
||||
block::Container::List {
|
||||
kind: block::ListKind { ty, tight },
|
||||
marker,
|
||||
} => {
|
||||
if matches!(ty, block::ListType::Description) {
|
||||
Container::DescriptionList
|
||||
} else {
|
||||
|
@ -927,9 +1084,8 @@ impl<'s> Parser<'s> {
|
|||
block::ListType::Unordered(..) => ListKind::Unordered,
|
||||
block::ListType::Task => ListKind::Task,
|
||||
block::ListType::Ordered(numbering, style) => {
|
||||
let start = numbering
|
||||
.parse_number(style.number(content))
|
||||
.max(1);
|
||||
let start =
|
||||
numbering.parse_number(style.number(marker)).max(1);
|
||||
ListKind::Ordered {
|
||||
numbering,
|
||||
style,
|
||||
|
@ -941,12 +1097,12 @@ impl<'s> Parser<'s> {
|
|||
Container::List { kind, tight }
|
||||
}
|
||||
}
|
||||
block::Container::ListItem(ty) => match ty {
|
||||
block::ListType::Task => Container::TaskListItem {
|
||||
checked: content.as_bytes()[3] != b' ',
|
||||
},
|
||||
block::ListType::Description => Container::DescriptionDetails,
|
||||
_ => Container::ListItem,
|
||||
block::Container::ListItem(kind) => match kind {
|
||||
block::ListItemKind::Task { checked } => {
|
||||
Container::TaskListItem { checked }
|
||||
}
|
||||
block::ListItemKind::Description => Container::DescriptionDetails,
|
||||
block::ListItemKind::List => Container::ListItem,
|
||||
},
|
||||
block::Container::Table => Container::Table,
|
||||
block::Container::TableRow { head } => {
|
||||
|
@ -955,10 +1111,10 @@ impl<'s> Parser<'s> {
|
|||
}
|
||||
Container::TableRow { head }
|
||||
}
|
||||
block::Container::Section => Container::Section {
|
||||
block::Container::Section { pos } => Container::Section {
|
||||
id: self
|
||||
.pre_pass
|
||||
.heading_id_by_location(ev.span.start())
|
||||
.heading_id_by_location(pos)
|
||||
.unwrap_or_default()
|
||||
.to_string()
|
||||
.into(),
|
||||
|
@ -966,32 +1122,63 @@ impl<'s> Parser<'s> {
|
|||
},
|
||||
};
|
||||
if enter {
|
||||
if let Some(pos) = self.block_attributes_pos.take() {
|
||||
ev.span = Span::new(pos, ev.span.end());
|
||||
}
|
||||
Event::Start(cont, self.block_attributes.take())
|
||||
} else {
|
||||
self.block_attributes = Attributes::new();
|
||||
self.block_attributes_pos = None;
|
||||
Event::End(cont)
|
||||
}
|
||||
}
|
||||
tree::EventKind::Inline => {
|
||||
block::EventKind::Inline => {
|
||||
if self.verbatim {
|
||||
Event::Str(content.into())
|
||||
Event::Str(ev.span.of(self.src).into())
|
||||
} else {
|
||||
self.inline_parser
|
||||
.feed_line(ev.span, self.tree.branch_is_empty());
|
||||
return self.next();
|
||||
self.inline_parser.feed_line(
|
||||
ev.span,
|
||||
!matches!(
|
||||
self.blocks.peek().map(|e| &e.kind),
|
||||
Some(block::EventKind::Inline),
|
||||
),
|
||||
);
|
||||
return self.next_span();
|
||||
}
|
||||
}
|
||||
block::EventKind::Stale => continue,
|
||||
};
|
||||
return Some(event);
|
||||
return Some((event, ev.span.into()));
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn next_span(&mut self) -> Option<(Event<'s>, Range<usize>)> {
|
||||
self.inline().or_else(|| self.block())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Iterator for Parser<'s> {
|
||||
type Item = Event<'s>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.inline().or_else(|| self.block())
|
||||
self.next_span().map(|(e, _)| e)
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator that is identical to a [`Parser`], except that it also emits the location of each
|
||||
/// event within the input.
|
||||
///
|
||||
/// See the documentation of [`Parser::into_offset_iter`] for more information.
|
||||
pub struct OffsetIter<'s> {
|
||||
parser: Parser<'s>,
|
||||
}
|
||||
|
||||
impl<'s> Iterator for OffsetIter<'s> {
|
||||
type Item = (Event<'s>, Range<usize>);
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.parser.next_span()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1523,7 +1710,6 @@ mod test {
|
|||
Blankline,
|
||||
Start(LinkDefinition { label: "tag" }, Attributes::new()),
|
||||
Str("u".into()),
|
||||
Softbreak,
|
||||
Str("rl".into()),
|
||||
End(LinkDefinition { label: "tag" }),
|
||||
);
|
||||
|
@ -1532,19 +1718,24 @@ mod test {
|
|||
"[text][tag]\n",
|
||||
"\n",
|
||||
"[tag]:\n",
|
||||
" url\n", //
|
||||
" url\n", //
|
||||
" cont\n", //
|
||||
),
|
||||
Start(Paragraph, Attributes::new()),
|
||||
Start(
|
||||
Link("url".into(), LinkType::Span(SpanLinkType::Reference)),
|
||||
Link("urlcont".into(), LinkType::Span(SpanLinkType::Reference)),
|
||||
Attributes::new()
|
||||
),
|
||||
Str("text".into()),
|
||||
End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))),
|
||||
End(Link(
|
||||
"urlcont".into(),
|
||||
LinkType::Span(SpanLinkType::Reference)
|
||||
)),
|
||||
End(Paragraph),
|
||||
Blankline,
|
||||
Start(LinkDefinition { label: "tag" }, Attributes::new()),
|
||||
Str("url".into()),
|
||||
Str("cont".into()),
|
||||
End(LinkDefinition { label: "tag" }),
|
||||
);
|
||||
}
|
||||
|
|
|
@ -4,6 +4,12 @@ pub struct Span {
|
|||
end: u32,
|
||||
}
|
||||
|
||||
impl From<Span> for std::ops::Range<usize> {
|
||||
fn from(span: Span) -> Self {
|
||||
span.start()..span.end()
|
||||
}
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn new(start: usize, end: usize) -> Self {
|
||||
Self::by_len(start, end.checked_sub(start).unwrap())
|
||||
|
|
427
src/tree.rs
427
src/tree.rs
|
@ -1,427 +0,0 @@
|
|||
use crate::Span;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum EventKind<C, A> {
|
||||
Enter(C),
|
||||
Inline,
|
||||
Exit(C),
|
||||
Atom(A),
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Node<'a, C, A> {
|
||||
pub index: NodeIndex,
|
||||
pub elem: Element<'a, C, A>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Element<'a, C, A> {
|
||||
Container(&'a mut C),
|
||||
Atom(&'a mut A),
|
||||
Inline,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Event<C, A> {
|
||||
pub kind: EventKind<C, A>,
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Tree<C: 'static, A: 'static> {
|
||||
nodes: std::rc::Rc<[InternalNode<C, A>]>,
|
||||
branch: Vec<NodeIndex>,
|
||||
head: Option<NodeIndex>,
|
||||
}
|
||||
|
||||
impl<C: Clone, A: Clone> Tree<C, A> {
|
||||
/// Count number of direct children nodes.
|
||||
pub fn count_children(&self) -> usize {
|
||||
let mut head = self.head;
|
||||
let mut count = 0;
|
||||
while let Some(h) = head {
|
||||
let n = &self.nodes[h.index()];
|
||||
head = n.next;
|
||||
count += 1;
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
/// Retrieve all inlines until the end of the current container. Panics if any upcoming node is
|
||||
/// not an inline node.
|
||||
pub fn take_inlines(&mut self) -> impl Iterator<Item = Span> + '_ {
|
||||
let mut head = self.head.take();
|
||||
std::iter::from_fn(move || {
|
||||
head.take().map(|h| {
|
||||
let n = &self.nodes[h.index()];
|
||||
debug_assert!(matches!(n.kind, NodeKind::Inline));
|
||||
head = n.next;
|
||||
n.span
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
pub fn branch_is_empty(&self) -> bool {
|
||||
matches!(self.head, None)
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Clone, A: Clone> Iterator for Tree<C, A> {
|
||||
type Item = Event<C, A>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(head) = self.head {
|
||||
let n = &self.nodes[head.index()];
|
||||
let kind = match &n.kind {
|
||||
NodeKind::Root => unreachable!(),
|
||||
NodeKind::Container(c, child) => {
|
||||
self.branch.push(head);
|
||||
self.head = *child;
|
||||
EventKind::Enter(c.clone())
|
||||
}
|
||||
NodeKind::Atom(a) => {
|
||||
self.head = n.next;
|
||||
EventKind::Atom(a.clone())
|
||||
}
|
||||
NodeKind::Inline => {
|
||||
self.head = n.next;
|
||||
EventKind::Inline
|
||||
}
|
||||
};
|
||||
Some(Event { kind, span: n.span })
|
||||
} else if let Some(block_ni) = self.branch.pop() {
|
||||
let InternalNode { next, kind, span } = &self.nodes[block_ni.index()];
|
||||
let kind = EventKind::Exit(kind.container().unwrap().clone());
|
||||
self.head = *next;
|
||||
Some(Event { kind, span: *span })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct NodeIndex(std::num::NonZeroUsize);
|
||||
|
||||
impl NodeIndex {
|
||||
fn new(i: usize) -> Self {
|
||||
debug_assert_ne!(i, usize::MAX);
|
||||
Self((i + 1).try_into().unwrap())
|
||||
}
|
||||
|
||||
fn root() -> Self {
|
||||
Self::new(0)
|
||||
}
|
||||
|
||||
fn index(self) -> usize {
|
||||
usize::from(self.0) - 1
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
enum NodeKind<C, A> {
|
||||
Root,
|
||||
Container(C, Option<NodeIndex>),
|
||||
Atom(A),
|
||||
Inline,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct InternalNode<C, A> {
|
||||
span: Span,
|
||||
kind: NodeKind<C, A>,
|
||||
next: Option<NodeIndex>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Builder<C, A> {
|
||||
nodes: Vec<InternalNode<C, A>>,
|
||||
branch: Vec<NodeIndex>,
|
||||
head: Option<NodeIndex>,
|
||||
depth: usize,
|
||||
}
|
||||
|
||||
impl<C, A> NodeKind<C, A> {
|
||||
fn child(&self) -> Option<NodeIndex> {
|
||||
if let NodeKind::Container(_, child) = self {
|
||||
*child
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn child_mut(&mut self) -> &mut Option<NodeIndex> {
|
||||
if let NodeKind::Container(_, child) = self {
|
||||
child
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
||||
fn container(&self) -> Option<&C> {
|
||||
if let NodeKind::Container(c, _) = self {
|
||||
Some(c)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, C, A> From<&'a mut NodeKind<C, A>> for Element<'a, C, A> {
|
||||
fn from(kind: &'a mut NodeKind<C, A>) -> Self {
|
||||
match kind {
|
||||
NodeKind::Root => unreachable!(),
|
||||
NodeKind::Container(c, ..) => Element::Container(c),
|
||||
NodeKind::Atom(a) => Element::Atom(a),
|
||||
NodeKind::Inline => Element::Inline,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<C, A> Builder<C, A> {
|
||||
pub(super) fn new() -> Self {
|
||||
Builder {
|
||||
nodes: vec![InternalNode {
|
||||
span: Span::default(),
|
||||
kind: NodeKind::Root,
|
||||
next: None,
|
||||
}],
|
||||
branch: vec![],
|
||||
head: Some(NodeIndex::root()),
|
||||
depth: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn atom(&mut self, a: A, span: Span) {
|
||||
self.add_node(InternalNode {
|
||||
span,
|
||||
kind: NodeKind::Atom(a),
|
||||
next: None,
|
||||
});
|
||||
}
|
||||
|
||||
pub(super) fn inline(&mut self, span: Span) {
|
||||
self.add_node(InternalNode {
|
||||
span,
|
||||
kind: NodeKind::Inline,
|
||||
next: None,
|
||||
});
|
||||
}
|
||||
|
||||
pub(super) fn enter(&mut self, c: C, span: Span) -> NodeIndex {
|
||||
self.depth += 1;
|
||||
self.add_node(InternalNode {
|
||||
span,
|
||||
kind: NodeKind::Container(c, None),
|
||||
next: None,
|
||||
})
|
||||
}
|
||||
|
||||
pub(super) fn exit(&mut self) {
|
||||
self.depth -= 1;
|
||||
if let Some(head) = self.head.take() {
|
||||
if matches!(self.nodes[head.index()].kind, NodeKind::Container(..)) {
|
||||
self.branch.push(head);
|
||||
}
|
||||
} else {
|
||||
let last = self.branch.pop();
|
||||
debug_assert_ne!(last, None);
|
||||
}
|
||||
}
|
||||
|
||||
/// Exit and discard all the contents of the current container.
|
||||
pub(super) fn exit_discard(&mut self) {
|
||||
self.exit();
|
||||
let exited = self.branch.pop().unwrap();
|
||||
self.nodes.drain(exited.index()..);
|
||||
let (prev, has_parent) = self.replace(exited, None);
|
||||
if has_parent {
|
||||
self.head = Some(prev);
|
||||
} else {
|
||||
self.branch.push(prev);
|
||||
}
|
||||
}
|
||||
|
||||
/// Swap the node and its children with either its parent or the node before.
|
||||
pub fn swap_prev(&mut self, node: NodeIndex) {
|
||||
let next = self.nodes[node.index()].next;
|
||||
if let Some(n) = next {
|
||||
self.replace(n, None);
|
||||
}
|
||||
let (prev, _) = self.replace(node, next);
|
||||
self.replace(prev, Some(node));
|
||||
self.nodes[node.index()].next = Some(prev);
|
||||
}
|
||||
|
||||
/// Remove the specified node and its children.
|
||||
pub fn remove(&mut self, node: NodeIndex) {
|
||||
let next = self.nodes[node.index()].next;
|
||||
self.replace(node, next);
|
||||
}
|
||||
|
||||
pub(super) fn depth(&self) -> usize {
|
||||
self.depth
|
||||
}
|
||||
|
||||
pub(super) fn elem(&mut self, ni: NodeIndex) -> Element<C, A> {
|
||||
match &mut self.nodes[ni.index()].kind {
|
||||
NodeKind::Root => unreachable!(),
|
||||
NodeKind::Container(c, ..) => Element::Container(c),
|
||||
NodeKind::Atom(a) => Element::Atom(a),
|
||||
NodeKind::Inline => Element::Inline,
|
||||
}
|
||||
}
|
||||
|
||||
/// Retrieve all children nodes for the specified node, in the order that they were added.
|
||||
pub(super) fn children(&mut self, node: NodeIndex) -> impl Iterator<Item = Node<C, A>> {
|
||||
// XXX assumes no modifications
|
||||
let n = &self.nodes[node.index()];
|
||||
let range = if let Some(start) = n.kind.child() {
|
||||
start.index()..n.next.map_or(self.nodes.len(), NodeIndex::index)
|
||||
} else {
|
||||
0..0
|
||||
};
|
||||
range
|
||||
.clone()
|
||||
.map(NodeIndex::new)
|
||||
.zip(self.nodes[range].iter_mut())
|
||||
.map(|(index, n)| Node {
|
||||
index,
|
||||
elem: Element::from(&mut n.kind),
|
||||
span: n.span,
|
||||
})
|
||||
}
|
||||
|
||||
pub(super) fn finish(self) -> Tree<C, A> {
|
||||
debug_assert_eq!(self.depth, 0);
|
||||
let head = self.nodes[NodeIndex::root().index()].next;
|
||||
Tree {
|
||||
nodes: self.nodes.into_boxed_slice().into(),
|
||||
branch: Vec::new(),
|
||||
head,
|
||||
}
|
||||
}
|
||||
|
||||
fn add_node(&mut self, node: InternalNode<C, A>) -> NodeIndex {
|
||||
let ni = NodeIndex::new(self.nodes.len());
|
||||
self.nodes.push(node);
|
||||
if let Some(head_ni) = &mut self.head {
|
||||
let mut head = &mut self.nodes[head_ni.index()];
|
||||
match &mut head.kind {
|
||||
NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => {
|
||||
// set next pointer of previous node
|
||||
debug_assert_eq!(head.next, None);
|
||||
head.next = Some(ni);
|
||||
}
|
||||
NodeKind::Container(_, child) => {
|
||||
self.branch.push(*head_ni);
|
||||
// set child pointer of current container
|
||||
debug_assert_eq!(*child, None);
|
||||
*child = Some(ni);
|
||||
}
|
||||
}
|
||||
} else if let Some(block) = self.branch.pop() {
|
||||
let mut block = &mut self.nodes[block.index()];
|
||||
debug_assert!(matches!(block.kind, NodeKind::Container(..)));
|
||||
block.next = Some(ni);
|
||||
} else {
|
||||
panic!()
|
||||
}
|
||||
self.head = Some(ni);
|
||||
ni
|
||||
}
|
||||
|
||||
/// Remove the link from the node that points to the specified node. Optionally replace the
|
||||
/// node with another node. Return the pointer node and whether it is a container or not.
|
||||
fn replace(&mut self, node: NodeIndex, next: Option<NodeIndex>) -> (NodeIndex, bool) {
|
||||
for (i, n) in self.nodes.iter_mut().enumerate().rev() {
|
||||
let ni = NodeIndex::new(i);
|
||||
if n.next == Some(node) {
|
||||
n.next = next;
|
||||
return (ni, false);
|
||||
} else if n.kind.child() == Some(node) {
|
||||
*n.kind.child_mut() = next;
|
||||
return (ni, true);
|
||||
}
|
||||
}
|
||||
panic!("node is never linked to")
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: std::fmt::Debug + Clone + 'static, A: std::fmt::Debug + Clone + 'static> std::fmt::Debug
|
||||
for Builder<C, A>
|
||||
{
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.clone().finish().fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: std::fmt::Debug + Clone, A: std::fmt::Debug + Clone> std::fmt::Debug for Tree<C, A> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
const INDENT: &str = " ";
|
||||
let mut level = 0;
|
||||
writeln!(f)?;
|
||||
for e in self.clone() {
|
||||
let indent = INDENT.repeat(level);
|
||||
match e.kind {
|
||||
EventKind::Enter(c) => {
|
||||
write!(f, "{}{:?}", indent, c)?;
|
||||
level += 1;
|
||||
}
|
||||
EventKind::Inline => write!(f, "{}Inline", indent)?,
|
||||
EventKind::Exit(..) => {
|
||||
level -= 1;
|
||||
continue;
|
||||
}
|
||||
EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?,
|
||||
}
|
||||
writeln!(f, " ({}:{})", e.span.start(), e.span.end())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::Span;
|
||||
|
||||
#[test]
|
||||
fn fmt() {
|
||||
let mut tree = super::Builder::new();
|
||||
tree.enter(1, Span::new(0, 1));
|
||||
tree.atom(11, Span::new(0, 1));
|
||||
tree.atom(12, Span::new(0, 1));
|
||||
tree.exit();
|
||||
tree.enter(2, Span::new(1, 5));
|
||||
tree.enter(21, Span::new(2, 5));
|
||||
tree.enter(211, Span::new(3, 4));
|
||||
tree.atom(2111, Span::new(3, 4));
|
||||
tree.exit();
|
||||
tree.exit();
|
||||
tree.enter(22, Span::new(4, 5));
|
||||
tree.atom(221, Span::new(4, 5));
|
||||
tree.exit();
|
||||
tree.exit();
|
||||
tree.enter(3, Span::new(5, 6));
|
||||
tree.atom(31, Span::new(5, 6));
|
||||
tree.exit();
|
||||
assert_eq!(
|
||||
format!("{:?}", tree.finish()),
|
||||
concat!(
|
||||
"\n",
|
||||
"1 (0:1)\n",
|
||||
" 11 (0:1)\n",
|
||||
" 12 (0:1)\n",
|
||||
"2 (1:5)\n",
|
||||
" 21 (2:5)\n",
|
||||
" 211 (3:4)\n",
|
||||
" 2111 (3:4)\n",
|
||||
" 22 (4:5)\n",
|
||||
" 221 (4:5)\n",
|
||||
"3 (5:6)\n",
|
||||
" 31 (5:6)\n",
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
|
@ -17,10 +17,6 @@ path = "src/main.rs"
|
|||
name = "parse"
|
||||
path = "src/parse.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "parse_balance"
|
||||
path = "src/parse_balance.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "html"
|
||||
path = "src/html.rs"
|
||||
|
|
|
@ -5,27 +5,66 @@ use html5ever::tendril::TendrilSink;
|
|||
use html5ever::tokenizer;
|
||||
use html5ever::tree_builder;
|
||||
|
||||
/// Perform sanity checks on events.
|
||||
pub fn parse(data: &[u8]) {
|
||||
if let Ok(s) = std::str::from_utf8(data) {
|
||||
jotdown::Parser::new(s).last();
|
||||
}
|
||||
}
|
||||
|
||||
/// Ensure containers are always balanced, i.e. opened and closed in correct order.
|
||||
pub fn parse_balance(data: &[u8]) {
|
||||
if let Ok(s) = std::str::from_utf8(data) {
|
||||
let whitelist_whitespace = s.contains('{') && s.contains('}'); // attributes are outside events
|
||||
let mut open = Vec::new();
|
||||
for event in jotdown::Parser::new(s) {
|
||||
let mut last = (jotdown::Event::Str("".into()), 0..0);
|
||||
for (event, range) in jotdown::Parser::new(s).into_offset_iter() {
|
||||
// no overlap, out of order
|
||||
assert!(
|
||||
last.1.end <= range.start
|
||||
// block attributes may overlap with start event
|
||||
|| (
|
||||
matches!(last.0, jotdown::Event::Blankline)
|
||||
&& (
|
||||
matches!(
|
||||
event,
|
||||
jotdown::Event::Start(ref cont, ..) if cont.is_block()
|
||||
)
|
||||
|| matches!(event, jotdown::Event::ThematicBreak(..))
|
||||
)
|
||||
)
|
||||
// caption event is before table rows but src is after
|
||||
|| (
|
||||
matches!(
|
||||
last.0,
|
||||
jotdown::Event::Start(jotdown::Container::Caption, ..)
|
||||
| jotdown::Event::End(jotdown::Container::Caption)
|
||||
)
|
||||
&& range.end <= last.1.start
|
||||
),
|
||||
"{} > {} {:?} {:?}",
|
||||
last.1.end,
|
||||
range.start,
|
||||
last.0,
|
||||
event
|
||||
);
|
||||
last = (event.clone(), range.clone());
|
||||
// range is valid unicode, does not cross char boundary
|
||||
let _ = &s[range];
|
||||
match event {
|
||||
jotdown::Event::Start(c, ..) => open.push(c.clone()),
|
||||
jotdown::Event::End(c) => assert_eq!(open.pop().unwrap(), c),
|
||||
jotdown::Event::End(c) => {
|
||||
// closes correct event
|
||||
assert_eq!(open.pop().unwrap(), c);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
// no missing close
|
||||
assert_eq!(open, &[]);
|
||||
// only whitespace after last event
|
||||
assert!(
|
||||
whitelist_whitespace || s[last.1.end..].chars().all(char::is_whitespace),
|
||||
"non whitespace {:?}",
|
||||
&s[last.1.end..],
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// Validate rendered html output.
|
||||
pub fn html(data: &[u8]) {
|
||||
if data.iter().any(|i| *i == 0) {
|
||||
return;
|
||||
|
@ -132,9 +171,6 @@ impl<'a> tree_builder::TreeSink for Dom<'a> {
|
|||
"Found special tag while closing generic tag",
|
||||
"Formatting element not current node",
|
||||
"Formatting element not open",
|
||||
// FIXME bug caused by empty table at end of list
|
||||
"No matching tag to close",
|
||||
"Unexpected open element while closing",
|
||||
];
|
||||
if !whitelist.iter().any(|e| msg.starts_with(e)) {
|
||||
#[cfg(feature = "debug")]
|
||||
|
|
|
@ -8,7 +8,6 @@ fn main() {
|
|||
|
||||
let f = match target.as_str() {
|
||||
"parse" => jotdown_afl::parse,
|
||||
"parse_balance" => jotdown_afl::parse_balance,
|
||||
"html" => jotdown_afl::html,
|
||||
_ => panic!("unknown target '{}'", target),
|
||||
};
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
fn main() {
|
||||
afl::fuzz!(|data: &[u8]| { jotdown_afl::parse_balance(data) });
|
||||
}
|
Loading…
Reference in a new issue