commit
70303e7e4b
12 changed files with 1644 additions and 1201 deletions
1
.github/workflows/ci.yml
vendored
1
.github/workflows/ci.yml
vendored
|
@ -72,7 +72,6 @@ jobs:
|
||||||
matrix:
|
matrix:
|
||||||
target:
|
target:
|
||||||
- parse
|
- parse
|
||||||
- parse_balance
|
|
||||||
- html
|
- html
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|
|
@ -21,7 +21,10 @@
|
||||||
output.innerText = jotdown_render(input.innerText);
|
output.innerText = jotdown_render(input.innerText);
|
||||||
} else if (fmt.value == "events") {
|
} else if (fmt.value == "events") {
|
||||||
output.classList.add("verbatim")
|
output.classList.add("verbatim")
|
||||||
output.innerText = jotdown_parse(input.innerText);
|
output.innerText = jotdown_parse(input.innerText, false);
|
||||||
|
} else if (fmt.value == "events_spans") {
|
||||||
|
output.classList.add("verbatim")
|
||||||
|
output.innerText = jotdown_parse(input.innerText, true);
|
||||||
} else if (fmt.value == "events_indent") {
|
} else if (fmt.value == "events_indent") {
|
||||||
output.classList.add("verbatim")
|
output.classList.add("verbatim")
|
||||||
output.innerText = jotdown_parse_indent(input.innerText);
|
output.innerText = jotdown_parse_indent(input.innerText);
|
||||||
|
@ -50,6 +53,7 @@
|
||||||
<option value="preview">preview</option>
|
<option value="preview">preview</option>
|
||||||
<option value="html">html</option>
|
<option value="html">html</option>
|
||||||
<option value="events">events</option>
|
<option value="events">events</option>
|
||||||
|
<option value="events_spans">events (with offsets)</option>
|
||||||
<option value="events_indent">events (indented)</option>
|
<option value="events_indent">events (indented)</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -22,10 +22,16 @@ pub fn jotdown_render(djot: &str) -> String {
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
#[wasm_bindgen]
|
#[wasm_bindgen]
|
||||||
pub fn jotdown_parse(djot: &str) -> String {
|
pub fn jotdown_parse(djot: &str, spans: bool) -> String {
|
||||||
jotdown::Parser::new(djot)
|
let mut out = String::new();
|
||||||
.map(|e| format!("{:?}\n", e))
|
for (e, sp) in jotdown::Parser::new(djot).into_offset_iter() {
|
||||||
.collect()
|
write!(out, "{:?}", e).unwrap();
|
||||||
|
if spans {
|
||||||
|
write!(out, " {:?} {:?}", &djot[sp.clone()], sp).unwrap();
|
||||||
|
}
|
||||||
|
writeln!(out).unwrap();
|
||||||
|
}
|
||||||
|
out
|
||||||
}
|
}
|
||||||
|
|
||||||
#[must_use]
|
#[must_use]
|
||||||
|
|
1825
src/block.rs
1825
src/block.rs
File diff suppressed because it is too large
Load diff
147
src/inline.rs
147
src/inline.rs
|
@ -12,9 +12,9 @@ use Container::*;
|
||||||
use ControlFlow::*;
|
use ControlFlow::*;
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
pub enum Atom {
|
pub enum Atom<'s> {
|
||||||
FootnoteReference,
|
FootnoteReference { label: &'s str },
|
||||||
Symbol,
|
Symbol(&'s str),
|
||||||
Softbreak,
|
Softbreak,
|
||||||
Hardbreak,
|
Hardbreak,
|
||||||
Escape,
|
Escape,
|
||||||
|
@ -26,7 +26,7 @@ pub enum Atom {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
pub enum Container {
|
pub enum Container<'s> {
|
||||||
Span,
|
Span,
|
||||||
Subscript,
|
Subscript,
|
||||||
Superscript,
|
Superscript,
|
||||||
|
@ -36,16 +36,14 @@ pub enum Container {
|
||||||
Strong,
|
Strong,
|
||||||
Mark,
|
Mark,
|
||||||
Verbatim,
|
Verbatim,
|
||||||
/// Span is the format.
|
RawFormat { format: &'s str },
|
||||||
RawFormat,
|
|
||||||
InlineMath,
|
InlineMath,
|
||||||
DisplayMath,
|
DisplayMath,
|
||||||
ReferenceLink(CowStrIndex),
|
ReferenceLink(CowStrIndex),
|
||||||
ReferenceImage(CowStrIndex),
|
ReferenceImage(CowStrIndex),
|
||||||
InlineLink(CowStrIndex),
|
InlineLink(CowStrIndex),
|
||||||
InlineImage(CowStrIndex),
|
InlineImage(CowStrIndex),
|
||||||
/// Open delimiter span is URL, closing is '>'.
|
Autolink(&'s str),
|
||||||
Autolink,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type CowStrIndex = u32;
|
type CowStrIndex = u32;
|
||||||
|
@ -57,10 +55,10 @@ pub enum QuoteType {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub enum EventKind {
|
pub enum EventKind<'s> {
|
||||||
Enter(Container),
|
Enter(Container<'s>),
|
||||||
Exit(Container),
|
Exit(Container<'s>),
|
||||||
Atom(Atom),
|
Atom(Atom<'s>),
|
||||||
Str,
|
Str,
|
||||||
Attributes {
|
Attributes {
|
||||||
container: bool,
|
container: bool,
|
||||||
|
@ -72,8 +70,8 @@ pub enum EventKind {
|
||||||
type AttributesIndex = u32;
|
type AttributesIndex = u32;
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||||
pub struct Event {
|
pub struct Event<'s> {
|
||||||
pub kind: EventKind,
|
pub kind: EventKind<'s>,
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,7 +216,7 @@ pub struct Parser<'s> {
|
||||||
openers: Vec<(Opener, usize)>,
|
openers: Vec<(Opener, usize)>,
|
||||||
/// Buffer queue for next events. Events are buffered until no modifications due to future
|
/// Buffer queue for next events. Events are buffered until no modifications due to future
|
||||||
/// characters are needed.
|
/// characters are needed.
|
||||||
events: std::collections::VecDeque<Event>,
|
events: std::collections::VecDeque<Event<'s>>,
|
||||||
/// State if inside a verbatim container.
|
/// State if inside a verbatim container.
|
||||||
verbatim: Option<VerbatimState>,
|
verbatim: Option<VerbatimState>,
|
||||||
/// State if currently parsing potential attributes.
|
/// State if currently parsing potential attributes.
|
||||||
|
@ -268,12 +266,12 @@ impl<'s> Parser<'s> {
|
||||||
self.store_attributes.clear();
|
self.store_attributes.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn push_sp(&mut self, kind: EventKind, span: Span) -> Option<ControlFlow> {
|
fn push_sp(&mut self, kind: EventKind<'s>, span: Span) -> Option<ControlFlow> {
|
||||||
self.events.push_back(Event { kind, span });
|
self.events.push_back(Event { kind, span });
|
||||||
Some(Continue)
|
Some(Continue)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn push(&mut self, kind: EventKind) -> Option<ControlFlow> {
|
fn push(&mut self, kind: EventKind<'s>) -> Option<ControlFlow> {
|
||||||
self.push_sp(kind, self.input.span)
|
self.push_sp(kind, self.input.span)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -310,17 +308,16 @@ impl<'s> Parser<'s> {
|
||||||
&& matches!(first.kind, lex::Kind::Seq(Sequence::Backtick))
|
&& matches!(first.kind, lex::Kind::Seq(Sequence::Backtick))
|
||||||
{
|
{
|
||||||
let raw_format = self.input.ahead_raw_format();
|
let raw_format = self.input.ahead_raw_format();
|
||||||
let mut span_closer = self.input.span;
|
|
||||||
if let Some(span_format) = raw_format {
|
if let Some(span_format) = raw_format {
|
||||||
self.events[event_opener].kind = EventKind::Enter(RawFormat);
|
self.events[event_opener].kind = EventKind::Enter(RawFormat {
|
||||||
self.events[event_opener].span = span_format;
|
format: span_format.of(self.input.src),
|
||||||
self.input.span = span_format.translate(1);
|
});
|
||||||
span_closer = span_format;
|
self.input.span = Span::new(self.input.span.start(), span_format.end() + 1);
|
||||||
};
|
};
|
||||||
let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind {
|
let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind {
|
||||||
debug_assert!(matches!(
|
debug_assert!(matches!(
|
||||||
ty,
|
ty,
|
||||||
Verbatim | RawFormat | InlineMath | DisplayMath
|
Verbatim | RawFormat { .. } | InlineMath | DisplayMath
|
||||||
));
|
));
|
||||||
ty
|
ty
|
||||||
} else {
|
} else {
|
||||||
|
@ -330,7 +327,7 @@ impl<'s> Parser<'s> {
|
||||||
{
|
{
|
||||||
self.events.drain(*event_skip..);
|
self.events.drain(*event_skip..);
|
||||||
}
|
}
|
||||||
self.push_sp(EventKind::Exit(ty_opener), span_closer);
|
self.push(EventKind::Exit(ty_opener));
|
||||||
self.verbatim = None;
|
self.verbatim = None;
|
||||||
if raw_format.is_none()
|
if raw_format.is_none()
|
||||||
&& self.input.peek().map_or(false, |t| {
|
&& self.input.peek().map_or(false, |t| {
|
||||||
|
@ -527,7 +524,13 @@ impl<'s> Parser<'s> {
|
||||||
self.input.span = Span::new(start_attr, state.end_attr);
|
self.input.span = Span::new(start_attr, state.end_attr);
|
||||||
self.input.lexer = lex::Lexer::new(&self.input.src[state.end_attr..line_end]);
|
self.input.lexer = lex::Lexer::new(&self.input.src[state.end_attr..line_end]);
|
||||||
|
|
||||||
if !attrs.is_empty() {
|
if attrs.is_empty() {
|
||||||
|
if matches!(state.elem_ty, AttributesElementType::Container { .. }) {
|
||||||
|
let last = self.events.len() - 1;
|
||||||
|
self.events[last].span =
|
||||||
|
Span::new(self.events[last].span.start(), self.input.span.end());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
let attr_index = self.store_attributes.len() as AttributesIndex;
|
let attr_index = self.store_attributes.len() as AttributesIndex;
|
||||||
self.store_attributes.push(attrs);
|
self.store_attributes.push(attrs);
|
||||||
let attr_event = Event {
|
let attr_event = Event {
|
||||||
|
@ -540,11 +543,13 @@ impl<'s> Parser<'s> {
|
||||||
match state.elem_ty {
|
match state.elem_ty {
|
||||||
AttributesElementType::Container { e_placeholder } => {
|
AttributesElementType::Container { e_placeholder } => {
|
||||||
self.events[e_placeholder] = attr_event;
|
self.events[e_placeholder] = attr_event;
|
||||||
|
let last = self.events.len() - 1;
|
||||||
if matches!(self.events[e_placeholder + 1].kind, EventKind::Str) {
|
if matches!(self.events[e_placeholder + 1].kind, EventKind::Str) {
|
||||||
self.events[e_placeholder + 1].kind = EventKind::Enter(Span);
|
self.events[e_placeholder + 1].kind = EventKind::Enter(Span);
|
||||||
let last = self.events.len() - 1;
|
|
||||||
self.events[last].kind = EventKind::Exit(Span);
|
self.events[last].kind = EventKind::Exit(Span);
|
||||||
}
|
}
|
||||||
|
self.events[last].span =
|
||||||
|
Span::new(self.events[last].span.start(), self.input.span.end());
|
||||||
}
|
}
|
||||||
AttributesElementType::Word => {
|
AttributesElementType::Word => {
|
||||||
self.events.push_back(attr_event);
|
self.events.push_back(attr_event);
|
||||||
|
@ -577,12 +582,13 @@ impl<'s> Parser<'s> {
|
||||||
.sum();
|
.sum();
|
||||||
if end && is_url {
|
if end && is_url {
|
||||||
self.input.lexer = lex::Lexer::new(ahead.as_str());
|
self.input.lexer = lex::Lexer::new(ahead.as_str());
|
||||||
self.input.span = self.input.span.after(len);
|
let span_url = self.input.span.after(len);
|
||||||
self.push(EventKind::Enter(Autolink));
|
let url = span_url.of(self.input.src);
|
||||||
|
self.push(EventKind::Enter(Autolink(url)));
|
||||||
|
self.input.span = span_url;
|
||||||
self.push(EventKind::Str);
|
self.push(EventKind::Str);
|
||||||
self.push(EventKind::Exit(Autolink));
|
|
||||||
self.input.span = self.input.span.after(1);
|
self.input.span = self.input.span.after(1);
|
||||||
return Some(Continue);
|
return self.push(EventKind::Exit(Autolink(url)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
|
@ -606,10 +612,11 @@ impl<'s> Parser<'s> {
|
||||||
.sum();
|
.sum();
|
||||||
if end && valid {
|
if end && valid {
|
||||||
self.input.lexer = lex::Lexer::new(ahead.as_str());
|
self.input.lexer = lex::Lexer::new(ahead.as_str());
|
||||||
self.input.span = self.input.span.after(len);
|
let span_symbol = self.input.span.after(len);
|
||||||
self.push(EventKind::Atom(Symbol));
|
self.input.span = Span::new(self.input.span.start(), span_symbol.end() + 1);
|
||||||
self.input.span = self.input.span.after(1);
|
return self.push(EventKind::Atom(Atom::Symbol(
|
||||||
return Some(Continue);
|
span_symbol.of(self.input.src),
|
||||||
|
)));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
|
@ -649,10 +656,10 @@ impl<'s> Parser<'s> {
|
||||||
.sum();
|
.sum();
|
||||||
if end {
|
if end {
|
||||||
self.input.lexer = lex::Lexer::new(ahead.as_str());
|
self.input.lexer = lex::Lexer::new(ahead.as_str());
|
||||||
self.input.span = self.input.span.after(len);
|
let span_label = self.input.span.after(len);
|
||||||
self.push(EventKind::Atom(FootnoteReference));
|
let label = span_label.of(self.input.src);
|
||||||
self.input.span = self.input.span.after(1);
|
self.input.span = Span::new(self.input.span.start(), span_label.end() + 1);
|
||||||
return Some(Continue);
|
return self.push(EventKind::Atom(FootnoteReference { label }));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
|
@ -925,7 +932,7 @@ impl<'s> Parser<'s> {
|
||||||
self.push(EventKind::Atom(atom))
|
self.push(EventKind::Atom(atom))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn merge_str_events(&mut self, span_str: Span) -> Event {
|
fn merge_str_events(&mut self, span_str: Span) -> Event<'s> {
|
||||||
let mut span = span_str;
|
let mut span = span_str;
|
||||||
let should_merge = |e: &Event, span: Span| {
|
let should_merge = |e: &Event, span: Span| {
|
||||||
matches!(e.kind, EventKind::Str | EventKind::Placeholder)
|
matches!(e.kind, EventKind::Str | EventKind::Placeholder)
|
||||||
|
@ -952,7 +959,7 @@ impl<'s> Parser<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn apply_word_attributes(&mut self, span_str: Span) -> Event {
|
fn apply_word_attributes(&mut self, span_str: Span) -> Event<'s> {
|
||||||
if let Some(i) = span_str
|
if let Some(i) = span_str
|
||||||
.of(self.input.src)
|
.of(self.input.src)
|
||||||
.bytes()
|
.bytes()
|
||||||
|
@ -972,7 +979,7 @@ impl<'s> Parser<'s> {
|
||||||
let attr = self.events.pop_front().unwrap();
|
let attr = self.events.pop_front().unwrap();
|
||||||
self.events.push_front(Event {
|
self.events.push_front(Event {
|
||||||
kind: EventKind::Exit(Span),
|
kind: EventKind::Exit(Span),
|
||||||
span: span_str.empty_after(),
|
span: attr.span,
|
||||||
});
|
});
|
||||||
self.events.push_front(Event {
|
self.events.push_front(Event {
|
||||||
kind: EventKind::Str,
|
kind: EventKind::Str,
|
||||||
|
@ -1089,8 +1096,8 @@ impl Opener {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
enum DelimEventKind {
|
enum DelimEventKind<'s> {
|
||||||
Container(Container),
|
Container(Container<'s>),
|
||||||
Span(SpanType),
|
Span(SpanType),
|
||||||
Quote(QuoteType),
|
Quote(QuoteType),
|
||||||
Link {
|
Link {
|
||||||
|
@ -1100,7 +1107,7 @@ enum DelimEventKind {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Opener> for DelimEventKind {
|
impl<'s> From<Opener> for DelimEventKind<'s> {
|
||||||
fn from(d: Opener) -> Self {
|
fn from(d: Opener) -> Self {
|
||||||
match d {
|
match d {
|
||||||
Opener::Span(ty) => Self::Span(ty),
|
Opener::Span(ty) => Self::Span(ty),
|
||||||
|
@ -1127,7 +1134,7 @@ impl From<Opener> for DelimEventKind {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Iterator for Parser<'s> {
|
impl<'s> Iterator for Parser<'s> {
|
||||||
type Item = Event;
|
type Item = Event<'s>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
while self.events.is_empty()
|
while self.events.is_empty()
|
||||||
|
@ -1158,7 +1165,7 @@ impl<'s> Iterator for Parser<'s> {
|
||||||
let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind {
|
let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind {
|
||||||
debug_assert!(matches!(
|
debug_assert!(matches!(
|
||||||
ty,
|
ty,
|
||||||
Verbatim | RawFormat | InlineMath | DisplayMath
|
Verbatim | RawFormat { .. } | InlineMath | DisplayMath
|
||||||
));
|
));
|
||||||
ty
|
ty
|
||||||
} else {
|
} else {
|
||||||
|
@ -1266,7 +1273,7 @@ mod test {
|
||||||
),
|
),
|
||||||
(Enter(Verbatim), "`"),
|
(Enter(Verbatim), "`"),
|
||||||
(Str, "raw"),
|
(Str, "raw"),
|
||||||
(Exit(Verbatim), "`"),
|
(Exit(Verbatim), "`{#id}"),
|
||||||
(Str, " post"),
|
(Str, " post"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -1336,16 +1343,16 @@ mod test {
|
||||||
fn raw_format() {
|
fn raw_format() {
|
||||||
test_parse!(
|
test_parse!(
|
||||||
"`raw`{=format}",
|
"`raw`{=format}",
|
||||||
(Enter(RawFormat), "format"),
|
(Enter(RawFormat { format: "format" }), "`"),
|
||||||
(Str, "raw"),
|
(Str, "raw"),
|
||||||
(Exit(RawFormat), "format"),
|
(Exit(RawFormat { format: "format" }), "`{=format}"),
|
||||||
);
|
);
|
||||||
test_parse!(
|
test_parse!(
|
||||||
"before `raw`{=format} after",
|
"before `raw`{=format} after",
|
||||||
(Str, "before "),
|
(Str, "before "),
|
||||||
(Enter(RawFormat), "format"),
|
(Enter(RawFormat { format: "format" }), "`"),
|
||||||
(Str, "raw"),
|
(Str, "raw"),
|
||||||
(Exit(RawFormat), "format"),
|
(Exit(RawFormat { format: "format" }), "`{=format}"),
|
||||||
(Str, " after"),
|
(Str, " after"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -1456,7 +1463,7 @@ mod test {
|
||||||
),
|
),
|
||||||
(Enter(Span), ""),
|
(Enter(Span), ""),
|
||||||
(Str, "[text]("),
|
(Str, "[text]("),
|
||||||
(Exit(Span), ""),
|
(Exit(Span), "{.cls}"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1520,7 +1527,7 @@ mod test {
|
||||||
"{.cls}",
|
"{.cls}",
|
||||||
),
|
),
|
||||||
(Enter(Span), "["),
|
(Enter(Span), "["),
|
||||||
(Exit(Span), "]")
|
(Exit(Span), "]{.cls}")
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1537,7 +1544,7 @@ mod test {
|
||||||
),
|
),
|
||||||
(Enter(Span), "["),
|
(Enter(Span), "["),
|
||||||
(Str, "abc"),
|
(Str, "abc"),
|
||||||
(Exit(Span), "]"),
|
(Exit(Span), "]{.def}"),
|
||||||
);
|
);
|
||||||
test_parse!("not a [span] {#id}.", (Str, "not a [span] "), (Str, "."));
|
test_parse!("not a [span] {#id}.", (Str, "not a [span] "), (Str, "."));
|
||||||
}
|
}
|
||||||
|
@ -1555,7 +1562,7 @@ mod test {
|
||||||
),
|
),
|
||||||
(Enter(Span), "["),
|
(Enter(Span), "["),
|
||||||
(Str, "x_y"),
|
(Str, "x_y"),
|
||||||
(Exit(Span), "]"),
|
(Exit(Span), "]{.bar_}"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1563,24 +1570,24 @@ mod test {
|
||||||
fn autolink() {
|
fn autolink() {
|
||||||
test_parse!(
|
test_parse!(
|
||||||
"<https://example.com>",
|
"<https://example.com>",
|
||||||
(Enter(Autolink), "https://example.com"),
|
(Enter(Autolink("https://example.com",)), "<"),
|
||||||
(Str, "https://example.com"),
|
(Str, "https://example.com"),
|
||||||
(Exit(Autolink), "https://example.com")
|
(Exit(Autolink("https://example.com",)), ">")
|
||||||
);
|
);
|
||||||
test_parse!(
|
test_parse!(
|
||||||
"<a@b.c>",
|
"<a@b.c>",
|
||||||
(Enter(Autolink), "a@b.c"),
|
(Enter(Autolink("a@b.c")), "<"),
|
||||||
(Str, "a@b.c"),
|
(Str, "a@b.c"),
|
||||||
(Exit(Autolink), "a@b.c"),
|
(Exit(Autolink("a@b.c")), ">"),
|
||||||
);
|
);
|
||||||
test_parse!(
|
test_parse!(
|
||||||
"<http://a.b><http://c.d>",
|
"<http://a.b><http://c.d>",
|
||||||
(Enter(Autolink), "http://a.b"),
|
(Enter(Autolink("http://a.b")), "<"),
|
||||||
(Str, "http://a.b"),
|
(Str, "http://a.b"),
|
||||||
(Exit(Autolink), "http://a.b"),
|
(Exit(Autolink("http://a.b")), ">"),
|
||||||
(Enter(Autolink), "http://c.d"),
|
(Enter(Autolink("http://c.d")), "<"),
|
||||||
(Str, "http://c.d"),
|
(Str, "http://c.d"),
|
||||||
(Exit(Autolink), "http://c.d"),
|
(Exit(Autolink("http://c.d")), ">"),
|
||||||
);
|
);
|
||||||
test_parse!("<not-a-url>", (Str, "<not-a-url>"));
|
test_parse!("<not-a-url>", (Str, "<not-a-url>"));
|
||||||
}
|
}
|
||||||
|
@ -1590,7 +1597,7 @@ mod test {
|
||||||
test_parse!(
|
test_parse!(
|
||||||
"text[^footnote]. more text",
|
"text[^footnote]. more text",
|
||||||
(Str, "text"),
|
(Str, "text"),
|
||||||
(Atom(FootnoteReference), "footnote"),
|
(Atom(FootnoteReference { label: "footnote" }), "[^footnote]"),
|
||||||
(Str, ". more text"),
|
(Str, ". more text"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -1687,7 +1694,7 @@ mod test {
|
||||||
),
|
),
|
||||||
(Enter(Emphasis), "_"),
|
(Enter(Emphasis), "_"),
|
||||||
(Str, "abc def"),
|
(Str, "abc def"),
|
||||||
(Exit(Emphasis), "_"),
|
(Exit(Emphasis), "_{.attr}"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1697,13 +1704,13 @@ mod test {
|
||||||
"_abc def_{}",
|
"_abc def_{}",
|
||||||
(Enter(Emphasis), "_"),
|
(Enter(Emphasis), "_"),
|
||||||
(Str, "abc def"),
|
(Str, "abc def"),
|
||||||
(Exit(Emphasis), "_"),
|
(Exit(Emphasis), "_{}"),
|
||||||
);
|
);
|
||||||
test_parse!(
|
test_parse!(
|
||||||
"_abc def_{ % comment % } ghi",
|
"_abc def_{ % comment % } ghi",
|
||||||
(Enter(Emphasis), "_"),
|
(Enter(Emphasis), "_"),
|
||||||
(Str, "abc def"),
|
(Str, "abc def"),
|
||||||
(Exit(Emphasis), "_"),
|
(Exit(Emphasis), "_{ % comment % }"),
|
||||||
(Str, " ghi"),
|
(Str, " ghi"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -1721,7 +1728,7 @@ mod test {
|
||||||
),
|
),
|
||||||
(Enter(Emphasis), "_"),
|
(Enter(Emphasis), "_"),
|
||||||
(Str, "abc def"),
|
(Str, "abc def"),
|
||||||
(Exit(Emphasis), "_"),
|
(Exit(Emphasis), "_{.a}{.b}{.c}"),
|
||||||
(Str, " "),
|
(Str, " "),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -1739,7 +1746,7 @@ mod test {
|
||||||
),
|
),
|
||||||
(Enter(Span), ""),
|
(Enter(Span), ""),
|
||||||
(Str, "word"),
|
(Str, "word"),
|
||||||
(Exit(Span), ""),
|
(Exit(Span), "{a=b}"),
|
||||||
);
|
);
|
||||||
test_parse!(
|
test_parse!(
|
||||||
"some word{.a}{.b} with attrs",
|
"some word{.a}{.b} with attrs",
|
||||||
|
@ -1753,7 +1760,7 @@ mod test {
|
||||||
),
|
),
|
||||||
(Enter(Span), ""),
|
(Enter(Span), ""),
|
||||||
(Str, "word"),
|
(Str, "word"),
|
||||||
(Exit(Span), ""),
|
(Exit(Span), "{.a}{.b}"),
|
||||||
(Str, " with attrs"),
|
(Str, " with attrs"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
351
src/lib.rs
351
src/lib.rs
|
@ -51,6 +51,7 @@
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fmt::Write as FmtWrite;
|
use std::fmt::Write as FmtWrite;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
#[cfg(feature = "html")]
|
#[cfg(feature = "html")]
|
||||||
pub mod html;
|
pub mod html;
|
||||||
|
@ -60,7 +61,6 @@ mod block;
|
||||||
mod inline;
|
mod inline;
|
||||||
mod lex;
|
mod lex;
|
||||||
mod span;
|
mod span;
|
||||||
mod tree;
|
|
||||||
|
|
||||||
use span::Span;
|
use span::Span;
|
||||||
|
|
||||||
|
@ -555,13 +555,14 @@ pub struct Parser<'s> {
|
||||||
src: &'s str,
|
src: &'s str,
|
||||||
|
|
||||||
/// Block tree parsed at first.
|
/// Block tree parsed at first.
|
||||||
tree: block::Tree,
|
blocks: std::iter::Peekable<std::vec::IntoIter<block::Event<'s>>>,
|
||||||
|
|
||||||
/// Contents obtained by the prepass.
|
/// Contents obtained by the prepass.
|
||||||
pre_pass: PrePass<'s>,
|
pre_pass: PrePass<'s>,
|
||||||
|
|
||||||
/// Last parsed block attributes
|
/// Last parsed block attributes, and its starting offset.
|
||||||
block_attributes: Attributes<'s>,
|
block_attributes: Attributes<'s>,
|
||||||
|
block_attributes_pos: Option<usize>,
|
||||||
|
|
||||||
/// Current table row is a head row.
|
/// Current table row is a head row.
|
||||||
table_head_row: bool,
|
table_head_row: bool,
|
||||||
|
@ -576,7 +577,7 @@ pub struct Parser<'s> {
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct Heading {
|
struct Heading {
|
||||||
/// Location of heading in src.
|
/// Location of heading in src.
|
||||||
location: usize,
|
location: u32,
|
||||||
/// Automatically generated id from heading text.
|
/// Automatically generated id from heading text.
|
||||||
id_auto: String,
|
id_auto: String,
|
||||||
/// Text of heading, formatting stripped.
|
/// Text of heading, formatting stripped.
|
||||||
|
@ -598,28 +599,50 @@ struct PrePass<'s> {
|
||||||
|
|
||||||
impl<'s> PrePass<'s> {
|
impl<'s> PrePass<'s> {
|
||||||
#[must_use]
|
#[must_use]
|
||||||
fn new(src: &'s str, mut tree: block::Tree, inline_parser: &mut inline::Parser<'s>) -> Self {
|
fn new(
|
||||||
|
src: &'s str,
|
||||||
|
blocks: std::slice::Iter<block::Event<'s>>,
|
||||||
|
inline_parser: &mut inline::Parser<'s>,
|
||||||
|
) -> Self {
|
||||||
let mut link_definitions = Map::new();
|
let mut link_definitions = Map::new();
|
||||||
let mut headings: Vec<Heading> = Vec::new();
|
let mut headings: Vec<Heading> = Vec::new();
|
||||||
let mut used_ids: Set<&str> = Set::new();
|
let mut used_ids: Set<&str> = Set::new();
|
||||||
|
|
||||||
|
let mut blocks = blocks.peekable();
|
||||||
|
|
||||||
let mut attr_prev: Option<Span> = None;
|
let mut attr_prev: Option<Span> = None;
|
||||||
while let Some(e) = tree.next() {
|
while let Some(e) = blocks.next() {
|
||||||
match e.kind {
|
match e.kind {
|
||||||
tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition)) => {
|
block::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition {
|
||||||
|
label,
|
||||||
|
})) => {
|
||||||
|
fn next_is_inline(
|
||||||
|
bs: &mut std::iter::Peekable<std::slice::Iter<block::Event>>,
|
||||||
|
) -> bool {
|
||||||
|
matches!(bs.peek().map(|e| &e.kind), Some(block::EventKind::Inline))
|
||||||
|
}
|
||||||
|
|
||||||
// All link definition tags have to be obtained initially, as references can
|
// All link definition tags have to be obtained initially, as references can
|
||||||
// appear before the definition.
|
// appear before the definition.
|
||||||
let tag = e.span.of(src);
|
|
||||||
let attrs =
|
let attrs =
|
||||||
attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src)));
|
attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src)));
|
||||||
let url = match tree.count_children() {
|
let url = if !next_is_inline(&mut blocks) {
|
||||||
0 => "".into(),
|
"".into()
|
||||||
1 => tree.take_inlines().next().unwrap().of(src).trim().into(),
|
} else {
|
||||||
_ => tree.take_inlines().map(|sp| sp.of(src).trim()).collect(),
|
let start = blocks.next().unwrap().span.of(src).trim();
|
||||||
|
if !next_is_inline(&mut blocks) {
|
||||||
|
start.into()
|
||||||
|
} else {
|
||||||
|
let mut url = start.to_string();
|
||||||
|
while next_is_inline(&mut blocks) {
|
||||||
|
url.push_str(blocks.next().unwrap().span.of(src).trim());
|
||||||
|
}
|
||||||
|
url.into()
|
||||||
|
}
|
||||||
};
|
};
|
||||||
link_definitions.insert(tag, (url, attrs));
|
link_definitions.insert(label, (url, attrs));
|
||||||
}
|
}
|
||||||
tree::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => {
|
block::EventKind::Enter(block::Node::Leaf(block::Leaf::Heading { .. })) => {
|
||||||
// All headings ids have to be obtained initially, as references can appear
|
// All headings ids have to be obtained initially, as references can appear
|
||||||
// before the heading. Additionally, determining the id requires inline parsing
|
// before the heading. Additionally, determining the id requires inline parsing
|
||||||
// as formatting must be removed.
|
// as formatting must be removed.
|
||||||
|
@ -634,10 +657,21 @@ impl<'s> PrePass<'s> {
|
||||||
let mut id_auto = String::new();
|
let mut id_auto = String::new();
|
||||||
let mut text = String::new();
|
let mut text = String::new();
|
||||||
let mut last_whitespace = true;
|
let mut last_whitespace = true;
|
||||||
let inlines = tree.take_inlines().collect::<Vec<_>>();
|
|
||||||
inline_parser.reset();
|
inline_parser.reset();
|
||||||
inlines.iter().enumerate().for_each(|(i, sp)| {
|
let mut last_end = 0;
|
||||||
inline_parser.feed_line(*sp, i == inlines.len() - 1);
|
loop {
|
||||||
|
let span_inline = blocks.next().and_then(|e| {
|
||||||
|
if matches!(e.kind, block::EventKind::Inline) {
|
||||||
|
last_end = e.span.end();
|
||||||
|
Some(e.span)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
});
|
||||||
|
inline_parser.feed_line(
|
||||||
|
span_inline.unwrap_or_else(|| Span::empty_at(last_end)),
|
||||||
|
span_inline.is_none(),
|
||||||
|
);
|
||||||
inline_parser.for_each(|ev| match ev.kind {
|
inline_parser.for_each(|ev| match ev.kind {
|
||||||
inline::EventKind::Str => {
|
inline::EventKind::Str => {
|
||||||
text.push_str(ev.span.of(src));
|
text.push_str(ev.span.of(src));
|
||||||
|
@ -662,8 +696,11 @@ impl<'s> PrePass<'s> {
|
||||||
id_auto.push('-');
|
id_auto.push('-');
|
||||||
}
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
})
|
});
|
||||||
});
|
if span_inline.is_none() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
id_auto.drain(id_auto.trim_end_matches('-').len()..);
|
id_auto.drain(id_auto.trim_end_matches('-').len()..);
|
||||||
|
|
||||||
// ensure id unique
|
// ensure id unique
|
||||||
|
@ -689,17 +726,17 @@ impl<'s> PrePass<'s> {
|
||||||
std::mem::transmute::<&str, &'static str>(id_auto.as_ref())
|
std::mem::transmute::<&str, &'static str>(id_auto.as_ref())
|
||||||
});
|
});
|
||||||
headings.push(Heading {
|
headings.push(Heading {
|
||||||
location: e.span.start(),
|
location: e.span.start() as u32,
|
||||||
id_auto,
|
id_auto,
|
||||||
text,
|
text,
|
||||||
id_override,
|
id_override,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
tree::EventKind::Atom(block::Atom::Attributes) => {
|
block::EventKind::Atom(block::Atom::Attributes) => {
|
||||||
attr_prev = Some(e.span);
|
attr_prev = Some(e.span);
|
||||||
}
|
}
|
||||||
tree::EventKind::Enter(..)
|
block::EventKind::Enter(..)
|
||||||
| tree::EventKind::Exit(block::Node::Container(block::Container::Section {
|
| block::EventKind::Exit(block::Node::Container(block::Container::Section {
|
||||||
..
|
..
|
||||||
})) => {}
|
})) => {}
|
||||||
_ => {
|
_ => {
|
||||||
|
@ -723,7 +760,7 @@ impl<'s> PrePass<'s> {
|
||||||
h.id_override.as_ref().unwrap_or(&h.id_auto)
|
h.id_override.as_ref().unwrap_or(&h.id_auto)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn heading_id_by_location(&self, location: usize) -> Option<&str> {
|
fn heading_id_by_location(&self, location: u32) -> Option<&str> {
|
||||||
self.headings
|
self.headings
|
||||||
.binary_search_by_key(&location, |h| h.location)
|
.binary_search_by_key(&location, |h| h.location)
|
||||||
.ok()
|
.ok()
|
||||||
|
@ -741,22 +778,133 @@ impl<'s> PrePass<'s> {
|
||||||
impl<'s> Parser<'s> {
|
impl<'s> Parser<'s> {
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn new(src: &'s str) -> Self {
|
pub fn new(src: &'s str) -> Self {
|
||||||
let tree = block::parse(src);
|
let blocks = block::parse(src);
|
||||||
let mut inline_parser = inline::Parser::new(src);
|
let mut inline_parser = inline::Parser::new(src);
|
||||||
let pre_pass = PrePass::new(src, tree.clone(), &mut inline_parser);
|
let pre_pass = PrePass::new(src, blocks.iter(), &mut inline_parser);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
src,
|
src,
|
||||||
tree,
|
blocks: blocks.into_iter().peekable(),
|
||||||
pre_pass,
|
pre_pass,
|
||||||
block_attributes: Attributes::new(),
|
block_attributes: Attributes::new(),
|
||||||
|
block_attributes_pos: None,
|
||||||
table_head_row: false,
|
table_head_row: false,
|
||||||
verbatim: false,
|
verbatim: false,
|
||||||
inline_parser,
|
inline_parser,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inline(&mut self) -> Option<Event<'s>> {
|
/// Turn the [`Parser`] into an iterator of tuples, each with an [`Event`] and a start/end byte
|
||||||
|
/// offset for its corresponding input (as a [`Range<usize>`]).
|
||||||
|
///
|
||||||
|
/// Generally, the range of each event does not overlap with any other event and the ranges are
|
||||||
|
/// in same order as the events are emitted, i.e. the start offset of an event must be greater
|
||||||
|
/// or equal to the (exclusive) end offset of all events that were emitted before that event.
|
||||||
|
/// However, there are some exceptions to this rule:
|
||||||
|
///
|
||||||
|
/// - Blank lines inbetween block attributes and the block causes the blankline events to
|
||||||
|
/// overlap with the block start event.
|
||||||
|
/// - Caption events are emitted before the table rows while the input for the caption content
|
||||||
|
/// is located after the table rows, causing the ranges to be out of order.
|
||||||
|
///
|
||||||
|
/// Characters between events, that are not part of any event range, are typically whitespace
|
||||||
|
/// but may also consist of unattached attributes or `>` characters from blockquotes.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// Start and end events of containers correspond only to the start and end markers for that
|
||||||
|
/// container, not its inner content:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use jotdown::*;
|
||||||
|
/// # use jotdown::Event::*;
|
||||||
|
/// # use jotdown::Container::*;
|
||||||
|
/// let input = "> _hello_ [text](url)\n";
|
||||||
|
/// assert!(matches!(
|
||||||
|
/// Parser::new(input)
|
||||||
|
/// .into_offset_iter()
|
||||||
|
/// .map(|(e, r)| (&input[r], e))
|
||||||
|
/// .collect::<Vec<_>>()
|
||||||
|
/// .as_slice(),
|
||||||
|
/// &[
|
||||||
|
/// (">", Start(Blockquote, ..)),
|
||||||
|
/// ("", Start(Paragraph, ..)),
|
||||||
|
/// ("_", Start(Emphasis, ..)),
|
||||||
|
/// ("hello", Str(..)),
|
||||||
|
/// ("_", End(Emphasis)),
|
||||||
|
/// (" ", Str(..)),
|
||||||
|
/// ("[", Start(Link { .. }, ..)),
|
||||||
|
/// ("text", Str(..)),
|
||||||
|
/// ("](url)", End(Link { .. })),
|
||||||
|
/// ("", End(Paragraph)),
|
||||||
|
/// ("", End(Blockquote)),
|
||||||
|
/// ],
|
||||||
|
/// ));
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// _Block_ attributes that belong to a container are included in the _start_ event. _Inline_
|
||||||
|
/// attributes that belong to a container are included in the _end_ event:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use jotdown::*;
|
||||||
|
/// # use jotdown::Event::*;
|
||||||
|
/// # use jotdown::Container::*;
|
||||||
|
/// let input = "
|
||||||
|
/// {.quote}
|
||||||
|
/// > [Hello]{lang=en} world!";
|
||||||
|
/// assert!(matches!(
|
||||||
|
/// Parser::new(input)
|
||||||
|
/// .into_offset_iter()
|
||||||
|
/// .map(|(e, r)| (&input[r], e))
|
||||||
|
/// .collect::<Vec<_>>()
|
||||||
|
/// .as_slice(),
|
||||||
|
/// &[
|
||||||
|
/// ("\n", Blankline),
|
||||||
|
/// ("{.quote}\n>", Start(Blockquote, ..)),
|
||||||
|
/// ("", Start(Paragraph, ..)),
|
||||||
|
/// ("[", Start(Span, ..)),
|
||||||
|
/// ("Hello", Str(..)),
|
||||||
|
/// ("]{lang=en}", End(Span)),
|
||||||
|
/// (" world!", Str(..)),
|
||||||
|
/// ("", End(Paragraph)),
|
||||||
|
/// ("", End(Blockquote)),
|
||||||
|
/// ],
|
||||||
|
/// ));
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Inline events that span multiple lines may contain characters from outer block containers
|
||||||
|
/// (e.g. `>` characters from blockquotes or whitespace from list items):
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use jotdown::*;
|
||||||
|
/// # use jotdown::Event::*;
|
||||||
|
/// # use jotdown::Container::*;
|
||||||
|
/// let input = "
|
||||||
|
/// > [txt](multi
|
||||||
|
/// > line)";
|
||||||
|
/// assert!(matches!(
|
||||||
|
/// Parser::new(input)
|
||||||
|
/// .into_offset_iter()
|
||||||
|
/// .map(|(e, r)| (&input[r], e))
|
||||||
|
/// .collect::<Vec<_>>()
|
||||||
|
/// .as_slice(),
|
||||||
|
/// &[
|
||||||
|
/// ("\n", Blankline),
|
||||||
|
/// (">", Start(Blockquote, ..)),
|
||||||
|
/// ("", Start(Paragraph, ..)),
|
||||||
|
/// ("[", Start(Link { .. }, ..)),
|
||||||
|
/// ("txt", Str(..)),
|
||||||
|
/// ("](multi\n> line)", End(Link { .. })),
|
||||||
|
/// ("", End(Paragraph)),
|
||||||
|
/// ("", End(Blockquote)),
|
||||||
|
/// ],
|
||||||
|
/// ));
|
||||||
|
/// ```
|
||||||
|
pub fn into_offset_iter(self) -> OffsetIter<'s> {
|
||||||
|
OffsetIter { parser: self }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inline(&mut self) -> Option<(Event<'s>, Range<usize>)> {
|
||||||
let next = self.inline_parser.next()?;
|
let next = self.inline_parser.next()?;
|
||||||
|
|
||||||
let (inline, mut attributes) = match next {
|
let (inline, mut attributes) = match next {
|
||||||
|
@ -772,16 +920,14 @@ impl<'s> Parser<'s> {
|
||||||
|
|
||||||
inline.map(|inline| {
|
inline.map(|inline| {
|
||||||
let enter = matches!(inline.kind, inline::EventKind::Enter(_));
|
let enter = matches!(inline.kind, inline::EventKind::Enter(_));
|
||||||
match inline.kind {
|
let event = match inline.kind {
|
||||||
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
||||||
let t = match c {
|
let t = match c {
|
||||||
inline::Container::Span => Container::Span,
|
inline::Container::Span => Container::Span,
|
||||||
inline::Container::Verbatim => Container::Verbatim,
|
inline::Container::Verbatim => Container::Verbatim,
|
||||||
inline::Container::InlineMath => Container::Math { display: false },
|
inline::Container::InlineMath => Container::Math { display: false },
|
||||||
inline::Container::DisplayMath => Container::Math { display: true },
|
inline::Container::DisplayMath => Container::Math { display: true },
|
||||||
inline::Container::RawFormat => Container::RawInline {
|
inline::Container::RawFormat { format } => Container::RawInline { format },
|
||||||
format: inline.span.of(self.src),
|
|
||||||
},
|
|
||||||
inline::Container::Subscript => Container::Subscript,
|
inline::Container::Subscript => Container::Subscript,
|
||||||
inline::Container::Superscript => Container::Superscript,
|
inline::Container::Superscript => Container::Superscript,
|
||||||
inline::Container::Insert => Container::Insert,
|
inline::Container::Insert => Container::Insert,
|
||||||
|
@ -822,14 +968,13 @@ impl<'s> Parser<'s> {
|
||||||
Container::Image(url_or_tag, ty)
|
Container::Image(url_or_tag, ty)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline::Container::Autolink => {
|
inline::Container::Autolink(url) => {
|
||||||
let url: CowStr = inline.span.of(self.src).into();
|
|
||||||
let ty = if url.contains('@') {
|
let ty = if url.contains('@') {
|
||||||
LinkType::Email
|
LinkType::Email
|
||||||
} else {
|
} else {
|
||||||
LinkType::AutoLink
|
LinkType::AutoLink
|
||||||
};
|
};
|
||||||
Container::Link(url, ty)
|
Container::Link(url.into(), ty)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if enter {
|
if enter {
|
||||||
|
@ -839,10 +984,8 @@ impl<'s> Parser<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline::EventKind::Atom(a) => match a {
|
inline::EventKind::Atom(a) => match a {
|
||||||
inline::Atom::FootnoteReference => {
|
inline::Atom::FootnoteReference { label } => Event::FootnoteReference(label),
|
||||||
Event::FootnoteReference(inline.span.of(self.src))
|
inline::Atom::Symbol(sym) => Event::Symbol(sym.into()),
|
||||||
}
|
|
||||||
inline::Atom::Symbol => Event::Symbol(inline.span.of(self.src).into()),
|
|
||||||
inline::Atom::Quote { ty, left } => match (ty, left) {
|
inline::Atom::Quote { ty, left } => match (ty, left) {
|
||||||
(inline::QuoteType::Single, true) => Event::LeftSingleQuote,
|
(inline::QuoteType::Single, true) => Event::LeftSingleQuote,
|
||||||
(inline::QuoteType::Single, false) => Event::RightSingleQuote,
|
(inline::QuoteType::Single, false) => Event::RightSingleQuote,
|
||||||
|
@ -861,48 +1004,58 @@ impl<'s> Parser<'s> {
|
||||||
inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => {
|
inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => {
|
||||||
panic!("{:?}", inline)
|
panic!("{:?}", inline)
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
(event, inline.span.into())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn block(&mut self) -> Option<Event<'s>> {
|
fn block(&mut self) -> Option<(Event<'s>, Range<usize>)> {
|
||||||
while let Some(ev) = &mut self.tree.next() {
|
while let Some(mut ev) = &mut self.blocks.next() {
|
||||||
let content = ev.span.of(self.src);
|
|
||||||
let event = match ev.kind {
|
let event = match ev.kind {
|
||||||
tree::EventKind::Atom(a) => match a {
|
block::EventKind::Atom(a) => match a {
|
||||||
block::Atom::Blankline => Event::Blankline,
|
block::Atom::Blankline => Event::Blankline,
|
||||||
block::Atom::ThematicBreak => {
|
block::Atom::ThematicBreak => {
|
||||||
|
if let Some(pos) = self.block_attributes_pos.take() {
|
||||||
|
ev.span = Span::new(pos, ev.span.end());
|
||||||
|
}
|
||||||
Event::ThematicBreak(self.block_attributes.take())
|
Event::ThematicBreak(self.block_attributes.take())
|
||||||
}
|
}
|
||||||
block::Atom::Attributes => {
|
block::Atom::Attributes => {
|
||||||
self.block_attributes.parse(content);
|
if self.block_attributes_pos.is_none() {
|
||||||
|
self.block_attributes_pos = Some(ev.span.start());
|
||||||
|
}
|
||||||
|
self.block_attributes.parse(ev.span.of(self.src));
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
tree::EventKind::Enter(c) | tree::EventKind::Exit(c) => {
|
block::EventKind::Enter(c) | block::EventKind::Exit(c) => {
|
||||||
let enter = matches!(ev.kind, tree::EventKind::Enter(..));
|
let enter = matches!(ev.kind, block::EventKind::Enter(..));
|
||||||
let cont = match c {
|
let cont = match c {
|
||||||
block::Node::Leaf(l) => {
|
block::Node::Leaf(l) => {
|
||||||
self.inline_parser.reset();
|
self.inline_parser.reset();
|
||||||
match l {
|
match l {
|
||||||
block::Leaf::Paragraph => Container::Paragraph,
|
block::Leaf::Paragraph => Container::Paragraph,
|
||||||
block::Leaf::Heading { has_section } => Container::Heading {
|
block::Leaf::Heading {
|
||||||
level: content.len().try_into().unwrap(),
|
level,
|
||||||
|
has_section,
|
||||||
|
pos,
|
||||||
|
} => Container::Heading {
|
||||||
|
level,
|
||||||
has_section,
|
has_section,
|
||||||
id: self
|
id: self
|
||||||
.pre_pass
|
.pre_pass
|
||||||
.heading_id_by_location(ev.span.start())
|
.heading_id_by_location(pos)
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
.to_string()
|
.to_string()
|
||||||
.into(),
|
.into(),
|
||||||
},
|
},
|
||||||
block::Leaf::DescriptionTerm => Container::DescriptionTerm,
|
block::Leaf::DescriptionTerm => Container::DescriptionTerm,
|
||||||
block::Leaf::CodeBlock => {
|
block::Leaf::CodeBlock { language } => {
|
||||||
self.verbatim = enter;
|
self.verbatim = enter;
|
||||||
if let Some(format) = content.strip_prefix('=') {
|
if let Some(format) = language.strip_prefix('=') {
|
||||||
Container::RawBlock { format }
|
Container::RawBlock { format }
|
||||||
} else {
|
} else {
|
||||||
Container::CodeBlock { language: content }
|
Container::CodeBlock { language }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
block::Leaf::TableCell(alignment) => Container::TableCell {
|
block::Leaf::TableCell(alignment) => Container::TableCell {
|
||||||
|
@ -910,16 +1063,20 @@ impl<'s> Parser<'s> {
|
||||||
head: self.table_head_row,
|
head: self.table_head_row,
|
||||||
},
|
},
|
||||||
block::Leaf::Caption => Container::Caption,
|
block::Leaf::Caption => Container::Caption,
|
||||||
block::Leaf::LinkDefinition => {
|
block::Leaf::LinkDefinition { label } => {
|
||||||
Container::LinkDefinition { label: content }
|
self.verbatim = enter;
|
||||||
|
Container::LinkDefinition { label }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
block::Node::Container(c) => match c {
|
block::Node::Container(c) => match c {
|
||||||
block::Container::Blockquote => Container::Blockquote,
|
block::Container::Blockquote => Container::Blockquote,
|
||||||
block::Container::Div => Container::Div { class: content },
|
block::Container::Div { class } => Container::Div { class },
|
||||||
block::Container::Footnote => Container::Footnote { label: content },
|
block::Container::Footnote { label } => Container::Footnote { label },
|
||||||
block::Container::List(block::ListKind { ty, tight }) => {
|
block::Container::List {
|
||||||
|
kind: block::ListKind { ty, tight },
|
||||||
|
marker,
|
||||||
|
} => {
|
||||||
if matches!(ty, block::ListType::Description) {
|
if matches!(ty, block::ListType::Description) {
|
||||||
Container::DescriptionList
|
Container::DescriptionList
|
||||||
} else {
|
} else {
|
||||||
|
@ -927,9 +1084,8 @@ impl<'s> Parser<'s> {
|
||||||
block::ListType::Unordered(..) => ListKind::Unordered,
|
block::ListType::Unordered(..) => ListKind::Unordered,
|
||||||
block::ListType::Task => ListKind::Task,
|
block::ListType::Task => ListKind::Task,
|
||||||
block::ListType::Ordered(numbering, style) => {
|
block::ListType::Ordered(numbering, style) => {
|
||||||
let start = numbering
|
let start =
|
||||||
.parse_number(style.number(content))
|
numbering.parse_number(style.number(marker)).max(1);
|
||||||
.max(1);
|
|
||||||
ListKind::Ordered {
|
ListKind::Ordered {
|
||||||
numbering,
|
numbering,
|
||||||
style,
|
style,
|
||||||
|
@ -941,12 +1097,12 @@ impl<'s> Parser<'s> {
|
||||||
Container::List { kind, tight }
|
Container::List { kind, tight }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
block::Container::ListItem(ty) => match ty {
|
block::Container::ListItem(kind) => match kind {
|
||||||
block::ListType::Task => Container::TaskListItem {
|
block::ListItemKind::Task { checked } => {
|
||||||
checked: content.as_bytes()[3] != b' ',
|
Container::TaskListItem { checked }
|
||||||
},
|
}
|
||||||
block::ListType::Description => Container::DescriptionDetails,
|
block::ListItemKind::Description => Container::DescriptionDetails,
|
||||||
_ => Container::ListItem,
|
block::ListItemKind::List => Container::ListItem,
|
||||||
},
|
},
|
||||||
block::Container::Table => Container::Table,
|
block::Container::Table => Container::Table,
|
||||||
block::Container::TableRow { head } => {
|
block::Container::TableRow { head } => {
|
||||||
|
@ -955,10 +1111,10 @@ impl<'s> Parser<'s> {
|
||||||
}
|
}
|
||||||
Container::TableRow { head }
|
Container::TableRow { head }
|
||||||
}
|
}
|
||||||
block::Container::Section => Container::Section {
|
block::Container::Section { pos } => Container::Section {
|
||||||
id: self
|
id: self
|
||||||
.pre_pass
|
.pre_pass
|
||||||
.heading_id_by_location(ev.span.start())
|
.heading_id_by_location(pos)
|
||||||
.unwrap_or_default()
|
.unwrap_or_default()
|
||||||
.to_string()
|
.to_string()
|
||||||
.into(),
|
.into(),
|
||||||
|
@ -966,32 +1122,63 @@ impl<'s> Parser<'s> {
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
if enter {
|
if enter {
|
||||||
|
if let Some(pos) = self.block_attributes_pos.take() {
|
||||||
|
ev.span = Span::new(pos, ev.span.end());
|
||||||
|
}
|
||||||
Event::Start(cont, self.block_attributes.take())
|
Event::Start(cont, self.block_attributes.take())
|
||||||
} else {
|
} else {
|
||||||
|
self.block_attributes = Attributes::new();
|
||||||
|
self.block_attributes_pos = None;
|
||||||
Event::End(cont)
|
Event::End(cont)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tree::EventKind::Inline => {
|
block::EventKind::Inline => {
|
||||||
if self.verbatim {
|
if self.verbatim {
|
||||||
Event::Str(content.into())
|
Event::Str(ev.span.of(self.src).into())
|
||||||
} else {
|
} else {
|
||||||
self.inline_parser
|
self.inline_parser.feed_line(
|
||||||
.feed_line(ev.span, self.tree.branch_is_empty());
|
ev.span,
|
||||||
return self.next();
|
!matches!(
|
||||||
|
self.blocks.peek().map(|e| &e.kind),
|
||||||
|
Some(block::EventKind::Inline),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
return self.next_span();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
block::EventKind::Stale => continue,
|
||||||
};
|
};
|
||||||
return Some(event);
|
return Some((event, ev.span.into()));
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn next_span(&mut self) -> Option<(Event<'s>, Range<usize>)> {
|
||||||
|
self.inline().or_else(|| self.block())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Iterator for Parser<'s> {
|
impl<'s> Iterator for Parser<'s> {
|
||||||
type Item = Event<'s>;
|
type Item = Event<'s>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.inline().or_else(|| self.block())
|
self.next_span().map(|(e, _)| e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator that is identical to a [`Parser`], except that it also emits the location of each
|
||||||
|
/// event within the input.
|
||||||
|
///
|
||||||
|
/// See the documentation of [`Parser::into_offset_iter`] for more information.
|
||||||
|
pub struct OffsetIter<'s> {
|
||||||
|
parser: Parser<'s>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Iterator for OffsetIter<'s> {
|
||||||
|
type Item = (Event<'s>, Range<usize>);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.parser.next_span()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1523,7 +1710,6 @@ mod test {
|
||||||
Blankline,
|
Blankline,
|
||||||
Start(LinkDefinition { label: "tag" }, Attributes::new()),
|
Start(LinkDefinition { label: "tag" }, Attributes::new()),
|
||||||
Str("u".into()),
|
Str("u".into()),
|
||||||
Softbreak,
|
|
||||||
Str("rl".into()),
|
Str("rl".into()),
|
||||||
End(LinkDefinition { label: "tag" }),
|
End(LinkDefinition { label: "tag" }),
|
||||||
);
|
);
|
||||||
|
@ -1532,19 +1718,24 @@ mod test {
|
||||||
"[text][tag]\n",
|
"[text][tag]\n",
|
||||||
"\n",
|
"\n",
|
||||||
"[tag]:\n",
|
"[tag]:\n",
|
||||||
" url\n", //
|
" url\n", //
|
||||||
|
" cont\n", //
|
||||||
),
|
),
|
||||||
Start(Paragraph, Attributes::new()),
|
Start(Paragraph, Attributes::new()),
|
||||||
Start(
|
Start(
|
||||||
Link("url".into(), LinkType::Span(SpanLinkType::Reference)),
|
Link("urlcont".into(), LinkType::Span(SpanLinkType::Reference)),
|
||||||
Attributes::new()
|
Attributes::new()
|
||||||
),
|
),
|
||||||
Str("text".into()),
|
Str("text".into()),
|
||||||
End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))),
|
End(Link(
|
||||||
|
"urlcont".into(),
|
||||||
|
LinkType::Span(SpanLinkType::Reference)
|
||||||
|
)),
|
||||||
End(Paragraph),
|
End(Paragraph),
|
||||||
Blankline,
|
Blankline,
|
||||||
Start(LinkDefinition { label: "tag" }, Attributes::new()),
|
Start(LinkDefinition { label: "tag" }, Attributes::new()),
|
||||||
Str("url".into()),
|
Str("url".into()),
|
||||||
|
Str("cont".into()),
|
||||||
End(LinkDefinition { label: "tag" }),
|
End(LinkDefinition { label: "tag" }),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,12 @@ pub struct Span {
|
||||||
end: u32,
|
end: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<Span> for std::ops::Range<usize> {
|
||||||
|
fn from(span: Span) -> Self {
|
||||||
|
span.start()..span.end()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Span {
|
impl Span {
|
||||||
pub fn new(start: usize, end: usize) -> Self {
|
pub fn new(start: usize, end: usize) -> Self {
|
||||||
Self::by_len(start, end.checked_sub(start).unwrap())
|
Self::by_len(start, end.checked_sub(start).unwrap())
|
||||||
|
|
427
src/tree.rs
427
src/tree.rs
|
@ -1,427 +0,0 @@
|
||||||
use crate::Span;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
pub enum EventKind<C, A> {
|
|
||||||
Enter(C),
|
|
||||||
Inline,
|
|
||||||
Exit(C),
|
|
||||||
Atom(A),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Node<'a, C, A> {
|
|
||||||
pub index: NodeIndex,
|
|
||||||
pub elem: Element<'a, C, A>,
|
|
||||||
pub span: Span,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub enum Element<'a, C, A> {
|
|
||||||
Container(&'a mut C),
|
|
||||||
Atom(&'a mut A),
|
|
||||||
Inline,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
pub struct Event<C, A> {
|
|
||||||
pub kind: EventKind<C, A>,
|
|
||||||
pub span: Span,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct Tree<C: 'static, A: 'static> {
|
|
||||||
nodes: std::rc::Rc<[InternalNode<C, A>]>,
|
|
||||||
branch: Vec<NodeIndex>,
|
|
||||||
head: Option<NodeIndex>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<C: Clone, A: Clone> Tree<C, A> {
|
|
||||||
/// Count number of direct children nodes.
|
|
||||||
pub fn count_children(&self) -> usize {
|
|
||||||
let mut head = self.head;
|
|
||||||
let mut count = 0;
|
|
||||||
while let Some(h) = head {
|
|
||||||
let n = &self.nodes[h.index()];
|
|
||||||
head = n.next;
|
|
||||||
count += 1;
|
|
||||||
}
|
|
||||||
count
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retrieve all inlines until the end of the current container. Panics if any upcoming node is
|
|
||||||
/// not an inline node.
|
|
||||||
pub fn take_inlines(&mut self) -> impl Iterator<Item = Span> + '_ {
|
|
||||||
let mut head = self.head.take();
|
|
||||||
std::iter::from_fn(move || {
|
|
||||||
head.take().map(|h| {
|
|
||||||
let n = &self.nodes[h.index()];
|
|
||||||
debug_assert!(matches!(n.kind, NodeKind::Inline));
|
|
||||||
head = n.next;
|
|
||||||
n.span
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn branch_is_empty(&self) -> bool {
|
|
||||||
matches!(self.head, None)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<C: Clone, A: Clone> Iterator for Tree<C, A> {
|
|
||||||
type Item = Event<C, A>;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
if let Some(head) = self.head {
|
|
||||||
let n = &self.nodes[head.index()];
|
|
||||||
let kind = match &n.kind {
|
|
||||||
NodeKind::Root => unreachable!(),
|
|
||||||
NodeKind::Container(c, child) => {
|
|
||||||
self.branch.push(head);
|
|
||||||
self.head = *child;
|
|
||||||
EventKind::Enter(c.clone())
|
|
||||||
}
|
|
||||||
NodeKind::Atom(a) => {
|
|
||||||
self.head = n.next;
|
|
||||||
EventKind::Atom(a.clone())
|
|
||||||
}
|
|
||||||
NodeKind::Inline => {
|
|
||||||
self.head = n.next;
|
|
||||||
EventKind::Inline
|
|
||||||
}
|
|
||||||
};
|
|
||||||
Some(Event { kind, span: n.span })
|
|
||||||
} else if let Some(block_ni) = self.branch.pop() {
|
|
||||||
let InternalNode { next, kind, span } = &self.nodes[block_ni.index()];
|
|
||||||
let kind = EventKind::Exit(kind.container().unwrap().clone());
|
|
||||||
self.head = *next;
|
|
||||||
Some(Event { kind, span: *span })
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
||||||
pub struct NodeIndex(std::num::NonZeroUsize);
|
|
||||||
|
|
||||||
impl NodeIndex {
|
|
||||||
fn new(i: usize) -> Self {
|
|
||||||
debug_assert_ne!(i, usize::MAX);
|
|
||||||
Self((i + 1).try_into().unwrap())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn root() -> Self {
|
|
||||||
Self::new(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn index(self) -> usize {
|
|
||||||
usize::from(self.0) - 1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
enum NodeKind<C, A> {
|
|
||||||
Root,
|
|
||||||
Container(C, Option<NodeIndex>),
|
|
||||||
Atom(A),
|
|
||||||
Inline,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
struct InternalNode<C, A> {
|
|
||||||
span: Span,
|
|
||||||
kind: NodeKind<C, A>,
|
|
||||||
next: Option<NodeIndex>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct Builder<C, A> {
|
|
||||||
nodes: Vec<InternalNode<C, A>>,
|
|
||||||
branch: Vec<NodeIndex>,
|
|
||||||
head: Option<NodeIndex>,
|
|
||||||
depth: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<C, A> NodeKind<C, A> {
|
|
||||||
fn child(&self) -> Option<NodeIndex> {
|
|
||||||
if let NodeKind::Container(_, child) = self {
|
|
||||||
*child
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn child_mut(&mut self) -> &mut Option<NodeIndex> {
|
|
||||||
if let NodeKind::Container(_, child) = self {
|
|
||||||
child
|
|
||||||
} else {
|
|
||||||
panic!()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn container(&self) -> Option<&C> {
|
|
||||||
if let NodeKind::Container(c, _) = self {
|
|
||||||
Some(c)
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, C, A> From<&'a mut NodeKind<C, A>> for Element<'a, C, A> {
|
|
||||||
fn from(kind: &'a mut NodeKind<C, A>) -> Self {
|
|
||||||
match kind {
|
|
||||||
NodeKind::Root => unreachable!(),
|
|
||||||
NodeKind::Container(c, ..) => Element::Container(c),
|
|
||||||
NodeKind::Atom(a) => Element::Atom(a),
|
|
||||||
NodeKind::Inline => Element::Inline,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<C, A> Builder<C, A> {
|
|
||||||
pub(super) fn new() -> Self {
|
|
||||||
Builder {
|
|
||||||
nodes: vec![InternalNode {
|
|
||||||
span: Span::default(),
|
|
||||||
kind: NodeKind::Root,
|
|
||||||
next: None,
|
|
||||||
}],
|
|
||||||
branch: vec![],
|
|
||||||
head: Some(NodeIndex::root()),
|
|
||||||
depth: 0,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn atom(&mut self, a: A, span: Span) {
|
|
||||||
self.add_node(InternalNode {
|
|
||||||
span,
|
|
||||||
kind: NodeKind::Atom(a),
|
|
||||||
next: None,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn inline(&mut self, span: Span) {
|
|
||||||
self.add_node(InternalNode {
|
|
||||||
span,
|
|
||||||
kind: NodeKind::Inline,
|
|
||||||
next: None,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn enter(&mut self, c: C, span: Span) -> NodeIndex {
|
|
||||||
self.depth += 1;
|
|
||||||
self.add_node(InternalNode {
|
|
||||||
span,
|
|
||||||
kind: NodeKind::Container(c, None),
|
|
||||||
next: None,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn exit(&mut self) {
|
|
||||||
self.depth -= 1;
|
|
||||||
if let Some(head) = self.head.take() {
|
|
||||||
if matches!(self.nodes[head.index()].kind, NodeKind::Container(..)) {
|
|
||||||
self.branch.push(head);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let last = self.branch.pop();
|
|
||||||
debug_assert_ne!(last, None);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Exit and discard all the contents of the current container.
|
|
||||||
pub(super) fn exit_discard(&mut self) {
|
|
||||||
self.exit();
|
|
||||||
let exited = self.branch.pop().unwrap();
|
|
||||||
self.nodes.drain(exited.index()..);
|
|
||||||
let (prev, has_parent) = self.replace(exited, None);
|
|
||||||
if has_parent {
|
|
||||||
self.head = Some(prev);
|
|
||||||
} else {
|
|
||||||
self.branch.push(prev);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Swap the node and its children with either its parent or the node before.
|
|
||||||
pub fn swap_prev(&mut self, node: NodeIndex) {
|
|
||||||
let next = self.nodes[node.index()].next;
|
|
||||||
if let Some(n) = next {
|
|
||||||
self.replace(n, None);
|
|
||||||
}
|
|
||||||
let (prev, _) = self.replace(node, next);
|
|
||||||
self.replace(prev, Some(node));
|
|
||||||
self.nodes[node.index()].next = Some(prev);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Remove the specified node and its children.
|
|
||||||
pub fn remove(&mut self, node: NodeIndex) {
|
|
||||||
let next = self.nodes[node.index()].next;
|
|
||||||
self.replace(node, next);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn depth(&self) -> usize {
|
|
||||||
self.depth
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn elem(&mut self, ni: NodeIndex) -> Element<C, A> {
|
|
||||||
match &mut self.nodes[ni.index()].kind {
|
|
||||||
NodeKind::Root => unreachable!(),
|
|
||||||
NodeKind::Container(c, ..) => Element::Container(c),
|
|
||||||
NodeKind::Atom(a) => Element::Atom(a),
|
|
||||||
NodeKind::Inline => Element::Inline,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Retrieve all children nodes for the specified node, in the order that they were added.
|
|
||||||
pub(super) fn children(&mut self, node: NodeIndex) -> impl Iterator<Item = Node<C, A>> {
|
|
||||||
// XXX assumes no modifications
|
|
||||||
let n = &self.nodes[node.index()];
|
|
||||||
let range = if let Some(start) = n.kind.child() {
|
|
||||||
start.index()..n.next.map_or(self.nodes.len(), NodeIndex::index)
|
|
||||||
} else {
|
|
||||||
0..0
|
|
||||||
};
|
|
||||||
range
|
|
||||||
.clone()
|
|
||||||
.map(NodeIndex::new)
|
|
||||||
.zip(self.nodes[range].iter_mut())
|
|
||||||
.map(|(index, n)| Node {
|
|
||||||
index,
|
|
||||||
elem: Element::from(&mut n.kind),
|
|
||||||
span: n.span,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(super) fn finish(self) -> Tree<C, A> {
|
|
||||||
debug_assert_eq!(self.depth, 0);
|
|
||||||
let head = self.nodes[NodeIndex::root().index()].next;
|
|
||||||
Tree {
|
|
||||||
nodes: self.nodes.into_boxed_slice().into(),
|
|
||||||
branch: Vec::new(),
|
|
||||||
head,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn add_node(&mut self, node: InternalNode<C, A>) -> NodeIndex {
|
|
||||||
let ni = NodeIndex::new(self.nodes.len());
|
|
||||||
self.nodes.push(node);
|
|
||||||
if let Some(head_ni) = &mut self.head {
|
|
||||||
let mut head = &mut self.nodes[head_ni.index()];
|
|
||||||
match &mut head.kind {
|
|
||||||
NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => {
|
|
||||||
// set next pointer of previous node
|
|
||||||
debug_assert_eq!(head.next, None);
|
|
||||||
head.next = Some(ni);
|
|
||||||
}
|
|
||||||
NodeKind::Container(_, child) => {
|
|
||||||
self.branch.push(*head_ni);
|
|
||||||
// set child pointer of current container
|
|
||||||
debug_assert_eq!(*child, None);
|
|
||||||
*child = Some(ni);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if let Some(block) = self.branch.pop() {
|
|
||||||
let mut block = &mut self.nodes[block.index()];
|
|
||||||
debug_assert!(matches!(block.kind, NodeKind::Container(..)));
|
|
||||||
block.next = Some(ni);
|
|
||||||
} else {
|
|
||||||
panic!()
|
|
||||||
}
|
|
||||||
self.head = Some(ni);
|
|
||||||
ni
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Remove the link from the node that points to the specified node. Optionally replace the
|
|
||||||
/// node with another node. Return the pointer node and whether it is a container or not.
|
|
||||||
fn replace(&mut self, node: NodeIndex, next: Option<NodeIndex>) -> (NodeIndex, bool) {
|
|
||||||
for (i, n) in self.nodes.iter_mut().enumerate().rev() {
|
|
||||||
let ni = NodeIndex::new(i);
|
|
||||||
if n.next == Some(node) {
|
|
||||||
n.next = next;
|
|
||||||
return (ni, false);
|
|
||||||
} else if n.kind.child() == Some(node) {
|
|
||||||
*n.kind.child_mut() = next;
|
|
||||||
return (ni, true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
panic!("node is never linked to")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<C: std::fmt::Debug + Clone + 'static, A: std::fmt::Debug + Clone + 'static> std::fmt::Debug
|
|
||||||
for Builder<C, A>
|
|
||||||
{
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
self.clone().finish().fmt(f)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<C: std::fmt::Debug + Clone, A: std::fmt::Debug + Clone> std::fmt::Debug for Tree<C, A> {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
const INDENT: &str = " ";
|
|
||||||
let mut level = 0;
|
|
||||||
writeln!(f)?;
|
|
||||||
for e in self.clone() {
|
|
||||||
let indent = INDENT.repeat(level);
|
|
||||||
match e.kind {
|
|
||||||
EventKind::Enter(c) => {
|
|
||||||
write!(f, "{}{:?}", indent, c)?;
|
|
||||||
level += 1;
|
|
||||||
}
|
|
||||||
EventKind::Inline => write!(f, "{}Inline", indent)?,
|
|
||||||
EventKind::Exit(..) => {
|
|
||||||
level -= 1;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?,
|
|
||||||
}
|
|
||||||
writeln!(f, " ({}:{})", e.span.start(), e.span.end())?;
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod test {
|
|
||||||
use crate::Span;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn fmt() {
|
|
||||||
let mut tree = super::Builder::new();
|
|
||||||
tree.enter(1, Span::new(0, 1));
|
|
||||||
tree.atom(11, Span::new(0, 1));
|
|
||||||
tree.atom(12, Span::new(0, 1));
|
|
||||||
tree.exit();
|
|
||||||
tree.enter(2, Span::new(1, 5));
|
|
||||||
tree.enter(21, Span::new(2, 5));
|
|
||||||
tree.enter(211, Span::new(3, 4));
|
|
||||||
tree.atom(2111, Span::new(3, 4));
|
|
||||||
tree.exit();
|
|
||||||
tree.exit();
|
|
||||||
tree.enter(22, Span::new(4, 5));
|
|
||||||
tree.atom(221, Span::new(4, 5));
|
|
||||||
tree.exit();
|
|
||||||
tree.exit();
|
|
||||||
tree.enter(3, Span::new(5, 6));
|
|
||||||
tree.atom(31, Span::new(5, 6));
|
|
||||||
tree.exit();
|
|
||||||
assert_eq!(
|
|
||||||
format!("{:?}", tree.finish()),
|
|
||||||
concat!(
|
|
||||||
"\n",
|
|
||||||
"1 (0:1)\n",
|
|
||||||
" 11 (0:1)\n",
|
|
||||||
" 12 (0:1)\n",
|
|
||||||
"2 (1:5)\n",
|
|
||||||
" 21 (2:5)\n",
|
|
||||||
" 211 (3:4)\n",
|
|
||||||
" 2111 (3:4)\n",
|
|
||||||
" 22 (4:5)\n",
|
|
||||||
" 221 (4:5)\n",
|
|
||||||
"3 (5:6)\n",
|
|
||||||
" 31 (5:6)\n",
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -17,10 +17,6 @@ path = "src/main.rs"
|
||||||
name = "parse"
|
name = "parse"
|
||||||
path = "src/parse.rs"
|
path = "src/parse.rs"
|
||||||
|
|
||||||
[[bin]]
|
|
||||||
name = "parse_balance"
|
|
||||||
path = "src/parse_balance.rs"
|
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "html"
|
name = "html"
|
||||||
path = "src/html.rs"
|
path = "src/html.rs"
|
||||||
|
|
|
@ -5,27 +5,66 @@ use html5ever::tendril::TendrilSink;
|
||||||
use html5ever::tokenizer;
|
use html5ever::tokenizer;
|
||||||
use html5ever::tree_builder;
|
use html5ever::tree_builder;
|
||||||
|
|
||||||
|
/// Perform sanity checks on events.
|
||||||
pub fn parse(data: &[u8]) {
|
pub fn parse(data: &[u8]) {
|
||||||
if let Ok(s) = std::str::from_utf8(data) {
|
if let Ok(s) = std::str::from_utf8(data) {
|
||||||
jotdown::Parser::new(s).last();
|
let whitelist_whitespace = s.contains('{') && s.contains('}'); // attributes are outside events
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Ensure containers are always balanced, i.e. opened and closed in correct order.
|
|
||||||
pub fn parse_balance(data: &[u8]) {
|
|
||||||
if let Ok(s) = std::str::from_utf8(data) {
|
|
||||||
let mut open = Vec::new();
|
let mut open = Vec::new();
|
||||||
for event in jotdown::Parser::new(s) {
|
let mut last = (jotdown::Event::Str("".into()), 0..0);
|
||||||
|
for (event, range) in jotdown::Parser::new(s).into_offset_iter() {
|
||||||
|
// no overlap, out of order
|
||||||
|
assert!(
|
||||||
|
last.1.end <= range.start
|
||||||
|
// block attributes may overlap with start event
|
||||||
|
|| (
|
||||||
|
matches!(last.0, jotdown::Event::Blankline)
|
||||||
|
&& (
|
||||||
|
matches!(
|
||||||
|
event,
|
||||||
|
jotdown::Event::Start(ref cont, ..) if cont.is_block()
|
||||||
|
)
|
||||||
|
|| matches!(event, jotdown::Event::ThematicBreak(..))
|
||||||
|
)
|
||||||
|
)
|
||||||
|
// caption event is before table rows but src is after
|
||||||
|
|| (
|
||||||
|
matches!(
|
||||||
|
last.0,
|
||||||
|
jotdown::Event::Start(jotdown::Container::Caption, ..)
|
||||||
|
| jotdown::Event::End(jotdown::Container::Caption)
|
||||||
|
)
|
||||||
|
&& range.end <= last.1.start
|
||||||
|
),
|
||||||
|
"{} > {} {:?} {:?}",
|
||||||
|
last.1.end,
|
||||||
|
range.start,
|
||||||
|
last.0,
|
||||||
|
event
|
||||||
|
);
|
||||||
|
last = (event.clone(), range.clone());
|
||||||
|
// range is valid unicode, does not cross char boundary
|
||||||
|
let _ = &s[range];
|
||||||
match event {
|
match event {
|
||||||
jotdown::Event::Start(c, ..) => open.push(c.clone()),
|
jotdown::Event::Start(c, ..) => open.push(c.clone()),
|
||||||
jotdown::Event::End(c) => assert_eq!(open.pop().unwrap(), c),
|
jotdown::Event::End(c) => {
|
||||||
|
// closes correct event
|
||||||
|
assert_eq!(open.pop().unwrap(), c);
|
||||||
|
}
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// no missing close
|
||||||
assert_eq!(open, &[]);
|
assert_eq!(open, &[]);
|
||||||
|
// only whitespace after last event
|
||||||
|
assert!(
|
||||||
|
whitelist_whitespace || s[last.1.end..].chars().all(char::is_whitespace),
|
||||||
|
"non whitespace {:?}",
|
||||||
|
&s[last.1.end..],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Validate rendered html output.
|
||||||
pub fn html(data: &[u8]) {
|
pub fn html(data: &[u8]) {
|
||||||
if data.iter().any(|i| *i == 0) {
|
if data.iter().any(|i| *i == 0) {
|
||||||
return;
|
return;
|
||||||
|
@ -132,9 +171,6 @@ impl<'a> tree_builder::TreeSink for Dom<'a> {
|
||||||
"Found special tag while closing generic tag",
|
"Found special tag while closing generic tag",
|
||||||
"Formatting element not current node",
|
"Formatting element not current node",
|
||||||
"Formatting element not open",
|
"Formatting element not open",
|
||||||
// FIXME bug caused by empty table at end of list
|
|
||||||
"No matching tag to close",
|
|
||||||
"Unexpected open element while closing",
|
|
||||||
];
|
];
|
||||||
if !whitelist.iter().any(|e| msg.starts_with(e)) {
|
if !whitelist.iter().any(|e| msg.starts_with(e)) {
|
||||||
#[cfg(feature = "debug")]
|
#[cfg(feature = "debug")]
|
||||||
|
|
|
@ -8,7 +8,6 @@ fn main() {
|
||||||
|
|
||||||
let f = match target.as_str() {
|
let f = match target.as_str() {
|
||||||
"parse" => jotdown_afl::parse,
|
"parse" => jotdown_afl::parse,
|
||||||
"parse_balance" => jotdown_afl::parse_balance,
|
|
||||||
"html" => jotdown_afl::html,
|
"html" => jotdown_afl::html,
|
||||||
_ => panic!("unknown target '{}'", target),
|
_ => panic!("unknown target '{}'", target),
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
fn main() {
|
|
||||||
afl::fuzz!(|data: &[u8]| { jotdown_afl::parse_balance(data) });
|
|
||||||
}
|
|
Loading…
Reference in a new issue