commit
91b6649fa8
11 changed files with 1698 additions and 1195 deletions
2
Makefile
2
Makefile
|
@ -104,6 +104,6 @@ clean:
|
||||||
(cd tests/suite && make clean)
|
(cd tests/suite && make clean)
|
||||||
rm -f tests/bench/*.dj
|
rm -f tests/bench/*.dj
|
||||||
(cd tests/bench && make clean)
|
(cd tests/bench && make clean)
|
||||||
rm -f bench/*.dj
|
find bench -type l -path 'bench/*.dj' -print0 | xargs -0 rm -f
|
||||||
rm -rf tests/afl/out
|
rm -rf tests/afl/out
|
||||||
(cd examples/jotdown_wasm && make clean)
|
(cd examples/jotdown_wasm && make clean)
|
||||||
|
|
82
bench/inline-attrs.dj
Normal file
82
bench/inline-attrs.dj
Normal file
|
@ -0,0 +1,82 @@
|
||||||
|
Inline{.a} attributes{#b} attached{c=d} to{.e} words{#f} probably{g=h}
|
||||||
|
only{i=j} contain{.m} a{#n} single{o=pqrstuv} attribute{.wxyz} in{#å-ä-ö}
|
||||||
|
the{абв=где} vast{.ёжз} majority{#ийк} of{мно=прс} cases{.туф}.
|
||||||
|
|
||||||
|
However{.they #could potentially=contain .any #number of="attributes," .also
|
||||||
|
#spanning multiple="lines," .even #the values="may span multiple lines
|
||||||
|
like this,
|
||||||
|
or even this"}. Attributes{.may}{#also}{be=concatenated}{.like}{#this}.
|
||||||
|
|
||||||
|
[Attributes]{.abc} *may also*{ghi=jkl} _be attached to containers_{mno=pqr},
|
||||||
|
{=they will most likely also contain a single attribute=}{.stu}.
|
||||||
|
|
||||||
|
[{^Containers [_may *also*{.first}_{#second} be]{.third}^}{.fourth} nested]{#fifth}
|
||||||
|
|
||||||
|
Attribute{values="can
|
||||||
|
span
|
||||||
|
any
|
||||||
|
number
|
||||||
|
of
|
||||||
|
lines
|
||||||
|
like
|
||||||
|
a
|
||||||
|
lot
|
||||||
|
of
|
||||||
|
lineee
|
||||||
|
e
|
||||||
|
ee
|
||||||
|
e
|
||||||
|
e
|
||||||
|
e
|
||||||
|
e
|
||||||
|
s"
|
||||||
|
%same
|
||||||
|
with
|
||||||
|
comments
|
||||||
|
they
|
||||||
|
can
|
||||||
|
be
|
||||||
|
lo
|
||||||
|
o
|
||||||
|
o
|
||||||
|
o
|
||||||
|
o
|
||||||
|
ng%}
|
||||||
|
|
||||||
|
Attribute{values="can
|
||||||
|
span
|
||||||
|
any
|
||||||
|
number
|
||||||
|
of
|
||||||
|
lines
|
||||||
|
like
|
||||||
|
a
|
||||||
|
lot
|
||||||
|
of
|
||||||
|
lineee
|
||||||
|
e
|
||||||
|
ee
|
||||||
|
e
|
||||||
|
e
|
||||||
|
e
|
||||||
|
e
|
||||||
|
s"
|
||||||
|
%even
|
||||||
|
though
|
||||||
|
they
|
||||||
|
are
|
||||||
|
long
|
||||||
|
they
|
||||||
|
could
|
||||||
|
_*turn*_
|
||||||
|
out
|
||||||
|
to
|
||||||
|
not
|
||||||
|
be
|
||||||
|
"attributes"
|
||||||
|
at
|
||||||
|
all
|
||||||
|
in
|
||||||
|
the
|
||||||
|
end
|
||||||
|
}
|
404
src/attr.rs
404
src/attr.rs
|
@ -1,29 +1,34 @@
|
||||||
use crate::CowStr;
|
use crate::CowStr;
|
||||||
use crate::DiscontinuousString;
|
|
||||||
use crate::Span;
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
use State::*;
|
/// Parse attributes, assumed to be valid.
|
||||||
|
pub(crate) fn parse(src: &str) -> Attributes {
|
||||||
pub(crate) fn parse<'s, S: DiscontinuousString<'s>>(chars: S) -> Attributes<'s> {
|
|
||||||
let mut a = Attributes::new();
|
let mut a = Attributes::new();
|
||||||
a.parse(chars);
|
a.parse(src);
|
||||||
a
|
a
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn valid<I: Iterator<Item = char>>(chars: I) -> (usize, bool) {
|
pub fn valid<I: Iterator<Item = char>>(chars: I) -> (usize, bool) {
|
||||||
|
use State::*;
|
||||||
|
|
||||||
let mut has_attr = false;
|
let mut has_attr = false;
|
||||||
let mut p = Parser::new(chars);
|
let mut n = 0;
|
||||||
for e in &mut p {
|
let mut state = Start;
|
||||||
match e {
|
for c in chars {
|
||||||
Element::Class(..) | Element::Identifier(..) | Element::Attribute(..) => {
|
n += 1;
|
||||||
has_attr = true;
|
state = state.step(c);
|
||||||
}
|
match state {
|
||||||
Element::Invalid => return (0, false),
|
Class | Identifier | Value | ValueQuoted => has_attr = true,
|
||||||
|
Done | Invalid => break,
|
||||||
|
_ => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(p.pos, has_attr)
|
|
||||||
|
if matches!(state, Done) {
|
||||||
|
(n, has_attr)
|
||||||
|
} else {
|
||||||
|
(0, false)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying,
|
/// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying,
|
||||||
|
@ -39,6 +44,23 @@ impl<'s> AttributeValue<'s> {
|
||||||
pub fn parts(&'s self) -> AttributeValueParts<'s> {
|
pub fn parts(&'s self) -> AttributeValueParts<'s> {
|
||||||
AttributeValueParts { ahead: &self.raw }
|
AttributeValueParts { ahead: &self.raw }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// lifetime is 's to avoid allocation if empty value is concatenated with single value
|
||||||
|
fn extend(&mut self, s: &'s str) {
|
||||||
|
match &mut self.raw {
|
||||||
|
CowStr::Borrowed(prev) => {
|
||||||
|
if prev.is_empty() {
|
||||||
|
*prev = s;
|
||||||
|
} else {
|
||||||
|
self.raw = format!("{} {}", prev, s).into();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CowStr::Owned(ref mut prev) => {
|
||||||
|
prev.push(' ');
|
||||||
|
prev.push_str(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> From<&'s str> for AttributeValue<'s> {
|
impl<'s> From<&'s str> for AttributeValue<'s> {
|
||||||
|
@ -113,24 +135,11 @@ impl<'s> Attributes<'s> {
|
||||||
Self(self.0.take())
|
Self(self.0.take())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool {
|
/// Parse and append attributes, assumed to be valid.
|
||||||
#[inline]
|
pub(crate) fn parse(&mut self, input: &'s str) {
|
||||||
fn borrow(cow: CowStr) -> &str {
|
let mut parser = Parser::new(self.take());
|
||||||
match cow {
|
parser.parse(input);
|
||||||
Cow::Owned(_) => panic!(),
|
*self = parser.finish();
|
||||||
Cow::Borrowed(s) => s,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for elem in Parser::new(input.chars()) {
|
|
||||||
match elem {
|
|
||||||
Element::Class(c) => self.insert("class", input.src(c).into()),
|
|
||||||
Element::Identifier(i) => self.insert("id", input.src(i).into()),
|
|
||||||
Element::Attribute(a, v) => self.insert(borrow(input.src(a)), input.src(v).into()),
|
|
||||||
Element::Invalid => return false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
true
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Combine all attributes from both objects, prioritizing self on conflicts.
|
/// Combine all attributes from both objects, prioritizing self on conflicts.
|
||||||
|
@ -152,6 +161,11 @@ impl<'s> Attributes<'s> {
|
||||||
/// overwritten, unless it is a "class" attribute. In that case the provided value will be
|
/// overwritten, unless it is a "class" attribute. In that case the provided value will be
|
||||||
/// appended to the existing value.
|
/// appended to the existing value.
|
||||||
pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
|
pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
|
||||||
|
self.insert_pos(key, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
// duplicate of insert but returns position of inserted value
|
||||||
|
fn insert_pos(&mut self, key: &'s str, val: AttributeValue<'s>) -> usize {
|
||||||
if self.0.is_none() {
|
if self.0.is_none() {
|
||||||
self.0 = Some(Vec::new().into());
|
self.0 = Some(Vec::new().into());
|
||||||
};
|
};
|
||||||
|
@ -160,12 +174,20 @@ impl<'s> Attributes<'s> {
|
||||||
if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
|
if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
|
||||||
let prev = &mut attrs[i].1;
|
let prev = &mut attrs[i].1;
|
||||||
if key == "class" {
|
if key == "class" {
|
||||||
*prev = format!("{} {}", prev, val).into();
|
match val.raw {
|
||||||
|
CowStr::Borrowed(s) => prev.extend(s),
|
||||||
|
CowStr::Owned(s) => {
|
||||||
|
*prev = format!("{} {}", prev, s).into();
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
*prev = val;
|
*prev = val;
|
||||||
}
|
}
|
||||||
|
i
|
||||||
} else {
|
} else {
|
||||||
|
let i = attrs.len();
|
||||||
attrs.push((key, val));
|
attrs.push((key, val));
|
||||||
|
i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -217,6 +239,106 @@ impl<'s> std::fmt::Debug for Attributes<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Validator {
|
||||||
|
state: State,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Validator {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
state: State::Start,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn restart(&mut self) {
|
||||||
|
self.state = State::Start;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns number of valid bytes parsed (0 means invalid) if finished, otherwise more input is
|
||||||
|
/// needed.
|
||||||
|
pub fn parse(&mut self, input: &str) -> Option<usize> {
|
||||||
|
let mut chars = input.chars();
|
||||||
|
for c in &mut chars {
|
||||||
|
self.state = self.state.step(c);
|
||||||
|
match self.state {
|
||||||
|
State::Done => return Some(input.len() - chars.as_str().len()),
|
||||||
|
State::Invalid => return Some(0),
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Attributes parser, take input of one or more consecutive attributes and create an `Attributes`
|
||||||
|
/// object.
|
||||||
|
///
|
||||||
|
/// Input is assumed to contain a valid series of attribute sets, the attributes are added as they
|
||||||
|
/// are encountered.
|
||||||
|
pub struct Parser<'s> {
|
||||||
|
attrs: Attributes<'s>,
|
||||||
|
i_prev: usize,
|
||||||
|
state: State,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Parser<'s> {
|
||||||
|
pub fn new(attrs: Attributes<'s>) -> Self {
|
||||||
|
Self {
|
||||||
|
attrs,
|
||||||
|
i_prev: usize::MAX,
|
||||||
|
state: State::Start,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return value indicates the number of bytes parsed if finished. If None, more input is
|
||||||
|
/// required to finish the attributes.
|
||||||
|
pub fn parse(&mut self, input: &'s str) {
|
||||||
|
use State::*;
|
||||||
|
|
||||||
|
let mut pos = 0;
|
||||||
|
let mut pos_prev = 0;
|
||||||
|
|
||||||
|
for c in input.chars() {
|
||||||
|
let state_next = self.state.step(c);
|
||||||
|
let st = std::mem::replace(&mut self.state, state_next);
|
||||||
|
|
||||||
|
if st != self.state && !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
|
||||||
|
{
|
||||||
|
let content = &input[pos_prev..pos];
|
||||||
|
pos_prev = pos;
|
||||||
|
match st {
|
||||||
|
Class => self.attrs.insert("class", content.into()),
|
||||||
|
Identifier => self.attrs.insert("id", content.into()),
|
||||||
|
Key => self.i_prev = self.attrs.insert_pos(content, "".into()),
|
||||||
|
Value | ValueQuoted | ValueContinued => {
|
||||||
|
self.attrs.0.as_mut().unwrap()[self.i_prev]
|
||||||
|
.1
|
||||||
|
.extend(&content[usize::from(matches!(st, ValueQuoted))..]);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pos += c.len_utf8();
|
||||||
|
|
||||||
|
debug_assert!(!matches!(self.state, Invalid));
|
||||||
|
|
||||||
|
if matches!(self.state, Done) {
|
||||||
|
if input[pos..].starts_with('{') {
|
||||||
|
self.state = Start;
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn finish(self) -> Attributes<'s> {
|
||||||
|
self.attrs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
enum State {
|
enum State {
|
||||||
Start,
|
Start,
|
||||||
|
@ -226,190 +348,63 @@ enum State {
|
||||||
Class,
|
Class,
|
||||||
IdentifierFirst,
|
IdentifierFirst,
|
||||||
Identifier,
|
Identifier,
|
||||||
Attribute,
|
Key,
|
||||||
ValueFirst,
|
ValueFirst,
|
||||||
Value,
|
Value,
|
||||||
ValueQuoted,
|
ValueQuoted,
|
||||||
|
ValueEscape,
|
||||||
|
ValueNewline,
|
||||||
|
ValueContinued,
|
||||||
Done,
|
Done,
|
||||||
Invalid,
|
Invalid,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Parser<I> {
|
impl State {
|
||||||
chars: I,
|
fn step(self, c: char) -> State {
|
||||||
pos: usize,
|
use State::*;
|
||||||
pos_prev: usize,
|
|
||||||
state: State,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<I: Iterator<Item = char>> Parser<I> {
|
match self {
|
||||||
fn new(chars: I) -> Self {
|
Start if c == '{' => Whitespace,
|
||||||
Parser {
|
Start => Invalid,
|
||||||
chars,
|
Whitespace => match c {
|
||||||
pos: 0,
|
'}' => Done,
|
||||||
pos_prev: 0,
|
'.' => ClassFirst,
|
||||||
state: Start,
|
'#' => IdentifierFirst,
|
||||||
|
'%' => Comment,
|
||||||
|
c if is_name(c) => Key,
|
||||||
|
c if c.is_whitespace() => Whitespace,
|
||||||
|
_ => Invalid,
|
||||||
|
},
|
||||||
|
Comment if c == '%' => Whitespace,
|
||||||
|
Comment => Comment,
|
||||||
|
ClassFirst if is_name(c) => Class,
|
||||||
|
ClassFirst => Invalid,
|
||||||
|
IdentifierFirst if is_name(c) => Identifier,
|
||||||
|
IdentifierFirst => Invalid,
|
||||||
|
s @ (Class | Identifier | Value) if is_name(c) => s,
|
||||||
|
Class | Identifier | Value if c.is_whitespace() => Whitespace,
|
||||||
|
Class | Identifier | Value if c == '}' => Done,
|
||||||
|
Class | Identifier | Value => Invalid,
|
||||||
|
Key if is_name(c) => Key,
|
||||||
|
Key if c == '=' => ValueFirst,
|
||||||
|
Key => Invalid,
|
||||||
|
ValueFirst if is_name(c) => Value,
|
||||||
|
ValueFirst if c == '"' => ValueQuoted,
|
||||||
|
ValueFirst => Invalid,
|
||||||
|
ValueQuoted | ValueNewline | ValueContinued if c == '"' => Whitespace,
|
||||||
|
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == '\n' => ValueNewline,
|
||||||
|
ValueQuoted if c == '\\' => ValueEscape,
|
||||||
|
ValueQuoted | ValueEscape => ValueQuoted,
|
||||||
|
ValueNewline | ValueContinued => ValueContinued,
|
||||||
|
Invalid | Done => panic!("{:?}", self),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn step_char(&mut self) -> Option<State> {
|
|
||||||
self.chars.next().map(|c| {
|
|
||||||
self.pos_prev = self.pos;
|
|
||||||
self.pos += c.len_utf8();
|
|
||||||
match self.state {
|
|
||||||
Start => match c {
|
|
||||||
'{' => Whitespace,
|
|
||||||
_ => Invalid,
|
|
||||||
},
|
|
||||||
Whitespace => match c {
|
|
||||||
'}' => Done,
|
|
||||||
'.' => ClassFirst,
|
|
||||||
'#' => IdentifierFirst,
|
|
||||||
'%' => Comment,
|
|
||||||
c if c.is_ascii_alphanumeric() || matches!(c, '_' | ':' | '-') => Attribute,
|
|
||||||
c if c.is_whitespace() => Whitespace,
|
|
||||||
_ => Invalid,
|
|
||||||
},
|
|
||||||
Comment => {
|
|
||||||
if c == '%' {
|
|
||||||
Whitespace
|
|
||||||
} else {
|
|
||||||
Comment
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s @ (ClassFirst | IdentifierFirst) => {
|
|
||||||
if is_name(c) {
|
|
||||||
match s {
|
|
||||||
ClassFirst => Class,
|
|
||||||
IdentifierFirst => Identifier,
|
|
||||||
_ => panic!(),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Invalid
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s @ (Class | Identifier | Value) => {
|
|
||||||
if is_name(c) {
|
|
||||||
s
|
|
||||||
} else if c.is_whitespace() {
|
|
||||||
Whitespace
|
|
||||||
} else if c == '}' {
|
|
||||||
Done
|
|
||||||
} else {
|
|
||||||
Invalid
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Attribute => {
|
|
||||||
if is_name(c) {
|
|
||||||
Attribute
|
|
||||||
} else if c == '=' {
|
|
||||||
ValueFirst
|
|
||||||
} else {
|
|
||||||
Invalid
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ValueFirst => {
|
|
||||||
if is_name(c) {
|
|
||||||
Value
|
|
||||||
} else if c == '"' {
|
|
||||||
ValueQuoted
|
|
||||||
} else {
|
|
||||||
Invalid
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ValueQuoted => match c {
|
|
||||||
'\\' => {
|
|
||||||
if let Some(c) = self.chars.next() {
|
|
||||||
self.pos_prev = self.pos;
|
|
||||||
self.pos += c.len_utf8();
|
|
||||||
}
|
|
||||||
ValueQuoted
|
|
||||||
}
|
|
||||||
'"' => Whitespace,
|
|
||||||
_ => ValueQuoted,
|
|
||||||
},
|
|
||||||
Invalid | Done => panic!("{:?}", self.state),
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn step(&mut self) -> (State, Span) {
|
|
||||||
let start = self.pos_prev;
|
|
||||||
|
|
||||||
if self.state == Done {
|
|
||||||
return (Done, Span::empty_at(start));
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.state == Invalid {
|
|
||||||
return (Invalid, Span::empty_at(start));
|
|
||||||
}
|
|
||||||
|
|
||||||
while let Some(state_next) = self.step_char() {
|
|
||||||
if self.state != state_next {
|
|
||||||
return (
|
|
||||||
std::mem::replace(&mut self.state, state_next),
|
|
||||||
Span::new(start, self.pos_prev),
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
(
|
|
||||||
if self.state == Done { Done } else { Invalid },
|
|
||||||
Span::new(start, self.pos_prev),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_name(c: char) -> bool {
|
pub fn is_name(c: char) -> bool {
|
||||||
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
|
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
|
||||||
}
|
}
|
||||||
|
|
||||||
enum Element {
|
|
||||||
Class(Span),
|
|
||||||
Identifier(Span),
|
|
||||||
Attribute(Span, Span),
|
|
||||||
Invalid,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<I: Iterator<Item = char>> Iterator for Parser<I> {
|
|
||||||
type Item = Element;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
loop {
|
|
||||||
let (st, span0) = self.step();
|
|
||||||
return match st {
|
|
||||||
ClassFirst | IdentifierFirst => {
|
|
||||||
let (st, span1) = self.step();
|
|
||||||
Some(match st {
|
|
||||||
Class => Element::Class(span1),
|
|
||||||
Identifier => Element::Identifier(span1),
|
|
||||||
_ => return Some(Element::Invalid),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
Attribute => {
|
|
||||||
let (st, _span1) = self.step();
|
|
||||||
match st {
|
|
||||||
ValueFirst => {
|
|
||||||
let (st, span2) = self.step();
|
|
||||||
match st {
|
|
||||||
Value => Some(Element::Attribute(span0, span2)),
|
|
||||||
ValueQuoted => Some(Element::Attribute(span0, span2.skip(1))),
|
|
||||||
Invalid => Some(Element::Invalid),
|
|
||||||
_ => panic!("{:?}", st),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Invalid => Some(Element::Invalid),
|
|
||||||
_ => panic!("{:?}", st),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Comment | Start | Whitespace => continue,
|
|
||||||
Done => None,
|
|
||||||
Invalid => Some(Element::Invalid),
|
|
||||||
_ => panic!("{:?}", st),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
macro_rules! test_attr {
|
macro_rules! test_attr {
|
||||||
|
@ -471,6 +466,11 @@ mod test {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn value_newline() {
|
||||||
|
test_attr!("{attr0=\"abc\ndef\"}", ("attr0", "abc def"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn comment() {
|
fn comment() {
|
||||||
test_attr!("{%%}");
|
test_attr!("{%%}");
|
||||||
|
|
|
@ -215,7 +215,7 @@ impl<'s> TreeParser<'s> {
|
||||||
|
|
||||||
// close list if a non list item or a list item of new type appeared
|
// close list if a non list item or a list item of new type appeared
|
||||||
if let Some(OpenList { ty, depth, .. }) = self.open_lists.last() {
|
if let Some(OpenList { ty, depth, .. }) = self.open_lists.last() {
|
||||||
assert!(usize::from(*depth) <= self.tree.depth());
|
debug_assert!(usize::from(*depth) <= self.tree.depth());
|
||||||
if self.tree.depth() == (*depth).into()
|
if self.tree.depth() == (*depth).into()
|
||||||
&& !matches!(kind, Kind::ListItem { ty: ty_new, .. } if *ty == ty_new)
|
&& !matches!(kind, Kind::ListItem { ty: ty_new, .. } if *ty == ty_new)
|
||||||
{
|
{
|
||||||
|
@ -405,7 +405,7 @@ impl<'s> TreeParser<'s> {
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(OpenList { depth, .. }) = self.open_lists.last() {
|
if let Some(OpenList { depth, .. }) = self.open_lists.last() {
|
||||||
assert!(usize::from(*depth) <= self.tree.depth());
|
debug_assert!(usize::from(*depth) <= self.tree.depth());
|
||||||
if self.tree.depth() == (*depth).into() {
|
if self.tree.depth() == (*depth).into() {
|
||||||
self.prev_blankline = false;
|
self.prev_blankline = false;
|
||||||
self.prev_loose = false;
|
self.prev_loose = false;
|
||||||
|
@ -447,7 +447,7 @@ impl<'s> TreeParser<'s> {
|
||||||
.tree
|
.tree
|
||||||
.enter(Node::Container(TableRow { head: false }), row.with_len(1));
|
.enter(Node::Container(TableRow { head: false }), row.with_len(1));
|
||||||
let rem = row.skip(1); // |
|
let rem = row.skip(1); // |
|
||||||
let lex = lex::Lexer::new(rem.of(self.src).chars());
|
let lex = lex::Lexer::new(rem.of(self.src));
|
||||||
let mut pos = rem.start();
|
let mut pos = rem.start();
|
||||||
let mut cell_start = pos;
|
let mut cell_start = pos;
|
||||||
let mut separator_row = true;
|
let mut separator_row = true;
|
||||||
|
|
|
@ -96,7 +96,7 @@ impl Render for Renderer {
|
||||||
Container::DescriptionList => out.write_str("<dl")?,
|
Container::DescriptionList => out.write_str("<dl")?,
|
||||||
Container::DescriptionDetails => out.write_str("<dd")?,
|
Container::DescriptionDetails => out.write_str("<dd")?,
|
||||||
Container::Footnote { number, .. } => {
|
Container::Footnote { number, .. } => {
|
||||||
assert!(self.footnote_number.is_none());
|
debug_assert!(self.footnote_number.is_none());
|
||||||
self.footnote_number = Some((*number).try_into().unwrap());
|
self.footnote_number = Some((*number).try_into().unwrap());
|
||||||
if !self.encountered_footnote {
|
if !self.encountered_footnote {
|
||||||
self.encountered_footnote = true;
|
self.encountered_footnote = true;
|
||||||
|
|
1689
src/inline.rs
1689
src/inline.rs
File diff suppressed because it is too large
Load diff
86
src/lex.rs
86
src/lex.rs
|
@ -13,7 +13,6 @@ pub(crate) struct Token {
|
||||||
pub enum Kind {
|
pub enum Kind {
|
||||||
Text,
|
Text,
|
||||||
Newline,
|
Newline,
|
||||||
Whitespace,
|
|
||||||
Nbsp,
|
Nbsp,
|
||||||
Hardbreak,
|
Hardbreak,
|
||||||
Escape,
|
Escape,
|
||||||
|
@ -21,6 +20,7 @@ pub enum Kind {
|
||||||
Close(Delimiter),
|
Close(Delimiter),
|
||||||
Sym(Symbol),
|
Sym(Symbol),
|
||||||
Seq(Sequence),
|
Seq(Sequence),
|
||||||
|
DollarBacktick(u8),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
@ -36,6 +36,7 @@ pub enum Delimiter {
|
||||||
Bracket,
|
Bracket,
|
||||||
BraceQuote1,
|
BraceQuote1,
|
||||||
BraceQuote2,
|
BraceQuote2,
|
||||||
|
Paren,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
@ -55,7 +56,6 @@ pub enum Symbol {
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub enum Sequence {
|
pub enum Sequence {
|
||||||
Backtick,
|
Backtick,
|
||||||
Dollar,
|
|
||||||
Hyphen,
|
Hyphen,
|
||||||
Period,
|
Period,
|
||||||
}
|
}
|
||||||
|
@ -64,7 +64,6 @@ impl Sequence {
|
||||||
fn ch(self) -> char {
|
fn ch(self) -> char {
|
||||||
match self {
|
match self {
|
||||||
Self::Backtick => '`',
|
Self::Backtick => '`',
|
||||||
Self::Dollar => '$',
|
|
||||||
Self::Period => '.',
|
Self::Period => '.',
|
||||||
Self::Hyphen => '-',
|
Self::Hyphen => '-',
|
||||||
}
|
}
|
||||||
|
@ -72,9 +71,9 @@ impl Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub(crate) struct Lexer<I: Iterator + Clone> {
|
pub(crate) struct Lexer<'s> {
|
||||||
chars: I,
|
src: &'s str,
|
||||||
chars_non_peeked: I,
|
chars: std::str::Chars<'s>,
|
||||||
/// Next character should be escaped.
|
/// Next character should be escaped.
|
||||||
escape: bool,
|
escape: bool,
|
||||||
/// Token to be peeked or next'ed.
|
/// Token to be peeked or next'ed.
|
||||||
|
@ -83,11 +82,11 @@ pub(crate) struct Lexer<I: Iterator + Clone> {
|
||||||
len: usize,
|
len: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Iterator<Item = char> + Clone> Lexer<I> {
|
impl<'s> Lexer<'s> {
|
||||||
pub fn new(chars: I) -> Lexer<I> {
|
pub fn new(src: &'s str) -> Self {
|
||||||
Lexer {
|
Lexer {
|
||||||
chars: chars.clone(),
|
src,
|
||||||
chars_non_peeked: chars,
|
chars: src.chars(),
|
||||||
escape: false,
|
escape: false,
|
||||||
next: None,
|
next: None,
|
||||||
len: 0,
|
len: 0,
|
||||||
|
@ -103,13 +102,14 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
|
||||||
self.next.as_ref()
|
self.next.as_ref()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn chars(&self) -> I {
|
pub fn ahead(&self) -> &'s str {
|
||||||
self.chars_non_peeked.clone()
|
let pos =
|
||||||
|
self.src.len() - self.chars.as_str().len() - self.next.as_ref().map_or(0, |t| t.len);
|
||||||
|
&self.src[pos..]
|
||||||
}
|
}
|
||||||
|
|
||||||
fn next_token(&mut self) -> Option<Token> {
|
fn next_token(&mut self) -> Option<Token> {
|
||||||
let mut current = self.token();
|
let mut current = self.token();
|
||||||
self.chars_non_peeked = self.chars.clone();
|
|
||||||
|
|
||||||
// concatenate text tokens
|
// concatenate text tokens
|
||||||
if let Some(Token { kind: Text, len }) = &mut current {
|
if let Some(Token { kind: Text, len }) = &mut current {
|
||||||
|
@ -148,7 +148,6 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn token(&mut self) -> Option<Token> {
|
fn token(&mut self) -> Option<Token> {
|
||||||
self.chars_non_peeked = self.chars.clone();
|
|
||||||
self.len = 0;
|
self.len = 0;
|
||||||
|
|
||||||
let first = self.eat_char()?;
|
let first = self.eat_char()?;
|
||||||
|
@ -167,6 +166,8 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
|
||||||
_ if escape && first == ' ' => Nbsp,
|
_ if escape && first == ' ' => Nbsp,
|
||||||
_ if escape => Text,
|
_ if escape => Text,
|
||||||
|
|
||||||
|
'\n' => Newline,
|
||||||
|
|
||||||
'\\' => {
|
'\\' => {
|
||||||
if self
|
if self
|
||||||
.peek_char()
|
.peek_char()
|
||||||
|
@ -179,14 +180,10 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
'\n' => Newline,
|
|
||||||
_ if first.is_whitespace() => {
|
|
||||||
self.eat_while(char::is_whitespace);
|
|
||||||
Whitespace
|
|
||||||
}
|
|
||||||
|
|
||||||
'[' => Open(Bracket),
|
'[' => Open(Bracket),
|
||||||
']' => Close(Bracket),
|
']' => Close(Bracket),
|
||||||
|
'(' => Open(Paren),
|
||||||
|
')' => Close(Paren),
|
||||||
'{' => {
|
'{' => {
|
||||||
let explicit = match self.peek_char() {
|
let explicit = match self.peek_char() {
|
||||||
Some('*') => Some(Open(BraceAsterisk)),
|
Some('*') => Some(Open(BraceAsterisk)),
|
||||||
|
@ -207,6 +204,7 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
|
||||||
Open(Brace)
|
Open(Brace)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
'}' => Close(Brace),
|
||||||
'*' => self.maybe_eat_close_brace(Sym(Asterisk), BraceAsterisk),
|
'*' => self.maybe_eat_close_brace(Sym(Asterisk), BraceAsterisk),
|
||||||
'^' => self.maybe_eat_close_brace(Sym(Caret), BraceCaret),
|
'^' => self.maybe_eat_close_brace(Sym(Caret), BraceCaret),
|
||||||
'=' => self.maybe_eat_close_brace(Text, BraceEqual),
|
'=' => self.maybe_eat_close_brace(Text, BraceEqual),
|
||||||
|
@ -236,8 +234,21 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
|
||||||
':' => Sym(Colon),
|
':' => Sym(Colon),
|
||||||
|
|
||||||
'`' => self.eat_seq(Backtick),
|
'`' => self.eat_seq(Backtick),
|
||||||
'$' => self.eat_seq(Dollar),
|
|
||||||
'.' => self.eat_seq(Period),
|
'.' => self.eat_seq(Period),
|
||||||
|
'$' => {
|
||||||
|
self.eat_while(|c| c == '$');
|
||||||
|
let mut n_ticks: u8 = 0;
|
||||||
|
self.eat_while(|c| {
|
||||||
|
if c == '`' {
|
||||||
|
if let Some(l) = n_ticks.checked_add(1) {
|
||||||
|
n_ticks = l;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
});
|
||||||
|
DollarBacktick(n_ticks)
|
||||||
|
}
|
||||||
|
|
||||||
_ => Text,
|
_ => Text,
|
||||||
};
|
};
|
||||||
|
@ -267,17 +278,11 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<I: Iterator<Item = char> + Clone> Iterator for Lexer<I> {
|
impl<'s> Iterator for Lexer<'s> {
|
||||||
type Item = Token;
|
type Item = Token;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.next
|
self.next.take().or_else(|| self.next_token())
|
||||||
.take()
|
|
||||||
.map(|x| {
|
|
||||||
self.chars_non_peeked = self.chars.clone();
|
|
||||||
x
|
|
||||||
})
|
|
||||||
.or_else(|| self.next_token())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -291,7 +296,7 @@ mod test {
|
||||||
macro_rules! test_lex {
|
macro_rules! test_lex {
|
||||||
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
let actual = super::Lexer::new($src.chars()).collect::<Vec<_>>();
|
let actual = super::Lexer::new($src).collect::<Vec<_>>();
|
||||||
let expected = vec![$($($token),*,)?];
|
let expected = vec![$($($token),*,)?];
|
||||||
assert_eq!(actual, expected, "{}", $src);
|
assert_eq!(actual, expected, "{}", $src);
|
||||||
};
|
};
|
||||||
|
@ -313,18 +318,11 @@ mod test {
|
||||||
test_lex!("abc", Text.l(3));
|
test_lex!("abc", Text.l(3));
|
||||||
test_lex!(
|
test_lex!(
|
||||||
"para w/ some _emphasis_ and *strong*.",
|
"para w/ some _emphasis_ and *strong*.",
|
||||||
Text.l(4),
|
Text.l(13),
|
||||||
Whitespace.l(1),
|
|
||||||
Text.l(2),
|
|
||||||
Whitespace.l(1),
|
|
||||||
Text.l(4),
|
|
||||||
Whitespace.l(1),
|
|
||||||
Sym(Underscore).l(1),
|
Sym(Underscore).l(1),
|
||||||
Text.l(8),
|
Text.l(8),
|
||||||
Sym(Underscore).l(1),
|
Sym(Underscore).l(1),
|
||||||
Whitespace.l(1),
|
Text.l(5),
|
||||||
Text.l(3),
|
|
||||||
Whitespace.l(1),
|
|
||||||
Sym(Asterisk).l(1),
|
Sym(Asterisk).l(1),
|
||||||
Text.l(6),
|
Text.l(6),
|
||||||
Sym(Asterisk).l(1),
|
Sym(Asterisk).l(1),
|
||||||
|
@ -383,11 +381,17 @@ mod test {
|
||||||
test_lex!("`", Seq(Backtick).l(1));
|
test_lex!("`", Seq(Backtick).l(1));
|
||||||
test_lex!("```", Seq(Backtick).l(3));
|
test_lex!("```", Seq(Backtick).l(3));
|
||||||
test_lex!(
|
test_lex!(
|
||||||
"`$-.",
|
"`-.",
|
||||||
Seq(Backtick).l(1),
|
Seq(Backtick).l(1),
|
||||||
Seq(Dollar).l(1),
|
|
||||||
Seq(Hyphen).l(1),
|
Seq(Hyphen).l(1),
|
||||||
Seq(Period).l(1),
|
Seq(Period).l(1),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn dollar_backtick() {
|
||||||
|
test_lex!("$`", DollarBacktick(1).l(2));
|
||||||
|
test_lex!("$$$`", DollarBacktick(1).l(4));
|
||||||
|
test_lex!("$$````", DollarBacktick(4).l(6));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
392
src/lib.rs
392
src/lib.rs
|
@ -46,6 +46,8 @@
|
||||||
//! # }
|
//! # }
|
||||||
//! ```
|
//! ```
|
||||||
|
|
||||||
|
#![allow(clippy::blocks_in_if_conditions)]
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fmt::Write as FmtWrite;
|
use std::fmt::Write as FmtWrite;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
@ -60,7 +62,6 @@ mod lex;
|
||||||
mod span;
|
mod span;
|
||||||
mod tree;
|
mod tree;
|
||||||
|
|
||||||
use span::DiscontinuousString;
|
|
||||||
use span::Span;
|
use span::Span;
|
||||||
|
|
||||||
pub use attr::{AttributeValue, AttributeValueParts, Attributes};
|
pub use attr::{AttributeValue, AttributeValueParts, Attributes};
|
||||||
|
@ -576,6 +577,9 @@ type Set<T> = std::collections::BTreeSet<T>;
|
||||||
/// structure that will be kept for the duration of the parser's lifetime. Then, when the iterator
|
/// structure that will be kept for the duration of the parser's lifetime. Then, when the iterator
|
||||||
/// is advanced, the parser will start from the beginning of the document and parse inline elements
|
/// is advanced, the parser will start from the beginning of the document and parse inline elements
|
||||||
/// and emit [`Event`]s.
|
/// and emit [`Event`]s.
|
||||||
|
///
|
||||||
|
/// It is possible to clone the parser to e.g. avoid performing the block parsing multiple times.
|
||||||
|
#[derive(Clone)]
|
||||||
pub struct Parser<'s> {
|
pub struct Parser<'s> {
|
||||||
src: &'s str,
|
src: &'s str,
|
||||||
|
|
||||||
|
@ -591,6 +595,9 @@ pub struct Parser<'s> {
|
||||||
/// Current table row is a head row.
|
/// Current table row is a head row.
|
||||||
table_head_row: bool,
|
table_head_row: bool,
|
||||||
|
|
||||||
|
/// Currently within a verbatim code block.
|
||||||
|
verbatim: bool,
|
||||||
|
|
||||||
/// Footnote references in the order they were encountered, without duplicates.
|
/// Footnote references in the order they were encountered, without duplicates.
|
||||||
footnote_references: Vec<&'s str>,
|
footnote_references: Vec<&'s str>,
|
||||||
/// Cache of footnotes to emit at the end.
|
/// Cache of footnotes to emit at the end.
|
||||||
|
@ -600,12 +607,11 @@ pub struct Parser<'s> {
|
||||||
/// Currently within a footnote.
|
/// Currently within a footnote.
|
||||||
footnote_active: bool,
|
footnote_active: bool,
|
||||||
|
|
||||||
/// Spans to the inlines in the leaf block currently being parsed.
|
/// Inline parser.
|
||||||
inlines: span::InlineSpans<'s>,
|
inline_parser: inline::Parser<'s>,
|
||||||
/// Inline parser, recreated for each new inline.
|
|
||||||
inline_parser: Option<inline::Parser<span::InlineCharsIter<'s>>>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
struct Heading {
|
struct Heading {
|
||||||
/// Location of heading in src.
|
/// Location of heading in src.
|
||||||
location: usize,
|
location: usize,
|
||||||
|
@ -616,6 +622,7 @@ struct Heading {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Because of potential future references, an initial pass is required to obtain all definitions.
|
/// Because of potential future references, an initial pass is required to obtain all definitions.
|
||||||
|
#[derive(Clone)]
|
||||||
struct PrePass<'s> {
|
struct PrePass<'s> {
|
||||||
/// Link definitions and their attributes.
|
/// Link definitions and their attributes.
|
||||||
link_definitions: Map<&'s str, (CowStr<'s>, attr::Attributes<'s>)>,
|
link_definitions: Map<&'s str, (CowStr<'s>, attr::Attributes<'s>)>,
|
||||||
|
@ -627,13 +634,11 @@ struct PrePass<'s> {
|
||||||
|
|
||||||
impl<'s> PrePass<'s> {
|
impl<'s> PrePass<'s> {
|
||||||
#[must_use]
|
#[must_use]
|
||||||
fn new(src: &'s str, mut tree: block::Tree) -> Self {
|
fn new(src: &'s str, mut tree: block::Tree, inline_parser: &mut inline::Parser<'s>) -> Self {
|
||||||
let mut link_definitions = Map::new();
|
let mut link_definitions = Map::new();
|
||||||
let mut headings: Vec<Heading> = Vec::new();
|
let mut headings: Vec<Heading> = Vec::new();
|
||||||
let mut used_ids: Set<&str> = Set::new();
|
let mut used_ids: Set<&str> = Set::new();
|
||||||
|
|
||||||
let mut inlines = span::InlineSpans::new(src);
|
|
||||||
|
|
||||||
let mut attr_prev: Option<Span> = None;
|
let mut attr_prev: Option<Span> = None;
|
||||||
while let Some(e) = tree.next() {
|
while let Some(e) = tree.next() {
|
||||||
match e.kind {
|
match e.kind {
|
||||||
|
@ -662,31 +667,35 @@ impl<'s> PrePass<'s> {
|
||||||
.and_then(|attrs| attrs.get("id"))
|
.and_then(|attrs| attrs.get("id"))
|
||||||
.map(ToString::to_string);
|
.map(ToString::to_string);
|
||||||
|
|
||||||
inlines.set_spans(tree.take_inlines());
|
|
||||||
let mut id_auto = String::new();
|
let mut id_auto = String::new();
|
||||||
let mut last_whitespace = true;
|
let mut last_whitespace = true;
|
||||||
inline::Parser::new(inlines.chars()).for_each(|ev| match ev.kind {
|
let inlines = tree.take_inlines().collect::<Vec<_>>();
|
||||||
inline::EventKind::Str => {
|
inline_parser.reset();
|
||||||
let mut chars = inlines.slice(ev.span).chars().peekable();
|
inlines.iter().enumerate().for_each(|(i, sp)| {
|
||||||
while let Some(c) = chars.next() {
|
inline_parser.feed_line(*sp, i == inlines.len() - 1);
|
||||||
if c.is_whitespace() {
|
inline_parser.for_each(|ev| match ev.kind {
|
||||||
while chars.peek().map_or(false, |c| c.is_whitespace()) {
|
inline::EventKind::Str => {
|
||||||
chars.next();
|
let mut chars = ev.span.of(src).chars().peekable();
|
||||||
|
while let Some(c) = chars.next() {
|
||||||
|
if c.is_whitespace() {
|
||||||
|
while chars.peek().map_or(false, |c| c.is_whitespace()) {
|
||||||
|
chars.next();
|
||||||
|
}
|
||||||
|
if !last_whitespace {
|
||||||
|
last_whitespace = true;
|
||||||
|
id_auto.push('-');
|
||||||
|
}
|
||||||
|
} else if !c.is_ascii_punctuation() || matches!(c, '-' | '_') {
|
||||||
|
id_auto.push(c);
|
||||||
|
last_whitespace = false;
|
||||||
}
|
}
|
||||||
if !last_whitespace {
|
|
||||||
last_whitespace = true;
|
|
||||||
id_auto.push('-');
|
|
||||||
}
|
|
||||||
} else if !c.is_ascii_punctuation() || matches!(c, '-' | '_') {
|
|
||||||
id_auto.push(c);
|
|
||||||
last_whitespace = false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
inline::EventKind::Atom(inline::Atom::Softbreak) => {
|
||||||
inline::EventKind::Atom(inline::Atom::Softbreak) => {
|
id_auto.push('-');
|
||||||
id_auto.push('-');
|
}
|
||||||
}
|
_ => {}
|
||||||
_ => {}
|
})
|
||||||
});
|
});
|
||||||
id_auto.drain(id_auto.trim_end_matches('-').len()..);
|
id_auto.drain(id_auto.trim_end_matches('-').len()..);
|
||||||
|
|
||||||
|
@ -765,7 +774,8 @@ impl<'s> Parser<'s> {
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn new(src: &'s str) -> Self {
|
pub fn new(src: &'s str) -> Self {
|
||||||
let tree = block::parse(src);
|
let tree = block::parse(src);
|
||||||
let pre_pass = PrePass::new(src, tree.clone());
|
let mut inline_parser = inline::Parser::new(src);
|
||||||
|
let pre_pass = PrePass::new(src, tree.clone(), &mut inline_parser);
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
src,
|
src,
|
||||||
|
@ -773,34 +783,32 @@ impl<'s> Parser<'s> {
|
||||||
pre_pass,
|
pre_pass,
|
||||||
block_attributes: Attributes::new(),
|
block_attributes: Attributes::new(),
|
||||||
table_head_row: false,
|
table_head_row: false,
|
||||||
|
verbatim: false,
|
||||||
footnote_references: Vec::new(),
|
footnote_references: Vec::new(),
|
||||||
footnotes: Map::new(),
|
footnotes: Map::new(),
|
||||||
footnote_index: 0,
|
footnote_index: 0,
|
||||||
footnote_active: false,
|
footnote_active: false,
|
||||||
inlines: span::InlineSpans::new(src),
|
inline_parser,
|
||||||
inline_parser: None,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inline(&mut self) -> Option<Event<'s>> {
|
fn inline(&mut self) -> Option<Event<'s>> {
|
||||||
self.inline_parser.as_mut().and_then(|parser| {
|
let next = self.inline_parser.next()?;
|
||||||
let mut inline = parser.next();
|
|
||||||
|
|
||||||
let mut first_is_attr = false;
|
let (inline, mut attributes) = match next {
|
||||||
let mut attributes = inline.as_ref().map_or_else(Attributes::new, |inl| {
|
inline::Event {
|
||||||
if let inline::EventKind::Attributes = inl.kind {
|
kind: inline::EventKind::Attributes { attrs, .. },
|
||||||
first_is_attr = true;
|
..
|
||||||
attr::parse(self.inlines.slice(inl.span))
|
} => (
|
||||||
} else {
|
self.inline_parser.next(),
|
||||||
Attributes::new()
|
self.inline_parser.store_attributes[attrs as usize].clone(),
|
||||||
}
|
),
|
||||||
});
|
inline => (Some(inline), Attributes::new()),
|
||||||
|
};
|
||||||
|
|
||||||
if first_is_attr {
|
inline.map(|inline| {
|
||||||
inline = parser.next();
|
let enter = matches!(inline.kind, inline::EventKind::Enter(_));
|
||||||
}
|
match inline.kind {
|
||||||
|
|
||||||
inline.map(|inline| match inline.kind {
|
|
||||||
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
||||||
let t = match c {
|
let t = match c {
|
||||||
inline::Container::Span => Container::Span,
|
inline::Container::Span => Container::Span,
|
||||||
|
@ -808,10 +816,7 @@ impl<'s> Parser<'s> {
|
||||||
inline::Container::InlineMath => Container::Math { display: false },
|
inline::Container::InlineMath => Container::Math { display: false },
|
||||||
inline::Container::DisplayMath => Container::Math { display: true },
|
inline::Container::DisplayMath => Container::Math { display: true },
|
||||||
inline::Container::RawFormat => Container::RawInline {
|
inline::Container::RawFormat => Container::RawInline {
|
||||||
format: match self.inlines.src(inline.span) {
|
format: inline.span.of(self.src),
|
||||||
CowStr::Owned(_) => panic!(),
|
|
||||||
CowStr::Borrowed(s) => s,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
inline::Container::Subscript => Container::Subscript,
|
inline::Container::Subscript => Container::Subscript,
|
||||||
inline::Container::Superscript => Container::Superscript,
|
inline::Container::Superscript => Container::Superscript,
|
||||||
|
@ -820,46 +825,41 @@ impl<'s> Parser<'s> {
|
||||||
inline::Container::Emphasis => Container::Emphasis,
|
inline::Container::Emphasis => Container::Emphasis,
|
||||||
inline::Container::Strong => Container::Strong,
|
inline::Container::Strong => Container::Strong,
|
||||||
inline::Container::Mark => Container::Mark,
|
inline::Container::Mark => Container::Mark,
|
||||||
inline::Container::InlineLink => Container::Link(
|
inline::Container::InlineLink(url) => Container::Link(
|
||||||
match self.inlines.src(inline.span) {
|
self.inline_parser.store_cowstrs[url as usize].clone(),
|
||||||
CowStr::Owned(s) => s.replace('\n', "").into(),
|
|
||||||
s @ CowStr::Borrowed(_) => s,
|
|
||||||
},
|
|
||||||
LinkType::Span(SpanLinkType::Inline),
|
LinkType::Span(SpanLinkType::Inline),
|
||||||
),
|
),
|
||||||
inline::Container::InlineImage => Container::Image(
|
inline::Container::InlineImage(url) => Container::Image(
|
||||||
match self.inlines.src(inline.span) {
|
self.inline_parser.store_cowstrs[url as usize].clone(),
|
||||||
CowStr::Owned(s) => s.replace('\n', "").into(),
|
|
||||||
s @ CowStr::Borrowed(_) => s,
|
|
||||||
},
|
|
||||||
SpanLinkType::Inline,
|
SpanLinkType::Inline,
|
||||||
),
|
),
|
||||||
inline::Container::ReferenceLink | inline::Container::ReferenceImage => {
|
inline::Container::ReferenceLink(tag)
|
||||||
let tag = match self.inlines.src(inline.span) {
|
| inline::Container::ReferenceImage(tag) => {
|
||||||
CowStr::Owned(s) => s.replace('\n', " ").into(),
|
let tag = &self.inline_parser.store_cowstrs[tag as usize];
|
||||||
s @ CowStr::Borrowed(_) => s,
|
let link_def = self
|
||||||
};
|
.pre_pass
|
||||||
let link_def =
|
.link_definitions
|
||||||
self.pre_pass.link_definitions.get(tag.as_ref()).cloned();
|
.get::<str>(tag.as_ref())
|
||||||
|
.cloned();
|
||||||
|
|
||||||
let (url_or_tag, ty) = if let Some((url, attrs_def)) = link_def {
|
let (url_or_tag, ty) = if let Some((url, attrs_def)) = link_def {
|
||||||
attributes.union(attrs_def);
|
attributes.union(attrs_def);
|
||||||
(url, SpanLinkType::Reference)
|
(url, SpanLinkType::Reference)
|
||||||
} else {
|
} else {
|
||||||
self.pre_pass.heading_id_by_tag(tag.as_ref()).map_or_else(
|
self.pre_pass.heading_id_by_tag(tag.as_ref()).map_or_else(
|
||||||
|| (tag, SpanLinkType::Unresolved),
|
|| (tag.clone(), SpanLinkType::Unresolved),
|
||||||
|id| (format!("#{}", id).into(), SpanLinkType::Reference),
|
|id| (format!("#{}", id).into(), SpanLinkType::Reference),
|
||||||
)
|
)
|
||||||
};
|
};
|
||||||
|
|
||||||
if matches!(c, inline::Container::ReferenceLink) {
|
if matches!(c, inline::Container::ReferenceLink(..)) {
|
||||||
Container::Link(url_or_tag, LinkType::Span(ty))
|
Container::Link(url_or_tag, LinkType::Span(ty))
|
||||||
} else {
|
} else {
|
||||||
Container::Image(url_or_tag, ty)
|
Container::Image(url_or_tag, ty)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline::Container::Autolink => {
|
inline::Container::Autolink => {
|
||||||
let url = self.inlines.src(inline.span);
|
let url: CowStr = inline.span.of(self.src).into();
|
||||||
let ty = if url.contains('@') {
|
let ty = if url.contains('@') {
|
||||||
LinkType::Email
|
LinkType::Email
|
||||||
} else {
|
} else {
|
||||||
|
@ -868,7 +868,7 @@ impl<'s> Parser<'s> {
|
||||||
Container::Link(url, ty)
|
Container::Link(url, ty)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
if matches!(inline.kind, inline::EventKind::Enter(_)) {
|
if enter {
|
||||||
Event::Start(t, attributes)
|
Event::Start(t, attributes)
|
||||||
} else {
|
} else {
|
||||||
Event::End(t)
|
Event::End(t)
|
||||||
|
@ -876,10 +876,7 @@ impl<'s> Parser<'s> {
|
||||||
}
|
}
|
||||||
inline::EventKind::Atom(a) => match a {
|
inline::EventKind::Atom(a) => match a {
|
||||||
inline::Atom::FootnoteReference => {
|
inline::Atom::FootnoteReference => {
|
||||||
let tag = match self.inlines.src(inline.span) {
|
let tag = inline.span.of(self.src);
|
||||||
CowStr::Borrowed(s) => s,
|
|
||||||
CowStr::Owned(..) => panic!(),
|
|
||||||
};
|
|
||||||
let number = self
|
let number = self
|
||||||
.footnote_references
|
.footnote_references
|
||||||
.iter()
|
.iter()
|
||||||
|
@ -891,15 +888,9 @@ impl<'s> Parser<'s> {
|
||||||
},
|
},
|
||||||
|i| i + 1,
|
|i| i + 1,
|
||||||
);
|
);
|
||||||
Event::FootnoteReference(
|
Event::FootnoteReference(inline.span.of(self.src), number)
|
||||||
match self.inlines.src(inline.span) {
|
|
||||||
CowStr::Borrowed(s) => s,
|
|
||||||
CowStr::Owned(..) => panic!(),
|
|
||||||
},
|
|
||||||
number,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
inline::Atom::Symbol => Event::Symbol(self.inlines.src(inline.span)),
|
inline::Atom::Symbol => Event::Symbol(inline.span.of(self.src).into()),
|
||||||
inline::Atom::Quote { ty, left } => match (ty, left) {
|
inline::Atom::Quote { ty, left } => match (ty, left) {
|
||||||
(inline::QuoteType::Single, true) => Event::LeftSingleQuote,
|
(inline::QuoteType::Single, true) => Event::LeftSingleQuote,
|
||||||
(inline::QuoteType::Single, false) => Event::RightSingleQuote,
|
(inline::QuoteType::Single, false) => Event::RightSingleQuote,
|
||||||
|
@ -914,13 +905,11 @@ impl<'s> Parser<'s> {
|
||||||
inline::Atom::Hardbreak => Event::Hardbreak,
|
inline::Atom::Hardbreak => Event::Hardbreak,
|
||||||
inline::Atom::Escape => Event::Escape,
|
inline::Atom::Escape => Event::Escape,
|
||||||
},
|
},
|
||||||
inline::EventKind::Str => Event::Str(self.inlines.src(inline.span)),
|
inline::EventKind::Str => Event::Str(inline.span.of(self.src).into()),
|
||||||
inline::EventKind::Whitespace
|
inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => {
|
||||||
| inline::EventKind::Attributes
|
|
||||||
| inline::EventKind::Placeholder => {
|
|
||||||
panic!("{:?}", inline)
|
panic!("{:?}", inline)
|
||||||
}
|
}
|
||||||
})
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -942,6 +931,7 @@ impl<'s> Parser<'s> {
|
||||||
let enter = matches!(ev.kind, tree::EventKind::Enter(..));
|
let enter = matches!(ev.kind, tree::EventKind::Enter(..));
|
||||||
let cont = match c {
|
let cont = match c {
|
||||||
block::Node::Leaf(l) => {
|
block::Node::Leaf(l) => {
|
||||||
|
self.inline_parser.reset();
|
||||||
if matches!(l, block::Leaf::LinkDefinition) {
|
if matches!(l, block::Leaf::LinkDefinition) {
|
||||||
// ignore link definitions
|
// ignore link definitions
|
||||||
if enter {
|
if enter {
|
||||||
|
@ -950,11 +940,6 @@ impl<'s> Parser<'s> {
|
||||||
self.block_attributes = Attributes::new();
|
self.block_attributes = Attributes::new();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if enter && !matches!(l, block::Leaf::CodeBlock) {
|
|
||||||
self.inlines.set_spans(self.tree.take_inlines());
|
|
||||||
self.inline_parser =
|
|
||||||
Some(inline::Parser::new(self.inlines.chars()));
|
|
||||||
}
|
|
||||||
match l {
|
match l {
|
||||||
block::Leaf::Paragraph => Container::Paragraph,
|
block::Leaf::Paragraph => Container::Paragraph,
|
||||||
block::Leaf::Heading { has_section } => Container::Heading {
|
block::Leaf::Heading { has_section } => Container::Heading {
|
||||||
|
@ -969,6 +954,7 @@ impl<'s> Parser<'s> {
|
||||||
},
|
},
|
||||||
block::Leaf::DescriptionTerm => Container::DescriptionTerm,
|
block::Leaf::DescriptionTerm => Container::DescriptionTerm,
|
||||||
block::Leaf::CodeBlock => {
|
block::Leaf::CodeBlock => {
|
||||||
|
self.verbatim = enter;
|
||||||
if let Some(format) = content.strip_prefix('=') {
|
if let Some(format) = content.strip_prefix('=') {
|
||||||
Container::RawBlock { format }
|
Container::RawBlock { format }
|
||||||
} else {
|
} else {
|
||||||
|
@ -991,7 +977,7 @@ impl<'s> Parser<'s> {
|
||||||
class: (!ev.span.is_empty()).then(|| content),
|
class: (!ev.span.is_empty()).then(|| content),
|
||||||
},
|
},
|
||||||
block::Container::Footnote => {
|
block::Container::Footnote => {
|
||||||
assert!(enter);
|
debug_assert!(enter);
|
||||||
self.footnotes.insert(content, self.tree.take_branch());
|
self.footnotes.insert(content, self.tree.take_branch());
|
||||||
self.block_attributes = Attributes::new();
|
self.block_attributes = Attributes::new();
|
||||||
continue;
|
continue;
|
||||||
|
@ -1048,7 +1034,15 @@ impl<'s> Parser<'s> {
|
||||||
Event::End(cont)
|
Event::End(cont)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tree::EventKind::Inline => Event::Str(content.into()), // verbatim
|
tree::EventKind::Inline => {
|
||||||
|
if self.verbatim {
|
||||||
|
Event::Str(content.into())
|
||||||
|
} else {
|
||||||
|
self.inline_parser
|
||||||
|
.feed_line(ev.span, self.tree.branch_is_empty());
|
||||||
|
return self.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
return Some(event);
|
return Some(event);
|
||||||
}
|
}
|
||||||
|
@ -1304,7 +1298,8 @@ mod test {
|
||||||
Start(Blockquote, Attributes::new()),
|
Start(Blockquote, Attributes::new()),
|
||||||
Start(Paragraph, Attributes::new()),
|
Start(Paragraph, Attributes::new()),
|
||||||
Start(Verbatim, Attributes::new()),
|
Start(Verbatim, Attributes::new()),
|
||||||
Str("abc\ndef".into()),
|
Str("abc\n".into()),
|
||||||
|
Str("def".into()),
|
||||||
End(Verbatim),
|
End(Verbatim),
|
||||||
End(Paragraph),
|
End(Paragraph),
|
||||||
End(Blockquote),
|
End(Blockquote),
|
||||||
|
@ -1358,6 +1353,10 @@ mod test {
|
||||||
End(Link("url".into(), LinkType::Span(SpanLinkType::Inline))),
|
End(Link("url".into(), LinkType::Span(SpanLinkType::Inline))),
|
||||||
End(Paragraph),
|
End(Paragraph),
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn link_inline_multi_line() {
|
||||||
test_parse!(
|
test_parse!(
|
||||||
concat!(
|
concat!(
|
||||||
"> [text](url\n",
|
"> [text](url\n",
|
||||||
|
@ -1374,6 +1373,23 @@ mod test {
|
||||||
End(Paragraph),
|
End(Paragraph),
|
||||||
End(Blockquote),
|
End(Blockquote),
|
||||||
);
|
);
|
||||||
|
test_parse!(
|
||||||
|
concat!(
|
||||||
|
"> [text](a\n", //
|
||||||
|
"> bc\n", //
|
||||||
|
"> def)\n", //
|
||||||
|
),
|
||||||
|
Start(Blockquote, Attributes::new()),
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Start(
|
||||||
|
Link("abcdef".into(), LinkType::Span(SpanLinkType::Inline)),
|
||||||
|
Attributes::new()
|
||||||
|
),
|
||||||
|
Str("text".into()),
|
||||||
|
End(Link("abcdef".into(), LinkType::Span(SpanLinkType::Inline))),
|
||||||
|
End(Paragraph),
|
||||||
|
End(Blockquote),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -1440,6 +1456,29 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn link_reference_multiline() {
|
fn link_reference_multiline() {
|
||||||
|
test_parse!(
|
||||||
|
concat!(
|
||||||
|
"> [text][a\n", //
|
||||||
|
"> b]\n", //
|
||||||
|
"\n", //
|
||||||
|
"[a b]: url\n", //
|
||||||
|
),
|
||||||
|
Start(Blockquote, Attributes::new()),
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Start(
|
||||||
|
Link("url".into(), LinkType::Span(SpanLinkType::Reference)),
|
||||||
|
Attributes::new()
|
||||||
|
),
|
||||||
|
Str("text".into()),
|
||||||
|
End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))),
|
||||||
|
End(Paragraph),
|
||||||
|
End(Blockquote),
|
||||||
|
Blankline,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn link_definition_multiline() {
|
||||||
test_parse!(
|
test_parse!(
|
||||||
concat!(
|
concat!(
|
||||||
"[text][tag]\n",
|
"[text][tag]\n",
|
||||||
|
@ -1662,6 +1701,165 @@ mod test {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn attr_inline_consecutive() {
|
||||||
|
test_parse!(
|
||||||
|
"_abc def_{.a}{.b #i}",
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Start(
|
||||||
|
Emphasis,
|
||||||
|
[("class", "a b"), ("id", "i")].into_iter().collect(),
|
||||||
|
),
|
||||||
|
Str("abc def".into()),
|
||||||
|
End(Emphasis),
|
||||||
|
End(Paragraph),
|
||||||
|
);
|
||||||
|
test_parse!(
|
||||||
|
"_abc def_{.a}{%%}{.b #i}",
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Start(
|
||||||
|
Emphasis,
|
||||||
|
[("class", "a b"), ("id", "i")].into_iter().collect(),
|
||||||
|
),
|
||||||
|
Str("abc def".into()),
|
||||||
|
End(Emphasis),
|
||||||
|
End(Paragraph),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn attr_inline_consecutive_invalid() {
|
||||||
|
test_parse!(
|
||||||
|
"_abc def_{.a}{.b #i}{.c invalid}",
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Start(
|
||||||
|
Emphasis,
|
||||||
|
[("class", "a b"), ("id", "i")].into_iter().collect(),
|
||||||
|
),
|
||||||
|
Str("abc def".into()),
|
||||||
|
End(Emphasis),
|
||||||
|
Str("{.c invalid}".into()),
|
||||||
|
End(Paragraph),
|
||||||
|
);
|
||||||
|
test_parse!(
|
||||||
|
"_abc def_{.a}{.b #i}{%%}{.c invalid}",
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Start(
|
||||||
|
Emphasis,
|
||||||
|
[("class", "a b"), ("id", "i")].into_iter().collect(),
|
||||||
|
),
|
||||||
|
Str("abc def".into()),
|
||||||
|
End(Emphasis),
|
||||||
|
Str("{.c invalid}".into()),
|
||||||
|
End(Paragraph),
|
||||||
|
);
|
||||||
|
test_parse!(
|
||||||
|
concat!("_abc def_{.a}{.b #i}{%%}{.c\n", "invalid}\n"),
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Start(
|
||||||
|
Emphasis,
|
||||||
|
[("class", "a b"), ("id", "i")].into_iter().collect(),
|
||||||
|
),
|
||||||
|
Str("abc def".into()),
|
||||||
|
End(Emphasis),
|
||||||
|
Str("{.c".into()),
|
||||||
|
Softbreak,
|
||||||
|
Str("invalid}".into()),
|
||||||
|
End(Paragraph),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn attr_inline_multiline() {
|
||||||
|
test_parse!(
|
||||||
|
concat!(
|
||||||
|
"> _abc_{a=b\n", //
|
||||||
|
"> c=d}\n", //
|
||||||
|
),
|
||||||
|
Start(Blockquote, Attributes::new()),
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Start(Emphasis, [("a", "b"), ("c", "d")].into_iter().collect()),
|
||||||
|
Str("abc".into()),
|
||||||
|
End(Emphasis),
|
||||||
|
End(Paragraph),
|
||||||
|
End(Blockquote),
|
||||||
|
);
|
||||||
|
test_parse!(
|
||||||
|
concat!(
|
||||||
|
"> a{\n", //
|
||||||
|
"> %%\n", //
|
||||||
|
"> a=a}\n", //
|
||||||
|
),
|
||||||
|
Start(Blockquote, Attributes::new()),
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Start(Span, [("a", "a")].into_iter().collect()),
|
||||||
|
Str("a".into()),
|
||||||
|
End(Span),
|
||||||
|
End(Paragraph),
|
||||||
|
End(Blockquote),
|
||||||
|
);
|
||||||
|
test_parse!(
|
||||||
|
concat!(
|
||||||
|
"> a{a=\"a\n", //
|
||||||
|
"> b\n", //
|
||||||
|
"> c\"}\n", //
|
||||||
|
),
|
||||||
|
Start(Blockquote, Attributes::new()),
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Start(Span, [("a", "a b c")].into_iter().collect()),
|
||||||
|
Str("a".into()),
|
||||||
|
End(Span),
|
||||||
|
End(Paragraph),
|
||||||
|
End(Blockquote),
|
||||||
|
);
|
||||||
|
test_parse!(
|
||||||
|
concat!(
|
||||||
|
"> a{a=\"\n", //
|
||||||
|
"> b\"}\n", //
|
||||||
|
),
|
||||||
|
Start(Blockquote, Attributes::new()),
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Start(Span, [("a", "b")].into_iter().collect()),
|
||||||
|
Str("a".into()),
|
||||||
|
End(Span),
|
||||||
|
End(Paragraph),
|
||||||
|
End(Blockquote),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn attr_inline_multiline_unclosed() {
|
||||||
|
test_parse!(
|
||||||
|
concat!(
|
||||||
|
"a{\n", //
|
||||||
|
" b\n", //
|
||||||
|
),
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Str("a{".into()),
|
||||||
|
Softbreak,
|
||||||
|
Str("b".into()),
|
||||||
|
End(Paragraph),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn attr_inline_multiline_invalid() {
|
||||||
|
test_parse!(
|
||||||
|
concat!(
|
||||||
|
"a{a=b\n", //
|
||||||
|
" b\n", //
|
||||||
|
"}", //
|
||||||
|
),
|
||||||
|
Start(Paragraph, Attributes::new()),
|
||||||
|
Str("a{a=b".into()),
|
||||||
|
Softbreak,
|
||||||
|
Str("b".into()),
|
||||||
|
Softbreak,
|
||||||
|
Str("}".into()),
|
||||||
|
End(Paragraph),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn list_item_unordered() {
|
fn list_item_unordered() {
|
||||||
test_parse!(
|
test_parse!(
|
||||||
|
|
209
src/span.rs
209
src/span.rs
|
@ -1,5 +1,3 @@
|
||||||
use crate::CowStr;
|
|
||||||
|
|
||||||
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
|
#[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
|
||||||
pub struct Span {
|
pub struct Span {
|
||||||
start: u32,
|
start: u32,
|
||||||
|
@ -30,14 +28,6 @@ impl Span {
|
||||||
Self::empty_at(self.end())
|
Self::empty_at(self.end())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn with_start(self, start: usize) -> Self {
|
|
||||||
Self::new(start, self.end())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_end(self, end: usize) -> Self {
|
|
||||||
Self::new(self.start(), end)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn with_len(self, len: usize) -> Self {
|
pub fn with_len(self, len: usize) -> Self {
|
||||||
Self::by_len(self.start(), len)
|
Self::by_len(self.start(), len)
|
||||||
}
|
}
|
||||||
|
@ -115,205 +105,6 @@ impl Span {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub trait DiscontinuousString<'s> {
|
|
||||||
type Chars: Iterator<Item = char>;
|
|
||||||
|
|
||||||
fn src(&self, span: Span) -> CowStr<'s>;
|
|
||||||
|
|
||||||
fn chars(&self) -> Self::Chars;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> DiscontinuousString<'s> for &'s str {
|
|
||||||
type Chars = std::str::Chars<'s>;
|
|
||||||
|
|
||||||
fn src(&self, span: Span) -> CowStr<'s> {
|
|
||||||
span.of(self).into()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chars(&self) -> Self::Chars {
|
|
||||||
str::chars(self)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Multiple discontinuous [`std::str::Chars`] objects concatenated.
|
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct InlineChars<'s, I> {
|
|
||||||
src: &'s str,
|
|
||||||
inlines: I,
|
|
||||||
next: std::str::Chars<'s>,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Implement inlines.flat_map(|sp| sp.of(self.src).chars())
|
|
||||||
impl<'s, I: Iterator<Item = Span>> InlineChars<'s, I> {
|
|
||||||
fn new(src: &'s str, inlines: I) -> Self {
|
|
||||||
Self {
|
|
||||||
src,
|
|
||||||
inlines,
|
|
||||||
next: "".chars(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s, I: Iterator<Item = Span>> Iterator for InlineChars<'s, I> {
|
|
||||||
type Item = char;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
|
||||||
if self.next.as_str().is_empty() {
|
|
||||||
self.next = self
|
|
||||||
.inlines
|
|
||||||
.next()
|
|
||||||
.map_or_else(|| "".chars(), |sp| sp.of(self.src).chars());
|
|
||||||
}
|
|
||||||
self.next.next()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type InlineCharsIter<'s> = InlineChars<'s, std::iter::Copied<std::slice::Iter<'static, Span>>>;
|
|
||||||
|
|
||||||
/// Discontinuous slices of a [`&str`].
|
|
||||||
#[derive(Default, Debug)]
|
|
||||||
pub struct InlineSpans<'s> {
|
|
||||||
src: &'s str,
|
|
||||||
spans: Vec<Span>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> InlineSpans<'s> {
|
|
||||||
pub fn new(src: &'s str) -> Self {
|
|
||||||
Self {
|
|
||||||
src,
|
|
||||||
spans: Vec::new(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn set_spans(&mut self, spans: impl Iterator<Item = Span>) {
|
|
||||||
self.spans.clear();
|
|
||||||
self.spans.extend(spans);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn slice<'i>(&'i self, span: Span) -> InlineSpansSlice<'s, 'i> {
|
|
||||||
let mut first = 0;
|
|
||||||
let mut last = 0;
|
|
||||||
let mut first_skip = 0;
|
|
||||||
let mut last_len = 0;
|
|
||||||
|
|
||||||
let mut a = 0;
|
|
||||||
for (i, sp) in self.spans.iter().enumerate() {
|
|
||||||
let b = a + sp.len();
|
|
||||||
if span.start() < b {
|
|
||||||
if a <= span.start() {
|
|
||||||
first = i;
|
|
||||||
first_skip = span.start() - a;
|
|
||||||
if span.end() <= b {
|
|
||||||
// continuous
|
|
||||||
last = i;
|
|
||||||
last_len = span.len();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
last = i;
|
|
||||||
last_len = sp.len().min(span.end() - a);
|
|
||||||
break;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
a = b;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert_ne!(last_len, 0);
|
|
||||||
|
|
||||||
InlineSpansSlice {
|
|
||||||
src: self.src,
|
|
||||||
first_skip,
|
|
||||||
last_len,
|
|
||||||
spans: &self.spans[first..=last],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Borrow if continuous, copy if discontiunous.
|
|
||||||
fn borrow_or_copy<I: Iterator<Item = Span>>(src: &str, spans: I, span: Span) -> CowStr {
|
|
||||||
let mut a = 0;
|
|
||||||
let mut s = String::new();
|
|
||||||
for sp in spans {
|
|
||||||
let b = a + sp.len();
|
|
||||||
if span.start() < b {
|
|
||||||
let r = if a <= span.start() {
|
|
||||||
if span.end() <= b {
|
|
||||||
// continuous
|
|
||||||
return CowStr::Borrowed(&sp.of(src)[span.start() - a..span.end() - a]);
|
|
||||||
}
|
|
||||||
(span.start() - a)..sp.len()
|
|
||||||
} else if a <= span.end() {
|
|
||||||
0..sp.len().min(span.end() - a)
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
};
|
|
||||||
s.push_str(&sp.of(src)[r]);
|
|
||||||
}
|
|
||||||
a = b;
|
|
||||||
}
|
|
||||||
assert_eq!(span.len(), s.len());
|
|
||||||
CowStr::Owned(s)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> DiscontinuousString<'s> for InlineSpans<'s> {
|
|
||||||
type Chars = InlineCharsIter<'s>;
|
|
||||||
|
|
||||||
fn src(&self, span: Span) -> CowStr<'s> {
|
|
||||||
Self::borrow_or_copy(self.src, self.spans.iter().copied(), span)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chars(&self) -> Self::Chars {
|
|
||||||
// SAFETY: do not call set_spans while chars is in use
|
|
||||||
unsafe { std::mem::transmute(InlineChars::new(self.src, self.spans.iter().copied())) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// A read-only slice of an [`InlineSpans`] object.
|
|
||||||
pub struct InlineSpansSlice<'s, 'i> {
|
|
||||||
src: &'s str,
|
|
||||||
first_skip: usize,
|
|
||||||
last_len: usize,
|
|
||||||
spans: &'i [Span],
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s, 'i> InlineSpansSlice<'s, 'i> {
|
|
||||||
fn spans(&self) -> InlineSpansSliceIter<'i> {
|
|
||||||
let (span_start, r_middle, span_end) = if self.spans.len() == 1 {
|
|
||||||
(
|
|
||||||
Span::by_len(self.spans[0].start() + self.first_skip, self.last_len),
|
|
||||||
0..0,
|
|
||||||
Span::by_len(self.spans[self.spans.len() - 1].start(), 0),
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
(
|
|
||||||
Span::new(self.spans[0].start() + self.first_skip, self.spans[0].end()),
|
|
||||||
1..1 + self.spans.len().saturating_sub(2),
|
|
||||||
Span::by_len(self.spans[self.spans.len() - 1].start(), self.last_len),
|
|
||||||
)
|
|
||||||
};
|
|
||||||
std::iter::once(span_start)
|
|
||||||
.chain(self.spans[r_middle].iter().copied())
|
|
||||||
.chain(std::iter::once(span_end))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s, 'i> DiscontinuousString<'s> for InlineSpansSlice<'s, 'i> {
|
|
||||||
type Chars = InlineChars<'s, InlineSpansSliceIter<'i>>;
|
|
||||||
|
|
||||||
fn src(&self, span: Span) -> CowStr<'s> {
|
|
||||||
InlineSpans::borrow_or_copy(self.src, self.spans(), span)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn chars(&self) -> Self::Chars {
|
|
||||||
InlineChars::new(self.src, self.spans())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub type InlineSpansSliceIter<'i> = std::iter::Chain<
|
|
||||||
std::iter::Chain<std::iter::Once<Span>, std::iter::Copied<std::slice::Iter<'i, Span>>>,
|
|
||||||
std::iter::Once<Span>,
|
|
||||||
>;
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::Span;
|
use super::Span;
|
||||||
|
|
18
src/tree.rs
18
src/tree.rs
|
@ -79,12 +79,16 @@ impl<C: Clone, A: Clone> Tree<C, A> {
|
||||||
std::iter::from_fn(move || {
|
std::iter::from_fn(move || {
|
||||||
head.take().map(|h| {
|
head.take().map(|h| {
|
||||||
let n = &self.nodes[h.index()];
|
let n = &self.nodes[h.index()];
|
||||||
assert!(matches!(n.kind, NodeKind::Inline));
|
debug_assert!(matches!(n.kind, NodeKind::Inline));
|
||||||
head = n.next;
|
head = n.next;
|
||||||
n.span
|
n.span
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn branch_is_empty(&self) -> bool {
|
||||||
|
matches!(self.head, None)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<C: Clone, A: Clone> Iterator for Tree<C, A> {
|
impl<C: Clone, A: Clone> Iterator for Tree<C, A> {
|
||||||
|
@ -126,7 +130,7 @@ pub struct NodeIndex(std::num::NonZeroUsize);
|
||||||
|
|
||||||
impl NodeIndex {
|
impl NodeIndex {
|
||||||
fn new(i: usize) -> Self {
|
fn new(i: usize) -> Self {
|
||||||
assert_ne!(i, usize::MAX);
|
debug_assert_ne!(i, usize::MAX);
|
||||||
Self((i + 1).try_into().unwrap())
|
Self((i + 1).try_into().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -246,7 +250,7 @@ impl<C, A> Builder<C, A> {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let last = self.branch.pop();
|
let last = self.branch.pop();
|
||||||
assert_ne!(last, None);
|
debug_assert_ne!(last, None);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -314,7 +318,7 @@ impl<C, A> Builder<C, A> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(super) fn finish(self) -> Tree<C, A> {
|
pub(super) fn finish(self) -> Tree<C, A> {
|
||||||
assert_eq!(self.depth, 0);
|
debug_assert_eq!(self.depth, 0);
|
||||||
let head = self.nodes[NodeIndex::root().index()].next;
|
let head = self.nodes[NodeIndex::root().index()].next;
|
||||||
Tree {
|
Tree {
|
||||||
nodes: self.nodes.into_boxed_slice().into(),
|
nodes: self.nodes.into_boxed_slice().into(),
|
||||||
|
@ -331,19 +335,19 @@ impl<C, A> Builder<C, A> {
|
||||||
match &mut head.kind {
|
match &mut head.kind {
|
||||||
NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => {
|
NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => {
|
||||||
// set next pointer of previous node
|
// set next pointer of previous node
|
||||||
assert_eq!(head.next, None);
|
debug_assert_eq!(head.next, None);
|
||||||
head.next = Some(ni);
|
head.next = Some(ni);
|
||||||
}
|
}
|
||||||
NodeKind::Container(_, child) => {
|
NodeKind::Container(_, child) => {
|
||||||
self.branch.push(*head_ni);
|
self.branch.push(*head_ni);
|
||||||
// set child pointer of current container
|
// set child pointer of current container
|
||||||
assert_eq!(*child, None);
|
debug_assert_eq!(*child, None);
|
||||||
*child = Some(ni);
|
*child = Some(ni);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if let Some(block) = self.branch.pop() {
|
} else if let Some(block) = self.branch.pop() {
|
||||||
let mut block = &mut self.nodes[block.index()];
|
let mut block = &mut self.nodes[block.index()];
|
||||||
assert!(matches!(block.kind, NodeKind::Container(..)));
|
debug_assert!(matches!(block.kind, NodeKind::Container(..)));
|
||||||
block.next = Some(ni);
|
block.next = Some(ni);
|
||||||
} else {
|
} else {
|
||||||
panic!()
|
panic!()
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
38d85f9:multi-line block attributes
|
38d85f9:multi-line block attributes
|
||||||
6c14561:multi-line block attributes
|
6c14561:multi-line block attributes
|
||||||
613a9d6:attribute container precedence
|
|
||||||
f4f22fc:attribute key class order
|
f4f22fc:attribute key class order
|
||||||
ae6fc15:bugged left/right quote
|
ae6fc15:bugged left/right quote
|
||||||
168469a:bugged left/right quote
|
168469a:bugged left/right quote
|
||||||
|
@ -9,9 +8,7 @@ ae6fc15:bugged left/right quote
|
||||||
e1f5b5e:untrimmed whitespace before linebreak
|
e1f5b5e:untrimmed whitespace before linebreak
|
||||||
07888f3:div close within raw block
|
07888f3:div close within raw block
|
||||||
8423412:heading id conflict with existing id
|
8423412:heading id conflict with existing id
|
||||||
00a46ed:clear inline formatting from link tags
|
|
||||||
c0a3dec:escape in url
|
c0a3dec:escape in url
|
||||||
e66af00:url container precedence
|
|
||||||
61876cf:roman alpha ambiguity
|
61876cf:roman alpha ambiguity
|
||||||
f31b357:roman alpha ambiguity
|
f31b357:roman alpha ambiguity
|
||||||
642d380:table end in verbatim inline
|
642d380:table end in verbatim inline
|
||||||
|
|
Loading…
Reference in a new issue