parse inline attrs
This commit is contained in:
parent
1586c2947f
commit
8d676e1431
3 changed files with 231 additions and 89 deletions
10
src/attr.rs
10
src/attr.rs
|
@ -4,6 +4,12 @@ use crate::Span;
|
|||
|
||||
use State::*;
|
||||
|
||||
pub(crate) fn parse<'s, S: DiscontinuousString<'s>>(chars: S) -> Attributes<'s> {
|
||||
let mut a = Attributes::new();
|
||||
a.parse(chars);
|
||||
a
|
||||
}
|
||||
|
||||
pub fn valid<I: Iterator<Item = char>>(chars: I) -> usize {
|
||||
let mut p = Parser::new(chars);
|
||||
if p.any(|e| matches!(e, Element::Invalid)) {
|
||||
|
@ -29,7 +35,7 @@ impl<'s> Attributes<'s> {
|
|||
Self(self.0.take())
|
||||
}
|
||||
|
||||
pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: &S) -> bool {
|
||||
pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool {
|
||||
for elem in Parser::new(input.chars()) {
|
||||
match elem {
|
||||
Element::Class(c) => self.add("class", input.src(c)),
|
||||
|
@ -275,7 +281,7 @@ mod test {
|
|||
($src:expr $(,$($av:expr),* $(,)?)?) => {
|
||||
#[allow(unused)]
|
||||
let mut attr =super::Attributes::new();
|
||||
attr.parse(&$src);
|
||||
attr.parse($src);
|
||||
let actual = attr.iter().collect::<Vec<_>>();
|
||||
let expected = &[$($($av),*,)?];
|
||||
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
|
||||
|
|
307
src/lib.rs
307
src/lib.rs
|
@ -255,24 +255,24 @@ impl<'s> Container<'s> {
|
|||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct InlineChars<'s, 't> {
|
||||
struct InlineChars<'s, I> {
|
||||
src: &'s str,
|
||||
inlines: std::slice::Iter<'t, Span>,
|
||||
inlines: I,
|
||||
next: std::str::Chars<'s>,
|
||||
}
|
||||
|
||||
// Implement inlines.flat_map(|sp| sp.of(self.src).chars())
|
||||
impl<'s, 't> InlineChars<'s, 't> {
|
||||
fn new(src: &'s str, inlines: &'t [Span]) -> Self {
|
||||
impl<'s, 't, I: Iterator<Item = Span>> InlineChars<'s, I> {
|
||||
fn new(src: &'s str, inlines: I) -> Self {
|
||||
Self {
|
||||
src,
|
||||
inlines: inlines.iter(),
|
||||
inlines,
|
||||
next: "".chars(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s, 't> Iterator for InlineChars<'s, 't> {
|
||||
impl<'s, 't, I: Iterator<Item = Span>> Iterator for InlineChars<'s, I> {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
|
@ -303,9 +303,8 @@ impl<'s> DiscontinuousString<'s> for &'s str {
|
|||
}
|
||||
|
||||
impl<'s> DiscontinuousString<'s> for InlineSpans<'s> {
|
||||
type Chars = InlineChars<'s, 'static>;
|
||||
type Chars = InlineCharsIter<'s>;
|
||||
|
||||
/// Borrow if continuous, copy if discontiunous.
|
||||
fn src(&self, span: Span) -> CowStr<'s> {
|
||||
let mut a = 0;
|
||||
let mut s = String::new();
|
||||
|
@ -333,11 +332,75 @@ impl<'s> DiscontinuousString<'s> for InlineSpans<'s> {
|
|||
|
||||
fn chars(&self) -> Self::Chars {
|
||||
// SAFETY: do not call set_spans while chars is in use
|
||||
unsafe { std::mem::transmute(InlineChars::new(self.src, &self.spans)) }
|
||||
unsafe { std::mem::transmute(InlineChars::new(self.src, self.spans.iter().copied())) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
impl<'s, 'i> DiscontinuousString<'s> for InlineSpansSlice<'s, 'i> {
|
||||
type Chars = InlineChars<
|
||||
's,
|
||||
std::iter::Chain<
|
||||
std::iter::Chain<std::iter::Once<Span>, std::iter::Copied<std::slice::Iter<'i, Span>>>,
|
||||
std::iter::Once<Span>,
|
||||
>,
|
||||
>;
|
||||
|
||||
/// Borrow if continuous, copy if discontiunous.
|
||||
fn src(&self, span: Span) -> CowStr<'s> {
|
||||
let mut a = 0;
|
||||
let mut s = String::new();
|
||||
for (i, mut sp) in self.spans.iter().copied().enumerate() {
|
||||
if i == 0 {
|
||||
sp = sp.skip(self.first_skip);
|
||||
}
|
||||
if i == self.spans.len() - 1 {
|
||||
sp = Span::by_len(sp.start(), self.last_len);
|
||||
}
|
||||
let b = a + sp.len();
|
||||
if span.start() < b {
|
||||
let r = if a <= span.start() {
|
||||
if span.end() <= b {
|
||||
// continuous
|
||||
return CowStr::Borrowed(
|
||||
&sp.of(self.src)[span.start() - a..span.end() - a],
|
||||
);
|
||||
}
|
||||
(span.start() - a)..sp.len()
|
||||
} else {
|
||||
0..sp.len().min(span.end() - a)
|
||||
};
|
||||
s.push_str(&sp.of(self.src)[r]);
|
||||
}
|
||||
a = b;
|
||||
}
|
||||
assert_eq!(span.len(), s.len());
|
||||
CowStr::Owned(s)
|
||||
}
|
||||
|
||||
fn chars(&self) -> Self::Chars {
|
||||
let (span_start, r_middle, span_end) = if self.spans.len() == 1 {
|
||||
(
|
||||
Span::by_len(self.spans[0].start() + self.first_skip, self.last_len),
|
||||
0..0,
|
||||
Span::by_len(self.spans[self.spans.len() - 1].start(), 0),
|
||||
)
|
||||
} else {
|
||||
(
|
||||
Span::new(self.spans[0].start() + self.first_skip, self.spans[0].end()),
|
||||
1..self.spans.len().saturating_sub(2),
|
||||
Span::by_len(self.spans[self.spans.len() - 1].start(), self.last_len),
|
||||
)
|
||||
};
|
||||
InlineChars::new(
|
||||
self.src,
|
||||
std::iter::once(span_start)
|
||||
.chain(self.spans[r_middle].iter().copied())
|
||||
.chain(std::iter::once(span_end)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
struct InlineSpans<'s> {
|
||||
src: &'s str,
|
||||
spans: Vec<Span>,
|
||||
|
@ -355,15 +418,61 @@ impl<'s> InlineSpans<'s> {
|
|||
self.spans.clear();
|
||||
self.spans.extend(spans);
|
||||
}
|
||||
|
||||
fn slice<'i>(&'i self, span: Span) -> InlineSpansSlice<'s, 'i> {
|
||||
let mut first = 0;
|
||||
let mut last = 0;
|
||||
let mut first_skip = 0;
|
||||
let mut last_len = 0;
|
||||
|
||||
let mut a = 0;
|
||||
for (i, sp) in self.spans.iter().enumerate() {
|
||||
let b = a + sp.len();
|
||||
if span.start() < b {
|
||||
if a <= span.start() {
|
||||
first = i;
|
||||
first_skip = span.start() - a;
|
||||
if span.end() <= b {
|
||||
// continuous
|
||||
last = i;
|
||||
last_len = span.len();
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
last = i;
|
||||
last_len = sp.len().min(span.end() - a);
|
||||
break;
|
||||
};
|
||||
}
|
||||
a = b;
|
||||
}
|
||||
|
||||
assert_ne!(last_len, 0);
|
||||
|
||||
InlineSpansSlice {
|
||||
src: self.src,
|
||||
first_skip,
|
||||
last_len,
|
||||
spans: &self.spans[first..=last],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct InlineSpansSlice<'s, 'i> {
|
||||
src: &'s str,
|
||||
first_skip: usize,
|
||||
last_len: usize,
|
||||
spans: &'i [Span],
|
||||
}
|
||||
|
||||
type InlineCharsIter<'s> = InlineChars<'s, std::iter::Copied<std::slice::Iter<'static, Span>>>;
|
||||
|
||||
pub struct Parser<'s> {
|
||||
src: &'s str,
|
||||
tree: block::Tree,
|
||||
inlines: InlineSpans<'s>,
|
||||
inline_parser: Option<inline::Parser<InlineChars<'s, 'static>>>,
|
||||
inline_parser: Option<inline::Parser<InlineCharsIter<'s>>>,
|
||||
inline_start: usize,
|
||||
block_attributes: Attributes<'s>,
|
||||
}
|
||||
|
||||
impl<'s> Parser<'s> {
|
||||
|
@ -375,84 +484,94 @@ impl<'s> Parser<'s> {
|
|||
inlines: InlineSpans::new(src),
|
||||
inline_parser: None,
|
||||
inline_start: 0,
|
||||
block_attributes: Attributes::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Parser<'s> {
|
||||
fn inline(&self, inline: inline::Event) -> Event<'s> {
|
||||
match inline.kind {
|
||||
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
||||
let t = match c {
|
||||
inline::Container::Span => Container::Span,
|
||||
inline::Container::Verbatim => Container::Verbatim,
|
||||
inline::Container::InlineMath => Container::Math { display: false },
|
||||
inline::Container::DisplayMath => Container::Math { display: true },
|
||||
inline::Container::RawFormat => Container::RawInline {
|
||||
format: match self.inlines.src(inline.span) {
|
||||
CowStr::Owned(_) => panic!(),
|
||||
CowStr::Borrowed(s) => s,
|
||||
},
|
||||
},
|
||||
inline::Container::Subscript => Container::Subscript,
|
||||
inline::Container::Superscript => Container::Superscript,
|
||||
inline::Container::Insert => Container::Insert,
|
||||
inline::Container::Delete => Container::Delete,
|
||||
inline::Container::Emphasis => Container::Emphasis,
|
||||
inline::Container::Strong => Container::Strong,
|
||||
inline::Container::Mark => Container::Mark,
|
||||
inline::Container::SingleQuoted => Container::SingleQuoted,
|
||||
inline::Container::DoubleQuoted => Container::DoubleQuoted,
|
||||
inline::Container::InlineLink => Container::Link(
|
||||
match self.inlines.src(inline.span) {
|
||||
CowStr::Owned(s) => s.replace('\n', "").into(),
|
||||
s @ CowStr::Borrowed(_) => s,
|
||||
},
|
||||
LinkType::Span(SpanLinkType::Inline),
|
||||
),
|
||||
inline::Container::InlineImage => Container::Image(
|
||||
match self.inlines.src(inline.span) {
|
||||
CowStr::Owned(s) => s.replace('\n', "").into(),
|
||||
s @ CowStr::Borrowed(_) => s,
|
||||
},
|
||||
SpanLinkType::Inline,
|
||||
),
|
||||
_ => todo!("{:?}", c),
|
||||
};
|
||||
if matches!(inline.kind, inline::EventKind::Enter(_)) {
|
||||
Event::Start(t, Attributes::new())
|
||||
fn inline(&mut self) -> Option<Event<'s>> {
|
||||
self.inline_parser.as_mut().and_then(|parser| {
|
||||
let mut inline = parser.next();
|
||||
|
||||
let mut first_is_attr = false;
|
||||
let attributes = inline.as_ref().map_or_else(Attributes::new, |inl| {
|
||||
if let inline::EventKind::Attributes = inl.kind {
|
||||
first_is_attr = true;
|
||||
attr::parse(self.inlines.slice(inl.span))
|
||||
} else {
|
||||
Event::End(t)
|
||||
Attributes::new()
|
||||
}
|
||||
});
|
||||
|
||||
if first_is_attr {
|
||||
inline = parser.next();
|
||||
}
|
||||
inline::EventKind::Atom(a) => match a {
|
||||
inline::Atom::Ellipsis => Event::Atom(Atom::Ellipsis),
|
||||
inline::Atom::EnDash => Event::Atom(Atom::EnDash),
|
||||
inline::Atom::EmDash => Event::Atom(Atom::EmDash),
|
||||
inline::Atom::Nbsp => Event::Atom(Atom::NonBreakingSpace),
|
||||
inline::Atom::Softbreak => Event::Atom(Atom::Softbreak),
|
||||
inline::Atom::Hardbreak => Event::Atom(Atom::Hardbreak),
|
||||
inline::Atom::Escape => Event::Atom(Atom::Escape),
|
||||
},
|
||||
inline::EventKind::Str => Event::Str(self.inlines.src(inline.span)),
|
||||
inline::EventKind::Attributes => todo!(),
|
||||
inline::EventKind::AttributesDummy => panic!(),
|
||||
}
|
||||
|
||||
inline.map(|inline| match inline.kind {
|
||||
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
||||
let t = match c {
|
||||
inline::Container::Span => Container::Span,
|
||||
inline::Container::Verbatim => Container::Verbatim,
|
||||
inline::Container::InlineMath => Container::Math { display: false },
|
||||
inline::Container::DisplayMath => Container::Math { display: true },
|
||||
inline::Container::RawFormat => Container::RawInline {
|
||||
format: match self.inlines.src(inline.span) {
|
||||
CowStr::Owned(_) => panic!(),
|
||||
CowStr::Borrowed(s) => s,
|
||||
},
|
||||
},
|
||||
inline::Container::Subscript => Container::Subscript,
|
||||
inline::Container::Superscript => Container::Superscript,
|
||||
inline::Container::Insert => Container::Insert,
|
||||
inline::Container::Delete => Container::Delete,
|
||||
inline::Container::Emphasis => Container::Emphasis,
|
||||
inline::Container::Strong => Container::Strong,
|
||||
inline::Container::Mark => Container::Mark,
|
||||
inline::Container::SingleQuoted => Container::SingleQuoted,
|
||||
inline::Container::DoubleQuoted => Container::DoubleQuoted,
|
||||
inline::Container::InlineLink => Container::Link(
|
||||
match self.inlines.src(inline.span) {
|
||||
CowStr::Owned(s) => s.replace('\n', "").into(),
|
||||
s @ CowStr::Borrowed(_) => s,
|
||||
},
|
||||
LinkType::Span(SpanLinkType::Inline),
|
||||
),
|
||||
inline::Container::InlineImage => Container::Image(
|
||||
match self.inlines.src(inline.span) {
|
||||
CowStr::Owned(s) => s.replace('\n', "").into(),
|
||||
s @ CowStr::Borrowed(_) => s,
|
||||
},
|
||||
SpanLinkType::Inline,
|
||||
),
|
||||
inline::Container::ReferenceLink => todo!("{:?}", c),
|
||||
inline::Container::ReferenceImage => todo!("{:?}", c),
|
||||
inline::Container::Autolink => todo!("{:?}", c),
|
||||
};
|
||||
if matches!(inline.kind, inline::EventKind::Enter(_)) {
|
||||
Event::Start(t, attributes)
|
||||
} else {
|
||||
Event::End(t)
|
||||
}
|
||||
}
|
||||
inline::EventKind::Atom(a) => match a {
|
||||
inline::Atom::Ellipsis => Event::Atom(Atom::Ellipsis),
|
||||
inline::Atom::EnDash => Event::Atom(Atom::EnDash),
|
||||
inline::Atom::EmDash => Event::Atom(Atom::EmDash),
|
||||
inline::Atom::Nbsp => Event::Atom(Atom::NonBreakingSpace),
|
||||
inline::Atom::Softbreak => Event::Atom(Atom::Softbreak),
|
||||
inline::Atom::Hardbreak => Event::Atom(Atom::Hardbreak),
|
||||
inline::Atom::Escape => Event::Atom(Atom::Escape),
|
||||
},
|
||||
inline::EventKind::Str => Event::Str(self.inlines.src(inline.span)),
|
||||
inline::EventKind::Attributes | inline::EventKind::AttributesDummy => {
|
||||
panic!("{:?}", inline)
|
||||
}
|
||||
})
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Iterator for Parser<'s> {
|
||||
type Item = Event<'s>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(parser) = &mut self.inline_parser {
|
||||
if let Some(inline) = parser.next() {
|
||||
return Some(self.inline(inline));
|
||||
}
|
||||
self.inline_parser = None;
|
||||
}
|
||||
|
||||
fn block(&mut self) -> Option<Event<'s>> {
|
||||
let mut attributes = Attributes::new();
|
||||
for ev in &mut self.tree {
|
||||
let content = ev.span.of(self.src);
|
||||
let event = match ev.kind {
|
||||
|
@ -460,7 +579,7 @@ impl<'s> Iterator for Parser<'s> {
|
|||
block::Atom::Blankline => Event::Atom(Atom::Blankline),
|
||||
block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak),
|
||||
block::Atom::Attributes => {
|
||||
self.block_attributes.parse(&content);
|
||||
attributes.parse(content);
|
||||
continue;
|
||||
}
|
||||
},
|
||||
|
@ -478,7 +597,7 @@ impl<'s> Iterator for Parser<'s> {
|
|||
}
|
||||
_ => Container::from_leaf_block(content, l),
|
||||
};
|
||||
Event::Start(container, self.block_attributes.take())
|
||||
Event::Start(container, attributes)
|
||||
}
|
||||
block::Node::Container(c) => {
|
||||
let container = match c {
|
||||
|
@ -487,7 +606,7 @@ impl<'s> Iterator for Parser<'s> {
|
|||
},
|
||||
_ => Container::from_container_block(content, c),
|
||||
};
|
||||
Event::Start(container, self.block_attributes.take())
|
||||
Event::Start(container, attributes)
|
||||
}
|
||||
},
|
||||
tree::EventKind::Exit(c) => match c {
|
||||
|
@ -500,11 +619,18 @@ impl<'s> Iterator for Parser<'s> {
|
|||
};
|
||||
return Some(event);
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Iterator for Parser<'s> {
|
||||
type Item = Event<'s>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.inline().or_else(|| self.block())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::Atom::*;
|
||||
|
@ -694,4 +820,17 @@ mod test {
|
|||
End(Paragraph),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn attr_inline() {
|
||||
test_parse!(
|
||||
"abc _def_{.ghi}",
|
||||
Start(Paragraph, Attributes::new()),
|
||||
Str("abc ".into()),
|
||||
Start(Emphasis, [("class", "ghi")].into_iter().collect()),
|
||||
Str("def".into()),
|
||||
End(Emphasis),
|
||||
End(Paragraph),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -6,9 +6,6 @@ fn main() {
|
|||
.read_to_string(&mut src)
|
||||
.expect("failed to read utf-8 file");
|
||||
|
||||
let p = jotdown::Parser::new(&src);
|
||||
println!("{:#?}", p.collect::<Vec<_>>());
|
||||
|
||||
let p = jotdown::Parser::new(&src);
|
||||
let mut out = std::io::BufWriter::new(std::io::stdout());
|
||||
jotdown::html::write(&mut out, p).unwrap();
|
||||
|
|
Loading…
Reference in a new issue