parse inline attrs

This commit is contained in:
Noah Hellman 2023-01-15 15:47:28 +01:00
parent 1586c2947f
commit 8d676e1431
3 changed files with 231 additions and 89 deletions

View file

@ -4,6 +4,12 @@ use crate::Span;
use State::*; use State::*;
pub(crate) fn parse<'s, S: DiscontinuousString<'s>>(chars: S) -> Attributes<'s> {
let mut a = Attributes::new();
a.parse(chars);
a
}
pub fn valid<I: Iterator<Item = char>>(chars: I) -> usize { pub fn valid<I: Iterator<Item = char>>(chars: I) -> usize {
let mut p = Parser::new(chars); let mut p = Parser::new(chars);
if p.any(|e| matches!(e, Element::Invalid)) { if p.any(|e| matches!(e, Element::Invalid)) {
@ -29,7 +35,7 @@ impl<'s> Attributes<'s> {
Self(self.0.take()) Self(self.0.take())
} }
pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: &S) -> bool { pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool {
for elem in Parser::new(input.chars()) { for elem in Parser::new(input.chars()) {
match elem { match elem {
Element::Class(c) => self.add("class", input.src(c)), Element::Class(c) => self.add("class", input.src(c)),
@ -275,7 +281,7 @@ mod test {
($src:expr $(,$($av:expr),* $(,)?)?) => { ($src:expr $(,$($av:expr),* $(,)?)?) => {
#[allow(unused)] #[allow(unused)]
let mut attr =super::Attributes::new(); let mut attr =super::Attributes::new();
attr.parse(&$src); attr.parse($src);
let actual = attr.iter().collect::<Vec<_>>(); let actual = attr.iter().collect::<Vec<_>>();
let expected = &[$($($av),*,)?]; let expected = &[$($($av),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src); assert_eq!(actual, expected, "\n\n{}\n\n", $src);

View file

@ -255,24 +255,24 @@ impl<'s> Container<'s> {
} }
#[derive(Clone)] #[derive(Clone)]
struct InlineChars<'s, 't> { struct InlineChars<'s, I> {
src: &'s str, src: &'s str,
inlines: std::slice::Iter<'t, Span>, inlines: I,
next: std::str::Chars<'s>, next: std::str::Chars<'s>,
} }
// Implement inlines.flat_map(|sp| sp.of(self.src).chars()) // Implement inlines.flat_map(|sp| sp.of(self.src).chars())
impl<'s, 't> InlineChars<'s, 't> { impl<'s, 't, I: Iterator<Item = Span>> InlineChars<'s, I> {
fn new(src: &'s str, inlines: &'t [Span]) -> Self { fn new(src: &'s str, inlines: I) -> Self {
Self { Self {
src, src,
inlines: inlines.iter(), inlines,
next: "".chars(), next: "".chars(),
} }
} }
} }
impl<'s, 't> Iterator for InlineChars<'s, 't> { impl<'s, 't, I: Iterator<Item = Span>> Iterator for InlineChars<'s, I> {
type Item = char; type Item = char;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
@ -303,9 +303,8 @@ impl<'s> DiscontinuousString<'s> for &'s str {
} }
impl<'s> DiscontinuousString<'s> for InlineSpans<'s> { impl<'s> DiscontinuousString<'s> for InlineSpans<'s> {
type Chars = InlineChars<'s, 'static>; type Chars = InlineCharsIter<'s>;
/// Borrow if continuous, copy if discontiunous.
fn src(&self, span: Span) -> CowStr<'s> { fn src(&self, span: Span) -> CowStr<'s> {
let mut a = 0; let mut a = 0;
let mut s = String::new(); let mut s = String::new();
@ -333,11 +332,75 @@ impl<'s> DiscontinuousString<'s> for InlineSpans<'s> {
fn chars(&self) -> Self::Chars { fn chars(&self) -> Self::Chars {
// SAFETY: do not call set_spans while chars is in use // SAFETY: do not call set_spans while chars is in use
unsafe { std::mem::transmute(InlineChars::new(self.src, &self.spans)) } unsafe { std::mem::transmute(InlineChars::new(self.src, self.spans.iter().copied())) }
} }
} }
#[derive(Default)] impl<'s, 'i> DiscontinuousString<'s> for InlineSpansSlice<'s, 'i> {
type Chars = InlineChars<
's,
std::iter::Chain<
std::iter::Chain<std::iter::Once<Span>, std::iter::Copied<std::slice::Iter<'i, Span>>>,
std::iter::Once<Span>,
>,
>;
/// Borrow if continuous, copy if discontiunous.
fn src(&self, span: Span) -> CowStr<'s> {
let mut a = 0;
let mut s = String::new();
for (i, mut sp) in self.spans.iter().copied().enumerate() {
if i == 0 {
sp = sp.skip(self.first_skip);
}
if i == self.spans.len() - 1 {
sp = Span::by_len(sp.start(), self.last_len);
}
let b = a + sp.len();
if span.start() < b {
let r = if a <= span.start() {
if span.end() <= b {
// continuous
return CowStr::Borrowed(
&sp.of(self.src)[span.start() - a..span.end() - a],
);
}
(span.start() - a)..sp.len()
} else {
0..sp.len().min(span.end() - a)
};
s.push_str(&sp.of(self.src)[r]);
}
a = b;
}
assert_eq!(span.len(), s.len());
CowStr::Owned(s)
}
fn chars(&self) -> Self::Chars {
let (span_start, r_middle, span_end) = if self.spans.len() == 1 {
(
Span::by_len(self.spans[0].start() + self.first_skip, self.last_len),
0..0,
Span::by_len(self.spans[self.spans.len() - 1].start(), 0),
)
} else {
(
Span::new(self.spans[0].start() + self.first_skip, self.spans[0].end()),
1..self.spans.len().saturating_sub(2),
Span::by_len(self.spans[self.spans.len() - 1].start(), self.last_len),
)
};
InlineChars::new(
self.src,
std::iter::once(span_start)
.chain(self.spans[r_middle].iter().copied())
.chain(std::iter::once(span_end)),
)
}
}
#[derive(Default, Debug)]
struct InlineSpans<'s> { struct InlineSpans<'s> {
src: &'s str, src: &'s str,
spans: Vec<Span>, spans: Vec<Span>,
@ -355,15 +418,61 @@ impl<'s> InlineSpans<'s> {
self.spans.clear(); self.spans.clear();
self.spans.extend(spans); self.spans.extend(spans);
} }
fn slice<'i>(&'i self, span: Span) -> InlineSpansSlice<'s, 'i> {
let mut first = 0;
let mut last = 0;
let mut first_skip = 0;
let mut last_len = 0;
let mut a = 0;
for (i, sp) in self.spans.iter().enumerate() {
let b = a + sp.len();
if span.start() < b {
if a <= span.start() {
first = i;
first_skip = span.start() - a;
if span.end() <= b {
// continuous
last = i;
last_len = span.len();
break;
}
} else {
last = i;
last_len = sp.len().min(span.end() - a);
break;
};
}
a = b;
}
assert_ne!(last_len, 0);
InlineSpansSlice {
src: self.src,
first_skip,
last_len,
spans: &self.spans[first..=last],
}
}
} }
struct InlineSpansSlice<'s, 'i> {
src: &'s str,
first_skip: usize,
last_len: usize,
spans: &'i [Span],
}
type InlineCharsIter<'s> = InlineChars<'s, std::iter::Copied<std::slice::Iter<'static, Span>>>;
pub struct Parser<'s> { pub struct Parser<'s> {
src: &'s str, src: &'s str,
tree: block::Tree, tree: block::Tree,
inlines: InlineSpans<'s>, inlines: InlineSpans<'s>,
inline_parser: Option<inline::Parser<InlineChars<'s, 'static>>>, inline_parser: Option<inline::Parser<InlineCharsIter<'s>>>,
inline_start: usize, inline_start: usize,
block_attributes: Attributes<'s>,
} }
impl<'s> Parser<'s> { impl<'s> Parser<'s> {
@ -375,84 +484,94 @@ impl<'s> Parser<'s> {
inlines: InlineSpans::new(src), inlines: InlineSpans::new(src),
inline_parser: None, inline_parser: None,
inline_start: 0, inline_start: 0,
block_attributes: Attributes::new(),
} }
} }
} }
impl<'s> Parser<'s> { impl<'s> Parser<'s> {
fn inline(&self, inline: inline::Event) -> Event<'s> { fn inline(&mut self) -> Option<Event<'s>> {
match inline.kind { self.inline_parser.as_mut().and_then(|parser| {
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => { let mut inline = parser.next();
let t = match c {
inline::Container::Span => Container::Span, let mut first_is_attr = false;
inline::Container::Verbatim => Container::Verbatim, let attributes = inline.as_ref().map_or_else(Attributes::new, |inl| {
inline::Container::InlineMath => Container::Math { display: false }, if let inline::EventKind::Attributes = inl.kind {
inline::Container::DisplayMath => Container::Math { display: true }, first_is_attr = true;
inline::Container::RawFormat => Container::RawInline { attr::parse(self.inlines.slice(inl.span))
format: match self.inlines.src(inline.span) {
CowStr::Owned(_) => panic!(),
CowStr::Borrowed(s) => s,
},
},
inline::Container::Subscript => Container::Subscript,
inline::Container::Superscript => Container::Superscript,
inline::Container::Insert => Container::Insert,
inline::Container::Delete => Container::Delete,
inline::Container::Emphasis => Container::Emphasis,
inline::Container::Strong => Container::Strong,
inline::Container::Mark => Container::Mark,
inline::Container::SingleQuoted => Container::SingleQuoted,
inline::Container::DoubleQuoted => Container::DoubleQuoted,
inline::Container::InlineLink => Container::Link(
match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', "").into(),
s @ CowStr::Borrowed(_) => s,
},
LinkType::Span(SpanLinkType::Inline),
),
inline::Container::InlineImage => Container::Image(
match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', "").into(),
s @ CowStr::Borrowed(_) => s,
},
SpanLinkType::Inline,
),
_ => todo!("{:?}", c),
};
if matches!(inline.kind, inline::EventKind::Enter(_)) {
Event::Start(t, Attributes::new())
} else { } else {
Event::End(t) Attributes::new()
} }
});
if first_is_attr {
inline = parser.next();
} }
inline::EventKind::Atom(a) => match a {
inline::Atom::Ellipsis => Event::Atom(Atom::Ellipsis), inline.map(|inline| match inline.kind {
inline::Atom::EnDash => Event::Atom(Atom::EnDash), inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
inline::Atom::EmDash => Event::Atom(Atom::EmDash), let t = match c {
inline::Atom::Nbsp => Event::Atom(Atom::NonBreakingSpace), inline::Container::Span => Container::Span,
inline::Atom::Softbreak => Event::Atom(Atom::Softbreak), inline::Container::Verbatim => Container::Verbatim,
inline::Atom::Hardbreak => Event::Atom(Atom::Hardbreak), inline::Container::InlineMath => Container::Math { display: false },
inline::Atom::Escape => Event::Atom(Atom::Escape), inline::Container::DisplayMath => Container::Math { display: true },
}, inline::Container::RawFormat => Container::RawInline {
inline::EventKind::Str => Event::Str(self.inlines.src(inline.span)), format: match self.inlines.src(inline.span) {
inline::EventKind::Attributes => todo!(), CowStr::Owned(_) => panic!(),
inline::EventKind::AttributesDummy => panic!(), CowStr::Borrowed(s) => s,
} },
},
inline::Container::Subscript => Container::Subscript,
inline::Container::Superscript => Container::Superscript,
inline::Container::Insert => Container::Insert,
inline::Container::Delete => Container::Delete,
inline::Container::Emphasis => Container::Emphasis,
inline::Container::Strong => Container::Strong,
inline::Container::Mark => Container::Mark,
inline::Container::SingleQuoted => Container::SingleQuoted,
inline::Container::DoubleQuoted => Container::DoubleQuoted,
inline::Container::InlineLink => Container::Link(
match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', "").into(),
s @ CowStr::Borrowed(_) => s,
},
LinkType::Span(SpanLinkType::Inline),
),
inline::Container::InlineImage => Container::Image(
match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', "").into(),
s @ CowStr::Borrowed(_) => s,
},
SpanLinkType::Inline,
),
inline::Container::ReferenceLink => todo!("{:?}", c),
inline::Container::ReferenceImage => todo!("{:?}", c),
inline::Container::Autolink => todo!("{:?}", c),
};
if matches!(inline.kind, inline::EventKind::Enter(_)) {
Event::Start(t, attributes)
} else {
Event::End(t)
}
}
inline::EventKind::Atom(a) => match a {
inline::Atom::Ellipsis => Event::Atom(Atom::Ellipsis),
inline::Atom::EnDash => Event::Atom(Atom::EnDash),
inline::Atom::EmDash => Event::Atom(Atom::EmDash),
inline::Atom::Nbsp => Event::Atom(Atom::NonBreakingSpace),
inline::Atom::Softbreak => Event::Atom(Atom::Softbreak),
inline::Atom::Hardbreak => Event::Atom(Atom::Hardbreak),
inline::Atom::Escape => Event::Atom(Atom::Escape),
},
inline::EventKind::Str => Event::Str(self.inlines.src(inline.span)),
inline::EventKind::Attributes | inline::EventKind::AttributesDummy => {
panic!("{:?}", inline)
}
})
})
} }
}
impl<'s> Iterator for Parser<'s> {
type Item = Event<'s>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(parser) = &mut self.inline_parser {
if let Some(inline) = parser.next() {
return Some(self.inline(inline));
}
self.inline_parser = None;
}
fn block(&mut self) -> Option<Event<'s>> {
let mut attributes = Attributes::new();
for ev in &mut self.tree { for ev in &mut self.tree {
let content = ev.span.of(self.src); let content = ev.span.of(self.src);
let event = match ev.kind { let event = match ev.kind {
@ -460,7 +579,7 @@ impl<'s> Iterator for Parser<'s> {
block::Atom::Blankline => Event::Atom(Atom::Blankline), block::Atom::Blankline => Event::Atom(Atom::Blankline),
block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak), block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak),
block::Atom::Attributes => { block::Atom::Attributes => {
self.block_attributes.parse(&content); attributes.parse(content);
continue; continue;
} }
}, },
@ -478,7 +597,7 @@ impl<'s> Iterator for Parser<'s> {
} }
_ => Container::from_leaf_block(content, l), _ => Container::from_leaf_block(content, l),
}; };
Event::Start(container, self.block_attributes.take()) Event::Start(container, attributes)
} }
block::Node::Container(c) => { block::Node::Container(c) => {
let container = match c { let container = match c {
@ -487,7 +606,7 @@ impl<'s> Iterator for Parser<'s> {
}, },
_ => Container::from_container_block(content, c), _ => Container::from_container_block(content, c),
}; };
Event::Start(container, self.block_attributes.take()) Event::Start(container, attributes)
} }
}, },
tree::EventKind::Exit(c) => match c { tree::EventKind::Exit(c) => match c {
@ -500,11 +619,18 @@ impl<'s> Iterator for Parser<'s> {
}; };
return Some(event); return Some(event);
} }
None None
} }
} }
impl<'s> Iterator for Parser<'s> {
type Item = Event<'s>;
fn next(&mut self) -> Option<Self::Item> {
self.inline().or_else(|| self.block())
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::Atom::*; use super::Atom::*;
@ -694,4 +820,17 @@ mod test {
End(Paragraph), End(Paragraph),
); );
} }
#[test]
fn attr_inline() {
test_parse!(
"abc _def_{.ghi}",
Start(Paragraph, Attributes::new()),
Str("abc ".into()),
Start(Emphasis, [("class", "ghi")].into_iter().collect()),
Str("def".into()),
End(Emphasis),
End(Paragraph),
);
}
} }

View file

@ -6,9 +6,6 @@ fn main() {
.read_to_string(&mut src) .read_to_string(&mut src)
.expect("failed to read utf-8 file"); .expect("failed to read utf-8 file");
let p = jotdown::Parser::new(&src);
println!("{:#?}", p.collect::<Vec<_>>());
let p = jotdown::Parser::new(&src); let p = jotdown::Parser::new(&src);
let mut out = std::io::BufWriter::new(std::io::stdout()); let mut out = std::io::BufWriter::new(std::io::stdout());
jotdown::html::write(&mut out, p).unwrap(); jotdown::html::write(&mut out, p).unwrap();