diff --git a/src/attr.rs b/src/attr.rs index 10a5436..7b8b9e7 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -4,6 +4,12 @@ use crate::Span; use State::*; +pub(crate) fn parse<'s, S: DiscontinuousString<'s>>(chars: S) -> Attributes<'s> { + let mut a = Attributes::new(); + a.parse(chars); + a +} + pub fn valid>(chars: I) -> usize { let mut p = Parser::new(chars); if p.any(|e| matches!(e, Element::Invalid)) { @@ -29,7 +35,7 @@ impl<'s> Attributes<'s> { Self(self.0.take()) } - pub(crate) fn parse>(&mut self, input: &S) -> bool { + pub(crate) fn parse>(&mut self, input: S) -> bool { for elem in Parser::new(input.chars()) { match elem { Element::Class(c) => self.add("class", input.src(c)), @@ -275,7 +281,7 @@ mod test { ($src:expr $(,$($av:expr),* $(,)?)?) => { #[allow(unused)] let mut attr =super::Attributes::new(); - attr.parse(&$src); + attr.parse($src); let actual = attr.iter().collect::>(); let expected = &[$($($av),*,)?]; assert_eq!(actual, expected, "\n\n{}\n\n", $src); diff --git a/src/lib.rs b/src/lib.rs index 687f1e0..3455f05 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -255,24 +255,24 @@ impl<'s> Container<'s> { } #[derive(Clone)] -struct InlineChars<'s, 't> { +struct InlineChars<'s, I> { src: &'s str, - inlines: std::slice::Iter<'t, Span>, + inlines: I, next: std::str::Chars<'s>, } // Implement inlines.flat_map(|sp| sp.of(self.src).chars()) -impl<'s, 't> InlineChars<'s, 't> { - fn new(src: &'s str, inlines: &'t [Span]) -> Self { +impl<'s, 't, I: Iterator> InlineChars<'s, I> { + fn new(src: &'s str, inlines: I) -> Self { Self { src, - inlines: inlines.iter(), + inlines, next: "".chars(), } } } -impl<'s, 't> Iterator for InlineChars<'s, 't> { +impl<'s, 't, I: Iterator> Iterator for InlineChars<'s, I> { type Item = char; fn next(&mut self) -> Option { @@ -303,9 +303,8 @@ impl<'s> DiscontinuousString<'s> for &'s str { } impl<'s> DiscontinuousString<'s> for InlineSpans<'s> { - type Chars = InlineChars<'s, 'static>; + type Chars = InlineCharsIter<'s>; - /// Borrow if continuous, copy if discontiunous. fn src(&self, span: Span) -> CowStr<'s> { let mut a = 0; let mut s = String::new(); @@ -333,11 +332,75 @@ impl<'s> DiscontinuousString<'s> for InlineSpans<'s> { fn chars(&self) -> Self::Chars { // SAFETY: do not call set_spans while chars is in use - unsafe { std::mem::transmute(InlineChars::new(self.src, &self.spans)) } + unsafe { std::mem::transmute(InlineChars::new(self.src, self.spans.iter().copied())) } } } -#[derive(Default)] +impl<'s, 'i> DiscontinuousString<'s> for InlineSpansSlice<'s, 'i> { + type Chars = InlineChars< + 's, + std::iter::Chain< + std::iter::Chain, std::iter::Copied>>, + std::iter::Once, + >, + >; + + /// Borrow if continuous, copy if discontiunous. + fn src(&self, span: Span) -> CowStr<'s> { + let mut a = 0; + let mut s = String::new(); + for (i, mut sp) in self.spans.iter().copied().enumerate() { + if i == 0 { + sp = sp.skip(self.first_skip); + } + if i == self.spans.len() - 1 { + sp = Span::by_len(sp.start(), self.last_len); + } + let b = a + sp.len(); + if span.start() < b { + let r = if a <= span.start() { + if span.end() <= b { + // continuous + return CowStr::Borrowed( + &sp.of(self.src)[span.start() - a..span.end() - a], + ); + } + (span.start() - a)..sp.len() + } else { + 0..sp.len().min(span.end() - a) + }; + s.push_str(&sp.of(self.src)[r]); + } + a = b; + } + assert_eq!(span.len(), s.len()); + CowStr::Owned(s) + } + + fn chars(&self) -> Self::Chars { + let (span_start, r_middle, span_end) = if self.spans.len() == 1 { + ( + Span::by_len(self.spans[0].start() + self.first_skip, self.last_len), + 0..0, + Span::by_len(self.spans[self.spans.len() - 1].start(), 0), + ) + } else { + ( + Span::new(self.spans[0].start() + self.first_skip, self.spans[0].end()), + 1..self.spans.len().saturating_sub(2), + Span::by_len(self.spans[self.spans.len() - 1].start(), self.last_len), + ) + }; + InlineChars::new( + self.src, + std::iter::once(span_start) + .chain(self.spans[r_middle].iter().copied()) + .chain(std::iter::once(span_end)), + ) + } +} + +#[derive(Default, Debug)] struct InlineSpans<'s> { src: &'s str, spans: Vec, @@ -355,15 +418,61 @@ impl<'s> InlineSpans<'s> { self.spans.clear(); self.spans.extend(spans); } + + fn slice<'i>(&'i self, span: Span) -> InlineSpansSlice<'s, 'i> { + let mut first = 0; + let mut last = 0; + let mut first_skip = 0; + let mut last_len = 0; + + let mut a = 0; + for (i, sp) in self.spans.iter().enumerate() { + let b = a + sp.len(); + if span.start() < b { + if a <= span.start() { + first = i; + first_skip = span.start() - a; + if span.end() <= b { + // continuous + last = i; + last_len = span.len(); + break; + } + } else { + last = i; + last_len = sp.len().min(span.end() - a); + break; + }; + } + a = b; + } + + assert_ne!(last_len, 0); + + InlineSpansSlice { + src: self.src, + first_skip, + last_len, + spans: &self.spans[first..=last], + } + } } +struct InlineSpansSlice<'s, 'i> { + src: &'s str, + first_skip: usize, + last_len: usize, + spans: &'i [Span], +} + +type InlineCharsIter<'s> = InlineChars<'s, std::iter::Copied>>; + pub struct Parser<'s> { src: &'s str, tree: block::Tree, inlines: InlineSpans<'s>, - inline_parser: Option>>, + inline_parser: Option>>, inline_start: usize, - block_attributes: Attributes<'s>, } impl<'s> Parser<'s> { @@ -375,84 +484,94 @@ impl<'s> Parser<'s> { inlines: InlineSpans::new(src), inline_parser: None, inline_start: 0, - block_attributes: Attributes::new(), } } } impl<'s> Parser<'s> { - fn inline(&self, inline: inline::Event) -> Event<'s> { - match inline.kind { - inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => { - let t = match c { - inline::Container::Span => Container::Span, - inline::Container::Verbatim => Container::Verbatim, - inline::Container::InlineMath => Container::Math { display: false }, - inline::Container::DisplayMath => Container::Math { display: true }, - inline::Container::RawFormat => Container::RawInline { - format: match self.inlines.src(inline.span) { - CowStr::Owned(_) => panic!(), - CowStr::Borrowed(s) => s, - }, - }, - inline::Container::Subscript => Container::Subscript, - inline::Container::Superscript => Container::Superscript, - inline::Container::Insert => Container::Insert, - inline::Container::Delete => Container::Delete, - inline::Container::Emphasis => Container::Emphasis, - inline::Container::Strong => Container::Strong, - inline::Container::Mark => Container::Mark, - inline::Container::SingleQuoted => Container::SingleQuoted, - inline::Container::DoubleQuoted => Container::DoubleQuoted, - inline::Container::InlineLink => Container::Link( - match self.inlines.src(inline.span) { - CowStr::Owned(s) => s.replace('\n', "").into(), - s @ CowStr::Borrowed(_) => s, - }, - LinkType::Span(SpanLinkType::Inline), - ), - inline::Container::InlineImage => Container::Image( - match self.inlines.src(inline.span) { - CowStr::Owned(s) => s.replace('\n', "").into(), - s @ CowStr::Borrowed(_) => s, - }, - SpanLinkType::Inline, - ), - _ => todo!("{:?}", c), - }; - if matches!(inline.kind, inline::EventKind::Enter(_)) { - Event::Start(t, Attributes::new()) + fn inline(&mut self) -> Option> { + self.inline_parser.as_mut().and_then(|parser| { + let mut inline = parser.next(); + + let mut first_is_attr = false; + let attributes = inline.as_ref().map_or_else(Attributes::new, |inl| { + if let inline::EventKind::Attributes = inl.kind { + first_is_attr = true; + attr::parse(self.inlines.slice(inl.span)) } else { - Event::End(t) + Attributes::new() } + }); + + if first_is_attr { + inline = parser.next(); } - inline::EventKind::Atom(a) => match a { - inline::Atom::Ellipsis => Event::Atom(Atom::Ellipsis), - inline::Atom::EnDash => Event::Atom(Atom::EnDash), - inline::Atom::EmDash => Event::Atom(Atom::EmDash), - inline::Atom::Nbsp => Event::Atom(Atom::NonBreakingSpace), - inline::Atom::Softbreak => Event::Atom(Atom::Softbreak), - inline::Atom::Hardbreak => Event::Atom(Atom::Hardbreak), - inline::Atom::Escape => Event::Atom(Atom::Escape), - }, - inline::EventKind::Str => Event::Str(self.inlines.src(inline.span)), - inline::EventKind::Attributes => todo!(), - inline::EventKind::AttributesDummy => panic!(), - } + + inline.map(|inline| match inline.kind { + inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => { + let t = match c { + inline::Container::Span => Container::Span, + inline::Container::Verbatim => Container::Verbatim, + inline::Container::InlineMath => Container::Math { display: false }, + inline::Container::DisplayMath => Container::Math { display: true }, + inline::Container::RawFormat => Container::RawInline { + format: match self.inlines.src(inline.span) { + CowStr::Owned(_) => panic!(), + CowStr::Borrowed(s) => s, + }, + }, + inline::Container::Subscript => Container::Subscript, + inline::Container::Superscript => Container::Superscript, + inline::Container::Insert => Container::Insert, + inline::Container::Delete => Container::Delete, + inline::Container::Emphasis => Container::Emphasis, + inline::Container::Strong => Container::Strong, + inline::Container::Mark => Container::Mark, + inline::Container::SingleQuoted => Container::SingleQuoted, + inline::Container::DoubleQuoted => Container::DoubleQuoted, + inline::Container::InlineLink => Container::Link( + match self.inlines.src(inline.span) { + CowStr::Owned(s) => s.replace('\n', "").into(), + s @ CowStr::Borrowed(_) => s, + }, + LinkType::Span(SpanLinkType::Inline), + ), + inline::Container::InlineImage => Container::Image( + match self.inlines.src(inline.span) { + CowStr::Owned(s) => s.replace('\n', "").into(), + s @ CowStr::Borrowed(_) => s, + }, + SpanLinkType::Inline, + ), + inline::Container::ReferenceLink => todo!("{:?}", c), + inline::Container::ReferenceImage => todo!("{:?}", c), + inline::Container::Autolink => todo!("{:?}", c), + }; + if matches!(inline.kind, inline::EventKind::Enter(_)) { + Event::Start(t, attributes) + } else { + Event::End(t) + } + } + inline::EventKind::Atom(a) => match a { + inline::Atom::Ellipsis => Event::Atom(Atom::Ellipsis), + inline::Atom::EnDash => Event::Atom(Atom::EnDash), + inline::Atom::EmDash => Event::Atom(Atom::EmDash), + inline::Atom::Nbsp => Event::Atom(Atom::NonBreakingSpace), + inline::Atom::Softbreak => Event::Atom(Atom::Softbreak), + inline::Atom::Hardbreak => Event::Atom(Atom::Hardbreak), + inline::Atom::Escape => Event::Atom(Atom::Escape), + }, + inline::EventKind::Str => Event::Str(self.inlines.src(inline.span)), + inline::EventKind::Attributes | inline::EventKind::AttributesDummy => { + panic!("{:?}", inline) + } + }) + }) } -} - -impl<'s> Iterator for Parser<'s> { - type Item = Event<'s>; - - fn next(&mut self) -> Option { - if let Some(parser) = &mut self.inline_parser { - if let Some(inline) = parser.next() { - return Some(self.inline(inline)); - } - self.inline_parser = None; - } + fn block(&mut self) -> Option> { + let mut attributes = Attributes::new(); for ev in &mut self.tree { let content = ev.span.of(self.src); let event = match ev.kind { @@ -460,7 +579,7 @@ impl<'s> Iterator for Parser<'s> { block::Atom::Blankline => Event::Atom(Atom::Blankline), block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak), block::Atom::Attributes => { - self.block_attributes.parse(&content); + attributes.parse(content); continue; } }, @@ -478,7 +597,7 @@ impl<'s> Iterator for Parser<'s> { } _ => Container::from_leaf_block(content, l), }; - Event::Start(container, self.block_attributes.take()) + Event::Start(container, attributes) } block::Node::Container(c) => { let container = match c { @@ -487,7 +606,7 @@ impl<'s> Iterator for Parser<'s> { }, _ => Container::from_container_block(content, c), }; - Event::Start(container, self.block_attributes.take()) + Event::Start(container, attributes) } }, tree::EventKind::Exit(c) => match c { @@ -500,11 +619,18 @@ impl<'s> Iterator for Parser<'s> { }; return Some(event); } - None } } +impl<'s> Iterator for Parser<'s> { + type Item = Event<'s>; + + fn next(&mut self) -> Option { + self.inline().or_else(|| self.block()) + } +} + #[cfg(test)] mod test { use super::Atom::*; @@ -694,4 +820,17 @@ mod test { End(Paragraph), ); } + + #[test] + fn attr_inline() { + test_parse!( + "abc _def_{.ghi}", + Start(Paragraph, Attributes::new()), + Str("abc ".into()), + Start(Emphasis, [("class", "ghi")].into_iter().collect()), + Str("def".into()), + End(Emphasis), + End(Paragraph), + ); + } } diff --git a/src/main.rs b/src/main.rs index e9fea46..c6b4c57 100644 --- a/src/main.rs +++ b/src/main.rs @@ -6,9 +6,6 @@ fn main() { .read_to_string(&mut src) .expect("failed to read utf-8 file"); - let p = jotdown::Parser::new(&src); - println!("{:#?}", p.collect::>()); - let p = jotdown::Parser::new(&src); let mut out = std::io::BufWriter::new(std::io::stdout()); jotdown::html::write(&mut out, p).unwrap();