discontinuous spans
This commit is contained in:
parent
903578b04d
commit
6fdbd09cd1
1 changed files with 154 additions and 63 deletions
217
src/lib.rs
217
src/lib.rs
|
@ -224,52 +224,6 @@ pub enum Atom {
|
||||||
Blankline,
|
Blankline,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Event<'s> {
|
|
||||||
fn from_inline(src: &'s str, inline: inline::Event) -> Self {
|
|
||||||
let content = inline.span.of(src);
|
|
||||||
match inline.kind {
|
|
||||||
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
|
||||||
let t = match c {
|
|
||||||
inline::Container::Span => Container::Span,
|
|
||||||
inline::Container::Verbatim => Container::Verbatim,
|
|
||||||
inline::Container::InlineMath => Container::Math { display: false },
|
|
||||||
inline::Container::DisplayMath => Container::Math { display: true },
|
|
||||||
inline::Container::RawFormat => Container::RawInline { format: content },
|
|
||||||
inline::Container::Subscript => Container::Subscript,
|
|
||||||
inline::Container::Superscript => Container::Superscript,
|
|
||||||
inline::Container::Insert => Container::Insert,
|
|
||||||
inline::Container::Delete => Container::Delete,
|
|
||||||
inline::Container::Emphasis => Container::Emphasis,
|
|
||||||
inline::Container::Strong => Container::Strong,
|
|
||||||
inline::Container::Mark => Container::Mark,
|
|
||||||
inline::Container::SingleQuoted => Container::SingleQuoted,
|
|
||||||
inline::Container::DoubleQuoted => Container::DoubleQuoted,
|
|
||||||
inline::Container::InlineLink => {
|
|
||||||
Container::Link(content.into(), LinkType::Inline)
|
|
||||||
}
|
|
||||||
_ => todo!(),
|
|
||||||
};
|
|
||||||
if matches!(inline.kind, inline::EventKind::Enter(_)) {
|
|
||||||
Self::Start(t, Attributes::none())
|
|
||||||
} else {
|
|
||||||
Self::End(t)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
inline::EventKind::Atom(a) => Event::Atom(match a {
|
|
||||||
inline::Atom::Ellipsis => Atom::Ellipsis,
|
|
||||||
inline::Atom::EnDash => Atom::EnDash,
|
|
||||||
inline::Atom::EmDash => Atom::EmDash,
|
|
||||||
inline::Atom::Nbsp => Atom::NonBreakingSpace,
|
|
||||||
inline::Atom::Softbreak => Atom::Softbreak,
|
|
||||||
inline::Atom::Hardbreak => Atom::Hardbreak,
|
|
||||||
inline::Atom::Escape => Atom::Escape,
|
|
||||||
}),
|
|
||||||
inline::EventKind::Str => Self::Str(content.into()),
|
|
||||||
inline::EventKind::Attributes => todo!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'s> Container<'s> {
|
impl<'s> Container<'s> {
|
||||||
fn from_leaf_block(content: &str, l: block::Leaf) -> Self {
|
fn from_leaf_block(content: &str, l: block::Leaf) -> Self {
|
||||||
match l {
|
match l {
|
||||||
|
@ -277,7 +231,7 @@ impl<'s> Container<'s> {
|
||||||
block::Leaf::Heading => Self::Heading {
|
block::Leaf::Heading => Self::Heading {
|
||||||
level: content.len(),
|
level: content.len(),
|
||||||
},
|
},
|
||||||
block::Leaf::CodeBlock => Self::CodeBlock { lang: None },
|
block::Leaf::CodeBlock => panic!(),
|
||||||
_ => todo!(),
|
_ => todo!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -285,7 +239,7 @@ impl<'s> Container<'s> {
|
||||||
fn from_container_block(content: &'s str, c: block::Container) -> Self {
|
fn from_container_block(content: &'s str, c: block::Container) -> Self {
|
||||||
match c {
|
match c {
|
||||||
block::Container::Blockquote => Self::Blockquote,
|
block::Container::Blockquote => Self::Blockquote,
|
||||||
block::Container::Div => Self::Div { class: None },
|
block::Container::Div => panic!(),
|
||||||
block::Container::Footnote => Self::Footnote { tag: content },
|
block::Container::Footnote => Self::Footnote { tag: content },
|
||||||
block::Container::ListItem => todo!(),
|
block::Container::ListItem => todo!(),
|
||||||
}
|
}
|
||||||
|
@ -303,6 +257,7 @@ impl<'s> Attributes<'s> {
|
||||||
Self(None)
|
Self(None)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
pub fn take(&mut self) -> Self {
|
pub fn take(&mut self) -> Self {
|
||||||
Self(self.0.take())
|
Self(self.0.take())
|
||||||
}
|
}
|
||||||
|
@ -313,12 +268,25 @@ impl<'s> Attributes<'s> {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
struct InlineChars<'t, 's> {
|
struct InlineChars<'s, 't> {
|
||||||
src: &'s str,
|
src: &'s str,
|
||||||
inlines: tree::Inlines<'t, block::Node, block::Atom>,
|
inlines: std::slice::Iter<'t, Span>,
|
||||||
|
next: std::str::Chars<'s>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 's> Iterator for InlineChars<'t, 's> {
|
// Implement inlines.flat_map(|sp| sp.of(self.src).chars())
|
||||||
|
// Is there a better way to do this?
|
||||||
|
impl<'s, 't> InlineChars<'s, 't> {
|
||||||
|
fn new(src: &'s str, inlines: &'t [Span]) -> Self {
|
||||||
|
Self {
|
||||||
|
src,
|
||||||
|
inlines: inlines.iter(),
|
||||||
|
next: "".chars(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s, 't> Iterator for InlineChars<'s, 't> {
|
||||||
type Item = char;
|
type Item = char;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
@ -331,7 +299,8 @@ impl<'t, 's> Iterator for InlineChars<'t, 's> {
|
||||||
pub struct Parser<'s> {
|
pub struct Parser<'s> {
|
||||||
src: &'s str,
|
src: &'s str,
|
||||||
tree: block::Tree,
|
tree: block::Tree,
|
||||||
inline_parser: Option<inline::Parser<InlineChars<'static, 's>>>,
|
inline_spans: Vec<Span>,
|
||||||
|
inline_parser: Option<inline::Parser<InlineChars<'s, 'static>>>,
|
||||||
inline_start: usize,
|
inline_start: usize,
|
||||||
block_attributes: Attributes<'s>,
|
block_attributes: Attributes<'s>,
|
||||||
}
|
}
|
||||||
|
@ -342,6 +311,7 @@ impl<'s> Parser<'s> {
|
||||||
Self {
|
Self {
|
||||||
src,
|
src,
|
||||||
tree: block::parse(src),
|
tree: block::parse(src),
|
||||||
|
inline_spans: Vec::new(),
|
||||||
inline_parser: None,
|
inline_parser: None,
|
||||||
inline_start: 0,
|
inline_start: 0,
|
||||||
block_attributes: Attributes::none(),
|
block_attributes: Attributes::none(),
|
||||||
|
@ -349,14 +319,90 @@ impl<'s> Parser<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'s> Parser<'s> {
|
||||||
|
fn inline(&self, inline: inline::Event) -> Event<'s> {
|
||||||
|
//let content = inline.span.of(self.src);
|
||||||
|
match inline.kind {
|
||||||
|
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
||||||
|
let t = match c {
|
||||||
|
inline::Container::Span => Container::Span,
|
||||||
|
inline::Container::Verbatim => Container::Verbatim,
|
||||||
|
inline::Container::InlineMath => Container::Math { display: false },
|
||||||
|
inline::Container::DisplayMath => Container::Math { display: true },
|
||||||
|
inline::Container::RawFormat => Container::RawInline {
|
||||||
|
format: self.inline_str_cont(inline.span),
|
||||||
|
},
|
||||||
|
inline::Container::Subscript => Container::Subscript,
|
||||||
|
inline::Container::Superscript => Container::Superscript,
|
||||||
|
inline::Container::Insert => Container::Insert,
|
||||||
|
inline::Container::Delete => Container::Delete,
|
||||||
|
inline::Container::Emphasis => Container::Emphasis,
|
||||||
|
inline::Container::Strong => Container::Strong,
|
||||||
|
inline::Container::Mark => Container::Mark,
|
||||||
|
inline::Container::SingleQuoted => Container::SingleQuoted,
|
||||||
|
inline::Container::DoubleQuoted => Container::DoubleQuoted,
|
||||||
|
inline::Container::InlineLink => Container::Link(
|
||||||
|
self.inline_str(inline.span).replace('\n', "").into(),
|
||||||
|
LinkType::Inline,
|
||||||
|
),
|
||||||
|
_ => todo!(),
|
||||||
|
};
|
||||||
|
if matches!(inline.kind, inline::EventKind::Enter(_)) {
|
||||||
|
Event::Start(t, Attributes::none())
|
||||||
|
} else {
|
||||||
|
Event::End(t)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
inline::EventKind::Atom(a) => match a {
|
||||||
|
inline::Atom::Ellipsis => Event::Atom(Atom::Ellipsis),
|
||||||
|
inline::Atom::EnDash => Event::Atom(Atom::EnDash),
|
||||||
|
inline::Atom::EmDash => Event::Atom(Atom::EmDash),
|
||||||
|
inline::Atom::Nbsp => Event::Atom(Atom::NonBreakingSpace),
|
||||||
|
inline::Atom::Softbreak => Event::Atom(Atom::Softbreak),
|
||||||
|
inline::Atom::Hardbreak => Event::Atom(Atom::Hardbreak),
|
||||||
|
inline::Atom::Escape => Event::Atom(Atom::Escape),
|
||||||
|
},
|
||||||
|
inline::EventKind::Str => Event::Str(self.inline_str(inline.span)),
|
||||||
|
inline::EventKind::Attributes => todo!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inline_str_cont(&self, span: Span) -> &'s str {
|
||||||
|
span.translate(self.inline_spans[0].start()).of(self.src)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Copy string if discontinuous.
|
||||||
|
fn inline_str(&self, span: Span) -> CowStr<'s> {
|
||||||
|
let mut a = 0;
|
||||||
|
let mut s = String::new();
|
||||||
|
for sp in &self.inline_spans {
|
||||||
|
let b = a + sp.len();
|
||||||
|
if span.start() < b {
|
||||||
|
let r = if a <= span.start() {
|
||||||
|
if span.end() <= b {
|
||||||
|
// continuous
|
||||||
|
return CowStr::Borrowed(self.inline_str_cont(span));
|
||||||
|
}
|
||||||
|
(span.start() - a)..sp.len()
|
||||||
|
} else {
|
||||||
|
0..sp.len().min(span.end() - a)
|
||||||
|
};
|
||||||
|
s.push_str(&sp.of(self.src)[r]);
|
||||||
|
}
|
||||||
|
a = b;
|
||||||
|
}
|
||||||
|
assert_eq!(span.len(), s.len());
|
||||||
|
CowStr::Owned(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'s> Iterator for Parser<'s> {
|
impl<'s> Iterator for Parser<'s> {
|
||||||
type Item = Event<'s>;
|
type Item = Event<'s>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
if let Some(parser) = &mut self.inline_parser {
|
if let Some(parser) = &mut self.inline_parser {
|
||||||
if let Some(mut inline) = parser.next() {
|
if let Some(inline) = parser.next() {
|
||||||
inline.span = inline.span.translate(self.inline_start);
|
return Some(self.inline(inline));
|
||||||
return Some(Event::from_inline(self.src, inline));
|
|
||||||
}
|
}
|
||||||
self.inline_parser = None;
|
self.inline_parser = None;
|
||||||
}
|
}
|
||||||
|
@ -374,13 +420,11 @@ impl<'s> Iterator for Parser<'s> {
|
||||||
},
|
},
|
||||||
tree::EventKind::Enter(c) => match c {
|
tree::EventKind::Enter(c) => match c {
|
||||||
block::Node::Leaf(l) => {
|
block::Node::Leaf(l) => {
|
||||||
let inlines = self.tree.inlines();
|
self.inline_spans = self.tree.inlines().collect();
|
||||||
let chars = InlineChars {
|
let chars = InlineChars::new(self.src, unsafe {
|
||||||
src: self.src,
|
std::mem::transmute(self.inline_spans.as_slice())
|
||||||
inlines,
|
});
|
||||||
};
|
self.inline_parser = Some(inline::Parser::new(chars));
|
||||||
self.inline_parser =
|
|
||||||
unsafe { Some(std::mem::transmute(inline::Parser::new(chars))) };
|
|
||||||
self.inline_start = ev.span.end();
|
self.inline_start = ev.span.end();
|
||||||
let container = match l {
|
let container = match l {
|
||||||
block::Leaf::CodeBlock { .. } => {
|
block::Leaf::CodeBlock { .. } => {
|
||||||
|
@ -471,6 +515,38 @@ mod test {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn empty() {
|
||||||
|
test_parse!("");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn heading() {
|
||||||
|
test_parse!(
|
||||||
|
"#\n",
|
||||||
|
Start(Heading { level: 1 }, Attributes::none()),
|
||||||
|
End(Heading { level: 1 }),
|
||||||
|
);
|
||||||
|
test_parse!(
|
||||||
|
"# abc\ndef\n",
|
||||||
|
Start(Heading { level: 1 }, Attributes::none()),
|
||||||
|
Str(CowStr::Borrowed("abc")),
|
||||||
|
Atom(Softbreak),
|
||||||
|
Str(CowStr::Borrowed("def")),
|
||||||
|
End(Heading { level: 1 }),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn blockquote() {
|
||||||
|
test_parse!(
|
||||||
|
">\n",
|
||||||
|
Start(Blockquote, Attributes::none()),
|
||||||
|
Atom(Blankline),
|
||||||
|
End(Blockquote),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn para() {
|
fn para() {
|
||||||
test_parse!(
|
test_parse!(
|
||||||
|
@ -507,6 +583,19 @@ mod test {
|
||||||
End(Verbatim),
|
End(Verbatim),
|
||||||
End(Paragraph),
|
End(Paragraph),
|
||||||
);
|
);
|
||||||
|
test_parse!(
|
||||||
|
concat!(
|
||||||
|
"> `abc\n",
|
||||||
|
"> def\n", //
|
||||||
|
),
|
||||||
|
Start(Blockquote, Attributes::none()),
|
||||||
|
Start(Paragraph, Attributes::none()),
|
||||||
|
Start(Verbatim, Attributes::none()),
|
||||||
|
Str(CowStr::Owned("abc\ndef".to_string())),
|
||||||
|
End(Verbatim),
|
||||||
|
End(Paragraph),
|
||||||
|
End(Blockquote),
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -539,14 +628,16 @@ mod test {
|
||||||
"> [text](url\n",
|
"> [text](url\n",
|
||||||
"> url)\n", //
|
"> url)\n", //
|
||||||
),
|
),
|
||||||
|
Start(Blockquote, Attributes::none()),
|
||||||
Start(Paragraph, Attributes::none()),
|
Start(Paragraph, Attributes::none()),
|
||||||
Start(
|
Start(
|
||||||
Link(CowStr::Borrowed("urlurl"), LinkType::Inline),
|
Link(CowStr::Owned("urlurl".to_string()), LinkType::Inline),
|
||||||
Attributes::none()
|
Attributes::none()
|
||||||
),
|
),
|
||||||
Str(CowStr::Borrowed("text")),
|
Str(CowStr::Borrowed("text")),
|
||||||
End(Link(CowStr::Borrowed("urlurl"), LinkType::Inline)),
|
End(Link(CowStr::Borrowed("urlurl"), LinkType::Inline)),
|
||||||
End(Paragraph),
|
End(Paragraph),
|
||||||
|
End(Blockquote),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue