inline: parse multi-line link tags/urls

reimplement after broken by "take str per line instead of full inline
iter" commit

this also resolves #22
This commit is contained in:
Noah Hellman 2023-03-14 22:01:57 +01:00
parent 98f3fe5c7c
commit a846477cea
3 changed files with 149 additions and 83 deletions

View file

@ -1,5 +1,6 @@
use crate::attr; use crate::attr;
use crate::lex; use crate::lex;
use crate::CowStr;
use crate::Span; use crate::Span;
use lex::Delimiter; use lex::Delimiter;
@ -23,8 +24,8 @@ pub enum Atom {
Quote { ty: QuoteType, left: bool }, Quote { ty: QuoteType, left: bool },
} }
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Container { pub enum Container<'s> {
Span, Span,
Subscript, Subscript,
Superscript, Superscript,
@ -38,14 +39,10 @@ pub enum Container {
RawFormat, RawFormat,
InlineMath, InlineMath,
DisplayMath, DisplayMath,
/// Span is the reference link tag. ReferenceLink(CowStr<'s>),
ReferenceLink, ReferenceImage(CowStr<'s>),
/// Span is the reference link tag. InlineLink(CowStr<'s>),
ReferenceImage, InlineImage(CowStr<'s>),
/// Span is the URL.
InlineLink,
/// Span is the URL.
InlineImage,
/// Open delimiter span is URL, closing is '>'. /// Open delimiter span is URL, closing is '>'.
Autolink, Autolink,
} }
@ -57,9 +54,9 @@ pub enum QuoteType {
} }
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub enum EventKind { pub enum EventKind<'s> {
Enter(Container), Enter(Container<'s>),
Exit(Container), Exit(Container<'s>),
Atom(Atom), Atom(Atom),
Str, Str,
Attributes { container: bool }, Attributes { container: bool },
@ -67,8 +64,8 @@ pub enum EventKind {
} }
#[derive(Clone, Debug, PartialEq, Eq)] #[derive(Clone, Debug, PartialEq, Eq)]
pub struct Event { pub struct Event<'s> {
pub kind: EventKind, pub kind: EventKind<'s>,
pub span: Span, pub span: Span,
} }
@ -79,6 +76,8 @@ struct Input<'s> {
lexer: lex::Lexer<'s>, lexer: lex::Lexer<'s>,
/// The block is complete, the final line has been provided. /// The block is complete, the final line has been provided.
complete: bool, complete: bool,
/// Span of current line.
span_line: Span,
/// Span of current event. /// Span of current event.
span: Span, span: Span,
} }
@ -89,6 +88,7 @@ impl<'s> Input<'s> {
src, src,
lexer: lex::Lexer::new(""), lexer: lex::Lexer::new(""),
complete: false, complete: false,
span_line: Span::new(0, 0),
span: Span::empty_at(0), span: Span::empty_at(0),
} }
} }
@ -97,6 +97,7 @@ impl<'s> Input<'s> {
debug_assert!(!self.complete); debug_assert!(!self.complete);
self.lexer = lex::Lexer::new(line.of(self.src)); self.lexer = lex::Lexer::new(line.of(self.src));
self.complete = last; self.complete = last;
self.span_line = line;
self.span = line.empty_before(); self.span = line.empty_before();
} }
@ -192,7 +193,7 @@ pub struct Parser<'s> {
openers: Vec<(Opener, usize)>, openers: Vec<(Opener, usize)>,
/// Buffer queue for next events. Events are buffered until no modifications due to future /// Buffer queue for next events. Events are buffered until no modifications due to future
/// characters are needed. /// characters are needed.
events: std::collections::VecDeque<Event>, events: std::collections::VecDeque<Event<'s>>,
/// State if inside a verbatim container. /// State if inside a verbatim container.
verbatim: Option<VerbatimState>, verbatim: Option<VerbatimState>,
} }
@ -219,12 +220,12 @@ impl<'s> Parser<'s> {
debug_assert!(self.verbatim.is_none()); debug_assert!(self.verbatim.is_none());
} }
fn push_sp(&mut self, kind: EventKind, span: Span) -> Option<()> { fn push_sp(&mut self, kind: EventKind<'s>, span: Span) -> Option<()> {
self.events.push_back(Event { kind, span }); self.events.push_back(Event { kind, span });
Some(()) Some(())
} }
fn push(&mut self, kind: EventKind) -> Option<()> { fn push(&mut self, kind: EventKind<'s>) -> Option<()> {
self.push_sp(kind, self.input.span) self.push_sp(kind, self.input.span)
} }
@ -274,12 +275,12 @@ impl<'s> Parser<'s> {
} }
self.input.span = span_attr; self.input.span = span_attr;
}; };
let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind { let ty_opener = if let EventKind::Enter(ty) = &self.events[event_opener].kind {
debug_assert!(matches!( debug_assert!(matches!(
ty, ty,
Verbatim | RawFormat | InlineMath | DisplayMath Verbatim | RawFormat | InlineMath | DisplayMath
)); ));
ty ty.clone()
} else { } else {
panic!() panic!()
}; };
@ -504,7 +505,7 @@ impl<'s> Parser<'s> {
self.openers.drain(o..); self.openers.drain(o..);
let mut closed = match DelimEventKind::from(opener) { let mut closed = match DelimEventKind::from(opener) {
DelimEventKind::Container(cont) => { DelimEventKind::Container(cont) => {
self.events[e_opener].kind = EventKind::Enter(cont); self.events[e_opener].kind = EventKind::Enter(cont.clone());
self.push(EventKind::Exit(cont)) self.push(EventKind::Exit(cont))
} }
DelimEventKind::Quote(ty) => { DelimEventKind::Quote(ty) => {
@ -536,26 +537,79 @@ impl<'s> Parser<'s> {
image, image,
} => { } => {
let span_spec = self.events[e_opener].span.between(self.input.span); let span_spec = self.events[e_opener].span.between(self.input.span);
let span_spec = if !inline && span_spec.is_empty() { let multiline =
self.events[event_span] self.events[e_opener].span.start() < self.input.span_line.start();
let spec: CowStr = if span_spec.is_empty() && !inline {
let span_spec = self.events[event_span]
.span .span
.between(self.events[e_opener - 1].span) .between(self.events[e_opener - 1].span);
let events_text = self
.events
.iter()
.skip(event_span + 1)
.take(e_opener - event_span - 2);
if multiline
|| events_text.clone().any(|ev| {
!matches!(ev.kind, EventKind::Str | EventKind::Atom(..))
})
{
events_text
.filter(|ev| {
matches!(ev.kind, EventKind::Str | EventKind::Atom(..))
})
.map(|ev| ev.span.of(self.input.src))
.collect::<String>()
.into()
} else { } else {
span_spec span_spec.of(self.input.src).into()
}
} else if multiline {
let mut spec = String::new();
let mut first_part = true;
let mut span = self.events[e_opener].span.empty_after();
let mut append = |span: Span| {
span.of(self.input.src).split('\n').for_each(|s| {
if !s.is_empty() {
if !inline && !first_part {
spec.push(' ');
}
spec.push_str(s);
first_part = false;
}
})
}; };
for ev in self.events.iter().skip(e_opener + 1) {
if span.end() == ev.span.start() {
span = Span::new(span.start(), ev.span.end());
} else {
append(span);
span = ev.span;
}
}
append(span);
spec.into()
} else {
span_spec.of(self.input.src).into()
};
let container = match (image, inline) { let container = match (image, inline) {
(false, false) => ReferenceLink, (false, false) => ReferenceLink(spec.into()),
(false, true) => InlineLink, (false, true) => InlineLink(spec.into()),
(true, false) => ReferenceImage, (true, false) => ReferenceImage(spec.into()),
(true, true) => InlineImage, (true, true) => InlineImage(spec.into()),
};
self.events[event_span] = Event {
kind: EventKind::Enter(container),
span: span_spec,
}; };
self.events[event_span].kind = EventKind::Enter(container.clone());
self.events[e_opener - 1] = Event { self.events[e_opener - 1] = Event {
kind: EventKind::Exit(container), kind: EventKind::Exit(container),
span: span_spec, span: Span::new(
self.events[e_opener - 1].span.start(),
span_spec.end() + 1,
),
}; };
self.events.drain(e_opener..); self.events.drain(e_opener..);
Some(()) Some(())
@ -687,7 +741,7 @@ impl<'s> Parser<'s> {
self.push(EventKind::Atom(atom)) self.push(EventKind::Atom(atom))
} }
fn merge_str_events(&mut self, span_str: Span) -> Event { fn merge_str_events(&mut self, span_str: Span) -> Event<'s> {
let mut span = span_str; let mut span = span_str;
let should_merge = |e: &Event, span: Span| { let should_merge = |e: &Event, span: Span| {
matches!(e.kind, EventKind::Str | EventKind::Placeholder) matches!(e.kind, EventKind::Str | EventKind::Placeholder)
@ -711,7 +765,7 @@ impl<'s> Parser<'s> {
} }
} }
fn apply_word_attributes(&mut self, span_str: Span) -> Event { fn apply_word_attributes(&mut self, span_str: Span) -> Event<'s> {
if let Some(i) = span_str if let Some(i) = span_str
.of(self.input.src) .of(self.input.src)
.bytes() .bytes()
@ -848,8 +902,8 @@ impl Opener {
} }
} }
enum DelimEventKind { enum DelimEventKind<'s> {
Container(Container), Container(Container<'s>),
Span(SpanType), Span(SpanType),
Quote(QuoteType), Quote(QuoteType),
Link { Link {
@ -859,7 +913,7 @@ enum DelimEventKind {
}, },
} }
impl From<Opener> for DelimEventKind { impl<'s> From<Opener> for DelimEventKind<'s> {
fn from(d: Opener) -> Self { fn from(d: Opener) -> Self {
match d { match d {
Opener::Span(ty) => Self::Span(ty), Opener::Span(ty) => Self::Span(ty),
@ -886,7 +940,7 @@ impl From<Opener> for DelimEventKind {
} }
impl<'s> Iterator for Parser<'s> { impl<'s> Iterator for Parser<'s> {
type Item = Event; type Item = Event<'s>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
while self.events.is_empty() while self.events.is_empty()
@ -908,12 +962,12 @@ impl<'s> Iterator for Parser<'s> {
// automatically close unclosed verbatim // automatically close unclosed verbatim
if let Some(VerbatimState { event_opener, .. }) = self.verbatim.take() { if let Some(VerbatimState { event_opener, .. }) = self.verbatim.take() {
let ty_opener = if let EventKind::Enter(ty) = self.events[event_opener].kind { let ty_opener = if let EventKind::Enter(ty) = &self.events[event_opener].kind {
debug_assert!(matches!( debug_assert!(matches!(
ty, ty,
Verbatim | RawFormat | InlineMath | DisplayMath Verbatim | RawFormat | InlineMath | DisplayMath
)); ));
ty ty.clone()
} else { } else {
panic!() panic!()
}; };
@ -1109,31 +1163,31 @@ mod test {
fn span_tag() { fn span_tag() {
test_parse!( test_parse!(
"[text][tag]", "[text][tag]",
(Enter(ReferenceLink), "tag"), (Enter(ReferenceLink("tag".into())), "["),
(Str, "text"), (Str, "text"),
(Exit(ReferenceLink), "tag"), (Exit(ReferenceLink("tag".into())), "][tag]"),
); );
test_parse!( test_parse!(
"![text][tag]", "![text][tag]",
(Enter(ReferenceImage), "tag"), (Enter(ReferenceImage("tag".into())), "!["),
(Str, "text"), (Str, "text"),
(Exit(ReferenceImage), "tag"), (Exit(ReferenceImage("tag".into())), "][tag]"),
); );
test_parse!( test_parse!(
"before [text][tag] after", "before [text][tag] after",
(Str, "before "), (Str, "before "),
(Enter(ReferenceLink), "tag"), (Enter(ReferenceLink("tag".into())), "["),
(Str, "text"), (Str, "text"),
(Exit(ReferenceLink), "tag"), (Exit(ReferenceLink("tag".into())), "][tag]"),
(Str, " after"), (Str, " after"),
); );
test_parse!( test_parse!(
"[[inner][i]][o]", "[[inner][i]][o]",
(Enter(ReferenceLink), "o"), (Enter(ReferenceLink("o".into())), "["),
(Enter(ReferenceLink), "i"), (Enter(ReferenceLink("i".into())), "["),
(Str, "inner"), (Str, "inner"),
(Exit(ReferenceLink), "i"), (Exit(ReferenceLink("i".into())), "][i]"),
(Exit(ReferenceLink), "o"), (Exit(ReferenceLink("o".into())), "][o]"),
); );
} }
@ -1141,15 +1195,15 @@ mod test {
fn span_tag_empty() { fn span_tag_empty() {
test_parse!( test_parse!(
"[text][]", "[text][]",
(Enter(ReferenceLink), "text"), (Enter(ReferenceLink("text".into())), "["),
(Str, "text"), (Str, "text"),
(Exit(ReferenceLink), "text"), (Exit(ReferenceLink("text".into())), "][]"),
); );
test_parse!( test_parse!(
"![text][]", "![text][]",
(Enter(ReferenceImage), "text"), (Enter(ReferenceImage("text".into())), "!["),
(Str, "text"), (Str, "text"),
(Exit(ReferenceImage), "text"), (Exit(ReferenceImage("text".into())), "][]"),
); );
} }
@ -1158,12 +1212,12 @@ mod test {
// TODO strip non str from tag? // TODO strip non str from tag?
test_parse!( test_parse!(
"[some _text_][]", "[some _text_][]",
(Enter(ReferenceLink), "some _text_"), (Enter(ReferenceLink("some text".into())), "["),
(Str, "some "), (Str, "some "),
(Enter(Emphasis), "_"), (Enter(Emphasis), "_"),
(Str, "text"), (Str, "text"),
(Exit(Emphasis), "_"), (Exit(Emphasis), "_"),
(Exit(ReferenceLink), "some _text_"), (Exit(ReferenceLink("some text".into())), "][]"),
); );
} }
@ -1172,19 +1226,19 @@ mod test {
test_parse!( test_parse!(
"before [text](url) after", "before [text](url) after",
(Str, "before "), (Str, "before "),
(Enter(InlineLink), "url"), (Enter(InlineLink("url".into())), "["),
(Str, "text"), (Str, "text"),
(Exit(InlineLink), "url"), (Exit(InlineLink("url".into())), "](url)"),
(Str, " after"), (Str, " after"),
); );
test_parse!( test_parse!(
"[outer [inner](i)](o)", "[outer [inner](i)](o)",
(Enter(InlineLink), "o"), (Enter(InlineLink("o".into())), "["),
(Str, "outer "), (Str, "outer "),
(Enter(InlineLink), "i"), (Enter(InlineLink("i".into())), "["),
(Str, "inner"), (Str, "inner"),
(Exit(InlineLink), "i"), (Exit(InlineLink("i".into())), "](i)"),
(Exit(InlineLink), "o"), (Exit(InlineLink("o".into())), "](o)"),
); );
} }
@ -1203,9 +1257,9 @@ mod test {
fn span_url_attr_closed() { fn span_url_attr_closed() {
test_parse!( test_parse!(
"[text]({.cls})", "[text]({.cls})",
(Enter(InlineLink), "{.cls}"), (Enter(InlineLink("{.cls}".into())), "["),
(Str, "text"), (Str, "text"),
(Exit(InlineLink), "{.cls}"), (Exit(InlineLink("{.cls}".into())), "]({.cls})"),
); );
} }
@ -1214,9 +1268,9 @@ mod test {
test_parse!( test_parse!(
"before [text]() after", "before [text]() after",
(Str, "before "), (Str, "before "),
(Enter(InlineLink), ""), (Enter(InlineLink("".into())), "["),
(Str, "text"), (Str, "text"),
(Exit(InlineLink), ""), (Exit(InlineLink("".into())), "]()"),
(Str, " after"), (Str, " after"),
); );
} }

View file

@ -830,16 +830,14 @@ impl<'s> Parser<'s> {
inline::Container::Emphasis => Container::Emphasis, inline::Container::Emphasis => Container::Emphasis,
inline::Container::Strong => Container::Strong, inline::Container::Strong => Container::Strong,
inline::Container::Mark => Container::Mark, inline::Container::Mark => Container::Mark,
inline::Container::InlineLink => Container::Link( inline::Container::InlineLink(url) => {
inline.span.of(self.src).replace('\n', "").into(), Container::Link(url, LinkType::Span(SpanLinkType::Inline))
LinkType::Span(SpanLinkType::Inline), }
), inline::Container::InlineImage(url) => {
inline::Container::InlineImage => Container::Image( Container::Image(url, SpanLinkType::Inline)
inline.span.of(self.src).replace('\n', "").into(), }
SpanLinkType::Inline, inline::Container::ReferenceLink(ref tag)
), | inline::Container::ReferenceImage(ref tag) => {
inline::Container::ReferenceLink | inline::Container::ReferenceImage => {
let tag = inline.span.of(self.src).replace('\n', " ");
let link_def = self let link_def = self
.pre_pass .pre_pass
.link_definitions .link_definitions
@ -851,12 +849,12 @@ impl<'s> Parser<'s> {
(url, SpanLinkType::Reference) (url, SpanLinkType::Reference)
} else { } else {
self.pre_pass.heading_id_by_tag(tag.as_ref()).map_or_else( self.pre_pass.heading_id_by_tag(tag.as_ref()).map_or_else(
|| (tag.into(), SpanLinkType::Unresolved), || (tag.clone(), SpanLinkType::Unresolved),
|id| (format!("#{}", id).into(), SpanLinkType::Reference), |id| (format!("#{}", id).into(), SpanLinkType::Reference),
) )
}; };
if matches!(c, inline::Container::ReferenceLink) { if matches!(c, inline::Container::ReferenceLink(..)) {
Container::Link(url_or_tag, LinkType::Span(ty)) Container::Link(url_or_tag, LinkType::Span(ty))
} else { } else {
Container::Image(url_or_tag, ty) Container::Image(url_or_tag, ty)
@ -1359,7 +1357,6 @@ mod test {
); );
} }
#[ignore = "broken"]
#[test] #[test]
fn link_inline_multi_line() { fn link_inline_multi_line() {
test_parse!( test_parse!(
@ -1378,6 +1375,23 @@ mod test {
End(Paragraph), End(Paragraph),
End(Blockquote), End(Blockquote),
); );
test_parse!(
concat!(
"> [text](a\n", //
"> bc\n", //
"> def)\n", //
),
Start(Blockquote, Attributes::new()),
Start(Paragraph, Attributes::new()),
Start(
Link("abcdef".into(), LinkType::Span(SpanLinkType::Inline)),
Attributes::new()
),
Str("text".into()),
End(Link("abcdef".into(), LinkType::Span(SpanLinkType::Inline))),
End(Paragraph),
End(Blockquote),
);
} }
#[test] #[test]
@ -1442,7 +1456,6 @@ mod test {
); );
} }
#[ignore = "multiline links broken"]
#[test] #[test]
fn link_reference_multiline() { fn link_reference_multiline() {
test_parse!( test_parse!(

View file

@ -9,7 +9,6 @@ ae6fc15:bugged left/right quote
e1f5b5e:untrimmed whitespace before linebreak e1f5b5e:untrimmed whitespace before linebreak
07888f3:div close within raw block 07888f3:div close within raw block
8423412:heading id conflict with existing id 8423412:heading id conflict with existing id
00a46ed:clear inline formatting from link tags
c0a3dec:escape in url c0a3dec:escape in url
61876cf:roman alpha ambiguity 61876cf:roman alpha ambiguity
f31b357:roman alpha ambiguity f31b357:roman alpha ambiguity