inline: impl smart quotation

This commit is contained in:
Noah Hellman 2023-01-27 19:04:01 +01:00
parent 8237384077
commit 56718ef887
4 changed files with 118 additions and 58 deletions

View file

@ -132,6 +132,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
Container::ListItem | Container::TaskListItem { .. } => { Container::ListItem | Container::TaskListItem { .. } => {
self.out.write_str("<li")?; self.out.write_str("<li")?;
} }
Container::DescriptionList => self.out.write_str("<dl")?,
Container::DescriptionDetails => self.out.write_str("<dd")?, Container::DescriptionDetails => self.out.write_str("<dd")?,
Container::Footnote { number, .. } => { Container::Footnote { number, .. } => {
assert!(self.footnote_number.is_none()); assert!(self.footnote_number.is_none());
@ -188,13 +189,6 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
Container::Strong => self.out.write_str("<strong")?, Container::Strong => self.out.write_str("<strong")?,
Container::Emphasis => self.out.write_str("<em")?, Container::Emphasis => self.out.write_str("<em")?,
Container::Mark => self.out.write_str("<mark")?, Container::Mark => self.out.write_str("<mark")?,
Container::SingleQuoted => self.out.write_str("&lsquo;")?,
Container::DoubleQuoted => self.out.write_str("&ldquo;")?,
_ => panic!(),
}
if matches!(c, Container::SingleQuoted | Container::DoubleQuoted) {
continue; // TODO add span to allow attributes?
} }
for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") { for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") {
@ -302,6 +296,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
Container::ListItem | Container::TaskListItem { .. } => { Container::ListItem | Container::TaskListItem { .. } => {
self.out.write_str("</li>")?; self.out.write_str("</li>")?;
} }
Container::DescriptionList => self.out.write_str("</dl>")?,
Container::DescriptionDetails => self.out.write_str("</dd>")?, Container::DescriptionDetails => self.out.write_str("</dd>")?,
Container::Footnote { number, .. } => { Container::Footnote { number, .. } => {
if !self.footnote_backlink_written { if !self.footnote_backlink_written {
@ -370,9 +365,6 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
Container::Strong => self.out.write_str("</strong>")?, Container::Strong => self.out.write_str("</strong>")?,
Container::Emphasis => self.out.write_str("</em>")?, Container::Emphasis => self.out.write_str("</em>")?,
Container::Mark => self.out.write_str("</mark>")?, Container::Mark => self.out.write_str("</mark>")?,
Container::SingleQuoted => self.out.write_str("&rsquo;")?,
Container::DoubleQuoted => self.out.write_str("&rdquo;")?,
_ => panic!(),
} }
} }
Event::Str(s) => { Event::Str(s) => {
@ -385,7 +377,6 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
'<' => Some("&lt;"), '<' => Some("&lt;"),
'>' => Some("&gt;"), '>' => Some("&gt;"),
'&' => Some("&amp;"), '&' => Some("&amp;"),
'"' => Some("&quot;"),
_ => None, _ => None,
} { } {
ent = s; ent = s;
@ -415,6 +406,10 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
number, number, number number, number, number
)?; )?;
} }
Atom::LeftSingleQuote => self.out.write_str("&lsquo;")?,
Atom::RightSingleQuote => self.out.write_str("&rsquo;")?,
Atom::LeftDoubleQuote => self.out.write_str("&ldquo;")?,
Atom::RightDoubleQuote => self.out.write_str("&rdquo;")?,
Atom::Ellipsis => self.out.write_str("&hellip;")?, Atom::Ellipsis => self.out.write_str("&hellip;")?,
Atom::EnDash => self.out.write_str("&ndash;")?, Atom::EnDash => self.out.write_str("&ndash;")?,
Atom::EmDash => self.out.write_str("&mdash;")?, Atom::EmDash => self.out.write_str("&mdash;")?,

View file

@ -18,6 +18,7 @@ pub enum Atom {
Ellipsis, Ellipsis,
EnDash, EnDash,
EmDash, EmDash,
Quote { ty: QuoteType, left: bool },
} }
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
@ -30,9 +31,6 @@ pub enum Container {
Emphasis, Emphasis,
Strong, Strong,
Mark, Mark,
// smart quoting
SingleQuoted,
DoubleQuoted,
// Verbatim // Verbatim
Verbatim, Verbatim,
/// Span is the format. /// Span is the format.
@ -51,6 +49,12 @@ pub enum Container {
Autolink, Autolink,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum QuoteType {
Single,
Double,
}
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum EventKind { pub enum EventKind {
Enter(Container), Enter(Container),
@ -417,15 +421,23 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
} }
let inner_span = self.events[e_opener].span.between(self.span); let inner_span = self.events[e_opener].span.between(self.span);
let mut event_closer = match Container::try_from(d) { let mut event_closer = match DelimEventKind::from(d) {
Ok(cont) => { DelimEventKind::Container(cont) => {
self.events[e_opener].kind = EventKind::Enter(cont); self.events[e_opener].kind = EventKind::Enter(cont);
Some(Event { Some(Event {
kind: EventKind::Exit(cont), kind: EventKind::Exit(cont),
span: self.span, span: self.span,
}) })
} }
Err(ty) => self.post_span(ty, e_opener), DelimEventKind::Quote(ty) => {
self.events[e_opener].kind =
EventKind::Atom(Atom::Quote { ty, left: true });
Some(Event {
kind: EventKind::Atom(Atom::Quote { ty, left: false }),
span: self.span,
})
}
DelimEventKind::Span(ty) => self.post_span(ty, e_opener),
}; };
self.openers.drain(o..); self.openers.drain(o..);
@ -491,15 +503,33 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
{ {
return None; return None;
} }
if matches!(delim, Delim::SingleQuoted | Delim::DoubleQuoted)
&& self
.events
.back()
.map_or(false, |ev| matches!(ev.kind, EventKind::Str))
{
return None;
}
self.openers.push((delim, self.events.len())); self.openers.push((delim, self.events.len()));
// push dummy event in case attributes are encountered after closing delimiter // push dummy event in case attributes are encountered after closing delimiter
self.events.push_back(Event { self.events.push_back(Event {
kind: EventKind::Placeholder, kind: EventKind::Placeholder,
span: Span::empty_at(self.span.start()), span: Span::empty_at(self.span.start()),
}); });
// use str for now, replace if closed later // use non-opener for now, replace if closed later
Some(Event { Some(Event {
kind: EventKind::Str, kind: match delim {
Delim::SingleQuoted => EventKind::Atom(Quote {
ty: QuoteType::Single,
left: false,
}),
Delim::DoubleQuoted => EventKind::Atom(Quote {
ty: QuoteType::Double,
left: true,
}),
_ => EventKind::Str,
},
span: self.span, span: self.span,
}) })
}) })
@ -548,16 +578,35 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
} }
fn parse_atom(&mut self, first: &lex::Token) -> Option<Event> { fn parse_atom(&mut self, first: &lex::Token) -> Option<Event> {
let atom = match first.kind { let atom =
lex::Kind::Newline => Softbreak, match first.kind {
lex::Kind::Hardbreak => Hardbreak, lex::Kind::Newline => Softbreak,
lex::Kind::Escape => Escape, lex::Kind::Hardbreak => Hardbreak,
lex::Kind::Nbsp => Nbsp, lex::Kind::Escape => Escape,
lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis, lex::Kind::Nbsp => Nbsp,
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 2 => EnDash, lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis,
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 3 => EmDash, lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 2 => EnDash,
_ => return None, lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 3 => EmDash,
}; lex::Kind::Open(lex::Delimiter::BraceQuote1) => Quote {
ty: QuoteType::Single,
left: true,
},
lex::Kind::Sym(lex::Symbol::Quote1)
| lex::Kind::Close(lex::Delimiter::BraceQuote1) => Quote {
ty: QuoteType::Single,
left: false,
},
lex::Kind::Open(lex::Delimiter::BraceQuote2) => Quote {
ty: QuoteType::Double,
left: true,
},
lex::Kind::Sym(lex::Symbol::Quote2)
| lex::Kind::Close(lex::Delimiter::BraceQuote2) => Quote {
ty: QuoteType::Double,
left: false,
},
_ => return None,
};
Some(Event { Some(Event {
kind: EventKind::Atom(atom), kind: EventKind::Atom(atom),
@ -585,11 +634,11 @@ enum Delim {
Emphasis(Directionality), Emphasis(Directionality),
Superscript(Directionality), Superscript(Directionality),
Subscript(Directionality), Subscript(Directionality),
SingleQuoted,
DoubleQuoted,
Mark, Mark,
Delete, Delete,
Insert, Insert,
SingleQuoted,
DoubleQuoted,
} }
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
@ -630,26 +679,34 @@ impl Delim {
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Close)), lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Close)),
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Open)), lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Open)),
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Close)), lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Close)),
lex::Kind::Open(Delimiter::BraceQuote1) => Some((SingleQuoted, Open)),
lex::Kind::Close(Delimiter::BraceQuote1) => Some((SingleQuoted, Close)),
lex::Kind::Open(Delimiter::BraceQuote2) => Some((DoubleQuoted, Open)),
lex::Kind::Close(Delimiter::BraceQuote2) => Some((DoubleQuoted, Close)),
_ => None, _ => None,
} }
} }
} }
impl TryFrom<Delim> for Container { enum DelimEventKind {
type Error = SpanType; Container(Container),
Span(SpanType),
Quote(QuoteType),
}
fn try_from(d: Delim) -> Result<Self, Self::Error> { impl From<Delim> for DelimEventKind {
fn from(d: Delim) -> Self {
match d { match d {
Delim::Span(ty) => Err(ty), Delim::Span(ty) => Self::Span(ty),
Delim::Strong(..) => Ok(Self::Strong), Delim::Strong(..) => Self::Container(Strong),
Delim::Emphasis(..) => Ok(Self::Emphasis), Delim::Emphasis(..) => Self::Container(Emphasis),
Delim::Superscript(..) => Ok(Self::Superscript), Delim::Superscript(..) => Self::Container(Superscript),
Delim::Subscript(..) => Ok(Self::Subscript), Delim::Subscript(..) => Self::Container(Subscript),
Delim::SingleQuoted => Ok(Self::SingleQuoted), Delim::Mark => Self::Container(Mark),
Delim::DoubleQuoted => Ok(Self::DoubleQuoted), Delim::Delete => Self::Container(Delete),
Delim::Mark => Ok(Self::Mark), Delim::Insert => Self::Container(Insert),
Delim::Delete => Ok(Self::Delete), Delim::SingleQuoted => Self::Quote(QuoteType::Single),
Delim::Insert => Ok(Self::Insert), Delim::DoubleQuoted => Self::Quote(QuoteType::Double),
} }
} }
} }

View file

@ -36,6 +36,8 @@ pub enum Delimiter {
BraceTilde, BraceTilde,
BraceUnderscore, BraceUnderscore,
Bracket, Bracket,
BraceQuote1,
BraceQuote2,
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@ -189,6 +191,8 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
'+' => Some(Open(BracePlus)), '+' => Some(Open(BracePlus)),
'~' => Some(Open(BraceTilde)), '~' => Some(Open(BraceTilde)),
'_' => Some(Open(BraceUnderscore)), '_' => Some(Open(BraceUnderscore)),
'\'' => Some(Open(BraceQuote1)),
'"' => Some(Open(BraceQuote2)),
_ => None, _ => None,
}; };
if let Some(exp) = explicit { if let Some(exp) = explicit {
@ -204,6 +208,8 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
'+' => self.maybe_eat_close_brace(Text, BracePlus), '+' => self.maybe_eat_close_brace(Text, BracePlus),
'~' => self.maybe_eat_close_brace(Sym(Tilde), BraceTilde), '~' => self.maybe_eat_close_brace(Sym(Tilde), BraceTilde),
'_' => self.maybe_eat_close_brace(Sym(Underscore), BraceUnderscore), '_' => self.maybe_eat_close_brace(Sym(Underscore), BraceUnderscore),
'\'' => self.maybe_eat_close_brace(Sym(Quote1), BraceQuote1),
'"' => self.maybe_eat_close_brace(Sym(Quote2), BraceQuote2),
'-' => { '-' => {
if self.peek_char() == '}' { if self.peek_char() == '}' {
self.eat_char(); self.eat_char();
@ -222,8 +228,6 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
} }
'<' => Sym(Lt), '<' => Sym(Lt),
'|' => Sym(Pipe), '|' => Sym(Pipe),
'\'' => Sym(Quote1),
'"' => Sym(Quote2),
'`' => self.eat_seq(Backtick), '`' => self.eat_seq(Backtick),
'$' => self.eat_seq(Dollar), '$' => self.eat_seq(Dollar),

View file

@ -90,10 +90,6 @@ pub enum Container<'s> {
Emphasis, Emphasis,
/// A highlighted inline element. /// A highlighted inline element.
Mark, Mark,
/// An quoted inline element, using single quotes.
SingleQuoted,
/// A quoted inline element, using double quotes.
DoubleQuoted,
} }
impl<'s> Container<'s> { impl<'s> Container<'s> {
@ -129,9 +125,7 @@ impl<'s> Container<'s> {
| Self::Delete | Self::Delete
| Self::Strong | Self::Strong
| Self::Emphasis | Self::Emphasis
| Self::Mark | Self::Mark => false,
| Self::SingleQuoted
| Self::DoubleQuoted => false,
} }
} }
@ -167,9 +161,7 @@ impl<'s> Container<'s> {
| Self::Delete | Self::Delete
| Self::Strong | Self::Strong
| Self::Emphasis | Self::Emphasis
| Self::Mark | Self::Mark => false,
| Self::SingleQuoted
| Self::DoubleQuoted => false,
} }
} }
} }
@ -234,6 +226,14 @@ pub enum OrderedListStyle {
pub enum Atom<'s> { pub enum Atom<'s> {
/// A footnote reference. /// A footnote reference.
FootnoteReference(&'s str, usize), FootnoteReference(&'s str, usize),
/// Left single quotation mark.
LeftSingleQuote,
/// Right double quotation mark.
RightSingleQuote,
/// Left single quotation mark.
LeftDoubleQuote,
/// Right double quotation mark.
RightDoubleQuote,
/// A horizontal ellipsis, i.e. a set of three periods. /// A horizontal ellipsis, i.e. a set of three periods.
Ellipsis, Ellipsis,
/// An en dash. /// An en dash.
@ -419,8 +419,6 @@ impl<'s> Parser<'s> {
inline::Container::Emphasis => Container::Emphasis, inline::Container::Emphasis => Container::Emphasis,
inline::Container::Strong => Container::Strong, inline::Container::Strong => Container::Strong,
inline::Container::Mark => Container::Mark, inline::Container::Mark => Container::Mark,
inline::Container::SingleQuoted => Container::SingleQuoted,
inline::Container::DoubleQuoted => Container::DoubleQuoted,
inline::Container::InlineLink => Container::Link( inline::Container::InlineLink => Container::Link(
match self.inlines.src(inline.span) { match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', "").into(), CowStr::Owned(s) => s.replace('\n', "").into(),
@ -482,6 +480,12 @@ impl<'s> Parser<'s> {
number, number,
) )
} }
inline::Atom::Quote { ty, left } => match (ty, left) {
(inline::QuoteType::Single, true) => Atom::LeftSingleQuote,
(inline::QuoteType::Single, false) => Atom::RightSingleQuote,
(inline::QuoteType::Double, true) => Atom::LeftDoubleQuote,
(inline::QuoteType::Double, false) => Atom::RightDoubleQuote,
},
inline::Atom::Ellipsis => Atom::Ellipsis, inline::Atom::Ellipsis => Atom::Ellipsis,
inline::Atom::EnDash => Atom::EnDash, inline::Atom::EnDash => Atom::EnDash,
inline::Atom::EmDash => Atom::EmDash, inline::Atom::EmDash => Atom::EmDash,