inline: impl smart quotation

This commit is contained in:
Noah Hellman 2023-01-27 19:04:01 +01:00
parent 8237384077
commit 56718ef887
4 changed files with 118 additions and 58 deletions

View file

@ -132,6 +132,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
Container::ListItem | Container::TaskListItem { .. } => {
self.out.write_str("<li")?;
}
Container::DescriptionList => self.out.write_str("<dl")?,
Container::DescriptionDetails => self.out.write_str("<dd")?,
Container::Footnote { number, .. } => {
assert!(self.footnote_number.is_none());
@ -188,13 +189,6 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
Container::Strong => self.out.write_str("<strong")?,
Container::Emphasis => self.out.write_str("<em")?,
Container::Mark => self.out.write_str("<mark")?,
Container::SingleQuoted => self.out.write_str("&lsquo;")?,
Container::DoubleQuoted => self.out.write_str("&ldquo;")?,
_ => panic!(),
}
if matches!(c, Container::SingleQuoted | Container::DoubleQuoted) {
continue; // TODO add span to allow attributes?
}
for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") {
@ -302,6 +296,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
Container::ListItem | Container::TaskListItem { .. } => {
self.out.write_str("</li>")?;
}
Container::DescriptionList => self.out.write_str("</dl>")?,
Container::DescriptionDetails => self.out.write_str("</dd>")?,
Container::Footnote { number, .. } => {
if !self.footnote_backlink_written {
@ -370,9 +365,6 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
Container::Strong => self.out.write_str("</strong>")?,
Container::Emphasis => self.out.write_str("</em>")?,
Container::Mark => self.out.write_str("</mark>")?,
Container::SingleQuoted => self.out.write_str("&rsquo;")?,
Container::DoubleQuoted => self.out.write_str("&rdquo;")?,
_ => panic!(),
}
}
Event::Str(s) => {
@ -385,7 +377,6 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
'<' => Some("&lt;"),
'>' => Some("&gt;"),
'&' => Some("&amp;"),
'"' => Some("&quot;"),
_ => None,
} {
ent = s;
@ -415,6 +406,10 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
number, number, number
)?;
}
Atom::LeftSingleQuote => self.out.write_str("&lsquo;")?,
Atom::RightSingleQuote => self.out.write_str("&rsquo;")?,
Atom::LeftDoubleQuote => self.out.write_str("&ldquo;")?,
Atom::RightDoubleQuote => self.out.write_str("&rdquo;")?,
Atom::Ellipsis => self.out.write_str("&hellip;")?,
Atom::EnDash => self.out.write_str("&ndash;")?,
Atom::EmDash => self.out.write_str("&mdash;")?,

View file

@ -18,6 +18,7 @@ pub enum Atom {
Ellipsis,
EnDash,
EmDash,
Quote { ty: QuoteType, left: bool },
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
@ -30,9 +31,6 @@ pub enum Container {
Emphasis,
Strong,
Mark,
// smart quoting
SingleQuoted,
DoubleQuoted,
// Verbatim
Verbatim,
/// Span is the format.
@ -51,6 +49,12 @@ pub enum Container {
Autolink,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum QuoteType {
Single,
Double,
}
#[derive(Debug, PartialEq, Eq)]
pub enum EventKind {
Enter(Container),
@ -417,15 +421,23 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
}
let inner_span = self.events[e_opener].span.between(self.span);
let mut event_closer = match Container::try_from(d) {
Ok(cont) => {
let mut event_closer = match DelimEventKind::from(d) {
DelimEventKind::Container(cont) => {
self.events[e_opener].kind = EventKind::Enter(cont);
Some(Event {
kind: EventKind::Exit(cont),
span: self.span,
})
}
Err(ty) => self.post_span(ty, e_opener),
DelimEventKind::Quote(ty) => {
self.events[e_opener].kind =
EventKind::Atom(Atom::Quote { ty, left: true });
Some(Event {
kind: EventKind::Atom(Atom::Quote { ty, left: false }),
span: self.span,
})
}
DelimEventKind::Span(ty) => self.post_span(ty, e_opener),
};
self.openers.drain(o..);
@ -491,15 +503,33 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
{
return None;
}
if matches!(delim, Delim::SingleQuoted | Delim::DoubleQuoted)
&& self
.events
.back()
.map_or(false, |ev| matches!(ev.kind, EventKind::Str))
{
return None;
}
self.openers.push((delim, self.events.len()));
// push dummy event in case attributes are encountered after closing delimiter
self.events.push_back(Event {
kind: EventKind::Placeholder,
span: Span::empty_at(self.span.start()),
});
// use str for now, replace if closed later
// use non-opener for now, replace if closed later
Some(Event {
kind: EventKind::Str,
kind: match delim {
Delim::SingleQuoted => EventKind::Atom(Quote {
ty: QuoteType::Single,
left: false,
}),
Delim::DoubleQuoted => EventKind::Atom(Quote {
ty: QuoteType::Double,
left: true,
}),
_ => EventKind::Str,
},
span: self.span,
})
})
@ -548,16 +578,35 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
}
fn parse_atom(&mut self, first: &lex::Token) -> Option<Event> {
let atom = match first.kind {
lex::Kind::Newline => Softbreak,
lex::Kind::Hardbreak => Hardbreak,
lex::Kind::Escape => Escape,
lex::Kind::Nbsp => Nbsp,
lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis,
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 2 => EnDash,
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 3 => EmDash,
_ => return None,
};
let atom =
match first.kind {
lex::Kind::Newline => Softbreak,
lex::Kind::Hardbreak => Hardbreak,
lex::Kind::Escape => Escape,
lex::Kind::Nbsp => Nbsp,
lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis,
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 2 => EnDash,
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 3 => EmDash,
lex::Kind::Open(lex::Delimiter::BraceQuote1) => Quote {
ty: QuoteType::Single,
left: true,
},
lex::Kind::Sym(lex::Symbol::Quote1)
| lex::Kind::Close(lex::Delimiter::BraceQuote1) => Quote {
ty: QuoteType::Single,
left: false,
},
lex::Kind::Open(lex::Delimiter::BraceQuote2) => Quote {
ty: QuoteType::Double,
left: true,
},
lex::Kind::Sym(lex::Symbol::Quote2)
| lex::Kind::Close(lex::Delimiter::BraceQuote2) => Quote {
ty: QuoteType::Double,
left: false,
},
_ => return None,
};
Some(Event {
kind: EventKind::Atom(atom),
@ -585,11 +634,11 @@ enum Delim {
Emphasis(Directionality),
Superscript(Directionality),
Subscript(Directionality),
SingleQuoted,
DoubleQuoted,
Mark,
Delete,
Insert,
SingleQuoted,
DoubleQuoted,
}
#[derive(Debug, Clone, Copy)]
@ -630,26 +679,34 @@ impl Delim {
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Close)),
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Open)),
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Close)),
lex::Kind::Open(Delimiter::BraceQuote1) => Some((SingleQuoted, Open)),
lex::Kind::Close(Delimiter::BraceQuote1) => Some((SingleQuoted, Close)),
lex::Kind::Open(Delimiter::BraceQuote2) => Some((DoubleQuoted, Open)),
lex::Kind::Close(Delimiter::BraceQuote2) => Some((DoubleQuoted, Close)),
_ => None,
}
}
}
impl TryFrom<Delim> for Container {
type Error = SpanType;
enum DelimEventKind {
Container(Container),
Span(SpanType),
Quote(QuoteType),
}
fn try_from(d: Delim) -> Result<Self, Self::Error> {
impl From<Delim> for DelimEventKind {
fn from(d: Delim) -> Self {
match d {
Delim::Span(ty) => Err(ty),
Delim::Strong(..) => Ok(Self::Strong),
Delim::Emphasis(..) => Ok(Self::Emphasis),
Delim::Superscript(..) => Ok(Self::Superscript),
Delim::Subscript(..) => Ok(Self::Subscript),
Delim::SingleQuoted => Ok(Self::SingleQuoted),
Delim::DoubleQuoted => Ok(Self::DoubleQuoted),
Delim::Mark => Ok(Self::Mark),
Delim::Delete => Ok(Self::Delete),
Delim::Insert => Ok(Self::Insert),
Delim::Span(ty) => Self::Span(ty),
Delim::Strong(..) => Self::Container(Strong),
Delim::Emphasis(..) => Self::Container(Emphasis),
Delim::Superscript(..) => Self::Container(Superscript),
Delim::Subscript(..) => Self::Container(Subscript),
Delim::Mark => Self::Container(Mark),
Delim::Delete => Self::Container(Delete),
Delim::Insert => Self::Container(Insert),
Delim::SingleQuoted => Self::Quote(QuoteType::Single),
Delim::DoubleQuoted => Self::Quote(QuoteType::Double),
}
}
}

View file

@ -36,6 +36,8 @@ pub enum Delimiter {
BraceTilde,
BraceUnderscore,
Bracket,
BraceQuote1,
BraceQuote2,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@ -189,6 +191,8 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
'+' => Some(Open(BracePlus)),
'~' => Some(Open(BraceTilde)),
'_' => Some(Open(BraceUnderscore)),
'\'' => Some(Open(BraceQuote1)),
'"' => Some(Open(BraceQuote2)),
_ => None,
};
if let Some(exp) = explicit {
@ -204,6 +208,8 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
'+' => self.maybe_eat_close_brace(Text, BracePlus),
'~' => self.maybe_eat_close_brace(Sym(Tilde), BraceTilde),
'_' => self.maybe_eat_close_brace(Sym(Underscore), BraceUnderscore),
'\'' => self.maybe_eat_close_brace(Sym(Quote1), BraceQuote1),
'"' => self.maybe_eat_close_brace(Sym(Quote2), BraceQuote2),
'-' => {
if self.peek_char() == '}' {
self.eat_char();
@ -222,8 +228,6 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
}
'<' => Sym(Lt),
'|' => Sym(Pipe),
'\'' => Sym(Quote1),
'"' => Sym(Quote2),
'`' => self.eat_seq(Backtick),
'$' => self.eat_seq(Dollar),

View file

@ -90,10 +90,6 @@ pub enum Container<'s> {
Emphasis,
/// A highlighted inline element.
Mark,
/// An quoted inline element, using single quotes.
SingleQuoted,
/// A quoted inline element, using double quotes.
DoubleQuoted,
}
impl<'s> Container<'s> {
@ -129,9 +125,7 @@ impl<'s> Container<'s> {
| Self::Delete
| Self::Strong
| Self::Emphasis
| Self::Mark
| Self::SingleQuoted
| Self::DoubleQuoted => false,
| Self::Mark => false,
}
}
@ -167,9 +161,7 @@ impl<'s> Container<'s> {
| Self::Delete
| Self::Strong
| Self::Emphasis
| Self::Mark
| Self::SingleQuoted
| Self::DoubleQuoted => false,
| Self::Mark => false,
}
}
}
@ -234,6 +226,14 @@ pub enum OrderedListStyle {
pub enum Atom<'s> {
/// A footnote reference.
FootnoteReference(&'s str, usize),
/// Left single quotation mark.
LeftSingleQuote,
/// Right double quotation mark.
RightSingleQuote,
/// Left single quotation mark.
LeftDoubleQuote,
/// Right double quotation mark.
RightDoubleQuote,
/// A horizontal ellipsis, i.e. a set of three periods.
Ellipsis,
/// An en dash.
@ -419,8 +419,6 @@ impl<'s> Parser<'s> {
inline::Container::Emphasis => Container::Emphasis,
inline::Container::Strong => Container::Strong,
inline::Container::Mark => Container::Mark,
inline::Container::SingleQuoted => Container::SingleQuoted,
inline::Container::DoubleQuoted => Container::DoubleQuoted,
inline::Container::InlineLink => Container::Link(
match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', "").into(),
@ -482,6 +480,12 @@ impl<'s> Parser<'s> {
number,
)
}
inline::Atom::Quote { ty, left } => match (ty, left) {
(inline::QuoteType::Single, true) => Atom::LeftSingleQuote,
(inline::QuoteType::Single, false) => Atom::RightSingleQuote,
(inline::QuoteType::Double, true) => Atom::LeftDoubleQuote,
(inline::QuoteType::Double, false) => Atom::RightDoubleQuote,
},
inline::Atom::Ellipsis => Atom::Ellipsis,
inline::Atom::EnDash => Atom::EnDash,
inline::Atom::EmDash => Atom::EmDash,