From 56718ef8877ba20347531d647ccfd617af68ac0b Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Fri, 27 Jan 2023 19:04:01 +0100 Subject: [PATCH] inline: impl smart quotation --- src/html.rs | 17 +++---- src/inline.rs | 123 ++++++++++++++++++++++++++++++++++++-------------- src/lex.rs | 8 +++- src/lib.rs | 28 +++++++----- 4 files changed, 118 insertions(+), 58 deletions(-) diff --git a/src/html.rs b/src/html.rs index 2e8c407..19e374d 100644 --- a/src/html.rs +++ b/src/html.rs @@ -132,6 +132,7 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { Container::ListItem | Container::TaskListItem { .. } => { self.out.write_str(" self.out.write_str(" self.out.write_str(" { assert!(self.footnote_number.is_none()); @@ -188,13 +189,6 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { Container::Strong => self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str("‘")?, - Container::DoubleQuoted => self.out.write_str("“")?, - _ => panic!(), - } - - if matches!(c, Container::SingleQuoted | Container::DoubleQuoted) { - continue; // TODO add span to allow attributes? } for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") { @@ -302,6 +296,7 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { Container::ListItem | Container::TaskListItem { .. } => { self.out.write_str("")?; } + Container::DescriptionList => self.out.write_str("")?, Container::DescriptionDetails => self.out.write_str("")?, Container::Footnote { number, .. } => { if !self.footnote_backlink_written { @@ -370,9 +365,6 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { Container::Strong => self.out.write_str("")?, Container::Emphasis => self.out.write_str("")?, Container::Mark => self.out.write_str("")?, - Container::SingleQuoted => self.out.write_str("’")?, - Container::DoubleQuoted => self.out.write_str("”")?, - _ => panic!(), } } Event::Str(s) => { @@ -385,7 +377,6 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { '<' => Some("<"), '>' => Some(">"), '&' => Some("&"), - '"' => Some("""), _ => None, } { ent = s; @@ -415,6 +406,10 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { number, number, number )?; } + Atom::LeftSingleQuote => self.out.write_str("‘")?, + Atom::RightSingleQuote => self.out.write_str("’")?, + Atom::LeftDoubleQuote => self.out.write_str("“")?, + Atom::RightDoubleQuote => self.out.write_str("”")?, Atom::Ellipsis => self.out.write_str("…")?, Atom::EnDash => self.out.write_str("–")?, Atom::EmDash => self.out.write_str("—")?, diff --git a/src/inline.rs b/src/inline.rs index 8bcd924..7037e67 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -18,6 +18,7 @@ pub enum Atom { Ellipsis, EnDash, EmDash, + Quote { ty: QuoteType, left: bool }, } #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -30,9 +31,6 @@ pub enum Container { Emphasis, Strong, Mark, - // smart quoting - SingleQuoted, - DoubleQuoted, // Verbatim Verbatim, /// Span is the format. @@ -51,6 +49,12 @@ pub enum Container { Autolink, } +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum QuoteType { + Single, + Double, +} + #[derive(Debug, PartialEq, Eq)] pub enum EventKind { Enter(Container), @@ -417,15 +421,23 @@ impl + Clone> Parser { } let inner_span = self.events[e_opener].span.between(self.span); - let mut event_closer = match Container::try_from(d) { - Ok(cont) => { + let mut event_closer = match DelimEventKind::from(d) { + DelimEventKind::Container(cont) => { self.events[e_opener].kind = EventKind::Enter(cont); Some(Event { kind: EventKind::Exit(cont), span: self.span, }) } - Err(ty) => self.post_span(ty, e_opener), + DelimEventKind::Quote(ty) => { + self.events[e_opener].kind = + EventKind::Atom(Atom::Quote { ty, left: true }); + Some(Event { + kind: EventKind::Atom(Atom::Quote { ty, left: false }), + span: self.span, + }) + } + DelimEventKind::Span(ty) => self.post_span(ty, e_opener), }; self.openers.drain(o..); @@ -491,15 +503,33 @@ impl + Clone> Parser { { return None; } + if matches!(delim, Delim::SingleQuoted | Delim::DoubleQuoted) + && self + .events + .back() + .map_or(false, |ev| matches!(ev.kind, EventKind::Str)) + { + return None; + } self.openers.push((delim, self.events.len())); // push dummy event in case attributes are encountered after closing delimiter self.events.push_back(Event { kind: EventKind::Placeholder, span: Span::empty_at(self.span.start()), }); - // use str for now, replace if closed later + // use non-opener for now, replace if closed later Some(Event { - kind: EventKind::Str, + kind: match delim { + Delim::SingleQuoted => EventKind::Atom(Quote { + ty: QuoteType::Single, + left: false, + }), + Delim::DoubleQuoted => EventKind::Atom(Quote { + ty: QuoteType::Double, + left: true, + }), + _ => EventKind::Str, + }, span: self.span, }) }) @@ -548,16 +578,35 @@ impl + Clone> Parser { } fn parse_atom(&mut self, first: &lex::Token) -> Option { - let atom = match first.kind { - lex::Kind::Newline => Softbreak, - lex::Kind::Hardbreak => Hardbreak, - lex::Kind::Escape => Escape, - lex::Kind::Nbsp => Nbsp, - lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis, - lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 2 => EnDash, - lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 3 => EmDash, - _ => return None, - }; + let atom = + match first.kind { + lex::Kind::Newline => Softbreak, + lex::Kind::Hardbreak => Hardbreak, + lex::Kind::Escape => Escape, + lex::Kind::Nbsp => Nbsp, + lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis, + lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 2 => EnDash, + lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 3 => EmDash, + lex::Kind::Open(lex::Delimiter::BraceQuote1) => Quote { + ty: QuoteType::Single, + left: true, + }, + lex::Kind::Sym(lex::Symbol::Quote1) + | lex::Kind::Close(lex::Delimiter::BraceQuote1) => Quote { + ty: QuoteType::Single, + left: false, + }, + lex::Kind::Open(lex::Delimiter::BraceQuote2) => Quote { + ty: QuoteType::Double, + left: true, + }, + lex::Kind::Sym(lex::Symbol::Quote2) + | lex::Kind::Close(lex::Delimiter::BraceQuote2) => Quote { + ty: QuoteType::Double, + left: false, + }, + _ => return None, + }; Some(Event { kind: EventKind::Atom(atom), @@ -585,11 +634,11 @@ enum Delim { Emphasis(Directionality), Superscript(Directionality), Subscript(Directionality), - SingleQuoted, - DoubleQuoted, Mark, Delete, Insert, + SingleQuoted, + DoubleQuoted, } #[derive(Debug, Clone, Copy)] @@ -630,26 +679,34 @@ impl Delim { lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Close)), lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Open)), lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Close)), + lex::Kind::Open(Delimiter::BraceQuote1) => Some((SingleQuoted, Open)), + lex::Kind::Close(Delimiter::BraceQuote1) => Some((SingleQuoted, Close)), + lex::Kind::Open(Delimiter::BraceQuote2) => Some((DoubleQuoted, Open)), + lex::Kind::Close(Delimiter::BraceQuote2) => Some((DoubleQuoted, Close)), _ => None, } } } -impl TryFrom for Container { - type Error = SpanType; +enum DelimEventKind { + Container(Container), + Span(SpanType), + Quote(QuoteType), +} - fn try_from(d: Delim) -> Result { +impl From for DelimEventKind { + fn from(d: Delim) -> Self { match d { - Delim::Span(ty) => Err(ty), - Delim::Strong(..) => Ok(Self::Strong), - Delim::Emphasis(..) => Ok(Self::Emphasis), - Delim::Superscript(..) => Ok(Self::Superscript), - Delim::Subscript(..) => Ok(Self::Subscript), - Delim::SingleQuoted => Ok(Self::SingleQuoted), - Delim::DoubleQuoted => Ok(Self::DoubleQuoted), - Delim::Mark => Ok(Self::Mark), - Delim::Delete => Ok(Self::Delete), - Delim::Insert => Ok(Self::Insert), + Delim::Span(ty) => Self::Span(ty), + Delim::Strong(..) => Self::Container(Strong), + Delim::Emphasis(..) => Self::Container(Emphasis), + Delim::Superscript(..) => Self::Container(Superscript), + Delim::Subscript(..) => Self::Container(Subscript), + Delim::Mark => Self::Container(Mark), + Delim::Delete => Self::Container(Delete), + Delim::Insert => Self::Container(Insert), + Delim::SingleQuoted => Self::Quote(QuoteType::Single), + Delim::DoubleQuoted => Self::Quote(QuoteType::Double), } } } diff --git a/src/lex.rs b/src/lex.rs index 660e68e..3beb047 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -36,6 +36,8 @@ pub enum Delimiter { BraceTilde, BraceUnderscore, Bracket, + BraceQuote1, + BraceQuote2, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -189,6 +191,8 @@ impl + Clone> Lexer { '+' => Some(Open(BracePlus)), '~' => Some(Open(BraceTilde)), '_' => Some(Open(BraceUnderscore)), + '\'' => Some(Open(BraceQuote1)), + '"' => Some(Open(BraceQuote2)), _ => None, }; if let Some(exp) = explicit { @@ -204,6 +208,8 @@ impl + Clone> Lexer { '+' => self.maybe_eat_close_brace(Text, BracePlus), '~' => self.maybe_eat_close_brace(Sym(Tilde), BraceTilde), '_' => self.maybe_eat_close_brace(Sym(Underscore), BraceUnderscore), + '\'' => self.maybe_eat_close_brace(Sym(Quote1), BraceQuote1), + '"' => self.maybe_eat_close_brace(Sym(Quote2), BraceQuote2), '-' => { if self.peek_char() == '}' { self.eat_char(); @@ -222,8 +228,6 @@ impl + Clone> Lexer { } '<' => Sym(Lt), '|' => Sym(Pipe), - '\'' => Sym(Quote1), - '"' => Sym(Quote2), '`' => self.eat_seq(Backtick), '$' => self.eat_seq(Dollar), diff --git a/src/lib.rs b/src/lib.rs index b4698f7..4ef3e26 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -90,10 +90,6 @@ pub enum Container<'s> { Emphasis, /// A highlighted inline element. Mark, - /// An quoted inline element, using single quotes. - SingleQuoted, - /// A quoted inline element, using double quotes. - DoubleQuoted, } impl<'s> Container<'s> { @@ -129,9 +125,7 @@ impl<'s> Container<'s> { | Self::Delete | Self::Strong | Self::Emphasis - | Self::Mark - | Self::SingleQuoted - | Self::DoubleQuoted => false, + | Self::Mark => false, } } @@ -167,9 +161,7 @@ impl<'s> Container<'s> { | Self::Delete | Self::Strong | Self::Emphasis - | Self::Mark - | Self::SingleQuoted - | Self::DoubleQuoted => false, + | Self::Mark => false, } } } @@ -234,6 +226,14 @@ pub enum OrderedListStyle { pub enum Atom<'s> { /// A footnote reference. FootnoteReference(&'s str, usize), + /// Left single quotation mark. + LeftSingleQuote, + /// Right double quotation mark. + RightSingleQuote, + /// Left single quotation mark. + LeftDoubleQuote, + /// Right double quotation mark. + RightDoubleQuote, /// A horizontal ellipsis, i.e. a set of three periods. Ellipsis, /// An en dash. @@ -419,8 +419,6 @@ impl<'s> Parser<'s> { inline::Container::Emphasis => Container::Emphasis, inline::Container::Strong => Container::Strong, inline::Container::Mark => Container::Mark, - inline::Container::SingleQuoted => Container::SingleQuoted, - inline::Container::DoubleQuoted => Container::DoubleQuoted, inline::Container::InlineLink => Container::Link( match self.inlines.src(inline.span) { CowStr::Owned(s) => s.replace('\n', "").into(), @@ -482,6 +480,12 @@ impl<'s> Parser<'s> { number, ) } + inline::Atom::Quote { ty, left } => match (ty, left) { + (inline::QuoteType::Single, true) => Atom::LeftSingleQuote, + (inline::QuoteType::Single, false) => Atom::RightSingleQuote, + (inline::QuoteType::Double, true) => Atom::LeftDoubleQuote, + (inline::QuoteType::Double, false) => Atom::RightDoubleQuote, + }, inline::Atom::Ellipsis => Atom::Ellipsis, inline::Atom::EnDash => Atom::EnDash, inline::Atom::EmDash => Atom::EmDash,