From 2303cf3574d8989399def1e964f85b07bf3383bd Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Thu, 8 Dec 2022 17:42:54 +0100 Subject: [PATCH] verbatim fix --- src/block.rs | 3 +- src/html.rs | 34 +++--- src/inline.rs | 319 ++++++++++++++++++++++++++++++-------------------- src/lib.rs | 59 ++++++---- src/tree.rs | 37 +++++- 5 files changed, 278 insertions(+), 174 deletions(-) diff --git a/src/block.rs b/src/block.rs index c99fd2f..6593f2c 100644 --- a/src/block.rs +++ b/src/block.rs @@ -309,7 +309,8 @@ impl Block { f @ ('`' | ':' | '~') => { let fence_length = (&mut chars).take_while(|c| *c == f).count() + 1; let lang = line_t[fence_length..].trim(); - let valid_spec = !lang.chars().any(char::is_whitespace); + let valid_spec = + !lang.chars().any(char::is_whitespace) && !lang.chars().any(|c| c == '`'); (valid_spec && fence_length >= 3) .then(|| { u8::try_from(fence_length).ok().map(|fence_length| { diff --git a/src/html.rs b/src/html.rs index 89d4689..cd8580e 100644 --- a/src/html.rs +++ b/src/html.rs @@ -75,11 +75,8 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer { self.out.write_str("
")?; } } - Container::Span => self.out.write_str("")?, Container::Paragraph => self.out.write_str("

")?, Container::Heading { level } => write!(self.out, "", level)?, - Container::Link(..) => todo!(), - Container::Image(..) => todo!(), Container::TableCell => self.out.write_str("")?, Container::DescriptionTerm => self.out.write_str("

")?, Container::RawBlock { .. } => todo!(), @@ -90,6 +87,16 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer { self.out.write_str("
")?;
                             }
                         }
+                        Container::Span => self.out.write_str("")?,
+                        Container::Link(..) => todo!(),
+                        Container::Image(..) => todo!(),
+                        Container::Verbatim => self.out.write_str("")?,
+                        Container::Math { display } => self.out.write_str(if display {
+                            r#"\["#
+                        } else {
+                            r#"\("#
+                        })?,
+                        Container::RawInline { .. } => todo!(),
                         Container::Subscript => self.out.write_str("")?,
                         Container::Superscript => self.out.write_str("")?,
                         Container::Insert => self.out.write_str("")?,
@@ -119,11 +126,14 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer {
                         Container::Heading { level } => write!(self.out, "", level)?,
                         Container::TableCell => self.out.write_str("")?,
                         Container::DescriptionTerm => self.out.write_str("
")?, - Container::RawBlock { .. } => self.out.write_str("")?, + Container::RawBlock { .. } => todo!(), Container::CodeBlock { .. } => self.out.write_str("")?, Container::Span => self.out.write_str("
")?, Container::Link(..) => todo!(), Container::Image(..) => todo!(), + Container::Verbatim => self.out.write_str("")?, + Container::Math { .. } => self.out.write_str("")?, + Container::RawInline { .. } => todo!(), Container::Subscript => self.out.write_str("")?, Container::Superscript => self.out.write_str("")?, Container::Insert => self.out.write_str("")?, @@ -136,22 +146,6 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer { } } Event::Str(s) => self.out.write_str(s)?, - Event::Verbatim(s) => write!(self.out, "{}", s)?, - Event::Math { content, display } => { - if display { - write!( - self.out, - r#"\[{}\]"#, - content, - )?; - } else { - write!( - self.out, - r#"\({}\)"#, - content, - )?; - } - } Event::Atom(a) => match a { Atom::Ellipsis => self.out.write_str("…")?, Atom::EnDash => self.out.write_str("–")?, diff --git a/src/inline.rs b/src/inline.rs index 61b57b6..5fd5ec9 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -6,7 +6,6 @@ use lex::Symbol; use Atom::*; use Container::*; -use Node::*; #[derive(Debug, Clone, PartialEq, Eq)] pub enum Atom { @@ -17,24 +16,6 @@ pub enum Atom { Ellipsis, EnDash, EmDash, - Lt, - Gt, - Ampersand, - Quote, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum Node { - Str, - // link - //Url, - //ImageSource, - //LinkReference, - //FootnoteReference, - Verbatim, - RawFormat { format: Span }, - InlineMath, - DisplayMath, } #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -52,6 +33,11 @@ pub enum Container { // smart quoting SingleQuoted, DoubleQuoted, + // Verbatim + Verbatim, + RawFormat, + InlineMath, + DisplayMath, } #[derive(Debug, PartialEq, Eq)] @@ -59,7 +45,7 @@ pub enum EventKind { Enter(Container), Exit(Container), Atom(Atom), - Node(Node), + Str, } #[derive(Debug, PartialEq, Eq)] @@ -81,6 +67,9 @@ pub struct Parser<'s> { span: Span, lexer: std::iter::Peekable>, + + verbatim: Option<(Container, usize)>, + last: bool, } impl<'s> Parser<'s> { @@ -91,11 +80,18 @@ impl<'s> Parser<'s> { span: Span::new(0, 0), lexer: lex::Lexer::new("").peekable(), + + verbatim: None, + last: false, } } - pub fn parse(&mut self, src: &'s str) { + pub fn parse(&mut self, src: &'s str, last: bool) { self.lexer = lex::Lexer::new(src).peekable(); + if last { + assert!(!self.last); + } + self.last = last; } fn eat(&mut self) -> Option { @@ -114,20 +110,16 @@ impl<'s> Parser<'s> { self.span = Span::empty_at(self.span.end()); } - fn node(&self, kind: Node) -> Event { - Event { - kind: EventKind::Node(kind), - span: self.span, - } - } - fn parse_event(&mut self) -> Option { self.reset_span(); self.eat().map(|first| { self.parse_verbatim(&first) .or_else(|| self.parse_container(&first)) .or_else(|| self.parse_atom(&first)) - .unwrap_or_else(|| self.node(Str)) + .unwrap_or(Event { + kind: EventKind::Str, + span: self.span, + }) }) } @@ -138,9 +130,6 @@ impl<'s> Parser<'s> { lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis, lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 2 => EnDash, lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 3 => EmDash, - lex::Kind::Sym(lex::Symbol::Lt) => Lt, - lex::Kind::Sym(lex::Symbol::Gt) => Gt, - lex::Kind::Sym(lex::Symbol::Quote2) => Quote, _ => return None, }; @@ -151,51 +140,62 @@ impl<'s> Parser<'s> { } fn parse_verbatim(&mut self, first: &lex::Token) -> Option { - match first.kind { - lex::Kind::Seq(lex::Sequence::Dollar) => { - let math_opt = (first.len <= 2) - .then(|| { - if let Some(lex::Token { - kind: lex::Kind::Seq(lex::Sequence::Backtick), - len, - }) = self.peek() - { - Some(( - if first.len == 2 { - DisplayMath - } else { - InlineMath - }, - *len, - )) - } else { - None - } - }) - .flatten(); - if math_opt.is_some() { - self.eat(); // backticks - } - math_opt - } - lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, first.len)), - _ => None, - } - .map(|(kind, opener_len)| { - let mut span = Span::empty_at(self.span.end()); - while let Some(tok) = self.eat() { - if matches!(tok.kind, lex::Kind::Seq(lex::Sequence::Backtick)) - && tok.len == opener_len + self.verbatim + .map(|(kind, opener_len)| { + let kind = if matches!(first.kind, lex::Kind::Seq(lex::Sequence::Backtick)) + && first.len == opener_len { - break; + self.verbatim = None; + EventKind::Exit(kind) + } else { + EventKind::Str + }; + Event { + kind, + span: self.span, } - span = span.extend(tok.len); - } - Event { - kind: EventKind::Node(kind), - span, - } - }) + }) + .or_else(|| { + match first.kind { + lex::Kind::Seq(lex::Sequence::Dollar) => { + let math_opt = (first.len <= 2) + .then(|| { + if let Some(lex::Token { + kind: lex::Kind::Seq(lex::Sequence::Backtick), + len, + }) = self.peek() + { + Some(( + if first.len == 2 { + Container::DisplayMath + } else { + Container::InlineMath + }, + *len, + )) + } else { + None + } + }) + .flatten(); + if math_opt.is_some() { + self.eat(); // backticks + } + math_opt + } + lex::Kind::Seq(lex::Sequence::Backtick) => { + Some((Container::Verbatim, first.len)) + } + _ => None, + } + .map(|(kind, opener_len)| { + self.verbatim = Some((kind, opener_len)); + Event { + kind: EventKind::Enter(kind), + span: self.span, + } + }) + }) } fn parse_container(&mut self, first: &lex::Token) -> Option { @@ -239,7 +239,7 @@ impl<'s> Parser<'s> { .unwrap_or_else(|| { self.openers.push((cont, self.events.len())); // use str for now, replace if closed later - EventKind::Node(Str) + EventKind::Str }) }) .map(|kind| Event { @@ -258,7 +258,7 @@ impl<'s> Iterator for Parser<'s> { || self .events .back() - .map_or(false, |ev| matches!(ev.kind, EventKind::Node(Str))) + .map_or(false, |ev| matches!(ev.kind, EventKind::Str)) { if let Some(ev) = self.parse_event() { self.events.push_back(ev); @@ -267,25 +267,39 @@ impl<'s> Iterator for Parser<'s> { } } - self.events.pop_front().map(|e| { - if matches!(e.kind, EventKind::Node(Str)) { - // merge str events - let mut span = e.span; - while self - .events - .front() - .map_or(false, |ev| matches!(ev.kind, EventKind::Node(Str))) - { - span = span.union(self.events.pop_front().unwrap().span); + self.events + .pop_front() + .map(|e| { + if matches!(e.kind, EventKind::Str) { + // merge str events + let mut span = e.span; + while self + .events + .front() + .map_or(false, |ev| matches!(ev.kind, EventKind::Str)) + { + let ev = self.events.pop_front().unwrap(); + assert_eq!(span.end(), ev.span.start()); + span = span.union(ev.span); + } + Event { + kind: EventKind::Str, + span, + } + } else { + e } - Event { - kind: EventKind::Node(Str), - span, + }) + .or_else(|| { + if self.last { + self.verbatim.take().map(|(kind, _)| Event { + kind: EventKind::Exit(kind), + span: self.span, + }) + } else { + None } - } else { - e - } - }) + }) } } @@ -296,49 +310,106 @@ mod test { use super::Atom::*; use super::Container::*; use super::EventKind::*; - use super::Node::*; + use super::Verbatim; macro_rules! test_parse { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { #[allow(unused)] let mut p = super::Parser::new(); - p.parse($src); + p.parse($src, true); let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::>(); let expected = &[$($($token),*,)?]; assert_eq!(actual, expected, "\n\n{}\n\n", $src); }; } - impl super::EventKind { - pub fn span(self, start: usize, end: usize) -> super::Event { - super::Event { - span: Span::new(start, end), - kind: self, - } - } - } - #[test] fn str() { - test_parse!("abc", (Node(Str), "abc")); - test_parse!("abc def", (Node(Str), "abc def")); + test_parse!("abc", (Str, "abc")); + test_parse!("abc def", (Str, "abc def")); } #[test] fn verbatim() { - test_parse!("`abc`", (Node(Verbatim), "abc")); - test_parse!("`abc", (Node(Verbatim), "abc")); - test_parse!("``abc``", (Node(Verbatim), "abc")); - test_parse!("abc `def`", (Node(Str), "abc "), (Node(Verbatim), "def")); + test_parse!( + "`abc`", + (Enter(Verbatim), "`"), + (Str, "abc"), + (Exit(Verbatim), "`"), + ); + test_parse!( + "`abc\ndef`", + (Enter(Verbatim), "`"), + (Str, "abc\ndef"), + (Exit(Verbatim), "`"), + ); + test_parse!( + "`abc&def`", + (Enter(Verbatim), "`"), + (Str, "abc&def"), + (Exit(Verbatim), "`"), + ); + test_parse!( + "`abc", + (Enter(Verbatim), "`"), + (Str, "abc"), + (Exit(Verbatim), ""), + ); + test_parse!( + "``abc``", + (Enter(Verbatim), "``"), + (Str, "abc"), + (Exit(Verbatim), "``"), + ); + test_parse!( + "abc `def`", + (Str, "abc "), + (Enter(Verbatim), "`"), + (Str, "def"), + (Exit(Verbatim), "`"), + ); + test_parse!( + "abc`def`", + (Str, "abc"), + (Enter(Verbatim), "`"), + (Str, "def"), + (Exit(Verbatim), "`"), + ); } #[test] fn math() { - test_parse!("$`abc`", (Node(InlineMath), "abc")); - test_parse!("$`abc` str", (Node(InlineMath), "abc"), (Node(Str), " str")); - test_parse!("$$`abc`", (Node(DisplayMath), "abc")); - test_parse!("$`abc", (Node(InlineMath), "abc")); - test_parse!("$```abc```", (Node(InlineMath), "abc"),); + test_parse!( + "$`abc`", + (Enter(InlineMath), "$`"), + (Str, "abc"), + (Exit(InlineMath), "`"), + ); + test_parse!( + "$`abc` str", + (Enter(InlineMath), "$`"), + (Str, "abc"), + (Exit(InlineMath), "`"), + (Str, " str"), + ); + test_parse!( + "$$`abc`", + (Enter(DisplayMath), "$$`"), + (Str, "abc"), + (Exit(DisplayMath), "`"), + ); + test_parse!( + "$`abc", + (Enter(InlineMath), "$`"), + (Str, "abc"), + (Exit(InlineMath), ""), + ); + test_parse!( + "$```abc```", + (Enter(InlineMath), "$```"), + (Str, "abc"), + (Exit(InlineMath), "```"), + ); } #[test] @@ -346,13 +417,13 @@ mod test { test_parse!( "_abc_", (Enter(Emphasis), "_"), - (Node(Str), "abc"), + (Str, "abc"), (Exit(Emphasis), "_"), ); test_parse!( "{_abc_}", (Enter(Emphasis), "{_"), - (Node(Str), "abc"), + (Str, "abc"), (Exit(Emphasis), "_}"), ); } @@ -363,7 +434,7 @@ mod test { "{_{_abc_}_}", (Enter(Emphasis), "{_"), (Enter(Emphasis), "{_"), - (Node(Str), "abc"), + (Str, "abc"), (Exit(Emphasis), "_}"), (Exit(Emphasis), "_}"), ); @@ -371,7 +442,7 @@ mod test { "*_abc_*", (Enter(Strong), "*"), (Enter(Emphasis), "_"), - (Node(Str), "abc"), + (Str, "abc"), (Exit(Emphasis), "_"), (Exit(Strong), "*"), ); @@ -379,7 +450,7 @@ mod test { #[test] fn container_unopened() { - test_parse!("*}abc", (Node(Str), "*}abc")); + test_parse!("*}abc", (Str, "*}abc")); } #[test] @@ -387,14 +458,14 @@ mod test { test_parse!( "{*{_abc*}", (Enter(Strong), "{*"), - (Node(Str), "{_abc"), + (Str, "{_abc"), (Exit(Strong), "*}"), ); } #[test] fn container_close_block() { - test_parse!("{_abc", (Node(Str), "{_abc")); - test_parse!("{_{*{_abc", (Node(Str), "{_{*{_abc")); + test_parse!("{_abc", (Str, "{_abc")); + test_parse!("{_{*{_abc", (Str, "{_{*{_abc")); } } diff --git a/src/lib.rs b/src/lib.rs index b1beaf4..aa58888 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,10 +22,6 @@ pub enum Event<'s> { Str(&'s str), /// An atomic element. Atom(Atom), - /// A verbatim string. - Verbatim(&'s str), - /// An inline or display math element. - Math { content: &'s str, display: bool }, } #[derive(Debug, PartialEq, Eq)] @@ -66,6 +62,12 @@ pub enum Container<'s> { Link(&'s str, LinkType), /// An inline image. Image(&'s str), + /// An inline verbatim string. + Verbatim, + /// An inline or display math element. + Math { display: bool }, + /// Inline raw markup for a specific output format. + RawInline { format: &'s str }, /// A subscripted element. Subscript, /// A superscripted element. @@ -108,6 +110,9 @@ impl<'s> Container<'s> { Self::Span | Self::Link(..) | Self::Image(..) + | Self::Verbatim + | Self::Math { .. } + | Self::RawInline { .. } | Self::Subscript | Self::Superscript | Self::Insert @@ -141,6 +146,9 @@ impl<'s> Container<'s> { | Self::Span | Self::Link(..) | Self::Image(..) + | Self::Verbatim + | Self::Math { .. } + | Self::RawInline { .. } | Self::Subscript | Self::Superscript | Self::Insert @@ -223,6 +231,10 @@ impl<'s> Event<'s> { inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => { let t = match c { inline::Container::Span => Container::Span, + inline::Container::Verbatim => Container::Verbatim, + inline::Container::InlineMath => Container::Math { display: false }, + inline::Container::DisplayMath => Container::Math { display: true }, + inline::Container::RawFormat => Container::RawInline { format: todo!() }, inline::Container::Subscript => Container::Subscript, inline::Container::Superscript => Container::Superscript, inline::Container::Insert => Container::Insert, @@ -248,21 +260,8 @@ impl<'s> Event<'s> { inline::Atom::Softbreak => Atom::Softbreak, inline::Atom::Hardbreak => Atom::Hardbreak, inline::Atom::Escape => Atom::Escape, - _ => todo!(), }), - inline::EventKind::Node(n) => match n { - inline::Node::Str => Self::Str(content), - inline::Node::Verbatim => Self::Verbatim(content), - inline::Node::InlineMath => Self::Math { - content, - display: false, - }, - inline::Node::DisplayMath => Self::Math { - content, - display: true, - }, - _ => todo!(), - }, + inline::EventKind::Str => Self::Str(content), } } } @@ -316,7 +315,7 @@ pub struct Parser<'s> { tree: block::Tree, parser: Option>, inline_start: usize, - attributes: Attributes<'s>, + block_attributes: Attributes<'s>, } impl<'s> Parser<'s> { @@ -327,7 +326,7 @@ impl<'s> Parser<'s> { tree: block::parse(src), parser: None, inline_start: 0, - attributes: Attributes::none(), + block_attributes: Attributes::none(), } } } @@ -345,7 +344,8 @@ impl<'s> Iterator for Parser<'s> { match ev.kind { tree::EventKind::Element(atom) => { assert_eq!(atom, block::Atom::Inline); - parser.parse(ev.span.of(self.src)); + let last_inline = self.tree.neighbors().next().is_none(); + parser.parse(ev.span.of(self.src), last_inline); } tree::EventKind::Exit(block) => { self.parser = None; @@ -363,7 +363,7 @@ impl<'s> Iterator for Parser<'s> { block::Atom::Inline => panic!("inline outside leaf block"), block::Atom::Blankline => Event::Atom(Atom::Blankline), block::Atom::Attributes => { - self.attributes.parse(content); + self.block_attributes.parse(content); continue; } }, @@ -384,7 +384,7 @@ impl<'s> Iterator for Parser<'s> { }, b => Container::from_block(self.src, b), }; - Event::Start(container, self.attributes.take()) + Event::Start(container, self.block_attributes.take()) } tree::EventKind::Exit(block) => Event::End(Container::from_block(self.src, block)), }; @@ -470,4 +470,17 @@ mod test { End(Paragraph), ); } + + #[test] + fn verbatim() { + test_parse!( + "`abc\ndef", + Start(Paragraph, Attributes::none()), + Start(Verbatim, Attributes::none()), + Str("abc\n"), + Str("def"), + End(Verbatim), + End(Paragraph), + ); + } } diff --git a/src/tree.rs b/src/tree.rs index 0c90f45..c5be38e 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -13,6 +13,16 @@ pub struct Event { pub span: Span, } +pub struct Object { + kind: ObjectKind, + span: Span, +} + +pub enum ObjectKind { + Container(C), + Element(E), +} + #[derive(Debug, Clone)] pub struct Tree { nodes: Vec>, @@ -20,14 +30,32 @@ pub struct Tree { head: Option, } -impl Tree { +impl Tree { fn new(nodes: Vec>) -> Self { + let head = nodes[NodeIndex::root().index()].next; Self { nodes, branch: Vec::new(), - head: Some(NodeIndex::root()), + head, } } + + pub fn neighbors(&self) -> impl Iterator> + '_ { + let mut head = self.head; + std::iter::from_fn(move || { + head.take().map(|h| { + let n = &self.nodes[h.index()]; + let kind = match &n.kind { + NodeKind::Root => unreachable!(), + NodeKind::Container(c, _) => ObjectKind::Container(*c), + NodeKind::Element(e) => ObjectKind::Element(*e), + }; + let span = n.span; + head = n.next; + Object { kind, span } + }) + }) + } } impl Iterator for Tree { @@ -37,10 +65,7 @@ impl Iterator for Tree { if let Some(head) = self.head { let n = &self.nodes[head.index()]; let kind = match &n.kind { - NodeKind::Root => { - self.head = n.next; - return self.next(); - } + NodeKind::Root => unreachable!(), NodeKind::Container(c, child) => { self.branch.push(head); self.head = *child;