verbatim fix

This commit is contained in:
Noah Hellman 2022-12-08 17:42:54 +01:00
parent a994228bb5
commit 2303cf3574
5 changed files with 278 additions and 174 deletions

View file

@ -309,7 +309,8 @@ impl Block {
f @ ('`' | ':' | '~') => {
let fence_length = (&mut chars).take_while(|c| *c == f).count() + 1;
let lang = line_t[fence_length..].trim();
let valid_spec = !lang.chars().any(char::is_whitespace);
let valid_spec =
!lang.chars().any(char::is_whitespace) && !lang.chars().any(|c| c == '`');
(valid_spec && fence_length >= 3)
.then(|| {
u8::try_from(fence_length).ok().map(|fence_length| {

View file

@ -75,11 +75,8 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<I, W> {
self.out.write_str("<div>")?;
}
}
Container::Span => self.out.write_str("<span>")?,
Container::Paragraph => self.out.write_str("<p>")?,
Container::Heading { level } => write!(self.out, "<h{}>", level)?,
Container::Link(..) => todo!(),
Container::Image(..) => todo!(),
Container::TableCell => self.out.write_str("<td>")?,
Container::DescriptionTerm => self.out.write_str("<dt>")?,
Container::RawBlock { .. } => todo!(),
@ -90,6 +87,16 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<I, W> {
self.out.write_str("<pre><code>")?;
}
}
Container::Span => self.out.write_str("<span>")?,
Container::Link(..) => todo!(),
Container::Image(..) => todo!(),
Container::Verbatim => self.out.write_str("<code>")?,
Container::Math { display } => self.out.write_str(if display {
r#"<span class="math display">\["#
} else {
r#"<span class="math inline">\("#
})?,
Container::RawInline { .. } => todo!(),
Container::Subscript => self.out.write_str("<sub>")?,
Container::Superscript => self.out.write_str("<sup>")?,
Container::Insert => self.out.write_str("<ins>")?,
@ -119,11 +126,14 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<I, W> {
Container::Heading { level } => write!(self.out, "</h{}>", level)?,
Container::TableCell => self.out.write_str("</td>")?,
Container::DescriptionTerm => self.out.write_str("</dt>")?,
Container::RawBlock { .. } => self.out.write_str("</code></pre>")?,
Container::RawBlock { .. } => todo!(),
Container::CodeBlock { .. } => self.out.write_str("</code></pre>")?,
Container::Span => self.out.write_str("</span>")?,
Container::Link(..) => todo!(),
Container::Image(..) => todo!(),
Container::Verbatim => self.out.write_str("</code>")?,
Container::Math { .. } => self.out.write_str("</span>")?,
Container::RawInline { .. } => todo!(),
Container::Subscript => self.out.write_str("</sub>")?,
Container::Superscript => self.out.write_str("</sup>")?,
Container::Insert => self.out.write_str("</ins>")?,
@ -136,22 +146,6 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<I, W> {
}
}
Event::Str(s) => self.out.write_str(s)?,
Event::Verbatim(s) => write!(self.out, "<code>{}</code>", s)?,
Event::Math { content, display } => {
if display {
write!(
self.out,
r#"<span class="math display">\[{}\]</span>"#,
content,
)?;
} else {
write!(
self.out,
r#"<span class="math inline">\({}\)</span>"#,
content,
)?;
}
}
Event::Atom(a) => match a {
Atom::Ellipsis => self.out.write_str("&hellip;")?,
Atom::EnDash => self.out.write_str("&ndash;")?,

View file

@ -6,7 +6,6 @@ use lex::Symbol;
use Atom::*;
use Container::*;
use Node::*;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Atom {
@ -17,24 +16,6 @@ pub enum Atom {
Ellipsis,
EnDash,
EmDash,
Lt,
Gt,
Ampersand,
Quote,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Node {
Str,
// link
//Url,
//ImageSource,
//LinkReference,
//FootnoteReference,
Verbatim,
RawFormat { format: Span },
InlineMath,
DisplayMath,
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
@ -52,6 +33,11 @@ pub enum Container {
// smart quoting
SingleQuoted,
DoubleQuoted,
// Verbatim
Verbatim,
RawFormat,
InlineMath,
DisplayMath,
}
#[derive(Debug, PartialEq, Eq)]
@ -59,7 +45,7 @@ pub enum EventKind {
Enter(Container),
Exit(Container),
Atom(Atom),
Node(Node),
Str,
}
#[derive(Debug, PartialEq, Eq)]
@ -81,6 +67,9 @@ pub struct Parser<'s> {
span: Span,
lexer: std::iter::Peekable<lex::Lexer<'s>>,
verbatim: Option<(Container, usize)>,
last: bool,
}
impl<'s> Parser<'s> {
@ -91,11 +80,18 @@ impl<'s> Parser<'s> {
span: Span::new(0, 0),
lexer: lex::Lexer::new("").peekable(),
verbatim: None,
last: false,
}
}
pub fn parse(&mut self, src: &'s str) {
pub fn parse(&mut self, src: &'s str, last: bool) {
self.lexer = lex::Lexer::new(src).peekable();
if last {
assert!(!self.last);
}
self.last = last;
}
fn eat(&mut self) -> Option<lex::Token> {
@ -114,20 +110,16 @@ impl<'s> Parser<'s> {
self.span = Span::empty_at(self.span.end());
}
fn node(&self, kind: Node) -> Event {
Event {
kind: EventKind::Node(kind),
span: self.span,
}
}
fn parse_event(&mut self) -> Option<Event> {
self.reset_span();
self.eat().map(|first| {
self.parse_verbatim(&first)
.or_else(|| self.parse_container(&first))
.or_else(|| self.parse_atom(&first))
.unwrap_or_else(|| self.node(Str))
.unwrap_or(Event {
kind: EventKind::Str,
span: self.span,
})
})
}
@ -138,9 +130,6 @@ impl<'s> Parser<'s> {
lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis,
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 2 => EnDash,
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 3 => EmDash,
lex::Kind::Sym(lex::Symbol::Lt) => Lt,
lex::Kind::Sym(lex::Symbol::Gt) => Gt,
lex::Kind::Sym(lex::Symbol::Quote2) => Quote,
_ => return None,
};
@ -151,6 +140,22 @@ impl<'s> Parser<'s> {
}
fn parse_verbatim(&mut self, first: &lex::Token) -> Option<Event> {
self.verbatim
.map(|(kind, opener_len)| {
let kind = if matches!(first.kind, lex::Kind::Seq(lex::Sequence::Backtick))
&& first.len == opener_len
{
self.verbatim = None;
EventKind::Exit(kind)
} else {
EventKind::Str
};
Event {
kind,
span: self.span,
}
})
.or_else(|| {
match first.kind {
lex::Kind::Seq(lex::Sequence::Dollar) => {
let math_opt = (first.len <= 2)
@ -162,9 +167,9 @@ impl<'s> Parser<'s> {
{
Some((
if first.len == 2 {
DisplayMath
Container::DisplayMath
} else {
InlineMath
Container::InlineMath
},
*len,
))
@ -178,24 +183,19 @@ impl<'s> Parser<'s> {
}
math_opt
}
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, first.len)),
lex::Kind::Seq(lex::Sequence::Backtick) => {
Some((Container::Verbatim, first.len))
}
_ => None,
}
.map(|(kind, opener_len)| {
let mut span = Span::empty_at(self.span.end());
while let Some(tok) = self.eat() {
if matches!(tok.kind, lex::Kind::Seq(lex::Sequence::Backtick))
&& tok.len == opener_len
{
break;
}
span = span.extend(tok.len);
}
self.verbatim = Some((kind, opener_len));
Event {
kind: EventKind::Node(kind),
span,
kind: EventKind::Enter(kind),
span: self.span,
}
})
})
}
fn parse_container(&mut self, first: &lex::Token) -> Option<Event> {
@ -239,7 +239,7 @@ impl<'s> Parser<'s> {
.unwrap_or_else(|| {
self.openers.push((cont, self.events.len()));
// use str for now, replace if closed later
EventKind::Node(Str)
EventKind::Str
})
})
.map(|kind| Event {
@ -258,7 +258,7 @@ impl<'s> Iterator for Parser<'s> {
|| self
.events
.back()
.map_or(false, |ev| matches!(ev.kind, EventKind::Node(Str)))
.map_or(false, |ev| matches!(ev.kind, EventKind::Str))
{
if let Some(ev) = self.parse_event() {
self.events.push_back(ev);
@ -267,25 +267,39 @@ impl<'s> Iterator for Parser<'s> {
}
}
self.events.pop_front().map(|e| {
if matches!(e.kind, EventKind::Node(Str)) {
self.events
.pop_front()
.map(|e| {
if matches!(e.kind, EventKind::Str) {
// merge str events
let mut span = e.span;
while self
.events
.front()
.map_or(false, |ev| matches!(ev.kind, EventKind::Node(Str)))
.map_or(false, |ev| matches!(ev.kind, EventKind::Str))
{
span = span.union(self.events.pop_front().unwrap().span);
let ev = self.events.pop_front().unwrap();
assert_eq!(span.end(), ev.span.start());
span = span.union(ev.span);
}
Event {
kind: EventKind::Node(Str),
kind: EventKind::Str,
span,
}
} else {
e
}
})
.or_else(|| {
if self.last {
self.verbatim.take().map(|(kind, _)| Event {
kind: EventKind::Exit(kind),
span: self.span,
})
} else {
None
}
})
}
}
@ -296,49 +310,106 @@ mod test {
use super::Atom::*;
use super::Container::*;
use super::EventKind::*;
use super::Node::*;
use super::Verbatim;
macro_rules! test_parse {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)]
let mut p = super::Parser::new();
p.parse($src);
p.parse($src, true);
let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
let expected = &[$($($token),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
};
}
impl super::EventKind {
pub fn span(self, start: usize, end: usize) -> super::Event {
super::Event {
span: Span::new(start, end),
kind: self,
}
}
}
#[test]
fn str() {
test_parse!("abc", (Node(Str), "abc"));
test_parse!("abc def", (Node(Str), "abc def"));
test_parse!("abc", (Str, "abc"));
test_parse!("abc def", (Str, "abc def"));
}
#[test]
fn verbatim() {
test_parse!("`abc`", (Node(Verbatim), "abc"));
test_parse!("`abc", (Node(Verbatim), "abc"));
test_parse!("``abc``", (Node(Verbatim), "abc"));
test_parse!("abc `def`", (Node(Str), "abc "), (Node(Verbatim), "def"));
test_parse!(
"`abc`",
(Enter(Verbatim), "`"),
(Str, "abc"),
(Exit(Verbatim), "`"),
);
test_parse!(
"`abc\ndef`",
(Enter(Verbatim), "`"),
(Str, "abc\ndef"),
(Exit(Verbatim), "`"),
);
test_parse!(
"`abc&def`",
(Enter(Verbatim), "`"),
(Str, "abc&def"),
(Exit(Verbatim), "`"),
);
test_parse!(
"`abc",
(Enter(Verbatim), "`"),
(Str, "abc"),
(Exit(Verbatim), ""),
);
test_parse!(
"``abc``",
(Enter(Verbatim), "``"),
(Str, "abc"),
(Exit(Verbatim), "``"),
);
test_parse!(
"abc `def`",
(Str, "abc "),
(Enter(Verbatim), "`"),
(Str, "def"),
(Exit(Verbatim), "`"),
);
test_parse!(
"abc`def`",
(Str, "abc"),
(Enter(Verbatim), "`"),
(Str, "def"),
(Exit(Verbatim), "`"),
);
}
#[test]
fn math() {
test_parse!("$`abc`", (Node(InlineMath), "abc"));
test_parse!("$`abc` str", (Node(InlineMath), "abc"), (Node(Str), " str"));
test_parse!("$$`abc`", (Node(DisplayMath), "abc"));
test_parse!("$`abc", (Node(InlineMath), "abc"));
test_parse!("$```abc```", (Node(InlineMath), "abc"),);
test_parse!(
"$`abc`",
(Enter(InlineMath), "$`"),
(Str, "abc"),
(Exit(InlineMath), "`"),
);
test_parse!(
"$`abc` str",
(Enter(InlineMath), "$`"),
(Str, "abc"),
(Exit(InlineMath), "`"),
(Str, " str"),
);
test_parse!(
"$$`abc`",
(Enter(DisplayMath), "$$`"),
(Str, "abc"),
(Exit(DisplayMath), "`"),
);
test_parse!(
"$`abc",
(Enter(InlineMath), "$`"),
(Str, "abc"),
(Exit(InlineMath), ""),
);
test_parse!(
"$```abc```",
(Enter(InlineMath), "$```"),
(Str, "abc"),
(Exit(InlineMath), "```"),
);
}
#[test]
@ -346,13 +417,13 @@ mod test {
test_parse!(
"_abc_",
(Enter(Emphasis), "_"),
(Node(Str), "abc"),
(Str, "abc"),
(Exit(Emphasis), "_"),
);
test_parse!(
"{_abc_}",
(Enter(Emphasis), "{_"),
(Node(Str), "abc"),
(Str, "abc"),
(Exit(Emphasis), "_}"),
);
}
@ -363,7 +434,7 @@ mod test {
"{_{_abc_}_}",
(Enter(Emphasis), "{_"),
(Enter(Emphasis), "{_"),
(Node(Str), "abc"),
(Str, "abc"),
(Exit(Emphasis), "_}"),
(Exit(Emphasis), "_}"),
);
@ -371,7 +442,7 @@ mod test {
"*_abc_*",
(Enter(Strong), "*"),
(Enter(Emphasis), "_"),
(Node(Str), "abc"),
(Str, "abc"),
(Exit(Emphasis), "_"),
(Exit(Strong), "*"),
);
@ -379,7 +450,7 @@ mod test {
#[test]
fn container_unopened() {
test_parse!("*}abc", (Node(Str), "*}abc"));
test_parse!("*}abc", (Str, "*}abc"));
}
#[test]
@ -387,14 +458,14 @@ mod test {
test_parse!(
"{*{_abc*}",
(Enter(Strong), "{*"),
(Node(Str), "{_abc"),
(Str, "{_abc"),
(Exit(Strong), "*}"),
);
}
#[test]
fn container_close_block() {
test_parse!("{_abc", (Node(Str), "{_abc"));
test_parse!("{_{*{_abc", (Node(Str), "{_{*{_abc"));
test_parse!("{_abc", (Str, "{_abc"));
test_parse!("{_{*{_abc", (Str, "{_{*{_abc"));
}
}

View file

@ -22,10 +22,6 @@ pub enum Event<'s> {
Str(&'s str),
/// An atomic element.
Atom(Atom),
/// A verbatim string.
Verbatim(&'s str),
/// An inline or display math element.
Math { content: &'s str, display: bool },
}
#[derive(Debug, PartialEq, Eq)]
@ -66,6 +62,12 @@ pub enum Container<'s> {
Link(&'s str, LinkType),
/// An inline image.
Image(&'s str),
/// An inline verbatim string.
Verbatim,
/// An inline or display math element.
Math { display: bool },
/// Inline raw markup for a specific output format.
RawInline { format: &'s str },
/// A subscripted element.
Subscript,
/// A superscripted element.
@ -108,6 +110,9 @@ impl<'s> Container<'s> {
Self::Span
| Self::Link(..)
| Self::Image(..)
| Self::Verbatim
| Self::Math { .. }
| Self::RawInline { .. }
| Self::Subscript
| Self::Superscript
| Self::Insert
@ -141,6 +146,9 @@ impl<'s> Container<'s> {
| Self::Span
| Self::Link(..)
| Self::Image(..)
| Self::Verbatim
| Self::Math { .. }
| Self::RawInline { .. }
| Self::Subscript
| Self::Superscript
| Self::Insert
@ -223,6 +231,10 @@ impl<'s> Event<'s> {
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
let t = match c {
inline::Container::Span => Container::Span,
inline::Container::Verbatim => Container::Verbatim,
inline::Container::InlineMath => Container::Math { display: false },
inline::Container::DisplayMath => Container::Math { display: true },
inline::Container::RawFormat => Container::RawInline { format: todo!() },
inline::Container::Subscript => Container::Subscript,
inline::Container::Superscript => Container::Superscript,
inline::Container::Insert => Container::Insert,
@ -248,21 +260,8 @@ impl<'s> Event<'s> {
inline::Atom::Softbreak => Atom::Softbreak,
inline::Atom::Hardbreak => Atom::Hardbreak,
inline::Atom::Escape => Atom::Escape,
_ => todo!(),
}),
inline::EventKind::Node(n) => match n {
inline::Node::Str => Self::Str(content),
inline::Node::Verbatim => Self::Verbatim(content),
inline::Node::InlineMath => Self::Math {
content,
display: false,
},
inline::Node::DisplayMath => Self::Math {
content,
display: true,
},
_ => todo!(),
},
inline::EventKind::Str => Self::Str(content),
}
}
}
@ -316,7 +315,7 @@ pub struct Parser<'s> {
tree: block::Tree,
parser: Option<inline::Parser<'s>>,
inline_start: usize,
attributes: Attributes<'s>,
block_attributes: Attributes<'s>,
}
impl<'s> Parser<'s> {
@ -327,7 +326,7 @@ impl<'s> Parser<'s> {
tree: block::parse(src),
parser: None,
inline_start: 0,
attributes: Attributes::none(),
block_attributes: Attributes::none(),
}
}
}
@ -345,7 +344,8 @@ impl<'s> Iterator for Parser<'s> {
match ev.kind {
tree::EventKind::Element(atom) => {
assert_eq!(atom, block::Atom::Inline);
parser.parse(ev.span.of(self.src));
let last_inline = self.tree.neighbors().next().is_none();
parser.parse(ev.span.of(self.src), last_inline);
}
tree::EventKind::Exit(block) => {
self.parser = None;
@ -363,7 +363,7 @@ impl<'s> Iterator for Parser<'s> {
block::Atom::Inline => panic!("inline outside leaf block"),
block::Atom::Blankline => Event::Atom(Atom::Blankline),
block::Atom::Attributes => {
self.attributes.parse(content);
self.block_attributes.parse(content);
continue;
}
},
@ -384,7 +384,7 @@ impl<'s> Iterator for Parser<'s> {
},
b => Container::from_block(self.src, b),
};
Event::Start(container, self.attributes.take())
Event::Start(container, self.block_attributes.take())
}
tree::EventKind::Exit(block) => Event::End(Container::from_block(self.src, block)),
};
@ -470,4 +470,17 @@ mod test {
End(Paragraph),
);
}
#[test]
fn verbatim() {
test_parse!(
"`abc\ndef",
Start(Paragraph, Attributes::none()),
Start(Verbatim, Attributes::none()),
Str("abc\n"),
Str("def"),
End(Verbatim),
End(Paragraph),
);
}
}

View file

@ -13,6 +13,16 @@ pub struct Event<C, A> {
pub span: Span,
}
pub struct Object<C, E> {
kind: ObjectKind<C, E>,
span: Span,
}
pub enum ObjectKind<C, E> {
Container(C),
Element(E),
}
#[derive(Debug, Clone)]
pub struct Tree<C, E> {
nodes: Vec<Node<C, E>>,
@ -20,14 +30,32 @@ pub struct Tree<C, E> {
head: Option<NodeIndex>,
}
impl<C, E> Tree<C, E> {
impl<C: Copy, E: Copy> Tree<C, E> {
fn new(nodes: Vec<Node<C, E>>) -> Self {
let head = nodes[NodeIndex::root().index()].next;
Self {
nodes,
branch: Vec::new(),
head: Some(NodeIndex::root()),
head,
}
}
pub fn neighbors(&self) -> impl Iterator<Item = Object<C, E>> + '_ {
let mut head = self.head;
std::iter::from_fn(move || {
head.take().map(|h| {
let n = &self.nodes[h.index()];
let kind = match &n.kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, _) => ObjectKind::Container(*c),
NodeKind::Element(e) => ObjectKind::Element(*e),
};
let span = n.span;
head = n.next;
Object { kind, span }
})
})
}
}
impl<C: Copy, E: Copy> Iterator for Tree<C, E> {
@ -37,10 +65,7 @@ impl<C: Copy, E: Copy> Iterator for Tree<C, E> {
if let Some(head) = self.head {
let n = &self.nodes[head.index()];
let kind = match &n.kind {
NodeKind::Root => {
self.head = n.next;
return self.next();
}
NodeKind::Root => unreachable!(),
NodeKind::Container(c, child) => {
self.branch.push(head);
self.head = *child;