This commit is contained in:
Noah Hellman 2022-11-28 20:12:49 +01:00
parent 977cabd450
commit 660e8041b0
5 changed files with 155 additions and 139 deletions

View file

@ -13,13 +13,13 @@ pub fn parse(src: &str) -> Tree {
Parser::new(src).parse() Parser::new(src).parse()
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Block { pub enum Block {
Leaf(Leaf), Leaf(Leaf),
Container(Container), Container(Container),
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Leaf { pub enum Leaf {
Paragraph, Paragraph,
Heading { level: u8 }, Heading { level: u8 },
@ -27,9 +27,10 @@ pub enum Leaf {
Table, Table,
LinkDefinition, LinkDefinition,
CodeBlock { fence_length: u8 }, CodeBlock { fence_length: u8 },
ThematicBreak,
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Container { pub enum Container {
Blockquote, Blockquote,
Div { fence_length: u8 }, Div { fence_length: u8 },
@ -37,14 +38,14 @@ pub enum Container {
Footnote { indent: u8 }, Footnote { indent: u8 },
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Atom { pub enum Atom {
/// Inline content with unparsed inline elements. /// Inline content with unparsed inline elements.
Inline, Inline,
/// A line with no non-whitespace characters. /// A line with no non-whitespace characters.
Blankline, Blankline,
/// Thematic break. ///// Thematic break.
ThematicBreak, //ThematicBreak,
} }
struct Parser<'s> { struct Parser<'s> {
@ -65,7 +66,7 @@ impl<'s> Parser<'s> {
pub fn parse(mut self) -> Tree { pub fn parse(mut self) -> Tree {
let mut lines = lines(self.src).collect::<Vec<_>>(); let mut lines = lines(self.src).collect::<Vec<_>>();
let mut line_pos = 0; let mut line_pos = 0;
loop { while line_pos < lines.len() {
let line_count = self.parse_block(&mut lines[line_pos..]); let line_count = self.parse_block(&mut lines[line_pos..]);
if line_count == 0 { if line_count == 0 {
break; break;
@ -202,7 +203,6 @@ impl Block {
.flatten() .flatten()
} }
_ => { _ => {
/*
let thematic_break = || { let thematic_break = || {
let mut without_whitespace = line.chars().filter(|c| !c.is_whitespace()); let mut without_whitespace = line.chars().filter(|c| !c.is_whitespace());
let length = without_whitespace.clone().count(); let length = without_whitespace.clone().count();
@ -211,9 +211,8 @@ impl Block {
|| without_whitespace.all(|c| c == '*'))) || without_whitespace.all(|c| c == '*')))
.then(|| (Self::Leaf(ThematicBreak), Span::by_len(start, line.len()))) .then(|| (Self::Leaf(ThematicBreak), Span::by_len(start, line.len())))
}; };
*/
//thematic_break() thematic_break()
None
} }
} }
.unwrap_or((Self::Leaf(Paragraph), Span::new(0, 0))) .unwrap_or((Self::Leaf(Paragraph), Span::new(0, 0)))
@ -225,7 +224,7 @@ impl Block {
Self::Leaf(Paragraph | Heading { .. } | Table | LinkDefinition) => { Self::Leaf(Paragraph | Heading { .. } | Table | LinkDefinition) => {
!line.trim().is_empty() !line.trim().is_empty()
} }
Self::Leaf(Attributes) => false, Self::Leaf(Attributes | ThematicBreak) => false,
Self::Container(Blockquote) => line.trim().starts_with('>'), Self::Container(Blockquote) => line.trim().starts_with('>'),
Self::Container(Footnote { indent } | ListItem { indent }) => { Self::Container(Footnote { indent } | ListItem { indent }) => {
let spaces = line.chars().take_while(|c| c.is_whitespace()).count(); let spaces = line.chars().take_while(|c| c.is_whitespace()).count();
@ -276,7 +275,7 @@ fn lines(src: &str) -> impl Iterator<Item = Span> + '_ {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use crate::tree::Event; use crate::tree::EventKind::*;
use crate::Span; use crate::Span;
use super::Atom::*; use super::Atom::*;
@ -288,81 +287,82 @@ mod test {
macro_rules! test_parse { macro_rules! test_parse {
($src:expr $(,$($event:expr),* $(,)?)?) => { ($src:expr $(,$($event:expr),* $(,)?)?) => {
let t = super::Parser::new($src).parse(); let t = super::Parser::new($src).parse();
let actual = t.iter().collect::<Vec<_>>(); let actual = t.iter().map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
let expected = &[$($($event),*,)?]; let expected = &[$($($event),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src); assert_eq!(actual, expected, "\n\n{}\n\n", $src);
}; };
} }
#[test] #[test]
fn parse_elem_oneline() { fn parse_para_oneline() {
test_parse!( test_parse!(
"para\n", "para\n",
Event::Enter(&Leaf(Paragraph), Span::new(0, 0)), (Enter(Leaf(Paragraph)), ""),
Event::Element(&Inline, Span::new(0, 5)), (Element(Inline), "para\n"),
Event::Exit, (Exit, ""),
); );
} }
#[test] #[test]
fn parse_elem_multiline() { fn parse_para_multiline() {
test_parse!( test_parse!(
"para\npara\n", "para0\npara1\n",
Event::Enter(&Leaf(Paragraph), Span::new(0, 0)), (Enter(Leaf(Paragraph)), ""),
Event::Element(&Inline, Span::new(0, 5)), (Element(Inline), "para0\n"),
Event::Element(&Inline, Span::new(5, 10)), (Element(Inline), "para1\n"),
Event::Exit, (Exit, ""),
); );
} }
#[test] #[test]
fn parse_elem_multi() { fn parse_heading_multi() {
test_parse!( test_parse!(
concat!( concat!(
"# 2\n", "# 2\n",
"\n", "\n",
" # 8\n", " # 8\n",
" 12\n", " 12\n",
"15\n", // "15\n", //
), ),
Event::Enter(&Leaf(Heading { level: 1 }), Span::new(0, 1)), (Enter(Leaf(Heading { level: 1 })), "#"),
Event::Element(&Inline, Span::new(1, 4)), (Element(Inline), " 2\n"),
Event::Exit, (Exit, "#"),
Event::Element(&Blankline, Span::new(4, 5)), (Element(Blankline), "\n"),
Event::Enter(&Leaf(Heading { level: 1 }), Span::new(6, 7)), (Enter(Leaf(Heading { level: 1 })), "#"),
Event::Element(&Inline, Span::new(7, 10)), (Element(Inline), " 8\n"),
Event::Element(&Inline, Span::new(10, 15)), (Element(Inline), " 12\n"),
Event::Element(&Inline, Span::new(15, 18)), (Element(Inline), "15\n"),
Event::Exit, (Exit, "#"),
); );
} }
#[test] #[test]
fn parse_container() { fn parse_blockquote() {
test_parse!( test_parse!(
concat!( concat!(
"> a\n", "> a\n",
">\n", ">\n",
"> ## hl\n", "> ## hl\n",
">\n", ">\n",
"> para\n", // "> para\n", //
), ),
Event::Enter(&Container(Blockquote), Span::new(0, 1)), (Enter(Container(Blockquote)), ">"),
Event::Enter(&Leaf(Paragraph), Span::new(1, 1)), (Enter(Leaf(Paragraph)), ""),
Event::Element(&Inline, Span::new(1, 4)), (Element(Inline), " a\n"),
Event::Exit, (Exit, ""),
Event::Element(&Blankline, Span::new(5, 6)), (Element(Blankline), "\n"),
Event::Enter(&Leaf(Heading { level: 2 }), Span::new(8, 10)), (Enter(Leaf(Heading { level: 2 })), "##"),
Event::Element(&Inline, Span::new(10, 14)), (Element(Inline), " hl\n"),
Event::Exit, (Exit, "##"),
Event::Element(&Blankline, Span::new(15, 16)), (Element(Blankline), "\n"),
Event::Enter(&Leaf(Paragraph), Span::new(17, 17)), (Enter(Leaf(Paragraph)), ""),
Event::Element(&Inline, Span::new(17, 23)), (Element(Inline), " para\n"),
Event::Exit, (Exit, ""),
Event::Exit, (Exit, ">"),
); );
} }
/*
#[test] #[test]
fn parse_code_block() { fn parse_code_block() {
test_parse!( test_parse!(
@ -372,12 +372,13 @@ mod test {
"l1\n", "l1\n",
"```", // "```", //
), ),
Event::Enter(&Leaf(CodeBlock { fence_length: 3 }), Span::new(0, 8)), (Event::Enter(Leaf(CodeBlock { fence_length: 3 })), "```lang\n"),
Event::Element(&Inline, Span::new(8, 11)), (Event::Element(Inline), "l0\n"),
Event::Element(&Inline, Span::new(11, 14)), (Event::Element(Inline), "l1\n"),
Event::Exit (Event::Exit, "```lang\n"),
); );
} }
*/
macro_rules! test_block { macro_rules! test_block {
($src:expr, $kind:expr, $str:expr, $len:expr $(,)?) => { ($src:expr, $kind:expr, $str:expr, $len:expr $(,)?) => {

View file

@ -303,7 +303,7 @@ mod test {
#[allow(unused)] #[allow(unused)]
let mut p = super::Parser::new(); let mut p = super::Parser::new();
p.parse($src); p.parse($src);
let actual = p.collect::<Vec<_>>(); let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
let expected = &[$($($token),*,)?]; let expected = &[$($($token),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src); assert_eq!(actual, expected, "\n\n{}\n\n", $src);
}; };
@ -320,37 +320,40 @@ mod test {
#[test] #[test]
fn str() { fn str() {
test_parse!("abc", Node(Str).span(0, 3)); test_parse!("abc", (Node(Str), "abc"));
test_parse!("abc def", Node(Str).span(0, 7)); test_parse!("abc def", (Node(Str), "abc def"));
} }
#[test] #[test]
fn verbatim() { fn verbatim() {
test_parse!("`abc`", Node(Verbatim).span(1, 4)); test_parse!("`abc`", (Node(Verbatim), "abc"));
test_parse!("`abc", Node(Verbatim).span(1, 4)); test_parse!("`abc", (Node(Verbatim), "abc"));
test_parse!("``abc``", Node(Verbatim).span(2, 5)); test_parse!("``abc``", (Node(Verbatim), "abc"));
test_parse!("abc `def`", Node(Str).span(0, 4), Node(Verbatim).span(5, 8)); test_parse!("abc `def`", (Node(Str), "abc "), (Node(Verbatim), "def"));
} }
#[test] #[test]
fn math() { fn math() {
test_parse!("$`abc`", Node(InlineMath).span(2, 5)); test_parse!("$`abc`", (Node(InlineMath), "abc"));
test_parse!("$$```abc", Node(DisplayMath).span(5, 8)); test_parse!("$`abc` str", (Node(InlineMath), "abc"), (Node(Str), " str"));
test_parse!("$$`abc`", (Node(DisplayMath), "abc"));
test_parse!("$`abc", (Node(InlineMath), "abc"));
test_parse!("$```abc```", (Node(InlineMath), "abc"),);
} }
#[test] #[test]
fn container_basic() { fn container_basic() {
test_parse!( test_parse!(
"_abc_", "_abc_",
Enter(Emphasis).span(0, 1), (Enter(Emphasis), "_"),
Node(Str).span(1, 4), (Node(Str), "abc"),
Exit(Emphasis).span(4, 5), (Exit(Emphasis), "_"),
); );
test_parse!( test_parse!(
"{_abc_}", "{_abc_}",
Enter(Emphasis).span(0, 2), (Enter(Emphasis), "{_"),
Node(Str).span(2, 5), (Node(Str), "abc"),
Exit(Emphasis).span(5, 7), (Exit(Emphasis), "_}"),
); );
} }
@ -358,40 +361,40 @@ mod test {
fn container_nest() { fn container_nest() {
test_parse!( test_parse!(
"{_{_abc_}_}", "{_{_abc_}_}",
Enter(Emphasis).span(0, 2), (Enter(Emphasis), "{_"),
Enter(Emphasis).span(2, 4), (Enter(Emphasis), "{_"),
Node(Str).span(4, 7), (Node(Str), "abc"),
Exit(Emphasis).span(7, 9), (Exit(Emphasis), "_}"),
Exit(Emphasis).span(9, 11), (Exit(Emphasis), "_}"),
); );
test_parse!( test_parse!(
"*_abc_*", "*_abc_*",
Enter(Strong).span(0, 1), (Enter(Strong), "*"),
Enter(Emphasis).span(1, 2), (Enter(Emphasis), "_"),
Node(Str).span(2, 5), (Node(Str), "abc"),
Exit(Emphasis).span(5, 6), (Exit(Emphasis), "_"),
Exit(Strong).span(6, 7), (Exit(Strong), "*"),
); );
} }
#[test] #[test]
fn container_unopened() { fn container_unopened() {
test_parse!("*}abc", Node(Str).span(0, 5)); test_parse!("*}abc", (Node(Str), "*}abc"));
} }
#[test] #[test]
fn container_close_parent() { fn container_close_parent() {
test_parse!( test_parse!(
"{*{_abc*}", "{*{_abc*}",
Enter(Strong).span(0, 2), (Enter(Strong), "{*"),
Node(Str).span(2, 7), (Node(Str), "{_abc"),
Exit(Strong).span(7, 9), (Exit(Strong), "*}"),
); );
} }
#[test] #[test]
fn container_close_block() { fn container_close_block() {
test_parse!("{_abc", Node(Str).span(0, 5),); test_parse!("{_abc", (Node(Str), "{_abc"));
test_parse!("{_{*{_abc", Node(Str).span(0, 9),); test_parse!("{_{*{_abc", (Node(Str), "{_{*{_abc"));
} }
} }

View file

@ -22,7 +22,6 @@ pub enum Kind {
Close(Delimiter), Close(Delimiter),
Sym(Symbol), Sym(Symbol),
Seq(Sequence), Seq(Sequence),
Eof,
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]

View file

@ -1,15 +1,16 @@
mod block; mod block;
mod html;
mod inline; mod inline;
mod lex; mod lex;
mod span; mod span;
mod tree; mod tree;
use span::Span;
pub struct Block; pub struct Block;
const EOF: char = '\0'; const EOF: char = '\0';
use span::Span;
pub struct Parser<'s> { pub struct Parser<'s> {
src: &'s str, src: &'s str,
tree: block::Tree, tree: block::Tree,
@ -35,11 +36,13 @@ impl<'s> Parser<'s> {
} }
} }
#[derive(Debug, PartialEq, Eq)]
pub enum ListType { pub enum ListType {
Unordered, Unordered,
Ordered, Ordered,
} }
#[derive(Debug, PartialEq, Eq)]
pub enum TagKind<'s> { pub enum TagKind<'s> {
Paragraph, Paragraph,
Heading { level: u8 }, Heading { level: u8 },
@ -58,6 +61,13 @@ pub enum TagKind<'s> {
Footnote { tag: &'s str }, Footnote { tag: &'s str },
} }
#[derive(Debug, PartialEq, Eq)]
pub enum Event2<'s> {
Start(TagKind<'s>),
End(TagKind<'s>),
Blankline,
}
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum Event { pub enum Event {
Start(block::Block), Start(block::Block),
@ -83,33 +93,33 @@ impl<'s> Iterator for Iter<'s> {
inline.span = inline.span.translate(self.inline_start); inline.span = inline.span.translate(self.inline_start);
return Some(Event::Inline(inline)); return Some(Event::Inline(inline));
} else if let Some(ev) = self.tree.next() { } else if let Some(ev) = self.tree.next() {
match ev { match ev.kind {
tree::Event::Element(atom, sp) => { tree::EventKind::Element(atom) => {
assert_eq!(*atom, block::Atom::Inline); assert_eq!(atom, block::Atom::Inline);
parser.parse(sp.of(self.src)); parser.parse(ev.span.of(self.src));
self.inline_start = sp.start(); self.inline_start = ev.span.start();
} }
tree::Event::Exit => { tree::EventKind::Exit => {
self.parser = None; self.parser = None;
return Some(Event::End); return Some(Event::End);
} }
tree::Event::Enter(..) => unreachable!(), tree::EventKind::Enter(..) => unreachable!(),
} }
} }
} }
self.tree.next().map(|ev| match ev { self.tree.next().map(|ev| match ev.kind {
tree::Event::Element(atom, _sp) => { tree::EventKind::Element(atom) => {
assert_eq!(*atom, block::Atom::Blankline); assert_eq!(atom, block::Atom::Blankline);
Event::Blankline Event::Blankline
} }
tree::Event::Enter(block, ..) => { tree::EventKind::Enter(block) => {
if matches!(block, block::Block::Leaf(..)) { if matches!(block, block::Block::Leaf(..)) {
self.parser = Some(inline::Parser::new()); self.parser = Some(inline::Parser::new());
} }
Event::Start(block.clone()) Event::Start(block)
} }
tree::Event::Exit => Event::End, tree::EventKind::Exit => Event::End,
}) })
} }
} }

View file

@ -1,11 +1,24 @@
use crate::Span; use crate::Span;
#[derive(Debug)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum EventKind<C, E> {
Enter(C),
Element(E),
Exit,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Event<C, A> {
pub kind: EventKind<C, A>,
pub span: Span,
}
#[derive(Debug, Clone)]
pub struct Tree<C, E> { pub struct Tree<C, E> {
nodes: Vec<Node<C, E>>, nodes: Vec<Node<C, E>>,
} }
impl<C, E> Tree<C, E> { impl<C: Copy, E: Copy> Tree<C, E> {
fn new(nodes: Vec<Node<C, E>>) -> Self { fn new(nodes: Vec<Node<C, E>>) -> Self {
Self { nodes } Self { nodes }
} }
@ -15,53 +28,41 @@ impl<C, E> Tree<C, E> {
} }
} }
#[derive(Debug, PartialEq, Eq)]
pub enum Event<'a, C, E> {
Enter(&'a C, Span),
Element(&'a E, Span),
Exit,
}
impl<'a, C, E> Event<'a, C, E> {
pub fn span(&self) -> Span {
match self {
Self::Enter(_, sp) | Self::Element(_, sp) => *sp,
Self::Exit => panic!(),
}
}
}
pub struct Iter<'a, C, E> { pub struct Iter<'a, C, E> {
nodes: &'a [Node<C, E>], nodes: &'a [Node<C, E>],
branch: Vec<NodeIndex>, branch: Vec<NodeIndex>,
head: Option<NodeIndex>, head: Option<NodeIndex>,
} }
impl<'a, C, E> Iterator for Iter<'a, C, E> { impl<'a, C: Copy, E: Copy> Iterator for Iter<'a, C, E> {
type Item = Event<'a, C, E>; type Item = Event<C, E>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if let Some(head) = self.head { if let Some(head) = self.head {
let n = &self.nodes[head.index()]; let n = &self.nodes[head.index()];
match &n.kind { let kind = match &n.kind {
NodeKind::Root => { NodeKind::Root => {
self.head = n.next; self.head = n.next;
self.next() return self.next();
} }
NodeKind::Container(c, child) => { NodeKind::Container(c, child) => {
self.branch.push(head); self.branch.push(head);
self.head = *child; self.head = *child;
Some(Event::Enter(c, n.span)) EventKind::Enter(*c)
} }
NodeKind::Element(e) => { NodeKind::Element(e) => {
self.head = n.next; self.head = n.next;
Some(Event::Element(e, n.span)) EventKind::Element(*e)
} }
} };
Some(Event { kind, span: n.span })
} else if let Some(block_ni) = self.branch.pop() { } else if let Some(block_ni) = self.branch.pop() {
let Node { next, .. } = &self.nodes[block_ni.index()]; let Node { next, span, .. } = &self.nodes[block_ni.index()];
self.head = *next; self.head = *next;
Some(Event::Exit) Some(Event {
kind: EventKind::Exit,
span: *span,
})
} else { } else {
None None
} }
@ -117,7 +118,7 @@ pub struct Builder<C, E> {
head: Option<NodeIndex>, head: Option<NodeIndex>,
} }
impl<C, E> Builder<C, E> { impl<C: Copy, E: Copy> Builder<C, E> {
pub(super) fn new() -> Self { pub(super) fn new() -> Self {
Builder { Builder {
nodes: vec![Node { nodes: vec![Node {
@ -188,30 +189,32 @@ impl<C, E> Builder<C, E> {
} }
} }
impl<C: std::fmt::Display + Clone, E: std::fmt::Display + Clone> std::fmt::Display impl<C: Copy + std::fmt::Display, E: Copy + std::fmt::Display> std::fmt::Display for Builder<C, E> {
for Builder<C, E>
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.clone().finish().fmt(f) self.clone().finish().fmt(f)
} }
} }
impl<C: std::fmt::Display, E: std::fmt::Display> std::fmt::Display for Tree<C, E> { impl<C: Copy + std::fmt::Display, E: Copy + std::fmt::Display> std::fmt::Display for Tree<C, E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
const INDENT: &str = " "; const INDENT: &str = " ";
let mut level = 0; let mut level = 0;
for e in self.iter() { for e in self.iter() {
let indent = INDENT.repeat(level); let indent = INDENT.repeat(level);
match e { match e.kind {
Event::Enter(container, sp) => { EventKind::Enter(container) => {
writeln!(f, "{}{} ({}:{})", indent, container, sp.start(), sp.end())?; write!(f, "{}{}", indent, container)?;
level += 1; level += 1;
} }
Event::Exit => level -= 1, EventKind::Exit => {
Event::Element(element, sp) => { level -= 1;
writeln!(f, "{}{} ({}:{})", indent, element, sp.start(), sp.end())?; continue;
}
EventKind::Element(element) => {
write!(f, "{}{}", indent, element)?;
} }
} }
writeln!(f, " ({}:{})", e.span.start(), e.span.end())?;
} }
Ok(()) Ok(())
} }