This commit is contained in:
Noah Hellman 2022-11-28 20:12:49 +01:00
parent 977cabd450
commit 660e8041b0
5 changed files with 155 additions and 139 deletions

View file

@ -13,13 +13,13 @@ pub fn parse(src: &str) -> Tree {
Parser::new(src).parse()
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Block {
Leaf(Leaf),
Container(Container),
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Leaf {
Paragraph,
Heading { level: u8 },
@ -27,9 +27,10 @@ pub enum Leaf {
Table,
LinkDefinition,
CodeBlock { fence_length: u8 },
ThematicBreak,
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Container {
Blockquote,
Div { fence_length: u8 },
@ -37,14 +38,14 @@ pub enum Container {
Footnote { indent: u8 },
}
#[derive(Debug, PartialEq, Eq)]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Atom {
/// Inline content with unparsed inline elements.
Inline,
/// A line with no non-whitespace characters.
Blankline,
/// Thematic break.
ThematicBreak,
///// Thematic break.
//ThematicBreak,
}
struct Parser<'s> {
@ -65,7 +66,7 @@ impl<'s> Parser<'s> {
pub fn parse(mut self) -> Tree {
let mut lines = lines(self.src).collect::<Vec<_>>();
let mut line_pos = 0;
loop {
while line_pos < lines.len() {
let line_count = self.parse_block(&mut lines[line_pos..]);
if line_count == 0 {
break;
@ -202,7 +203,6 @@ impl Block {
.flatten()
}
_ => {
/*
let thematic_break = || {
let mut without_whitespace = line.chars().filter(|c| !c.is_whitespace());
let length = without_whitespace.clone().count();
@ -211,9 +211,8 @@ impl Block {
|| without_whitespace.all(|c| c == '*')))
.then(|| (Self::Leaf(ThematicBreak), Span::by_len(start, line.len())))
};
*/
//thematic_break()
None
thematic_break()
}
}
.unwrap_or((Self::Leaf(Paragraph), Span::new(0, 0)))
@ -225,7 +224,7 @@ impl Block {
Self::Leaf(Paragraph | Heading { .. } | Table | LinkDefinition) => {
!line.trim().is_empty()
}
Self::Leaf(Attributes) => false,
Self::Leaf(Attributes | ThematicBreak) => false,
Self::Container(Blockquote) => line.trim().starts_with('>'),
Self::Container(Footnote { indent } | ListItem { indent }) => {
let spaces = line.chars().take_while(|c| c.is_whitespace()).count();
@ -276,7 +275,7 @@ fn lines(src: &str) -> impl Iterator<Item = Span> + '_ {
#[cfg(test)]
mod test {
use crate::tree::Event;
use crate::tree::EventKind::*;
use crate::Span;
use super::Atom::*;
@ -288,81 +287,82 @@ mod test {
macro_rules! test_parse {
($src:expr $(,$($event:expr),* $(,)?)?) => {
let t = super::Parser::new($src).parse();
let actual = t.iter().collect::<Vec<_>>();
let actual = t.iter().map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
let expected = &[$($($event),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
};
}
#[test]
fn parse_elem_oneline() {
fn parse_para_oneline() {
test_parse!(
"para\n",
Event::Enter(&Leaf(Paragraph), Span::new(0, 0)),
Event::Element(&Inline, Span::new(0, 5)),
Event::Exit,
(Enter(Leaf(Paragraph)), ""),
(Element(Inline), "para\n"),
(Exit, ""),
);
}
#[test]
fn parse_elem_multiline() {
fn parse_para_multiline() {
test_parse!(
"para\npara\n",
Event::Enter(&Leaf(Paragraph), Span::new(0, 0)),
Event::Element(&Inline, Span::new(0, 5)),
Event::Element(&Inline, Span::new(5, 10)),
Event::Exit,
"para0\npara1\n",
(Enter(Leaf(Paragraph)), ""),
(Element(Inline), "para0\n"),
(Element(Inline), "para1\n"),
(Exit, ""),
);
}
#[test]
fn parse_elem_multi() {
fn parse_heading_multi() {
test_parse!(
concat!(
"# 2\n",
"\n",
" # 8\n",
" # 8\n",
" 12\n",
"15\n", //
),
Event::Enter(&Leaf(Heading { level: 1 }), Span::new(0, 1)),
Event::Element(&Inline, Span::new(1, 4)),
Event::Exit,
Event::Element(&Blankline, Span::new(4, 5)),
Event::Enter(&Leaf(Heading { level: 1 }), Span::new(6, 7)),
Event::Element(&Inline, Span::new(7, 10)),
Event::Element(&Inline, Span::new(10, 15)),
Event::Element(&Inline, Span::new(15, 18)),
Event::Exit,
(Enter(Leaf(Heading { level: 1 })), "#"),
(Element(Inline), " 2\n"),
(Exit, "#"),
(Element(Blankline), "\n"),
(Enter(Leaf(Heading { level: 1 })), "#"),
(Element(Inline), " 8\n"),
(Element(Inline), " 12\n"),
(Element(Inline), "15\n"),
(Exit, "#"),
);
}
#[test]
fn parse_container() {
fn parse_blockquote() {
test_parse!(
concat!(
"> a\n",
">\n",
"> ## hl\n",
">\n",
"> para\n", //
"> para\n", //
),
Event::Enter(&Container(Blockquote), Span::new(0, 1)),
Event::Enter(&Leaf(Paragraph), Span::new(1, 1)),
Event::Element(&Inline, Span::new(1, 4)),
Event::Exit,
Event::Element(&Blankline, Span::new(5, 6)),
Event::Enter(&Leaf(Heading { level: 2 }), Span::new(8, 10)),
Event::Element(&Inline, Span::new(10, 14)),
Event::Exit,
Event::Element(&Blankline, Span::new(15, 16)),
Event::Enter(&Leaf(Paragraph), Span::new(17, 17)),
Event::Element(&Inline, Span::new(17, 23)),
Event::Exit,
Event::Exit,
(Enter(Container(Blockquote)), ">"),
(Enter(Leaf(Paragraph)), ""),
(Element(Inline), " a\n"),
(Exit, ""),
(Element(Blankline), "\n"),
(Enter(Leaf(Heading { level: 2 })), "##"),
(Element(Inline), " hl\n"),
(Exit, "##"),
(Element(Blankline), "\n"),
(Enter(Leaf(Paragraph)), ""),
(Element(Inline), " para\n"),
(Exit, ""),
(Exit, ">"),
);
}
/*
#[test]
fn parse_code_block() {
test_parse!(
@ -372,12 +372,13 @@ mod test {
"l1\n",
"```", //
),
Event::Enter(&Leaf(CodeBlock { fence_length: 3 }), Span::new(0, 8)),
Event::Element(&Inline, Span::new(8, 11)),
Event::Element(&Inline, Span::new(11, 14)),
Event::Exit
(Event::Enter(Leaf(CodeBlock { fence_length: 3 })), "```lang\n"),
(Event::Element(Inline), "l0\n"),
(Event::Element(Inline), "l1\n"),
(Event::Exit, "```lang\n"),
);
}
*/
macro_rules! test_block {
($src:expr, $kind:expr, $str:expr, $len:expr $(,)?) => {

View file

@ -303,7 +303,7 @@ mod test {
#[allow(unused)]
let mut p = super::Parser::new();
p.parse($src);
let actual = p.collect::<Vec<_>>();
let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
let expected = &[$($($token),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
};
@ -320,37 +320,40 @@ mod test {
#[test]
fn str() {
test_parse!("abc", Node(Str).span(0, 3));
test_parse!("abc def", Node(Str).span(0, 7));
test_parse!("abc", (Node(Str), "abc"));
test_parse!("abc def", (Node(Str), "abc def"));
}
#[test]
fn verbatim() {
test_parse!("`abc`", Node(Verbatim).span(1, 4));
test_parse!("`abc", Node(Verbatim).span(1, 4));
test_parse!("``abc``", Node(Verbatim).span(2, 5));
test_parse!("abc `def`", Node(Str).span(0, 4), Node(Verbatim).span(5, 8));
test_parse!("`abc`", (Node(Verbatim), "abc"));
test_parse!("`abc", (Node(Verbatim), "abc"));
test_parse!("``abc``", (Node(Verbatim), "abc"));
test_parse!("abc `def`", (Node(Str), "abc "), (Node(Verbatim), "def"));
}
#[test]
fn math() {
test_parse!("$`abc`", Node(InlineMath).span(2, 5));
test_parse!("$$```abc", Node(DisplayMath).span(5, 8));
test_parse!("$`abc`", (Node(InlineMath), "abc"));
test_parse!("$`abc` str", (Node(InlineMath), "abc"), (Node(Str), " str"));
test_parse!("$$`abc`", (Node(DisplayMath), "abc"));
test_parse!("$`abc", (Node(InlineMath), "abc"));
test_parse!("$```abc```", (Node(InlineMath), "abc"),);
}
#[test]
fn container_basic() {
test_parse!(
"_abc_",
Enter(Emphasis).span(0, 1),
Node(Str).span(1, 4),
Exit(Emphasis).span(4, 5),
(Enter(Emphasis), "_"),
(Node(Str), "abc"),
(Exit(Emphasis), "_"),
);
test_parse!(
"{_abc_}",
Enter(Emphasis).span(0, 2),
Node(Str).span(2, 5),
Exit(Emphasis).span(5, 7),
(Enter(Emphasis), "{_"),
(Node(Str), "abc"),
(Exit(Emphasis), "_}"),
);
}
@ -358,40 +361,40 @@ mod test {
fn container_nest() {
test_parse!(
"{_{_abc_}_}",
Enter(Emphasis).span(0, 2),
Enter(Emphasis).span(2, 4),
Node(Str).span(4, 7),
Exit(Emphasis).span(7, 9),
Exit(Emphasis).span(9, 11),
(Enter(Emphasis), "{_"),
(Enter(Emphasis), "{_"),
(Node(Str), "abc"),
(Exit(Emphasis), "_}"),
(Exit(Emphasis), "_}"),
);
test_parse!(
"*_abc_*",
Enter(Strong).span(0, 1),
Enter(Emphasis).span(1, 2),
Node(Str).span(2, 5),
Exit(Emphasis).span(5, 6),
Exit(Strong).span(6, 7),
(Enter(Strong), "*"),
(Enter(Emphasis), "_"),
(Node(Str), "abc"),
(Exit(Emphasis), "_"),
(Exit(Strong), "*"),
);
}
#[test]
fn container_unopened() {
test_parse!("*}abc", Node(Str).span(0, 5));
test_parse!("*}abc", (Node(Str), "*}abc"));
}
#[test]
fn container_close_parent() {
test_parse!(
"{*{_abc*}",
Enter(Strong).span(0, 2),
Node(Str).span(2, 7),
Exit(Strong).span(7, 9),
(Enter(Strong), "{*"),
(Node(Str), "{_abc"),
(Exit(Strong), "*}"),
);
}
#[test]
fn container_close_block() {
test_parse!("{_abc", Node(Str).span(0, 5),);
test_parse!("{_{*{_abc", Node(Str).span(0, 9),);
test_parse!("{_abc", (Node(Str), "{_abc"));
test_parse!("{_{*{_abc", (Node(Str), "{_{*{_abc"));
}
}

View file

@ -22,7 +22,6 @@ pub enum Kind {
Close(Delimiter),
Sym(Symbol),
Seq(Sequence),
Eof,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]

View file

@ -1,15 +1,16 @@
mod block;
mod html;
mod inline;
mod lex;
mod span;
mod tree;
use span::Span;
pub struct Block;
const EOF: char = '\0';
use span::Span;
pub struct Parser<'s> {
src: &'s str,
tree: block::Tree,
@ -35,11 +36,13 @@ impl<'s> Parser<'s> {
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum ListType {
Unordered,
Ordered,
}
#[derive(Debug, PartialEq, Eq)]
pub enum TagKind<'s> {
Paragraph,
Heading { level: u8 },
@ -58,6 +61,13 @@ pub enum TagKind<'s> {
Footnote { tag: &'s str },
}
#[derive(Debug, PartialEq, Eq)]
pub enum Event2<'s> {
Start(TagKind<'s>),
End(TagKind<'s>),
Blankline,
}
#[derive(Debug, PartialEq, Eq)]
pub enum Event {
Start(block::Block),
@ -83,33 +93,33 @@ impl<'s> Iterator for Iter<'s> {
inline.span = inline.span.translate(self.inline_start);
return Some(Event::Inline(inline));
} else if let Some(ev) = self.tree.next() {
match ev {
tree::Event::Element(atom, sp) => {
assert_eq!(*atom, block::Atom::Inline);
parser.parse(sp.of(self.src));
self.inline_start = sp.start();
match ev.kind {
tree::EventKind::Element(atom) => {
assert_eq!(atom, block::Atom::Inline);
parser.parse(ev.span.of(self.src));
self.inline_start = ev.span.start();
}
tree::Event::Exit => {
tree::EventKind::Exit => {
self.parser = None;
return Some(Event::End);
}
tree::Event::Enter(..) => unreachable!(),
tree::EventKind::Enter(..) => unreachable!(),
}
}
}
self.tree.next().map(|ev| match ev {
tree::Event::Element(atom, _sp) => {
assert_eq!(*atom, block::Atom::Blankline);
self.tree.next().map(|ev| match ev.kind {
tree::EventKind::Element(atom) => {
assert_eq!(atom, block::Atom::Blankline);
Event::Blankline
}
tree::Event::Enter(block, ..) => {
tree::EventKind::Enter(block) => {
if matches!(block, block::Block::Leaf(..)) {
self.parser = Some(inline::Parser::new());
}
Event::Start(block.clone())
Event::Start(block)
}
tree::Event::Exit => Event::End,
tree::EventKind::Exit => Event::End,
})
}
}

View file

@ -1,11 +1,24 @@
use crate::Span;
#[derive(Debug)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EventKind<C, E> {
Enter(C),
Element(E),
Exit,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Event<C, A> {
pub kind: EventKind<C, A>,
pub span: Span,
}
#[derive(Debug, Clone)]
pub struct Tree<C, E> {
nodes: Vec<Node<C, E>>,
}
impl<C, E> Tree<C, E> {
impl<C: Copy, E: Copy> Tree<C, E> {
fn new(nodes: Vec<Node<C, E>>) -> Self {
Self { nodes }
}
@ -15,53 +28,41 @@ impl<C, E> Tree<C, E> {
}
}
#[derive(Debug, PartialEq, Eq)]
pub enum Event<'a, C, E> {
Enter(&'a C, Span),
Element(&'a E, Span),
Exit,
}
impl<'a, C, E> Event<'a, C, E> {
pub fn span(&self) -> Span {
match self {
Self::Enter(_, sp) | Self::Element(_, sp) => *sp,
Self::Exit => panic!(),
}
}
}
pub struct Iter<'a, C, E> {
nodes: &'a [Node<C, E>],
branch: Vec<NodeIndex>,
head: Option<NodeIndex>,
}
impl<'a, C, E> Iterator for Iter<'a, C, E> {
type Item = Event<'a, C, E>;
impl<'a, C: Copy, E: Copy> Iterator for Iter<'a, C, E> {
type Item = Event<C, E>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(head) = self.head {
let n = &self.nodes[head.index()];
match &n.kind {
let kind = match &n.kind {
NodeKind::Root => {
self.head = n.next;
self.next()
return self.next();
}
NodeKind::Container(c, child) => {
self.branch.push(head);
self.head = *child;
Some(Event::Enter(c, n.span))
EventKind::Enter(*c)
}
NodeKind::Element(e) => {
self.head = n.next;
Some(Event::Element(e, n.span))
EventKind::Element(*e)
}
}
};
Some(Event { kind, span: n.span })
} else if let Some(block_ni) = self.branch.pop() {
let Node { next, .. } = &self.nodes[block_ni.index()];
let Node { next, span, .. } = &self.nodes[block_ni.index()];
self.head = *next;
Some(Event::Exit)
Some(Event {
kind: EventKind::Exit,
span: *span,
})
} else {
None
}
@ -117,7 +118,7 @@ pub struct Builder<C, E> {
head: Option<NodeIndex>,
}
impl<C, E> Builder<C, E> {
impl<C: Copy, E: Copy> Builder<C, E> {
pub(super) fn new() -> Self {
Builder {
nodes: vec![Node {
@ -188,30 +189,32 @@ impl<C, E> Builder<C, E> {
}
}
impl<C: std::fmt::Display + Clone, E: std::fmt::Display + Clone> std::fmt::Display
for Builder<C, E>
{
impl<C: Copy + std::fmt::Display, E: Copy + std::fmt::Display> std::fmt::Display for Builder<C, E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.clone().finish().fmt(f)
}
}
impl<C: std::fmt::Display, E: std::fmt::Display> std::fmt::Display for Tree<C, E> {
impl<C: Copy + std::fmt::Display, E: Copy + std::fmt::Display> std::fmt::Display for Tree<C, E> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
const INDENT: &str = " ";
let mut level = 0;
for e in self.iter() {
let indent = INDENT.repeat(level);
match e {
Event::Enter(container, sp) => {
writeln!(f, "{}{} ({}:{})", indent, container, sp.start(), sp.end())?;
match e.kind {
EventKind::Enter(container) => {
write!(f, "{}{}", indent, container)?;
level += 1;
}
Event::Exit => level -= 1,
Event::Element(element, sp) => {
writeln!(f, "{}{} ({}:{})", indent, element, sp.start(), sp.end())?;
EventKind::Exit => {
level -= 1;
continue;
}
EventKind::Element(element) => {
write!(f, "{}{}", indent, element)?;
}
}
writeln!(f, " ({}:{})", e.span.start(), e.span.end())?;
}
Ok(())
}