This commit is contained in:
Noah Hellman 2022-11-27 21:59:54 +01:00
parent 0d0183e75f
commit 3ca0002df8
4 changed files with 166 additions and 117 deletions

View file

@ -22,25 +22,19 @@ pub enum Block {
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Leaf { pub enum Leaf {
Paragraph, Paragraph,
Heading { Heading { level: u8 },
level: usize,
},
Attributes, Attributes,
Table, Table,
ThematicBreak,
LinkDefinition, LinkDefinition,
CodeBlock { CodeBlock { fence_length: u8 },
fence_char: char,
fence_length: usize,
},
} }
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Container { pub enum Container {
Blockquote, Blockquote,
Div { fence_length: usize }, Div { fence_length: u8 },
ListItem { indent: usize }, ListItem { indent: u8 },
Footnote { indent: usize }, Footnote { indent: u8 },
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
@ -49,6 +43,8 @@ pub enum Atom {
Inline, Inline,
/// A line with no non-whitespace characters. /// A line with no non-whitespace characters.
Blankline, Blankline,
/// Thematic break.
ThematicBreak,
} }
struct Parser<'s> { struct Parser<'s> {
@ -89,9 +85,9 @@ impl<'s> Parser<'s> {
}) })
.count(); .count();
let lines = &mut lines[blanklines..]; let lines = &mut lines[blanklines..];
Block::parse(lines.iter().map(|sp| (sp.of(self.src), sp.start()))).map_or( Block::parse(lines.iter().map(|sp| sp.of(self.src))).map_or(0, |(kind, span, len)| {
0, let start = lines.get(0).map(|sp| sp.start()).unwrap();
|(kind, span, len)| { let span = span.translate(start);
match &kind { match &kind {
Block::Leaf(_) => { Block::Leaf(_) => {
self.tree.enter(kind, span); self.tree.enter(kind, span);
@ -116,7 +112,7 @@ impl<'s> Parser<'s> {
.chars() .chars()
.take_while(|c| c.is_whitespace()) .take_while(|c| c.is_whitespace())
.count() .count()
+ skip_chars) + usize::from(skip_chars))
.min(sp.len()); .min(sp.len());
*sp = sp.trim_start(skip); *sp = sp.trim_start(skip);
}); });
@ -130,27 +126,27 @@ impl<'s> Parser<'s> {
} }
self.tree.exit(); self.tree.exit();
blanklines + len blanklines + len
}, })
)
} }
} }
impl Block { impl Block {
/// Parse a single block. Return number of lines the block uses. /// Parse a single block. Return number of lines the block uses.
fn parse<'b, I: Iterator<Item = (&'b str, usize)>>( fn parse<'b, I: Iterator<Item = &'b str>>(mut lines: I) -> Option<(Block, Span, usize)> {
mut lines: I, lines.next().map(|l| {
) -> Option<(Block, Span, usize)> {
if let Some((l, start)) = lines.next() {
let (kind, sp) = Block::start(l); let (kind, sp) = Block::start(l);
let line_count = 1 + lines.take_while(|(l, _)| kind.continues(l)).count(); let line_count = 1
Some((kind, sp.translate(start), line_count)) + lines.take_while(|l| kind.continues(l)).count()
} else { + usize::from(matches!(
None kind,
} Self::Leaf(CodeBlock { .. }) | Self::Container(Div { .. })
));
(kind, sp, line_count)
})
} }
/// Determine what type of block a line can start. /// Determine what type of block a line can start.
fn start(line: &str) -> (Block, Span) { fn start(line: &str) -> (Self, Span) {
let start = line.chars().take_while(|c| c.is_whitespace()).count(); let start = line.chars().take_while(|c| c.is_whitespace()).count();
let line = &line[start..]; let line = &line[start..];
let mut chars = line.chars(); let mut chars = line.chars();
@ -159,37 +155,41 @@ impl Block {
.find(|c| *c != '#') .find(|c| *c != '#')
.map_or(true, char::is_whitespace) .map_or(true, char::is_whitespace)
.then(|| { .then(|| {
let span = Span::by_len(start, line.len() - chars.as_str().len() - 1); u8::try_from(line.len() - chars.as_str().len() - 1)
(Self::Leaf(Heading { level: span.len() }), span) .ok()
}), .map(|level| {
(
Self::Leaf(Heading { level }),
Span::by_len(start, level.into()),
)
})
})
.flatten(),
'>' => chars.next().map_or(true, |c| c == ' ').then(|| { '>' => chars.next().map_or(true, |c| c == ' ').then(|| {
( (
Self::Container(Blockquote), Self::Container(Blockquote),
Span::by_len(start, line.len() - chars.as_str().len() - 1), Span::by_len(start, line.len() - chars.as_str().len() - 1),
) )
}), }),
f @ ':' => { f @ ('`' | ':') => {
let fence_length = chars.take_while(|c| *c == f).count() + 1; let fence_length = chars.take_while(|c| *c == f).count() + 1;
(fence_length >= 3).then(|| { (fence_length >= 3)
.then(|| {
u8::try_from(fence_length).ok().map(|fence_length| {
( (
Self::Container(Div { fence_length }), match f {
'`' => Self::Leaf(CodeBlock { fence_length }),
':' => Self::Container(Div { fence_length }),
_ => unreachable!(),
},
Span::by_len(start, line.len()), Span::by_len(start, line.len()),
) )
}) })
}
fence_char @ ('`' | '~') => {
let fence_length = chars.take_while(|c| *c == fence_char).count() + 1;
(fence_length >= 3).then(|| {
(
Self::Leaf(CodeBlock {
fence_char,
fence_length,
}),
Span::by_len(start, line.len()),
)
}) })
.flatten()
} }
_ => { _ => {
/*
let thematic_break = || { let thematic_break = || {
let mut without_whitespace = line.chars().filter(|c| !c.is_whitespace()); let mut without_whitespace = line.chars().filter(|c| !c.is_whitespace());
let length = without_whitespace.clone().count(); let length = without_whitespace.clone().count();
@ -198,7 +198,9 @@ impl Block {
|| without_whitespace.all(|c| c == '*'))) || without_whitespace.all(|c| c == '*')))
.then(|| (Self::Leaf(ThematicBreak), Span::by_len(start, line.len()))) .then(|| (Self::Leaf(ThematicBreak), Span::by_len(start, line.len())))
}; };
thematic_break() */
//thematic_break()
None
} }
} }
.unwrap_or((Self::Leaf(Paragraph), Span::new(0, 0))) .unwrap_or((Self::Leaf(Paragraph), Span::new(0, 0)))
@ -210,18 +212,16 @@ impl Block {
Self::Leaf(Paragraph | Heading { .. } | Table | LinkDefinition) => { Self::Leaf(Paragraph | Heading { .. } | Table | LinkDefinition) => {
!line.trim().is_empty() !line.trim().is_empty()
} }
Self::Leaf(Attributes | ThematicBreak) => false, Self::Leaf(Attributes) => false,
Self::Leaf(CodeBlock {
fence_char,
fence_length,
}) => !line.chars().take(*fence_length).all(|c| c == *fence_char),
Self::Container(Blockquote) => line.trim().starts_with('>'), Self::Container(Blockquote) => line.trim().starts_with('>'),
Self::Container(Footnote { indent } | ListItem { indent }) => { Self::Container(Footnote { indent } | ListItem { indent }) => {
let spaces = line.chars().take_while(|c| c.is_whitespace()).count(); let spaces = line.chars().take_while(|c| c.is_whitespace()).count();
!line.trim().is_empty() && spaces >= *indent !line.trim().is_empty() && spaces >= (*indent).into()
} }
Self::Container(Div { fence_length }) => { Self::Container(Div { fence_length }) | Self::Leaf(CodeBlock { fence_length }) => {
line.chars().take(*fence_length).all(|c| c == ':') let mut c = line.chars();
!((&mut c).take((*fence_length).into()).all(|c| c == ':')
&& c.next().map_or(false, char::is_whitespace))
} }
} }
} }
@ -357,10 +357,30 @@ mod test {
); );
} }
#[test]
fn parse_code_block() {
let src = concat!(
"```lang\n",
"l0\n",
"l1\n",
"```", //
);
assert_eq!(
super::Parser::new(src).parse().iter().collect::<Vec<_>>(),
&[
Event::Enter(&Leaf(CodeBlock { fence_length: 3 }), Span::new(0, 8)),
Event::Element(&Inline, Span::new(8, 11)),
Event::Element(&Inline, Span::new(11, 14)),
Event::Exit
]
);
}
#[test] #[test]
fn block_multiline() { fn block_multiline() {
let src = "# heading\n spanning two lines\n"; let src = "# heading\n spanning two lines\n";
let lines = super::lines(src).map(|sp| (sp.of(src), sp.start())); let lines = super::lines(src).map(|sp| sp.of(src));
let (kind, sp, len) = Block::parse(lines).unwrap(); let (kind, sp, len) = Block::parse(lines).unwrap();
assert_eq!(kind, Block::Leaf(Heading { level: 1 })); assert_eq!(kind, Block::Leaf(Heading { level: 1 }));
assert_eq!(sp.of(src), "#"); assert_eq!(sp.of(src), "#");
@ -376,7 +396,7 @@ mod test {
">\n", ">\n",
"> c\n", // "> c\n", //
); );
let lines = super::lines(src).map(|sp| (sp.of(src), sp.start())); let lines = super::lines(src).map(|sp| sp.of(src));
let (kind, sp, len) = Block::parse(lines).unwrap(); let (kind, sp, len) = Block::parse(lines).unwrap();
assert_eq!(kind, Block::Container(Blockquote)); assert_eq!(kind, Block::Container(Blockquote));
assert_eq!(sp.of(src), ">"); assert_eq!(sp.of(src), ">");

View file

@ -35,22 +35,21 @@ pub struct Node {
pub enum NodeKind { pub enum NodeKind {
Str, Str,
// link // link
Url,
ImageSource,
LinkReference,
FootnoteReference, FootnoteReference,
ReferenceLink,
Link,
Emoji,
// verbatim // verbatim
Verbatim, Verbatim,
RawFormat, RawFormat,
DisplayMath,
InlineMath, InlineMath,
DisplayMath,
} }
#[derive(Debug, Copy, Clone, PartialEq, Eq)] #[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum Container { pub enum Container {
// attributes
Attributes,
Span, Span,
Attributes,
// typesetting // typesetting
Subscript, Subscript,
Superscript, Superscript,
@ -58,17 +57,10 @@ pub enum Container {
Delete, Delete,
Emphasis, Emphasis,
Strong, Strong,
Mark, //Mark,
// smart quoting // smart quoting
SingleQuoted, SingleQuoted,
DoubleQuoted, DoubleQuoted,
// URLs
AutoUrl,
Url,
ImageText,
LinkText,
Reference,
Destination,
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
@ -165,7 +157,14 @@ impl<'s> Parser<'s> {
len, len,
}) = self.peek() }) = self.peek()
{ {
Some((DisplayMath, first.len)) Some((
if first.len == 2 {
DisplayMath
} else {
InlineMath
},
*len,
))
} else { } else {
None None
} }
@ -201,14 +200,14 @@ impl<'s> Parser<'s> {
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)), lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)), lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)), lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
lex::Kind::Open(Delimiter::Bracket) => Some((LinkText, Dir::Open)), lex::Kind::Open(Delimiter::Bracket) => Some((Span, Dir::Open)),
lex::Kind::Close(Delimiter::Bracket) => Some((LinkText, Dir::Close)), lex::Kind::Close(Delimiter::Bracket) => Some((Span, Dir::Close)),
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)), lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)), lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)), lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)), lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)),
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)), //lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)), //lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)), lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)),
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)), lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)),
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)), lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)),
@ -308,6 +307,12 @@ mod test {
test_parse!("abc `def`", Node(Str.span(0, 4)), Node(Verbatim.span(5, 8))); test_parse!("abc `def`", Node(Str.span(0, 4)), Node(Verbatim.span(5, 8)));
} }
#[test]
fn math() {
test_parse!("$`abc`", Node(InlineMath.span(2, 5)));
test_parse!("$$```abc", Node(DisplayMath.span(5, 8)));
}
#[test] #[test]
fn container_basic() { fn container_basic() {
test_parse!( test_parse!(

View file

@ -30,7 +30,7 @@ pub enum Delimiter {
Brace, Brace,
BraceAsterisk, BraceAsterisk,
BraceCaret, BraceCaret,
BraceEqual, //BraceEqual,
BraceHyphen, BraceHyphen,
BracePlus, BracePlus,
BraceTilde, BraceTilde,
@ -149,7 +149,7 @@ impl<'s> Lexer<'s> {
let explicit = match self.peek() { let explicit = match self.peek() {
'*' => Some(Open(BraceAsterisk)), '*' => Some(Open(BraceAsterisk)),
'^' => Some(Open(BraceCaret)), '^' => Some(Open(BraceCaret)),
'=' => Some(Open(BraceEqual)), //'=' => Some(Open(BraceEqual)),
'-' => Some(Open(BraceHyphen)), '-' => Some(Open(BraceHyphen)),
'+' => Some(Open(BracePlus)), '+' => Some(Open(BracePlus)),
'~' => Some(Open(BraceTilde)), '~' => Some(Open(BraceTilde)),
@ -165,7 +165,7 @@ impl<'s> Lexer<'s> {
} }
'*' => self.maybe_eat_close_brace(Asterisk, BraceAsterisk), '*' => self.maybe_eat_close_brace(Asterisk, BraceAsterisk),
'^' => self.maybe_eat_close_brace(Caret, BraceCaret), '^' => self.maybe_eat_close_brace(Caret, BraceCaret),
'=' => self.maybe_eat_close_brace(Equal, BraceEqual), //'=' => self.maybe_eat_close_brace(Equal, BraceEqual),
'+' => self.maybe_eat_close_brace(Plus, BracePlus), '+' => self.maybe_eat_close_brace(Plus, BracePlus),
'~' => self.maybe_eat_close_brace(Tilde, BraceTilde), '~' => self.maybe_eat_close_brace(Tilde, BraceTilde),
'_' => self.maybe_eat_close_brace(Underscore, BraceUnderscore), '_' => self.maybe_eat_close_brace(Underscore, BraceUnderscore),
@ -178,6 +178,7 @@ impl<'s> Lexer<'s> {
} }
} }
'=' => Sym(Equal),
'!' => Sym(Exclaim), '!' => Sym(Exclaim),
'%' => Sym(Percentage), '%' => Sym(Percentage),
'<' => Sym(Lt), '<' => Sym(Lt),

View file

@ -35,6 +35,29 @@ impl<'s> Parser<'s> {
} }
} }
pub enum ListType {
Unordered,
Ordered,
}
pub enum TagKind<'s> {
Paragraph,
Heading { level: u8 },
Table,
TableRow,
TableCell,
RawBlock { format: &'s str },
CodeBlock { language: &'s str },
Blockquote,
Div,
UnorderedList,
OrderedList { start: usize },
ListItem,
DescriptionList,
DescriptionItem,
Footnote { tag: &'s str },
}
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum Event { pub enum Event {
Start(block::Block), Start(block::Block),