wip
This commit is contained in:
parent
0d0183e75f
commit
3ca0002df8
4 changed files with 166 additions and 117 deletions
210
src/block.rs
210
src/block.rs
|
@ -22,25 +22,19 @@ pub enum Block {
|
|||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Leaf {
|
||||
Paragraph,
|
||||
Heading {
|
||||
level: usize,
|
||||
},
|
||||
Heading { level: u8 },
|
||||
Attributes,
|
||||
Table,
|
||||
ThematicBreak,
|
||||
LinkDefinition,
|
||||
CodeBlock {
|
||||
fence_char: char,
|
||||
fence_length: usize,
|
||||
},
|
||||
CodeBlock { fence_length: u8 },
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Container {
|
||||
Blockquote,
|
||||
Div { fence_length: usize },
|
||||
ListItem { indent: usize },
|
||||
Footnote { indent: usize },
|
||||
Div { fence_length: u8 },
|
||||
ListItem { indent: u8 },
|
||||
Footnote { indent: u8 },
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
@ -49,6 +43,8 @@ pub enum Atom {
|
|||
Inline,
|
||||
/// A line with no non-whitespace characters.
|
||||
Blankline,
|
||||
/// Thematic break.
|
||||
ThematicBreak,
|
||||
}
|
||||
|
||||
struct Parser<'s> {
|
||||
|
@ -89,68 +85,68 @@ impl<'s> Parser<'s> {
|
|||
})
|
||||
.count();
|
||||
let lines = &mut lines[blanklines..];
|
||||
Block::parse(lines.iter().map(|sp| (sp.of(self.src), sp.start()))).map_or(
|
||||
0,
|
||||
|(kind, span, len)| {
|
||||
match &kind {
|
||||
Block::Leaf(_) => {
|
||||
self.tree.enter(kind, span);
|
||||
lines[0] = lines[0].with_start(span.end());
|
||||
for line in lines.iter().take(len) {
|
||||
self.tree.elem(Atom::Inline, *line);
|
||||
}
|
||||
}
|
||||
Block::Container(c) => {
|
||||
let (skip_chars, skip_lines_suffix) = match &c {
|
||||
Blockquote => (1, 0),
|
||||
ListItem { indent } | Footnote { indent } => (*indent, 0),
|
||||
Div { .. } => (0, 1),
|
||||
};
|
||||
let line_count = lines.len() - skip_lines_suffix;
|
||||
|
||||
// update spans, remove indentation / container prefix
|
||||
lines[0] = lines[0].with_start(span.end());
|
||||
lines.iter_mut().skip(1).take(line_count).for_each(|sp| {
|
||||
let skip = (sp
|
||||
.of(self.src)
|
||||
.chars()
|
||||
.take_while(|c| c.is_whitespace())
|
||||
.count()
|
||||
+ skip_chars)
|
||||
.min(sp.len());
|
||||
*sp = sp.trim_start(skip);
|
||||
});
|
||||
|
||||
self.tree.enter(kind, span);
|
||||
let mut l = 0;
|
||||
while l < line_count {
|
||||
l += self.parse_block(&mut lines[l..line_count]);
|
||||
}
|
||||
Block::parse(lines.iter().map(|sp| sp.of(self.src))).map_or(0, |(kind, span, len)| {
|
||||
let start = lines.get(0).map(|sp| sp.start()).unwrap();
|
||||
let span = span.translate(start);
|
||||
match &kind {
|
||||
Block::Leaf(_) => {
|
||||
self.tree.enter(kind, span);
|
||||
lines[0] = lines[0].with_start(span.end());
|
||||
for line in lines.iter().take(len) {
|
||||
self.tree.elem(Atom::Inline, *line);
|
||||
}
|
||||
}
|
||||
self.tree.exit();
|
||||
blanklines + len
|
||||
},
|
||||
)
|
||||
Block::Container(c) => {
|
||||
let (skip_chars, skip_lines_suffix) = match &c {
|
||||
Blockquote => (1, 0),
|
||||
ListItem { indent } | Footnote { indent } => (*indent, 0),
|
||||
Div { .. } => (0, 1),
|
||||
};
|
||||
let line_count = lines.len() - skip_lines_suffix;
|
||||
|
||||
// update spans, remove indentation / container prefix
|
||||
lines[0] = lines[0].with_start(span.end());
|
||||
lines.iter_mut().skip(1).take(line_count).for_each(|sp| {
|
||||
let skip = (sp
|
||||
.of(self.src)
|
||||
.chars()
|
||||
.take_while(|c| c.is_whitespace())
|
||||
.count()
|
||||
+ usize::from(skip_chars))
|
||||
.min(sp.len());
|
||||
*sp = sp.trim_start(skip);
|
||||
});
|
||||
|
||||
self.tree.enter(kind, span);
|
||||
let mut l = 0;
|
||||
while l < line_count {
|
||||
l += self.parse_block(&mut lines[l..line_count]);
|
||||
}
|
||||
}
|
||||
}
|
||||
self.tree.exit();
|
||||
blanklines + len
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl Block {
|
||||
/// Parse a single block. Return number of lines the block uses.
|
||||
fn parse<'b, I: Iterator<Item = (&'b str, usize)>>(
|
||||
mut lines: I,
|
||||
) -> Option<(Block, Span, usize)> {
|
||||
if let Some((l, start)) = lines.next() {
|
||||
fn parse<'b, I: Iterator<Item = &'b str>>(mut lines: I) -> Option<(Block, Span, usize)> {
|
||||
lines.next().map(|l| {
|
||||
let (kind, sp) = Block::start(l);
|
||||
let line_count = 1 + lines.take_while(|(l, _)| kind.continues(l)).count();
|
||||
Some((kind, sp.translate(start), line_count))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
let line_count = 1
|
||||
+ lines.take_while(|l| kind.continues(l)).count()
|
||||
+ usize::from(matches!(
|
||||
kind,
|
||||
Self::Leaf(CodeBlock { .. }) | Self::Container(Div { .. })
|
||||
));
|
||||
(kind, sp, line_count)
|
||||
})
|
||||
}
|
||||
|
||||
/// Determine what type of block a line can start.
|
||||
fn start(line: &str) -> (Block, Span) {
|
||||
fn start(line: &str) -> (Self, Span) {
|
||||
let start = line.chars().take_while(|c| c.is_whitespace()).count();
|
||||
let line = &line[start..];
|
||||
let mut chars = line.chars();
|
||||
|
@ -159,37 +155,41 @@ impl Block {
|
|||
.find(|c| *c != '#')
|
||||
.map_or(true, char::is_whitespace)
|
||||
.then(|| {
|
||||
let span = Span::by_len(start, line.len() - chars.as_str().len() - 1);
|
||||
(Self::Leaf(Heading { level: span.len() }), span)
|
||||
}),
|
||||
u8::try_from(line.len() - chars.as_str().len() - 1)
|
||||
.ok()
|
||||
.map(|level| {
|
||||
(
|
||||
Self::Leaf(Heading { level }),
|
||||
Span::by_len(start, level.into()),
|
||||
)
|
||||
})
|
||||
})
|
||||
.flatten(),
|
||||
'>' => chars.next().map_or(true, |c| c == ' ').then(|| {
|
||||
(
|
||||
Self::Container(Blockquote),
|
||||
Span::by_len(start, line.len() - chars.as_str().len() - 1),
|
||||
)
|
||||
}),
|
||||
f @ ':' => {
|
||||
f @ ('`' | ':') => {
|
||||
let fence_length = chars.take_while(|c| *c == f).count() + 1;
|
||||
(fence_length >= 3).then(|| {
|
||||
(
|
||||
Self::Container(Div { fence_length }),
|
||||
Span::by_len(start, line.len()),
|
||||
)
|
||||
})
|
||||
}
|
||||
fence_char @ ('`' | '~') => {
|
||||
let fence_length = chars.take_while(|c| *c == fence_char).count() + 1;
|
||||
(fence_length >= 3).then(|| {
|
||||
(
|
||||
Self::Leaf(CodeBlock {
|
||||
fence_char,
|
||||
fence_length,
|
||||
}),
|
||||
Span::by_len(start, line.len()),
|
||||
)
|
||||
})
|
||||
(fence_length >= 3)
|
||||
.then(|| {
|
||||
u8::try_from(fence_length).ok().map(|fence_length| {
|
||||
(
|
||||
match f {
|
||||
'`' => Self::Leaf(CodeBlock { fence_length }),
|
||||
':' => Self::Container(Div { fence_length }),
|
||||
_ => unreachable!(),
|
||||
},
|
||||
Span::by_len(start, line.len()),
|
||||
)
|
||||
})
|
||||
})
|
||||
.flatten()
|
||||
}
|
||||
_ => {
|
||||
/*
|
||||
let thematic_break = || {
|
||||
let mut without_whitespace = line.chars().filter(|c| !c.is_whitespace());
|
||||
let length = without_whitespace.clone().count();
|
||||
|
@ -198,7 +198,9 @@ impl Block {
|
|||
|| without_whitespace.all(|c| c == '*')))
|
||||
.then(|| (Self::Leaf(ThematicBreak), Span::by_len(start, line.len())))
|
||||
};
|
||||
thematic_break()
|
||||
*/
|
||||
//thematic_break()
|
||||
None
|
||||
}
|
||||
}
|
||||
.unwrap_or((Self::Leaf(Paragraph), Span::new(0, 0)))
|
||||
|
@ -210,18 +212,16 @@ impl Block {
|
|||
Self::Leaf(Paragraph | Heading { .. } | Table | LinkDefinition) => {
|
||||
!line.trim().is_empty()
|
||||
}
|
||||
Self::Leaf(Attributes | ThematicBreak) => false,
|
||||
Self::Leaf(CodeBlock {
|
||||
fence_char,
|
||||
fence_length,
|
||||
}) => !line.chars().take(*fence_length).all(|c| c == *fence_char),
|
||||
Self::Leaf(Attributes) => false,
|
||||
Self::Container(Blockquote) => line.trim().starts_with('>'),
|
||||
Self::Container(Footnote { indent } | ListItem { indent }) => {
|
||||
let spaces = line.chars().take_while(|c| c.is_whitespace()).count();
|
||||
!line.trim().is_empty() && spaces >= *indent
|
||||
!line.trim().is_empty() && spaces >= (*indent).into()
|
||||
}
|
||||
Self::Container(Div { fence_length }) => {
|
||||
line.chars().take(*fence_length).all(|c| c == ':')
|
||||
Self::Container(Div { fence_length }) | Self::Leaf(CodeBlock { fence_length }) => {
|
||||
let mut c = line.chars();
|
||||
!((&mut c).take((*fence_length).into()).all(|c| c == ':')
|
||||
&& c.next().map_or(false, char::is_whitespace))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -357,10 +357,30 @@ mod test {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_code_block() {
|
||||
let src = concat!(
|
||||
"```lang\n",
|
||||
"l0\n",
|
||||
"l1\n",
|
||||
"```", //
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
super::Parser::new(src).parse().iter().collect::<Vec<_>>(),
|
||||
&[
|
||||
Event::Enter(&Leaf(CodeBlock { fence_length: 3 }), Span::new(0, 8)),
|
||||
Event::Element(&Inline, Span::new(8, 11)),
|
||||
Event::Element(&Inline, Span::new(11, 14)),
|
||||
Event::Exit
|
||||
]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn block_multiline() {
|
||||
let src = "# heading\n spanning two lines\n";
|
||||
let lines = super::lines(src).map(|sp| (sp.of(src), sp.start()));
|
||||
let lines = super::lines(src).map(|sp| sp.of(src));
|
||||
let (kind, sp, len) = Block::parse(lines).unwrap();
|
||||
assert_eq!(kind, Block::Leaf(Heading { level: 1 }));
|
||||
assert_eq!(sp.of(src), "#");
|
||||
|
@ -376,7 +396,7 @@ mod test {
|
|||
">\n",
|
||||
"> c\n", //
|
||||
);
|
||||
let lines = super::lines(src).map(|sp| (sp.of(src), sp.start()));
|
||||
let lines = super::lines(src).map(|sp| sp.of(src));
|
||||
let (kind, sp, len) = Block::parse(lines).unwrap();
|
||||
assert_eq!(kind, Block::Container(Blockquote));
|
||||
assert_eq!(sp.of(src), ">");
|
||||
|
|
|
@ -35,22 +35,21 @@ pub struct Node {
|
|||
pub enum NodeKind {
|
||||
Str,
|
||||
// link
|
||||
Url,
|
||||
ImageSource,
|
||||
LinkReference,
|
||||
FootnoteReference,
|
||||
ReferenceLink,
|
||||
Link,
|
||||
Emoji,
|
||||
// verbatim
|
||||
Verbatim,
|
||||
RawFormat,
|
||||
DisplayMath,
|
||||
InlineMath,
|
||||
DisplayMath,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
pub enum Container {
|
||||
// attributes
|
||||
Attributes,
|
||||
Span,
|
||||
Attributes,
|
||||
// typesetting
|
||||
Subscript,
|
||||
Superscript,
|
||||
|
@ -58,17 +57,10 @@ pub enum Container {
|
|||
Delete,
|
||||
Emphasis,
|
||||
Strong,
|
||||
Mark,
|
||||
//Mark,
|
||||
// smart quoting
|
||||
SingleQuoted,
|
||||
DoubleQuoted,
|
||||
// URLs
|
||||
AutoUrl,
|
||||
Url,
|
||||
ImageText,
|
||||
LinkText,
|
||||
Reference,
|
||||
Destination,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
@ -165,7 +157,14 @@ impl<'s> Parser<'s> {
|
|||
len,
|
||||
}) = self.peek()
|
||||
{
|
||||
Some((DisplayMath, first.len))
|
||||
Some((
|
||||
if first.len == 2 {
|
||||
DisplayMath
|
||||
} else {
|
||||
InlineMath
|
||||
},
|
||||
*len,
|
||||
))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
@ -201,14 +200,14 @@ impl<'s> Parser<'s> {
|
|||
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
|
||||
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
|
||||
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
|
||||
lex::Kind::Open(Delimiter::Bracket) => Some((LinkText, Dir::Open)),
|
||||
lex::Kind::Close(Delimiter::Bracket) => Some((LinkText, Dir::Close)),
|
||||
lex::Kind::Open(Delimiter::Bracket) => Some((Span, Dir::Open)),
|
||||
lex::Kind::Close(Delimiter::Bracket) => Some((Span, Dir::Close)),
|
||||
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
|
||||
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
|
||||
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
|
||||
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)),
|
||||
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
|
||||
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
|
||||
//lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
|
||||
//lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
|
||||
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)),
|
||||
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)),
|
||||
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)),
|
||||
|
@ -308,6 +307,12 @@ mod test {
|
|||
test_parse!("abc `def`", Node(Str.span(0, 4)), Node(Verbatim.span(5, 8)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn math() {
|
||||
test_parse!("$`abc`", Node(InlineMath.span(2, 5)));
|
||||
test_parse!("$$```abc", Node(DisplayMath.span(5, 8)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn container_basic() {
|
||||
test_parse!(
|
||||
|
|
|
@ -30,7 +30,7 @@ pub enum Delimiter {
|
|||
Brace,
|
||||
BraceAsterisk,
|
||||
BraceCaret,
|
||||
BraceEqual,
|
||||
//BraceEqual,
|
||||
BraceHyphen,
|
||||
BracePlus,
|
||||
BraceTilde,
|
||||
|
@ -149,7 +149,7 @@ impl<'s> Lexer<'s> {
|
|||
let explicit = match self.peek() {
|
||||
'*' => Some(Open(BraceAsterisk)),
|
||||
'^' => Some(Open(BraceCaret)),
|
||||
'=' => Some(Open(BraceEqual)),
|
||||
//'=' => Some(Open(BraceEqual)),
|
||||
'-' => Some(Open(BraceHyphen)),
|
||||
'+' => Some(Open(BracePlus)),
|
||||
'~' => Some(Open(BraceTilde)),
|
||||
|
@ -165,7 +165,7 @@ impl<'s> Lexer<'s> {
|
|||
}
|
||||
'*' => self.maybe_eat_close_brace(Asterisk, BraceAsterisk),
|
||||
'^' => self.maybe_eat_close_brace(Caret, BraceCaret),
|
||||
'=' => self.maybe_eat_close_brace(Equal, BraceEqual),
|
||||
//'=' => self.maybe_eat_close_brace(Equal, BraceEqual),
|
||||
'+' => self.maybe_eat_close_brace(Plus, BracePlus),
|
||||
'~' => self.maybe_eat_close_brace(Tilde, BraceTilde),
|
||||
'_' => self.maybe_eat_close_brace(Underscore, BraceUnderscore),
|
||||
|
@ -178,6 +178,7 @@ impl<'s> Lexer<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
'=' => Sym(Equal),
|
||||
'!' => Sym(Exclaim),
|
||||
'%' => Sym(Percentage),
|
||||
'<' => Sym(Lt),
|
||||
|
|
23
src/lib.rs
23
src/lib.rs
|
@ -35,6 +35,29 @@ impl<'s> Parser<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
pub enum ListType {
|
||||
Unordered,
|
||||
Ordered,
|
||||
}
|
||||
|
||||
pub enum TagKind<'s> {
|
||||
Paragraph,
|
||||
Heading { level: u8 },
|
||||
Table,
|
||||
TableRow,
|
||||
TableCell,
|
||||
RawBlock { format: &'s str },
|
||||
CodeBlock { language: &'s str },
|
||||
Blockquote,
|
||||
Div,
|
||||
UnorderedList,
|
||||
OrderedList { start: usize },
|
||||
ListItem,
|
||||
DescriptionList,
|
||||
DescriptionItem,
|
||||
Footnote { tag: &'s str },
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Event {
|
||||
Start(block::Block),
|
||||
|
|
Loading…
Reference in a new issue