2022-11-12 12:45:17 -05:00
|
|
|
mod block;
|
2022-11-28 14:12:49 -05:00
|
|
|
mod html;
|
2022-11-16 16:11:55 -05:00
|
|
|
mod inline;
|
2022-11-20 13:13:48 -05:00
|
|
|
mod lex;
|
2022-11-12 12:45:17 -05:00
|
|
|
mod span;
|
|
|
|
mod tree;
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
use span::Span;
|
|
|
|
|
2022-11-20 13:13:48 -05:00
|
|
|
pub struct Block;
|
2022-11-12 12:45:17 -05:00
|
|
|
|
|
|
|
const EOF: char = '\0';
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2022-11-28 15:52:09 -05:00
|
|
|
pub enum Event2<'s> {
|
|
|
|
/// Start of a tag.
|
|
|
|
Start(TagKind<'s>, Attributes<'s>),
|
|
|
|
/// End of a tag.
|
|
|
|
End(TagKind<'s>),
|
|
|
|
/// A string object, text only.
|
|
|
|
Str(&'s str),
|
|
|
|
/// A verbatim string.
|
|
|
|
Verbatim(&'s str),
|
|
|
|
/// An inline or display math element.
|
|
|
|
Math { content: &'s str, display: bool },
|
|
|
|
/// An ellipsis, i.e. a set of three periods.
|
|
|
|
Ellipsis,
|
|
|
|
/// An en dash.
|
|
|
|
EnDash,
|
|
|
|
/// An em dash.
|
|
|
|
EmDash,
|
|
|
|
/// A thematic break, typically a horizontal rule.
|
|
|
|
ThematicBreak,
|
|
|
|
/// A blank line.
|
|
|
|
Blankline,
|
|
|
|
/// A space that may not break a line.
|
|
|
|
NonBreakingSpace,
|
|
|
|
/// A newline that may or may not break a line in the output format.
|
|
|
|
Softbreak,
|
|
|
|
/// A newline that must break a line.
|
|
|
|
HardBreak,
|
2022-11-27 15:59:54 -05:00
|
|
|
}
|
|
|
|
|
2022-11-28 15:52:09 -05:00
|
|
|
// Attributes are rare, better to pay 8 bytes always and sometimes an extra allocation instead of
|
|
|
|
// always 24 bytes.
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
|
|
pub struct Attributes<'s>(Option<Box<Vec<(&'s str, &'s str)>>>);
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2022-11-27 15:59:54 -05:00
|
|
|
pub enum TagKind<'s> {
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A paragraph.
|
2022-11-27 15:59:54 -05:00
|
|
|
Paragraph,
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A heading.
|
2022-11-27 15:59:54 -05:00
|
|
|
Heading { level: u8 },
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A link with a destination URL.
|
|
|
|
Link(&'s str, LinkType),
|
|
|
|
/// An image.
|
|
|
|
Image(&'s str),
|
|
|
|
/// A divider element.
|
|
|
|
Div,
|
|
|
|
/// An inline divider element.
|
|
|
|
Span,
|
|
|
|
/// A table element.
|
2022-11-27 15:59:54 -05:00
|
|
|
Table,
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A row element of a table.
|
2022-11-27 15:59:54 -05:00
|
|
|
TableRow,
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A cell element of row within a table.
|
2022-11-27 15:59:54 -05:00
|
|
|
TableCell,
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A block with raw markup for a specific output format.
|
2022-11-27 15:59:54 -05:00
|
|
|
RawBlock { format: &'s str },
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A block with code in a specific language.
|
|
|
|
CodeBlock { language: Option<&'s str> },
|
|
|
|
/// A blockquote element.
|
2022-11-27 15:59:54 -05:00
|
|
|
Blockquote,
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A list.
|
|
|
|
List(List),
|
|
|
|
/// An item of a list
|
2022-11-27 15:59:54 -05:00
|
|
|
ListItem,
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A description list element.
|
2022-11-27 15:59:54 -05:00
|
|
|
DescriptionList,
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A item of a description list.
|
2022-11-27 15:59:54 -05:00
|
|
|
DescriptionItem,
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A footnote definition.
|
2022-11-27 15:59:54 -05:00
|
|
|
Footnote { tag: &'s str },
|
|
|
|
}
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2022-11-28 15:52:09 -05:00
|
|
|
pub enum LinkType {
|
|
|
|
Inline,
|
|
|
|
Reference,
|
|
|
|
Autolink,
|
|
|
|
Email,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
|
|
pub enum List {
|
|
|
|
Unordered,
|
|
|
|
Ordered {
|
|
|
|
kind: OrderedListKind,
|
|
|
|
start: u32,
|
|
|
|
format: OrderedListFormat,
|
|
|
|
},
|
|
|
|
Description,
|
|
|
|
Task(bool),
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
|
|
pub enum OrderedListKind {
|
|
|
|
/// Decimal numbering, e.g. `1)`.
|
|
|
|
Decimal,
|
|
|
|
/// Lowercase alphabetic numbering, e.g. `a)`.
|
|
|
|
AlphaLower,
|
|
|
|
/// Uppercase alphabetic numbering, e.g. `A)`.
|
|
|
|
AlphaUpper,
|
|
|
|
/// Lowercase roman numbering, e.g. `iv)`.
|
|
|
|
RomanLower,
|
|
|
|
/// Uppercase roman numbering, e.g. `IV)`.
|
|
|
|
RomanUpper,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
|
|
pub enum OrderedListFormat {
|
|
|
|
/// Number is followed by a period, e.g. `1.`.
|
|
|
|
Period,
|
|
|
|
/// Number is followed by a closing parenthesis, e.g. `1)`.
|
|
|
|
Paren,
|
|
|
|
/// Number is enclosed by parentheses, e.g. `(1)`.
|
|
|
|
ParenParen,
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
impl<'s> Event<'s> {
|
|
|
|
fn from_inline(src: &'s str, inline: inline::Event) -> Self {
|
|
|
|
match inline {
|
|
|
|
Enter
|
|
|
|
}
|
|
|
|
}
|
2022-11-28 14:12:49 -05:00
|
|
|
}
|
2022-11-28 15:52:09 -05:00
|
|
|
*/
|
2022-11-28 14:12:49 -05:00
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
|
|
pub enum Event {
|
|
|
|
Start(block::Block),
|
|
|
|
End,
|
|
|
|
Inline(inline::Event),
|
|
|
|
Blankline,
|
|
|
|
}
|
|
|
|
|
2022-11-28 14:19:22 -05:00
|
|
|
pub struct Parser<'s> {
|
2022-11-20 13:13:48 -05:00
|
|
|
src: &'s str,
|
2022-11-28 14:19:22 -05:00
|
|
|
tree: block::Tree,
|
2022-11-22 13:19:21 -05:00
|
|
|
parser: Option<inline::Parser<'s>>,
|
2022-11-26 19:12:56 -05:00
|
|
|
inline_start: usize,
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
|
2022-11-28 14:19:22 -05:00
|
|
|
impl<'s> Parser<'s> {
|
|
|
|
#[must_use]
|
|
|
|
pub fn new(src: &'s str) -> Self {
|
|
|
|
Self {
|
|
|
|
src,
|
|
|
|
tree: block::parse(src),
|
|
|
|
parser: None,
|
|
|
|
inline_start: 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> Iterator for Parser<'s> {
|
2022-11-22 13:19:21 -05:00
|
|
|
type Item = Event;
|
2022-11-20 13:13:48 -05:00
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
2022-11-22 13:19:21 -05:00
|
|
|
while let Some(parser) = &mut self.parser {
|
|
|
|
// inside leaf block, with inline content
|
2022-11-26 19:12:56 -05:00
|
|
|
if let Some(mut inline) = parser.next() {
|
2022-11-27 18:10:28 -05:00
|
|
|
inline.span = inline.span.translate(self.inline_start);
|
2022-11-22 13:19:21 -05:00
|
|
|
return Some(Event::Inline(inline));
|
|
|
|
} else if let Some(ev) = self.tree.next() {
|
2022-11-28 14:12:49 -05:00
|
|
|
match ev.kind {
|
|
|
|
tree::EventKind::Element(atom) => {
|
|
|
|
assert_eq!(atom, block::Atom::Inline);
|
|
|
|
parser.parse(ev.span.of(self.src));
|
|
|
|
self.inline_start = ev.span.start();
|
2022-11-22 13:19:21 -05:00
|
|
|
}
|
2022-11-28 14:12:49 -05:00
|
|
|
tree::EventKind::Exit => {
|
2022-11-22 13:19:21 -05:00
|
|
|
self.parser = None;
|
|
|
|
return Some(Event::End);
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
2022-11-28 14:12:49 -05:00
|
|
|
tree::EventKind::Enter(..) => unreachable!(),
|
2022-11-22 13:19:21 -05:00
|
|
|
}
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
2022-11-22 13:19:21 -05:00
|
|
|
}
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
self.tree.next().map(|ev| match ev.kind {
|
|
|
|
tree::EventKind::Element(atom) => {
|
|
|
|
assert_eq!(atom, block::Atom::Blankline);
|
2022-11-22 13:19:21 -05:00
|
|
|
Event::Blankline
|
|
|
|
}
|
2022-11-28 14:12:49 -05:00
|
|
|
tree::EventKind::Enter(block) => {
|
2022-11-26 19:12:56 -05:00
|
|
|
if matches!(block, block::Block::Leaf(..)) {
|
|
|
|
self.parser = Some(inline::Parser::new());
|
|
|
|
}
|
2022-11-28 14:12:49 -05:00
|
|
|
Event::Start(block)
|
2022-11-22 13:19:21 -05:00
|
|
|
}
|
2022-11-28 14:12:49 -05:00
|
|
|
tree::EventKind::Exit => Event::End,
|
2022-11-20 13:13:48 -05:00
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
2022-11-22 13:19:21 -05:00
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use super::Event::*;
|
|
|
|
use crate::block::Block::*;
|
|
|
|
use crate::block::Container::*;
|
|
|
|
use crate::block::Leaf::*;
|
|
|
|
use crate::inline::Atom::*;
|
2022-11-27 18:10:28 -05:00
|
|
|
use crate::inline::EventKind::*;
|
|
|
|
use crate::inline::Node::*;
|
2022-11-22 13:19:21 -05:00
|
|
|
|
2022-11-22 13:48:17 -05:00
|
|
|
macro_rules! test_parse {
|
|
|
|
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
|
|
|
#[allow(unused)]
|
2022-11-28 14:19:22 -05:00
|
|
|
let actual = super::Parser::new($src).collect::<Vec<_>>();
|
2022-11-22 13:48:17 -05:00
|
|
|
let expected = &[$($($token),*,)?];
|
|
|
|
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
#[test]
|
2022-11-22 13:48:17 -05:00
|
|
|
fn para() {
|
|
|
|
test_parse!(
|
2022-11-26 19:12:56 -05:00
|
|
|
"para",
|
|
|
|
Start(Leaf(Paragraph)),
|
2022-11-27 18:10:28 -05:00
|
|
|
Inline(Node(Str).span(0, 4)),
|
2022-11-26 19:12:56 -05:00
|
|
|
End
|
|
|
|
);
|
|
|
|
test_parse!(
|
|
|
|
"pa ra",
|
|
|
|
Start(Leaf(Paragraph)),
|
2022-11-27 18:10:28 -05:00
|
|
|
Inline(Node(Str).span(0, 9)),
|
2022-11-26 19:12:56 -05:00
|
|
|
End
|
|
|
|
);
|
|
|
|
test_parse!(
|
|
|
|
"para0\n\npara1",
|
2022-11-22 13:48:17 -05:00
|
|
|
Start(Leaf(Paragraph)),
|
2022-11-27 18:10:28 -05:00
|
|
|
Inline(Node(Str).span(0, 6)),
|
2022-11-22 13:48:17 -05:00
|
|
|
End,
|
2022-11-26 19:12:56 -05:00
|
|
|
Blankline,
|
2022-11-22 13:48:17 -05:00
|
|
|
Start(Leaf(Paragraph)),
|
2022-11-27 18:10:28 -05:00
|
|
|
Inline(Node(Str).span(7, 12)),
|
2022-11-22 13:48:17 -05:00
|
|
|
End,
|
2022-11-22 13:19:21 -05:00
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|