jotdown/src/lib.rs

254 lines
6.3 KiB
Rust
Raw Normal View History

2022-11-12 12:45:17 -05:00
mod block;
2022-11-28 14:12:49 -05:00
mod html;
2022-11-16 16:11:55 -05:00
mod inline;
2022-11-20 13:13:48 -05:00
mod lex;
2022-11-12 12:45:17 -05:00
mod span;
mod tree;
2022-11-28 14:12:49 -05:00
use span::Span;
2022-11-20 13:13:48 -05:00
pub struct Block;
2022-11-12 12:45:17 -05:00
const EOF: char = '\0';
2022-11-28 14:12:49 -05:00
#[derive(Debug, PartialEq, Eq)]
2022-11-28 15:52:09 -05:00
pub enum Event2<'s> {
/// Start of a tag.
Start(TagKind<'s>, Attributes<'s>),
/// End of a tag.
End(TagKind<'s>),
/// A string object, text only.
Str(&'s str),
/// A verbatim string.
Verbatim(&'s str),
/// An inline or display math element.
Math { content: &'s str, display: bool },
/// An ellipsis, i.e. a set of three periods.
Ellipsis,
/// An en dash.
EnDash,
/// An em dash.
EmDash,
/// A thematic break, typically a horizontal rule.
ThematicBreak,
/// A blank line.
Blankline,
/// A space that may not break a line.
NonBreakingSpace,
/// A newline that may or may not break a line in the output format.
Softbreak,
/// A newline that must break a line.
HardBreak,
2022-11-27 15:59:54 -05:00
}
2022-11-28 15:52:09 -05:00
// Attributes are rare, better to pay 8 bytes always and sometimes an extra allocation instead of
// always 24 bytes.
#[derive(Debug, PartialEq, Eq)]
pub struct Attributes<'s>(Option<Box<Vec<(&'s str, &'s str)>>>);
2022-11-28 14:12:49 -05:00
#[derive(Debug, PartialEq, Eq)]
2022-11-27 15:59:54 -05:00
pub enum TagKind<'s> {
2022-11-28 15:52:09 -05:00
/// A paragraph.
2022-11-27 15:59:54 -05:00
Paragraph,
2022-11-28 15:52:09 -05:00
/// A heading.
2022-11-27 15:59:54 -05:00
Heading { level: u8 },
2022-11-28 15:52:09 -05:00
/// A link with a destination URL.
Link(&'s str, LinkType),
/// An image.
Image(&'s str),
/// A divider element.
Div,
/// An inline divider element.
Span,
/// A table element.
2022-11-27 15:59:54 -05:00
Table,
2022-11-28 15:52:09 -05:00
/// A row element of a table.
2022-11-27 15:59:54 -05:00
TableRow,
2022-11-28 15:52:09 -05:00
/// A cell element of row within a table.
2022-11-27 15:59:54 -05:00
TableCell,
2022-11-28 15:52:09 -05:00
/// A block with raw markup for a specific output format.
2022-11-27 15:59:54 -05:00
RawBlock { format: &'s str },
2022-11-28 15:52:09 -05:00
/// A block with code in a specific language.
CodeBlock { language: Option<&'s str> },
/// A blockquote element.
2022-11-27 15:59:54 -05:00
Blockquote,
2022-11-28 15:52:09 -05:00
/// A list.
List(List),
/// An item of a list
2022-11-27 15:59:54 -05:00
ListItem,
2022-11-28 15:52:09 -05:00
/// A description list element.
2022-11-27 15:59:54 -05:00
DescriptionList,
2022-11-28 15:52:09 -05:00
/// A item of a description list.
2022-11-27 15:59:54 -05:00
DescriptionItem,
2022-11-28 15:52:09 -05:00
/// A footnote definition.
2022-11-27 15:59:54 -05:00
Footnote { tag: &'s str },
}
2022-11-28 14:12:49 -05:00
#[derive(Debug, PartialEq, Eq)]
2022-11-28 15:52:09 -05:00
pub enum LinkType {
Inline,
Reference,
Autolink,
Email,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum List {
Unordered,
Ordered {
kind: OrderedListKind,
start: u32,
format: OrderedListFormat,
},
Description,
Task(bool),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OrderedListKind {
/// Decimal numbering, e.g. `1)`.
Decimal,
/// Lowercase alphabetic numbering, e.g. `a)`.
AlphaLower,
/// Uppercase alphabetic numbering, e.g. `A)`.
AlphaUpper,
/// Lowercase roman numbering, e.g. `iv)`.
RomanLower,
/// Uppercase roman numbering, e.g. `IV)`.
RomanUpper,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OrderedListFormat {
/// Number is followed by a period, e.g. `1.`.
Period,
/// Number is followed by a closing parenthesis, e.g. `1)`.
Paren,
/// Number is enclosed by parentheses, e.g. `(1)`.
ParenParen,
}
/*
impl<'s> Event<'s> {
fn from_inline(src: &'s str, inline: inline::Event) -> Self {
match inline {
Enter
}
}
2022-11-28 14:12:49 -05:00
}
2022-11-28 15:52:09 -05:00
*/
2022-11-28 14:12:49 -05:00
2022-11-22 13:19:21 -05:00
#[derive(Debug, PartialEq, Eq)]
pub enum Event {
Start(block::Block),
End,
Inline(inline::Event),
Blankline,
}
2022-11-28 14:19:22 -05:00
pub struct Parser<'s> {
2022-11-20 13:13:48 -05:00
src: &'s str,
2022-11-28 14:19:22 -05:00
tree: block::Tree,
2022-11-22 13:19:21 -05:00
parser: Option<inline::Parser<'s>>,
2022-11-26 19:12:56 -05:00
inline_start: usize,
2022-11-20 13:13:48 -05:00
}
2022-11-28 14:19:22 -05:00
impl<'s> Parser<'s> {
#[must_use]
pub fn new(src: &'s str) -> Self {
Self {
src,
tree: block::parse(src),
parser: None,
inline_start: 0,
}
}
}
impl<'s> Iterator for Parser<'s> {
2022-11-22 13:19:21 -05:00
type Item = Event;
2022-11-20 13:13:48 -05:00
fn next(&mut self) -> Option<Self::Item> {
2022-11-22 13:19:21 -05:00
while let Some(parser) = &mut self.parser {
// inside leaf block, with inline content
2022-11-26 19:12:56 -05:00
if let Some(mut inline) = parser.next() {
2022-11-27 18:10:28 -05:00
inline.span = inline.span.translate(self.inline_start);
2022-11-22 13:19:21 -05:00
return Some(Event::Inline(inline));
} else if let Some(ev) = self.tree.next() {
2022-11-28 14:12:49 -05:00
match ev.kind {
tree::EventKind::Element(atom) => {
assert_eq!(atom, block::Atom::Inline);
parser.parse(ev.span.of(self.src));
self.inline_start = ev.span.start();
2022-11-22 13:19:21 -05:00
}
2022-11-28 14:12:49 -05:00
tree::EventKind::Exit => {
2022-11-22 13:19:21 -05:00
self.parser = None;
return Some(Event::End);
2022-11-20 13:13:48 -05:00
}
2022-11-28 14:12:49 -05:00
tree::EventKind::Enter(..) => unreachable!(),
2022-11-22 13:19:21 -05:00
}
2022-11-20 13:13:48 -05:00
}
2022-11-22 13:19:21 -05:00
}
2022-11-28 14:12:49 -05:00
self.tree.next().map(|ev| match ev.kind {
tree::EventKind::Element(atom) => {
assert_eq!(atom, block::Atom::Blankline);
2022-11-22 13:19:21 -05:00
Event::Blankline
}
2022-11-28 14:12:49 -05:00
tree::EventKind::Enter(block) => {
2022-11-26 19:12:56 -05:00
if matches!(block, block::Block::Leaf(..)) {
self.parser = Some(inline::Parser::new());
}
2022-11-28 14:12:49 -05:00
Event::Start(block)
2022-11-22 13:19:21 -05:00
}
2022-11-28 14:12:49 -05:00
tree::EventKind::Exit => Event::End,
2022-11-20 13:13:48 -05:00
})
}
}
2022-11-22 13:19:21 -05:00
#[cfg(test)]
mod test {
use super::Event::*;
use crate::block::Block::*;
use crate::block::Container::*;
use crate::block::Leaf::*;
use crate::inline::Atom::*;
2022-11-27 18:10:28 -05:00
use crate::inline::EventKind::*;
use crate::inline::Node::*;
2022-11-22 13:19:21 -05:00
2022-11-22 13:48:17 -05:00
macro_rules! test_parse {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)]
2022-11-28 14:19:22 -05:00
let actual = super::Parser::new($src).collect::<Vec<_>>();
2022-11-22 13:48:17 -05:00
let expected = &[$($($token),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
};
}
2022-11-22 13:19:21 -05:00
#[test]
2022-11-22 13:48:17 -05:00
fn para() {
test_parse!(
2022-11-26 19:12:56 -05:00
"para",
Start(Leaf(Paragraph)),
2022-11-27 18:10:28 -05:00
Inline(Node(Str).span(0, 4)),
2022-11-26 19:12:56 -05:00
End
);
test_parse!(
"pa ra",
Start(Leaf(Paragraph)),
2022-11-27 18:10:28 -05:00
Inline(Node(Str).span(0, 9)),
2022-11-26 19:12:56 -05:00
End
);
test_parse!(
"para0\n\npara1",
2022-11-22 13:48:17 -05:00
Start(Leaf(Paragraph)),
2022-11-27 18:10:28 -05:00
Inline(Node(Str).span(0, 6)),
2022-11-22 13:48:17 -05:00
End,
2022-11-26 19:12:56 -05:00
Blankline,
2022-11-22 13:48:17 -05:00
Start(Leaf(Paragraph)),
2022-11-27 18:10:28 -05:00
Inline(Node(Str).span(7, 12)),
2022-11-22 13:48:17 -05:00
End,
2022-11-22 13:19:21 -05:00
);
}
}