jotdown/src/lib.rs

1229 lines
40 KiB
Rust
Raw Normal View History

2022-11-29 12:34:13 -05:00
pub mod html;
2022-12-18 12:05:39 -05:00
mod attr;
2022-11-12 12:45:17 -05:00
mod block;
2022-11-16 16:11:55 -05:00
mod inline;
2022-11-20 13:13:48 -05:00
mod lex;
2022-11-12 12:45:17 -05:00
mod span;
mod tree;
2023-01-15 10:12:05 -05:00
use span::DiscontinuousString;
2022-11-28 14:12:49 -05:00
use span::Span;
2022-12-18 12:05:39 -05:00
pub use attr::Attributes;
2022-12-13 15:19:16 -05:00
type CowStr<'s> = std::borrow::Cow<'s, str>;
2022-11-12 12:45:17 -05:00
const EOF: char = '\0';
2022-11-28 14:12:49 -05:00
#[derive(Debug, PartialEq, Eq)]
2022-11-28 18:33:43 -05:00
pub enum Event<'s> {
2022-11-29 12:34:13 -05:00
/// Start of a container.
Start(Container<'s>, Attributes<'s>),
/// End of a container.
End(Container<'s>),
2022-11-28 15:52:09 -05:00
/// A string object, text only.
2022-12-13 15:19:16 -05:00
Str(CowStr<'s>),
2022-11-29 12:34:13 -05:00
/// An atomic element.
2023-01-18 16:30:24 -05:00
Atom(Atom<'s>),
2022-11-27 15:59:54 -05:00
}
2022-11-28 14:12:49 -05:00
#[derive(Debug, PartialEq, Eq)]
2022-11-29 12:34:13 -05:00
pub enum Container<'s> {
/// A blockquote element.
Blockquote,
/// A list.
2023-01-22 06:39:04 -05:00
List { kind: ListKind, tight: bool },
2022-11-29 12:34:13 -05:00
/// An item of a list
ListItem,
2023-01-21 15:21:43 -05:00
/// An item of a task list, either checked or unchecked.
TaskListItem { checked: bool },
/// A description list element.
DescriptionList,
2022-11-29 12:34:13 -05:00
/// Details describing a term within a description list.
DescriptionDetails,
/// A footnote definition.
2023-01-18 16:30:24 -05:00
Footnote { tag: &'s str, number: usize },
2022-11-29 12:34:13 -05:00
/// A table element.
Table,
/// A row element of a table.
2023-01-25 13:27:12 -05:00
TableRow { head: bool },
2022-11-29 12:34:13 -05:00
/// A block-level divider element.
2022-12-07 13:32:42 -05:00
Div { class: Option<&'s str> },
2022-11-28 15:52:09 -05:00
/// A paragraph.
2022-11-27 15:59:54 -05:00
Paragraph,
2022-11-28 15:52:09 -05:00
/// A heading.
Heading { level: usize },
2022-11-28 15:52:09 -05:00
/// A cell element of row within a table.
2023-01-25 13:27:12 -05:00
TableCell { alignment: Alignment, head: bool },
2023-01-26 14:16:20 -05:00
/// A caption within a table.
Caption,
/// A term within a description list.
DescriptionTerm,
2022-11-28 15:52:09 -05:00
/// A block with raw markup for a specific output format.
2022-11-27 15:59:54 -05:00
RawBlock { format: &'s str },
2022-11-28 15:52:09 -05:00
/// A block with code in a specific language.
CodeBlock { lang: Option<&'s str> },
/// An inline divider element.
Span,
/// An inline link with a destination URL.
2022-12-13 15:19:16 -05:00
Link(CowStr<'s>, LinkType),
2022-12-18 01:59:11 -05:00
/// An inline image with a source URL. Inner Str objects compose the alternative text.
2022-12-17 12:03:06 -05:00
Image(CowStr<'s>, SpanLinkType),
2022-12-08 11:42:54 -05:00
/// An inline verbatim string.
Verbatim,
/// An inline or display math element.
Math { display: bool },
/// Inline raw markup for a specific output format.
RawInline { format: &'s str },
2022-11-28 18:33:43 -05:00
/// A subscripted element.
Subscript,
/// A superscripted element.
Superscript,
/// An inserted inline element.
2022-11-28 18:33:43 -05:00
Insert,
/// A deleted inline element.
2022-11-28 18:33:43 -05:00
Delete,
/// An inline element emphasized with a bold typeface.
2022-11-28 18:33:43 -05:00
Strong,
/// An emphasized inline element.
2022-11-28 18:33:43 -05:00
Emphasis,
/// A highlighted inline element.
Mark,
2022-11-27 15:59:54 -05:00
}
impl<'s> Container<'s> {
/// Is a block element.
fn is_block(&self) -> bool {
match self {
Self::Blockquote
2023-01-22 06:39:04 -05:00
| Self::List { .. }
| Self::ListItem
2023-01-21 15:21:43 -05:00
| Self::TaskListItem { .. }
| Self::DescriptionList
| Self::DescriptionDetails
| Self::Footnote { .. }
| Self::Table
2023-01-25 13:27:12 -05:00
| Self::TableRow { .. }
2022-12-07 13:32:42 -05:00
| Self::Div { .. }
| Self::Paragraph
| Self::Heading { .. }
2023-01-25 13:27:12 -05:00
| Self::TableCell { .. }
2023-01-26 14:16:20 -05:00
| Self::Caption
| Self::DescriptionTerm
| Self::RawBlock { .. }
| Self::CodeBlock { .. } => true,
Self::Span
| Self::Link(..)
| Self::Image(..)
2022-12-08 11:42:54 -05:00
| Self::Verbatim
| Self::Math { .. }
| Self::RawInline { .. }
| Self::Subscript
| Self::Superscript
| Self::Insert
| Self::Delete
| Self::Strong
| Self::Emphasis
2023-01-27 13:04:01 -05:00
| Self::Mark => false,
}
}
/// Is a block element that may contain children blocks.
fn is_block_container(&self) -> bool {
match self {
Self::Blockquote
2023-01-22 06:39:04 -05:00
| Self::List { .. }
| Self::ListItem
2023-01-21 15:21:43 -05:00
| Self::TaskListItem { .. }
| Self::DescriptionList
| Self::DescriptionDetails
| Self::Footnote { .. }
| Self::Table
2023-01-25 13:27:12 -05:00
| Self::TableRow { .. }
2022-12-07 13:32:42 -05:00
| Self::Div { .. } => true,
Self::Paragraph
| Self::Heading { .. }
2023-01-25 13:27:12 -05:00
| Self::TableCell { .. }
2023-01-26 14:16:20 -05:00
| Self::Caption
| Self::DescriptionTerm
| Self::RawBlock { .. }
| Self::CodeBlock { .. }
| Self::Span
| Self::Link(..)
| Self::Image(..)
2022-12-08 11:42:54 -05:00
| Self::Verbatim
| Self::Math { .. }
| Self::RawInline { .. }
| Self::Subscript
| Self::Superscript
| Self::Insert
| Self::Delete
| Self::Strong
| Self::Emphasis
2023-01-27 13:04:01 -05:00
| Self::Mark => false,
}
}
}
2023-01-25 13:27:12 -05:00
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Alignment {
Unspecified,
Left,
Center,
Right,
}
2022-11-28 14:12:49 -05:00
#[derive(Debug, PartialEq, Eq)]
2022-12-17 12:03:06 -05:00
pub enum SpanLinkType {
2022-11-28 15:52:09 -05:00
Inline,
Reference,
2022-12-17 12:03:06 -05:00
}
#[derive(Debug, PartialEq, Eq)]
pub enum LinkType {
Span(SpanLinkType),
2022-12-11 03:26:55 -05:00
AutoLink,
2022-11-28 15:52:09 -05:00
Email,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2023-01-22 06:39:04 -05:00
pub enum ListKind {
2022-11-28 15:52:09 -05:00
Unordered,
2023-01-21 05:14:00 -05:00
Ordered {
numbering: OrderedListNumbering,
style: OrderedListStyle,
2023-01-21 05:14:00 -05:00
start: u32,
},
2022-12-06 15:31:08 -05:00
Task,
2022-11-28 15:52:09 -05:00
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
2023-01-21 05:13:24 -05:00
pub enum OrderedListNumbering {
2022-11-28 15:52:09 -05:00
/// Decimal numbering, e.g. `1)`.
Decimal,
/// Lowercase alphabetic numbering, e.g. `a)`.
AlphaLower,
/// Uppercase alphabetic numbering, e.g. `A)`.
AlphaUpper,
2023-01-22 04:49:40 -05:00
/// Lowercase roman or alphabetic numbering, e.g. `iv)`.
2022-11-28 15:52:09 -05:00
RomanLower,
2023-01-22 04:49:40 -05:00
/// Uppercase roman or alphabetic numbering, e.g. `IV)`.
2022-11-28 15:52:09 -05:00
RomanUpper,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum OrderedListStyle {
2022-11-28 15:52:09 -05:00
/// Number is followed by a period, e.g. `1.`.
Period,
/// Number is followed by a closing parenthesis, e.g. `1)`.
Paren,
/// Number is enclosed by parentheses, e.g. `(1)`.
ParenParen,
}
2022-11-29 12:34:13 -05:00
#[derive(Debug, PartialEq, Eq)]
2023-01-18 16:30:24 -05:00
pub enum Atom<'s> {
/// A footnote reference.
FootnoteReference(&'s str, usize),
2023-01-27 13:04:01 -05:00
/// Left single quotation mark.
LeftSingleQuote,
/// Right double quotation mark.
RightSingleQuote,
/// Left single quotation mark.
LeftDoubleQuote,
/// Right double quotation mark.
RightDoubleQuote,
/// A horizontal ellipsis, i.e. a set of three periods.
2022-11-29 12:34:13 -05:00
Ellipsis,
/// An en dash.
EnDash,
/// An em dash.
EmDash,
/// A thematic break, typically a horizontal rule.
ThematicBreak,
2022-12-18 12:05:39 -05:00
/// A space that must not break a line.
2022-11-29 12:34:13 -05:00
NonBreakingSpace,
2022-12-18 12:05:39 -05:00
/// A newline that may or may not break a line in the output.
2022-11-29 12:34:13 -05:00
Softbreak,
2022-12-18 12:05:39 -05:00
/// A newline that must break a line in the output.
2022-11-29 12:34:13 -05:00
Hardbreak,
/// An escape character, not visible in output.
Escape,
2022-12-07 13:32:42 -05:00
/// A blank line, not visible in output.
Blankline,
2022-11-29 12:34:13 -05:00
}
2023-01-21 15:21:43 -05:00
impl OrderedListNumbering {
fn parse_number(self, n: &str) -> u32 {
match self {
Self::Decimal => n.parse().unwrap(),
2023-01-23 15:41:04 -05:00
Self::AlphaLower | Self::AlphaUpper => {
let d0 = u32::from(if matches!(self, Self::AlphaLower) {
b'a'
} else {
b'A'
});
let weights = (1..=n.len()).scan(1, |a, _| {
let prev = *a;
*a *= 26;
Some(prev)
});
n.as_bytes()
.iter()
.rev()
.copied()
.map(u32::from)
.zip(weights)
.map(|(d, w)| w * (d - d0 + 1))
.sum()
}
2023-01-23 16:22:23 -05:00
Self::RomanLower | Self::RomanUpper => {
fn value(d: char) -> u32 {
match d {
'i' | 'I' => 1,
'v' | 'V' => 5,
'x' | 'X' => 10,
'l' | 'L' => 50,
'c' | 'C' => 100,
'd' | 'D' => 500,
'm' | 'M' => 1000,
_ => panic!(),
}
}
let mut prev = 0;
let mut sum = 0;
for d in n.chars().rev() {
let v = value(d);
if v < prev {
sum -= v;
} else {
sum += v;
}
prev = v;
}
sum
}
2023-01-21 15:21:43 -05:00
}
}
}
impl OrderedListStyle {
fn number(self, marker: &str) -> &str {
&marker[usize::from(matches!(self, Self::ParenParen))..marker.len() - 1]
}
}
2022-11-28 14:19:22 -05:00
pub struct Parser<'s> {
2022-11-20 13:13:48 -05:00
src: &'s str,
2023-01-18 16:22:59 -05:00
2023-01-21 05:36:05 -05:00
/// Link definitions encountered during block parse, written once.
2023-01-28 10:03:01 -05:00
link_definitions: std::collections::HashMap<&'s str, (CowStr<'s>, attr::Attributes<'s>)>,
2023-01-18 16:22:59 -05:00
2023-01-21 05:36:05 -05:00
/// Block tree cursor.
2023-01-21 07:36:21 -05:00
tree: block::Tree,
2023-01-21 05:36:05 -05:00
/// Spans to the inlines in the block currently being parsed.
2023-01-15 10:12:05 -05:00
inlines: span::InlineSpans<'s>,
2023-01-21 05:36:05 -05:00
/// Inline parser, recreated for each new inline.
2023-01-15 10:12:05 -05:00
inline_parser: Option<inline::Parser<span::InlineCharsIter<'s>>>,
2023-01-21 05:36:05 -05:00
2023-01-25 13:27:12 -05:00
table_head_row: bool,
2023-01-18 16:30:24 -05:00
/// Footnote references in the order they were encountered, without duplicates.
footnote_references: Vec<&'s str>,
/// Cache of footnotes to emit at the end.
2023-01-21 07:36:21 -05:00
footnotes: std::collections::HashMap<&'s str, block::Tree>,
2023-01-18 16:30:24 -05:00
/// Next or current footnote being parsed and emitted.
footnote_index: usize,
/// Currently within a footnote.
footnote_active: bool,
2022-11-20 13:13:48 -05:00
}
2022-11-28 14:19:22 -05:00
impl<'s> Parser<'s> {
#[must_use]
pub fn new(src: &'s str) -> Self {
2023-01-16 17:22:44 -05:00
let tree = block::parse(src);
let link_definitions = {
2023-01-21 07:36:21 -05:00
let mut branch = tree.clone();
2023-01-16 17:22:44 -05:00
let mut defs = std::collections::HashMap::new();
2023-01-28 10:03:01 -05:00
let mut attr_prev: Option<Span> = None;
while let Some(e) = branch.next() {
2023-01-16 17:22:44 -05:00
if let tree::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition)) =
e.kind
{
let tag = e.span.of(src);
2023-01-28 10:03:01 -05:00
let attrs =
attr_prev.map_or_else(Attributes::new, |sp| attr::parse(sp.of(src)));
2023-01-18 15:44:58 -05:00
let url = match branch.count_children() {
0 => "".into(),
1 => branch.take_inlines().next().unwrap().of(src).trim().into(),
_ => branch.take_inlines().map(|sp| sp.of(src).trim()).collect(),
};
2023-01-28 10:03:01 -05:00
defs.insert(tag, (url, attrs));
} else if let tree::EventKind::Atom(block::Atom::Attributes) = e.kind {
attr_prev = Some(e.span);
} else {
attr_prev = None;
2023-01-16 17:22:44 -05:00
}
}
defs
};
2023-01-21 07:36:21 -05:00
let branch = tree.clone();
2022-11-28 14:19:22 -05:00
Self {
src,
2023-01-18 16:23:58 -05:00
link_definitions,
tree: branch,
2023-01-25 13:27:12 -05:00
table_head_row: false,
2023-01-18 16:30:24 -05:00
footnote_references: Vec::new(),
footnotes: std::collections::HashMap::new(),
footnote_index: 0,
footnote_active: false,
2023-01-15 10:12:05 -05:00
inlines: span::InlineSpans::new(src),
2022-12-11 12:47:00 -05:00
inline_parser: None,
2022-11-28 14:19:22 -05:00
}
}
}
2022-12-17 06:21:15 -05:00
impl<'s> Parser<'s> {
2023-01-15 09:47:28 -05:00
fn inline(&mut self) -> Option<Event<'s>> {
self.inline_parser.as_mut().and_then(|parser| {
let mut inline = parser.next();
let mut first_is_attr = false;
2023-01-28 10:03:01 -05:00
let mut attributes = inline.as_ref().map_or_else(Attributes::new, |inl| {
2023-01-15 09:47:28 -05:00
if let inline::EventKind::Attributes = inl.kind {
first_is_attr = true;
attr::parse(self.inlines.slice(inl.span))
2022-12-17 06:21:15 -05:00
} else {
2023-01-15 09:47:28 -05:00
Attributes::new()
2022-12-17 06:21:15 -05:00
}
2023-01-15 09:47:28 -05:00
});
2022-11-20 13:13:48 -05:00
2023-01-15 09:47:28 -05:00
if first_is_attr {
inline = parser.next();
2022-12-11 12:47:00 -05:00
}
2022-11-22 13:19:21 -05:00
2023-01-15 09:47:28 -05:00
inline.map(|inline| match inline.kind {
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
let t = match c {
inline::Container::Span => Container::Span,
inline::Container::Verbatim => Container::Verbatim,
inline::Container::InlineMath => Container::Math { display: false },
inline::Container::DisplayMath => Container::Math { display: true },
inline::Container::RawFormat => Container::RawInline {
format: match self.inlines.src(inline.span) {
CowStr::Owned(_) => panic!(),
CowStr::Borrowed(s) => s,
},
},
inline::Container::Subscript => Container::Subscript,
inline::Container::Superscript => Container::Superscript,
inline::Container::Insert => Container::Insert,
inline::Container::Delete => Container::Delete,
inline::Container::Emphasis => Container::Emphasis,
inline::Container::Strong => Container::Strong,
inline::Container::Mark => Container::Mark,
inline::Container::InlineLink => Container::Link(
match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', "").into(),
s @ CowStr::Borrowed(_) => s,
},
LinkType::Span(SpanLinkType::Inline),
),
inline::Container::InlineImage => Container::Image(
match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', "").into(),
s @ CowStr::Borrowed(_) => s,
},
SpanLinkType::Inline,
),
2023-01-28 10:03:01 -05:00
inline::Container::ReferenceLink | inline::Container::ReferenceImage => {
let tag = match self.inlines.src(inline.span) {
CowStr::Owned(s) => s.replace('\n', " ").into(),
s @ CowStr::Borrowed(_) => s,
};
2023-01-28 10:03:01 -05:00
let (url, attrs_def) = self
.link_definitions
.get(tag.as_ref())
2023-01-28 05:20:04 -05:00
.cloned()
2023-01-28 10:03:01 -05:00
.unwrap_or_else(|| ("".into(), Attributes::new()));
attributes.union(attrs_def);
if matches!(c, inline::Container::ReferenceLink) {
Container::Link(url, LinkType::Span(SpanLinkType::Reference))
} else {
Container::Image(url, SpanLinkType::Reference)
}
}
2023-01-28 10:38:19 -05:00
inline::Container::Autolink => {
let url = self.inlines.src(inline.span);
let url = if url.contains('@') {
format!("mailto:{}", url).into()
} else {
url
};
Container::Link(url, LinkType::AutoLink)
}
2023-01-15 09:47:28 -05:00
};
if matches!(inline.kind, inline::EventKind::Enter(_)) {
Event::Start(t, attributes)
} else {
Event::End(t)
}
}
2023-01-17 14:55:36 -05:00
inline::EventKind::Atom(a) => Event::Atom(match a {
2023-01-18 16:30:24 -05:00
inline::Atom::FootnoteReference => {
let tag = match self.inlines.src(inline.span) {
CowStr::Borrowed(s) => s,
CowStr::Owned(..) => panic!(),
};
let number = self
.footnote_references
.iter()
.position(|t| *t == tag)
.map_or_else(
|| {
self.footnote_references.push(tag);
self.footnote_references.len()
},
|i| i + 1,
);
Atom::FootnoteReference(
match self.inlines.src(inline.span) {
CowStr::Borrowed(s) => s,
CowStr::Owned(..) => panic!(),
},
number,
)
}
2023-01-27 13:04:01 -05:00
inline::Atom::Quote { ty, left } => match (ty, left) {
(inline::QuoteType::Single, true) => Atom::LeftSingleQuote,
(inline::QuoteType::Single, false) => Atom::RightSingleQuote,
(inline::QuoteType::Double, true) => Atom::LeftDoubleQuote,
(inline::QuoteType::Double, false) => Atom::RightDoubleQuote,
},
2023-01-17 14:55:36 -05:00
inline::Atom::Ellipsis => Atom::Ellipsis,
inline::Atom::EnDash => Atom::EnDash,
inline::Atom::EmDash => Atom::EmDash,
inline::Atom::Nbsp => Atom::NonBreakingSpace,
inline::Atom::Softbreak => Atom::Softbreak,
inline::Atom::Hardbreak => Atom::Hardbreak,
inline::Atom::Escape => Atom::Escape,
}),
2023-01-15 09:47:28 -05:00
inline::EventKind::Str => Event::Str(self.inlines.src(inline.span)),
2023-01-15 17:48:55 -05:00
inline::EventKind::Whitespace
| inline::EventKind::Attributes
| inline::EventKind::Placeholder => {
2023-01-15 09:47:28 -05:00
panic!("{:?}", inline)
}
})
})
}
fn block(&mut self) -> Option<Event<'s>> {
let mut attributes = Attributes::new();
2023-01-16 17:22:44 -05:00
while let Some(ev) = &mut self.tree.next() {
2022-12-07 13:32:42 -05:00
let content = ev.span.of(self.src);
let event = match ev.kind {
2022-12-10 04:26:06 -05:00
tree::EventKind::Atom(a) => match a {
2022-12-07 13:32:42 -05:00
block::Atom::Blankline => Event::Atom(Atom::Blankline),
2022-12-10 04:26:06 -05:00
block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak),
2022-12-07 13:32:42 -05:00
block::Atom::Attributes => {
2023-01-15 09:47:28 -05:00
attributes.parse(content);
2022-12-07 13:32:42 -05:00
continue;
}
},
tree::EventKind::Enter(c) | tree::EventKind::Exit(c) => {
let enter = matches!(ev.kind, tree::EventKind::Enter(..));
let cont = match c {
block::Node::Leaf(l) => {
if matches!(l, block::Leaf::LinkDefinition) {
// ignore link definitions
if enter {
self.tree.take_inlines().last();
}
attributes = Attributes::new();
continue;
}
if enter {
self.inlines.set_spans(self.tree.take_inlines());
self.inline_parser =
Some(inline::Parser::new(self.inlines.chars()));
}
2023-01-25 13:27:12 -05:00
match l {
block::Leaf::Paragraph => Container::Paragraph,
block::Leaf::Heading => Container::Heading {
level: content.len(),
},
block::Leaf::CodeBlock => {
if let Some(format) = content.strip_prefix('=') {
Container::RawBlock { format }
} else {
Container::CodeBlock {
lang: (!content.is_empty()).then(|| content),
}
}
}
block::Leaf::TableCell(alignment) => Container::TableCell {
alignment,
head: self.table_head_row,
},
2023-01-26 14:16:20 -05:00
block::Leaf::Caption => Container::Caption,
2023-01-25 13:27:12 -05:00
block::Leaf::LinkDefinition => unreachable!(),
}
}
block::Node::Container(c) => match c {
2023-01-19 16:16:01 -05:00
block::Container::Blockquote => Container::Blockquote,
2022-12-12 12:22:13 -05:00
block::Container::Div { .. } => Container::Div {
class: (!ev.span.is_empty()).then(|| content),
},
2023-01-18 16:30:24 -05:00
block::Container::Footnote => {
assert!(enter);
2023-01-18 16:30:24 -05:00
self.footnotes.insert(content, self.tree.take_branch());
attributes = Attributes::new();
2023-01-18 16:30:24 -05:00
continue;
}
block::Container::DescriptionList => Container::DescriptionList,
block::Container::List { ty, tight } => {
2023-01-22 06:39:04 -05:00
let kind = match ty {
block::ListType::Unordered(..) => ListKind::Unordered,
block::ListType::Ordered(numbering, style) => {
let marker = ev.span.of(self.src);
let start =
numbering.parse_number(style.number(marker)).max(1);
ListKind::Ordered {
numbering,
style,
start,
}
}
block::ListType::Task => ListKind::Task,
};
2023-01-22 06:39:04 -05:00
Container::List { kind, tight }
}
2023-01-21 15:21:43 -05:00
block::Container::ListItem(ty) => {
if matches!(ty, block::ListType::Task) {
let marker = ev.span.of(self.src);
Container::TaskListItem {
checked: marker.as_bytes()[3] != b' ',
}
} else {
Container::ListItem
}
}
2023-01-25 13:27:12 -05:00
block::Container::Table => Container::Table,
block::Container::TableRow { head } => {
if enter {
self.table_head_row = head;
}
Container::TableRow { head }
}
},
};
if enter {
Event::Start(cont, attributes)
} else {
Event::End(cont)
2022-12-12 12:22:13 -05:00
}
}
2022-12-12 12:22:13 -05:00
tree::EventKind::Inline => unreachable!(),
2022-12-07 13:32:42 -05:00
};
return Some(event);
}
None
2022-11-20 13:13:48 -05:00
}
2023-01-18 16:30:24 -05:00
fn footnote(&mut self) -> Option<Event<'s>> {
if self.footnote_active {
let tag = self.footnote_references.get(self.footnote_index).unwrap();
self.footnote_index += 1;
self.footnote_active = false;
Some(Event::End(Container::Footnote {
tag,
number: self.footnote_index,
}))
} else if let Some(tag) = self.footnote_references.get(self.footnote_index) {
self.tree = self
.footnotes
.remove(tag)
2023-01-21 07:36:21 -05:00
.unwrap_or_else(block::Tree::empty);
2023-01-18 16:30:24 -05:00
self.footnote_active = true;
Some(Event::Start(
Container::Footnote {
tag,
number: self.footnote_index + 1,
},
Attributes::new(),
))
} else {
None
}
}
2022-11-20 13:13:48 -05:00
}
2022-11-22 13:19:21 -05:00
2023-01-15 09:47:28 -05:00
impl<'s> Iterator for Parser<'s> {
type Item = Event<'s>;
fn next(&mut self) -> Option<Self::Item> {
2023-01-18 16:30:24 -05:00
self.inline()
.or_else(|| self.block())
.or_else(|| self.footnote())
2023-01-15 09:47:28 -05:00
}
}
2022-11-22 13:19:21 -05:00
#[cfg(test)]
mod test {
2022-11-29 12:34:13 -05:00
use super::Atom::*;
2022-11-28 18:33:43 -05:00
use super::Attributes;
2022-11-29 12:34:13 -05:00
use super::Container::*;
2022-11-22 13:19:21 -05:00
use super::Event::*;
2022-12-13 15:19:16 -05:00
use super::LinkType;
2023-01-22 06:39:04 -05:00
use super::ListKind;
2023-01-21 15:21:43 -05:00
use super::OrderedListNumbering::*;
use super::OrderedListStyle::*;
2022-12-17 12:03:06 -05:00
use super::SpanLinkType;
2022-11-22 13:19:21 -05:00
2022-11-22 13:48:17 -05:00
macro_rules! test_parse {
2022-12-13 15:19:16 -05:00
($src:expr $(,$($token:expr),* $(,)?)?) => {
2022-11-22 13:48:17 -05:00
#[allow(unused)]
2022-11-28 14:19:22 -05:00
let actual = super::Parser::new($src).collect::<Vec<_>>();
2022-11-22 13:48:17 -05:00
let expected = &[$($($token),*,)?];
2022-11-28 18:33:43 -05:00
assert_eq!(
actual,
expected,
concat!(
"\n",
"\x1b[0;1m====================== INPUT =========================\x1b[0m\n",
"\x1b[2m{}",
"\x1b[0;1m================ ACTUAL vs EXPECTED ==================\x1b[0m\n",
"{}",
"\x1b[0;1m======================================================\x1b[0m\n",
),
$src,
{
let a = actual.iter().map(|n| format!("{:?}", n)).collect::<Vec<_>>();
let b = expected.iter().map(|n| format!("{:?}", n)).collect::<Vec<_>>();
let max = a.len().max(b.len());
let a_width = a.iter().map(|a| a.len()).max().unwrap_or(0);
a.iter()
.map(AsRef::as_ref)
.chain(std::iter::repeat(""))
.zip(b.iter().map(AsRef::as_ref).chain(std::iter::repeat("")))
.take(max)
.map(|(a, b)|
format!(
"\x1b[{}m{:a_width$}\x1b[0m {}= \x1b[{}m{}\x1b[0m\n",
if a == b { "2" } else { "31" },
a,
if a == b { '=' } else { '!' },
if a == b { "2" } else { "32" },
b,
a_width = a_width,
)
)
.collect::<String>()
},
);
2022-11-22 13:48:17 -05:00
};
}
2022-12-17 06:21:15 -05:00
#[test]
fn empty() {
test_parse!("");
}
#[test]
fn heading() {
test_parse!(
"#\n",
2022-12-18 12:05:39 -05:00
Start(Heading { level: 1 }, Attributes::new()),
2022-12-17 06:21:15 -05:00
End(Heading { level: 1 }),
);
test_parse!(
"# abc\ndef\n",
2022-12-18 12:05:39 -05:00
Start(Heading { level: 1 }, Attributes::new()),
Str("abc".into()),
2022-12-17 06:21:15 -05:00
Atom(Softbreak),
2022-12-18 12:05:39 -05:00
Str("def".into()),
2022-12-17 06:21:15 -05:00
End(Heading { level: 1 }),
);
}
#[test]
fn blockquote() {
test_parse!(
">\n",
2022-12-18 12:05:39 -05:00
Start(Blockquote, Attributes::new()),
2022-12-17 06:21:15 -05:00
Atom(Blankline),
End(Blockquote),
);
}
2022-11-22 13:19:21 -05:00
#[test]
2022-11-22 13:48:17 -05:00
fn para() {
test_parse!(
2022-11-26 19:12:56 -05:00
"para",
2022-12-18 12:05:39 -05:00
Start(Paragraph, Attributes::new()),
Str("para".into()),
2022-11-28 18:33:43 -05:00
End(Paragraph),
2022-11-26 19:12:56 -05:00
);
test_parse!(
"pa ra",
2022-12-18 12:05:39 -05:00
Start(Paragraph, Attributes::new()),
Str("pa ra".into()),
2022-11-28 18:33:43 -05:00
End(Paragraph),
2022-11-26 19:12:56 -05:00
);
test_parse!(
"para0\n\npara1",
2022-12-18 12:05:39 -05:00
Start(Paragraph, Attributes::new()),
Str("para0".into()),
2022-11-28 18:33:43 -05:00
End(Paragraph),
2022-12-10 04:26:06 -05:00
Atom(Blankline),
2022-12-18 12:05:39 -05:00
Start(Paragraph, Attributes::new()),
Str("para1".into()),
2022-11-28 18:33:43 -05:00
End(Paragraph),
2022-11-22 13:19:21 -05:00
);
}
2022-12-08 11:42:54 -05:00
#[test]
fn verbatim() {
test_parse!(
"`abc\ndef",
2022-12-18 12:05:39 -05:00
Start(Paragraph, Attributes::new()),
Start(Verbatim, Attributes::new()),
Str("abc\ndef".into()),
2022-12-08 11:42:54 -05:00
End(Verbatim),
End(Paragraph),
);
2022-12-17 06:21:15 -05:00
test_parse!(
concat!(
"> `abc\n",
"> def\n", //
),
2022-12-18 12:05:39 -05:00
Start(Blockquote, Attributes::new()),
Start(Paragraph, Attributes::new()),
Start(Verbatim, Attributes::new()),
Str("abc\ndef".into()),
2022-12-17 06:21:15 -05:00
End(Verbatim),
End(Paragraph),
End(Blockquote),
);
2022-12-08 11:42:54 -05:00
}
2022-12-11 04:45:05 -05:00
#[test]
fn raw_inline() {
test_parse!(
2022-12-11 15:43:22 -05:00
"``raw\nraw``{=format}",
2022-12-18 12:05:39 -05:00
Start(Paragraph, Attributes::new()),
Start(RawInline { format: "format" }, Attributes::new()),
Str("raw\nraw".into()),
2022-12-11 04:45:05 -05:00
End(RawInline { format: "format" }),
End(Paragraph),
);
}
2022-12-13 15:19:16 -05:00
2023-01-15 15:56:48 -05:00
#[test]
fn raw_block() {
test_parse!(
"``` =html\n<table>\n```",
Start(RawBlock { format: "html" }, Attributes::new()),
Str("<table>".into()),
Atom(Softbreak),
End(RawBlock { format: "html" }),
);
}
2022-12-13 15:19:16 -05:00
#[test]
fn link_inline() {
test_parse!(
"[text](url)",
2022-12-18 12:05:39 -05:00
Start(Paragraph, Attributes::new()),
2022-12-13 15:19:16 -05:00
Start(
2022-12-18 12:05:39 -05:00
Link("url".into(), LinkType::Span(SpanLinkType::Inline)),
Attributes::new()
2022-12-13 15:19:16 -05:00
),
2022-12-18 12:05:39 -05:00
Str("text".into()),
End(Link("url".into(), LinkType::Span(SpanLinkType::Inline))),
2022-12-13 15:19:16 -05:00
End(Paragraph),
);
test_parse!(
concat!(
"> [text](url\n",
"> url)\n", //
),
2022-12-18 12:05:39 -05:00
Start(Blockquote, Attributes::new()),
Start(Paragraph, Attributes::new()),
2022-12-13 15:19:16 -05:00
Start(
2022-12-18 12:05:39 -05:00
Link("urlurl".into(), LinkType::Span(SpanLinkType::Inline)),
Attributes::new()
2022-12-13 15:19:16 -05:00
),
2022-12-18 12:05:39 -05:00
Str("text".into()),
End(Link("urlurl".into(), LinkType::Span(SpanLinkType::Inline))),
2022-12-13 15:19:16 -05:00
End(Paragraph),
2022-12-17 06:21:15 -05:00
End(Blockquote),
2022-12-13 15:19:16 -05:00
);
}
2022-12-18 12:05:39 -05:00
2023-01-16 17:22:44 -05:00
#[test]
fn link_reference() {
test_parse!(
concat!(
"[text][tag]\n",
"\n",
"[tag]: url\n" //
),
Start(Paragraph, Attributes::new()),
Start(
Link("url".into(), LinkType::Span(SpanLinkType::Reference)),
Attributes::new()
),
Str("text".into()),
End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))),
End(Paragraph),
Atom(Blankline),
);
test_parse!(
concat!(
"![text][tag]\n",
"\n",
"[tag]: url\n" //
),
Start(Paragraph, Attributes::new()),
Start(
Image("url".into(), SpanLinkType::Reference),
Attributes::new()
),
Str("text".into()),
End(Image("url".into(), SpanLinkType::Reference)),
End(Paragraph),
Atom(Blankline),
);
}
2023-01-17 12:05:34 -05:00
#[test]
fn link_reference_multiline() {
test_parse!(
concat!(
"[text][tag]\n",
"\n",
"[tag]: u\n",
" rl\n", //
),
Start(Paragraph, Attributes::new()),
Start(
Link("url".into(), LinkType::Span(SpanLinkType::Reference)),
Attributes::new()
),
Str("text".into()),
End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))),
End(Paragraph),
Atom(Blankline),
);
test_parse!(
concat!(
"[text][tag]\n",
"\n",
"[tag]:\n",
" url\n", //
),
Start(Paragraph, Attributes::new()),
Start(
Link("url".into(), LinkType::Span(SpanLinkType::Reference)),
Attributes::new()
),
Str("text".into()),
End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))),
End(Paragraph),
Atom(Blankline),
);
}
2023-01-28 10:03:01 -05:00
#[test]
fn link_reference_attrs() {
test_parse!(
concat!(
"[text][tag]{b=c}\n",
"\n",
"{a=b}\n",
"[tag]: url\n",
"para\n",
),
Start(Paragraph, Attributes::new()),
Start(
Link("url".into(), LinkType::Span(SpanLinkType::Reference)),
[("b", "c"), ("a", "b")].into_iter().collect(),
),
Str("text".into()),
End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))),
End(Paragraph),
Atom(Blankline),
Start(Paragraph, Attributes::new()),
Str("para".into()),
End(Paragraph),
);
}
2023-01-18 16:30:24 -05:00
#[test]
fn footnote_references() {
test_parse!(
"[^a][^b][^c]",
Start(Paragraph, Attributes::new()),
Atom(FootnoteReference("a", 1)),
Atom(FootnoteReference("b", 2)),
Atom(FootnoteReference("c", 3)),
End(Paragraph),
Start(
Footnote {
tag: "a",
number: 1
},
Attributes::new()
),
End(Footnote {
tag: "a",
number: 1
}),
Start(
Footnote {
tag: "b",
number: 2
},
Attributes::new()
),
End(Footnote {
tag: "b",
number: 2
}),
Start(
Footnote {
tag: "c",
number: 3
},
Attributes::new()
),
End(Footnote {
tag: "c",
number: 3
}),
);
}
#[test]
fn footnote() {
test_parse!(
"[^a]\n\n[^a]: a\n",
Start(Paragraph, Attributes::new()),
Atom(FootnoteReference("a", 1)),
End(Paragraph),
Atom(Blankline),
Start(
Footnote {
tag: "a",
number: 1
},
Attributes::new()
),
Start(Paragraph, Attributes::new()),
Str("a".into()),
End(Paragraph),
End(Footnote {
tag: "a",
number: 1
}),
);
}
#[test]
fn footnote_multiblock() {
test_parse!(
concat!(
"[^a]\n",
"\n",
"[^a]: abc\n",
"\n",
" def", //
),
Start(Paragraph, Attributes::new()),
Atom(FootnoteReference("a", 1)),
End(Paragraph),
Atom(Blankline),
Start(
Footnote {
tag: "a",
number: 1
},
Attributes::new()
),
Start(Paragraph, Attributes::new()),
Str("abc".into()),
End(Paragraph),
Atom(Blankline),
Start(Paragraph, Attributes::new()),
Str("def".into()),
End(Paragraph),
End(Footnote {
tag: "a",
number: 1
}),
);
}
#[test]
fn footnote_post() {
test_parse!(
concat!(
"[^a]\n",
"\n",
"[^a]: note\n",
"para\n", //
),
Start(Paragraph, Attributes::new()),
Atom(FootnoteReference("a", 1)),
End(Paragraph),
Atom(Blankline),
Start(Paragraph, Attributes::new()),
Str("para".into()),
End(Paragraph),
Start(
Footnote {
tag: "a",
number: 1
},
Attributes::new()
),
Start(Paragraph, Attributes::new()),
Str("note".into()),
End(Paragraph),
End(Footnote {
tag: "a",
number: 1
}),
);
}
2022-12-18 12:05:39 -05:00
#[test]
fn attr_block() {
test_parse!(
"{.some_class}\npara\n",
Start(Paragraph, [("class", "some_class")].into_iter().collect()),
Str("para".into()),
End(Paragraph),
);
}
2023-01-15 09:47:28 -05:00
#[test]
fn attr_inline() {
test_parse!(
"abc _def_{.ghi}",
Start(Paragraph, Attributes::new()),
Str("abc ".into()),
Start(Emphasis, [("class", "ghi")].into_iter().collect()),
Str("def".into()),
End(Emphasis),
End(Paragraph),
);
}
2023-01-21 15:21:43 -05:00
#[test]
fn list_item_unordered() {
test_parse!(
"- abc",
2023-01-22 06:39:04 -05:00
Start(
List {
kind: ListKind::Unordered,
tight: true,
},
Attributes::new(),
),
2023-01-21 15:21:43 -05:00
Start(ListItem, Attributes::new()),
Start(Paragraph, Attributes::new()),
Str("abc".into()),
End(Paragraph),
End(ListItem),
2023-01-22 06:39:04 -05:00
End(List {
kind: ListKind::Unordered,
tight: true,
}),
2023-01-21 15:21:43 -05:00
);
}
#[test]
fn list_item_ordered_decimal() {
test_parse!(
"123. abc",
Start(
2023-01-22 06:39:04 -05:00
List {
kind: ListKind::Ordered {
numbering: Decimal,
style: Period,
start: 123
},
tight: true,
},
Attributes::new(),
2023-01-21 15:21:43 -05:00
),
Start(ListItem, Attributes::new()),
Start(Paragraph, Attributes::new()),
Str("abc".into()),
End(Paragraph),
End(ListItem),
2023-01-22 06:39:04 -05:00
End(List {
kind: ListKind::Ordered {
numbering: Decimal,
style: Period,
start: 123
},
tight: true,
}),
2023-01-21 15:21:43 -05:00
);
}
#[test]
fn list_task() {
test_parse!(
concat!(
"- [ ] a\n", //
"- [x] b\n", //
"- [X] c\n", //
),
2023-01-22 06:39:04 -05:00
Start(
List {
kind: ListKind::Task,
tight: true,
},
Attributes::new(),
),
2023-01-21 15:21:43 -05:00
Start(TaskListItem { checked: false }, Attributes::new()),
Start(Paragraph, Attributes::new()),
Str("a".into()),
End(Paragraph),
End(TaskListItem { checked: false }),
Start(TaskListItem { checked: true }, Attributes::new()),
Start(Paragraph, Attributes::new()),
Str("b".into()),
End(Paragraph),
End(TaskListItem { checked: true }),
Start(TaskListItem { checked: true }, Attributes::new()),
Start(Paragraph, Attributes::new()),
Str("c".into()),
End(Paragraph),
End(TaskListItem { checked: true }),
2023-01-22 06:39:04 -05:00
End(List {
kind: ListKind::Task,
tight: true,
}),
2023-01-21 15:21:43 -05:00
);
}
2023-01-23 15:41:04 -05:00
#[test]
fn numbering_alpha() {
assert_eq!(AlphaLower.parse_number("a"), 1);
assert_eq!(AlphaUpper.parse_number("B"), 2);
assert_eq!(AlphaUpper.parse_number("Z"), 26);
assert_eq!(AlphaLower.parse_number("aa"), 27);
}
2022-11-22 13:19:21 -05:00
}