2022-11-29 12:34:13 -05:00
|
|
|
pub mod html;
|
|
|
|
|
2022-12-18 12:05:39 -05:00
|
|
|
mod attr;
|
2022-11-12 12:45:17 -05:00
|
|
|
mod block;
|
2022-11-16 16:11:55 -05:00
|
|
|
mod inline;
|
2022-11-20 13:13:48 -05:00
|
|
|
mod lex;
|
2022-11-12 12:45:17 -05:00
|
|
|
mod span;
|
|
|
|
mod tree;
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
use span::Span;
|
|
|
|
|
2022-12-18 12:05:39 -05:00
|
|
|
pub use attr::Attributes;
|
|
|
|
|
2022-12-13 15:19:16 -05:00
|
|
|
type CowStr<'s> = std::borrow::Cow<'s, str>;
|
|
|
|
|
2022-11-12 12:45:17 -05:00
|
|
|
const EOF: char = '\0';
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2022-11-28 18:33:43 -05:00
|
|
|
pub enum Event<'s> {
|
2022-11-29 12:34:13 -05:00
|
|
|
/// Start of a container.
|
|
|
|
Start(Container<'s>, Attributes<'s>),
|
|
|
|
/// End of a container.
|
|
|
|
End(Container<'s>),
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A string object, text only.
|
2022-12-13 15:19:16 -05:00
|
|
|
Str(CowStr<'s>),
|
2022-11-29 12:34:13 -05:00
|
|
|
/// An atomic element.
|
|
|
|
Atom(Atom),
|
2022-11-27 15:59:54 -05:00
|
|
|
}
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2022-11-29 12:34:13 -05:00
|
|
|
pub enum Container<'s> {
|
|
|
|
/// A blockquote element.
|
|
|
|
Blockquote,
|
|
|
|
/// A list.
|
|
|
|
List(List),
|
|
|
|
/// An item of a list
|
|
|
|
ListItem,
|
|
|
|
/// A description list element.
|
|
|
|
DescriptionList,
|
|
|
|
/// Details describing a term within a description list.
|
|
|
|
DescriptionDetails,
|
|
|
|
/// A footnote definition.
|
|
|
|
Footnote { tag: &'s str },
|
|
|
|
/// A table element.
|
|
|
|
Table,
|
|
|
|
/// A row element of a table.
|
|
|
|
TableRow,
|
|
|
|
/// A block-level divider element.
|
2022-12-07 13:32:42 -05:00
|
|
|
Div { class: Option<&'s str> },
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A paragraph.
|
2022-11-27 15:59:54 -05:00
|
|
|
Paragraph,
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A heading.
|
2022-12-10 04:57:15 -05:00
|
|
|
Heading { level: usize },
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A cell element of row within a table.
|
2022-11-27 15:59:54 -05:00
|
|
|
TableCell,
|
2022-12-01 14:34:23 -05:00
|
|
|
/// A term within a description list.
|
|
|
|
DescriptionTerm,
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A block with raw markup for a specific output format.
|
2022-11-27 15:59:54 -05:00
|
|
|
RawBlock { format: &'s str },
|
2022-11-28 15:52:09 -05:00
|
|
|
/// A block with code in a specific language.
|
2022-12-04 11:56:49 -05:00
|
|
|
CodeBlock { lang: Option<&'s str> },
|
2022-12-01 14:46:53 -05:00
|
|
|
/// An inline divider element.
|
|
|
|
Span,
|
|
|
|
/// An inline link with a destination URL.
|
2022-12-13 15:19:16 -05:00
|
|
|
Link(CowStr<'s>, LinkType),
|
2022-12-18 01:59:11 -05:00
|
|
|
/// An inline image with a source URL. Inner Str objects compose the alternative text.
|
2022-12-17 12:03:06 -05:00
|
|
|
Image(CowStr<'s>, SpanLinkType),
|
2022-12-08 11:42:54 -05:00
|
|
|
/// An inline verbatim string.
|
|
|
|
Verbatim,
|
|
|
|
/// An inline or display math element.
|
|
|
|
Math { display: bool },
|
|
|
|
/// Inline raw markup for a specific output format.
|
|
|
|
RawInline { format: &'s str },
|
2022-11-28 18:33:43 -05:00
|
|
|
/// A subscripted element.
|
|
|
|
Subscript,
|
|
|
|
/// A superscripted element.
|
|
|
|
Superscript,
|
2022-12-01 14:46:53 -05:00
|
|
|
/// An inserted inline element.
|
2022-11-28 18:33:43 -05:00
|
|
|
Insert,
|
2022-12-01 14:46:53 -05:00
|
|
|
/// A deleted inline element.
|
2022-11-28 18:33:43 -05:00
|
|
|
Delete,
|
2022-12-01 14:46:53 -05:00
|
|
|
/// An inline element emphasized with a bold typeface.
|
2022-11-28 18:33:43 -05:00
|
|
|
Strong,
|
2022-12-01 14:46:53 -05:00
|
|
|
/// An emphasized inline element.
|
2022-11-28 18:33:43 -05:00
|
|
|
Emphasis,
|
|
|
|
/// A highlighted inline element.
|
|
|
|
Mark,
|
2022-11-30 13:56:08 -05:00
|
|
|
/// An quoted inline element, using single quotes.
|
2022-11-28 18:33:43 -05:00
|
|
|
SingleQuoted,
|
|
|
|
/// A quoted inline element, using double quotes.
|
|
|
|
DoubleQuoted,
|
2022-11-27 15:59:54 -05:00
|
|
|
}
|
|
|
|
|
2022-11-30 13:56:08 -05:00
|
|
|
impl<'s> Container<'s> {
|
|
|
|
/// Is a block element.
|
|
|
|
fn is_block(&self) -> bool {
|
|
|
|
match self {
|
|
|
|
Self::Blockquote
|
|
|
|
| Self::List(..)
|
|
|
|
| Self::ListItem
|
|
|
|
| Self::DescriptionList
|
|
|
|
| Self::DescriptionDetails
|
|
|
|
| Self::Footnote { .. }
|
|
|
|
| Self::Table
|
|
|
|
| Self::TableRow
|
2022-12-07 13:32:42 -05:00
|
|
|
| Self::Div { .. }
|
2022-11-30 13:56:08 -05:00
|
|
|
| Self::Paragraph
|
|
|
|
| Self::Heading { .. }
|
2022-12-01 14:34:23 -05:00
|
|
|
| Self::DescriptionTerm
|
2022-11-30 13:56:08 -05:00
|
|
|
| Self::TableCell
|
|
|
|
| Self::RawBlock { .. }
|
|
|
|
| Self::CodeBlock { .. } => true,
|
|
|
|
Self::Span
|
|
|
|
| Self::Link(..)
|
|
|
|
| Self::Image(..)
|
2022-12-08 11:42:54 -05:00
|
|
|
| Self::Verbatim
|
|
|
|
| Self::Math { .. }
|
|
|
|
| Self::RawInline { .. }
|
2022-11-30 13:56:08 -05:00
|
|
|
| Self::Subscript
|
|
|
|
| Self::Superscript
|
|
|
|
| Self::Insert
|
|
|
|
| Self::Delete
|
|
|
|
| Self::Strong
|
|
|
|
| Self::Emphasis
|
|
|
|
| Self::Mark
|
|
|
|
| Self::SingleQuoted
|
|
|
|
| Self::DoubleQuoted => false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Is a block element that may contain children blocks.
|
|
|
|
fn is_block_container(&self) -> bool {
|
|
|
|
match self {
|
|
|
|
Self::Blockquote
|
|
|
|
| Self::List(..)
|
|
|
|
| Self::ListItem
|
|
|
|
| Self::DescriptionList
|
|
|
|
| Self::DescriptionDetails
|
|
|
|
| Self::Footnote { .. }
|
|
|
|
| Self::Table
|
|
|
|
| Self::TableRow
|
2022-12-07 13:32:42 -05:00
|
|
|
| Self::Div { .. } => true,
|
2022-11-30 13:56:08 -05:00
|
|
|
Self::Paragraph
|
|
|
|
| Self::Heading { .. }
|
|
|
|
| Self::TableCell
|
2022-12-01 14:34:23 -05:00
|
|
|
| Self::DescriptionTerm
|
2022-11-30 13:56:08 -05:00
|
|
|
| Self::RawBlock { .. }
|
|
|
|
| Self::CodeBlock { .. }
|
|
|
|
| Self::Span
|
|
|
|
| Self::Link(..)
|
|
|
|
| Self::Image(..)
|
2022-12-08 11:42:54 -05:00
|
|
|
| Self::Verbatim
|
|
|
|
| Self::Math { .. }
|
|
|
|
| Self::RawInline { .. }
|
2022-11-30 13:56:08 -05:00
|
|
|
| Self::Subscript
|
|
|
|
| Self::Superscript
|
|
|
|
| Self::Insert
|
|
|
|
| Self::Delete
|
|
|
|
| Self::Strong
|
|
|
|
| Self::Emphasis
|
|
|
|
| Self::Mark
|
|
|
|
| Self::SingleQuoted
|
|
|
|
| Self::DoubleQuoted => false,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-28 14:12:49 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
2022-12-17 12:03:06 -05:00
|
|
|
pub enum SpanLinkType {
|
2022-11-28 15:52:09 -05:00
|
|
|
Inline,
|
|
|
|
Reference,
|
2022-12-17 12:03:06 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
|
|
pub enum LinkType {
|
|
|
|
Span(SpanLinkType),
|
2022-12-11 03:26:55 -05:00
|
|
|
AutoLink,
|
2022-11-28 15:52:09 -05:00
|
|
|
Email,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
|
|
pub enum List {
|
|
|
|
Unordered,
|
2022-12-02 02:16:47 -05:00
|
|
|
Ordered { kind: OrderedListKind, start: u32 },
|
2022-11-28 15:52:09 -05:00
|
|
|
Description,
|
2022-12-06 15:31:08 -05:00
|
|
|
Task,
|
2022-11-28 15:52:09 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
|
|
pub enum OrderedListKind {
|
|
|
|
/// Decimal numbering, e.g. `1)`.
|
|
|
|
Decimal,
|
|
|
|
/// Lowercase alphabetic numbering, e.g. `a)`.
|
|
|
|
AlphaLower,
|
|
|
|
/// Uppercase alphabetic numbering, e.g. `A)`.
|
|
|
|
AlphaUpper,
|
|
|
|
/// Lowercase roman numbering, e.g. `iv)`.
|
|
|
|
RomanLower,
|
|
|
|
/// Uppercase roman numbering, e.g. `IV)`.
|
|
|
|
RomanUpper,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
2022-12-01 14:46:53 -05:00
|
|
|
pub enum OrderedListStyle {
|
2022-11-28 15:52:09 -05:00
|
|
|
/// Number is followed by a period, e.g. `1.`.
|
|
|
|
Period,
|
|
|
|
/// Number is followed by a closing parenthesis, e.g. `1)`.
|
|
|
|
Paren,
|
|
|
|
/// Number is enclosed by parentheses, e.g. `(1)`.
|
|
|
|
ParenParen,
|
|
|
|
}
|
|
|
|
|
2022-11-29 12:34:13 -05:00
|
|
|
#[derive(Debug, PartialEq, Eq)]
|
|
|
|
pub enum Atom {
|
2022-11-30 13:56:08 -05:00
|
|
|
/// A horizontal ellipsis, i.e. a set of three periods.
|
2022-11-29 12:34:13 -05:00
|
|
|
Ellipsis,
|
|
|
|
/// An en dash.
|
|
|
|
EnDash,
|
|
|
|
/// An em dash.
|
|
|
|
EmDash,
|
|
|
|
/// A thematic break, typically a horizontal rule.
|
|
|
|
ThematicBreak,
|
2022-12-18 12:05:39 -05:00
|
|
|
/// A space that must not break a line.
|
2022-11-29 12:34:13 -05:00
|
|
|
NonBreakingSpace,
|
2022-12-18 12:05:39 -05:00
|
|
|
/// A newline that may or may not break a line in the output.
|
2022-11-29 12:34:13 -05:00
|
|
|
Softbreak,
|
2022-12-18 12:05:39 -05:00
|
|
|
/// A newline that must break a line in the output.
|
2022-11-29 12:34:13 -05:00
|
|
|
Hardbreak,
|
|
|
|
/// An escape character, not visible in output.
|
|
|
|
Escape,
|
2022-12-07 13:32:42 -05:00
|
|
|
/// A blank line, not visible in output.
|
|
|
|
Blankline,
|
2022-11-29 12:34:13 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> Container<'s> {
|
2022-12-11 14:49:57 -05:00
|
|
|
fn from_leaf_block(content: &str, l: block::Leaf) -> Self {
|
|
|
|
match l {
|
|
|
|
block::Leaf::Paragraph => Self::Paragraph,
|
|
|
|
block::Leaf::Heading => Self::Heading {
|
|
|
|
level: content.len(),
|
2022-11-28 18:33:43 -05:00
|
|
|
},
|
2022-12-17 06:21:15 -05:00
|
|
|
block::Leaf::CodeBlock => panic!(),
|
2022-12-11 14:49:57 -05:00
|
|
|
_ => todo!(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn from_container_block(content: &'s str, c: block::Container) -> Self {
|
|
|
|
match c {
|
|
|
|
block::Container::Blockquote => Self::Blockquote,
|
2022-12-17 06:21:15 -05:00
|
|
|
block::Container::Div => panic!(),
|
2022-12-11 14:49:57 -05:00
|
|
|
block::Container::Footnote => Self::Footnote { tag: content },
|
|
|
|
block::Container::ListItem => todo!(),
|
2022-11-28 15:52:09 -05:00
|
|
|
}
|
|
|
|
}
|
2022-11-28 14:12:49 -05:00
|
|
|
}
|
|
|
|
|
2022-12-11 12:47:00 -05:00
|
|
|
#[derive(Clone)]
|
2023-01-15 09:47:28 -05:00
|
|
|
struct InlineChars<'s, I> {
|
2022-12-11 12:47:00 -05:00
|
|
|
src: &'s str,
|
2023-01-15 09:47:28 -05:00
|
|
|
inlines: I,
|
2022-12-17 06:21:15 -05:00
|
|
|
next: std::str::Chars<'s>,
|
|
|
|
}
|
|
|
|
|
|
|
|
// Implement inlines.flat_map(|sp| sp.of(self.src).chars())
|
2023-01-15 10:02:36 -05:00
|
|
|
impl<'s, I: Iterator<Item = Span>> InlineChars<'s, I> {
|
2023-01-15 09:47:28 -05:00
|
|
|
fn new(src: &'s str, inlines: I) -> Self {
|
2022-12-17 06:21:15 -05:00
|
|
|
Self {
|
|
|
|
src,
|
2023-01-15 09:47:28 -05:00
|
|
|
inlines,
|
2022-12-17 06:21:15 -05:00
|
|
|
next: "".chars(),
|
|
|
|
}
|
|
|
|
}
|
2022-12-11 12:47:00 -05:00
|
|
|
}
|
|
|
|
|
2023-01-15 10:02:36 -05:00
|
|
|
impl<'s, I: Iterator<Item = Span>> Iterator for InlineChars<'s, I> {
|
2022-12-11 12:47:00 -05:00
|
|
|
type Item = char;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
(&mut self.inlines)
|
|
|
|
.flat_map(|sp| sp.of(self.src).chars())
|
|
|
|
.next()
|
2022-11-28 18:33:43 -05:00
|
|
|
}
|
2022-11-22 13:19:21 -05:00
|
|
|
}
|
|
|
|
|
2022-12-18 12:05:39 -05:00
|
|
|
trait DiscontinuousString<'s> {
|
|
|
|
type Chars: Iterator<Item = char>;
|
|
|
|
|
|
|
|
fn src(&self, span: Span) -> CowStr<'s>;
|
|
|
|
|
|
|
|
fn chars(&self) -> Self::Chars;
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> DiscontinuousString<'s> for &'s str {
|
|
|
|
type Chars = std::str::Chars<'s>;
|
|
|
|
|
|
|
|
fn src(&self, span: Span) -> CowStr<'s> {
|
|
|
|
span.of(self).into()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn chars(&self) -> Self::Chars {
|
|
|
|
str::chars(self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> DiscontinuousString<'s> for InlineSpans<'s> {
|
2023-01-15 09:47:28 -05:00
|
|
|
type Chars = InlineCharsIter<'s>;
|
2022-12-18 12:05:39 -05:00
|
|
|
|
|
|
|
fn src(&self, span: Span) -> CowStr<'s> {
|
2023-01-15 10:02:36 -05:00
|
|
|
Self::borrow_or_copy(self.src, self.spans.iter().copied(), span)
|
2022-12-18 12:05:39 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn chars(&self) -> Self::Chars {
|
|
|
|
// SAFETY: do not call set_spans while chars is in use
|
2023-01-15 09:47:28 -05:00
|
|
|
unsafe { std::mem::transmute(InlineChars::new(self.src, self.spans.iter().copied())) }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s, 'i> DiscontinuousString<'s> for InlineSpansSlice<'s, 'i> {
|
2023-01-15 10:02:36 -05:00
|
|
|
type Chars = InlineChars<'s, InlineSpansSliceIter<'i>>;
|
2023-01-15 09:47:28 -05:00
|
|
|
|
|
|
|
/// Borrow if continuous, copy if discontiunous.
|
|
|
|
fn src(&self, span: Span) -> CowStr<'s> {
|
2023-01-15 10:02:36 -05:00
|
|
|
InlineSpans::borrow_or_copy(self.src, self.spans(), span)
|
2023-01-15 09:47:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
fn chars(&self) -> Self::Chars {
|
2023-01-15 10:02:36 -05:00
|
|
|
InlineChars::new(self.src, self.spans())
|
2022-12-18 12:05:39 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-01-15 09:47:28 -05:00
|
|
|
#[derive(Default, Debug)]
|
2022-12-18 12:05:39 -05:00
|
|
|
struct InlineSpans<'s> {
|
|
|
|
src: &'s str,
|
|
|
|
spans: Vec<Span>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'s> InlineSpans<'s> {
|
|
|
|
fn new(src: &'s str) -> Self {
|
|
|
|
Self {
|
|
|
|
src,
|
|
|
|
spans: Vec::new(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn set_spans(&mut self, spans: impl Iterator<Item = Span>) {
|
|
|
|
self.spans.clear();
|
|
|
|
self.spans.extend(spans);
|
|
|
|
}
|
2023-01-15 09:47:28 -05:00
|
|
|
|
|
|
|
fn slice<'i>(&'i self, span: Span) -> InlineSpansSlice<'s, 'i> {
|
|
|
|
let mut first = 0;
|
|
|
|
let mut last = 0;
|
|
|
|
let mut first_skip = 0;
|
|
|
|
let mut last_len = 0;
|
|
|
|
|
|
|
|
let mut a = 0;
|
|
|
|
for (i, sp) in self.spans.iter().enumerate() {
|
|
|
|
let b = a + sp.len();
|
|
|
|
if span.start() < b {
|
|
|
|
if a <= span.start() {
|
|
|
|
first = i;
|
|
|
|
first_skip = span.start() - a;
|
|
|
|
if span.end() <= b {
|
|
|
|
// continuous
|
|
|
|
last = i;
|
|
|
|
last_len = span.len();
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
last = i;
|
|
|
|
last_len = sp.len().min(span.end() - a);
|
|
|
|
break;
|
|
|
|
};
|
|
|
|
}
|
|
|
|
a = b;
|
|
|
|
}
|
|
|
|
|
|
|
|
assert_ne!(last_len, 0);
|
|
|
|
|
|
|
|
InlineSpansSlice {
|
|
|
|
src: self.src,
|
|
|
|
first_skip,
|
|
|
|
last_len,
|
|
|
|
spans: &self.spans[first..=last],
|
|
|
|
}
|
|
|
|
}
|
2023-01-15 10:02:36 -05:00
|
|
|
|
|
|
|
/// Borrow if continuous, copy if discontiunous.
|
|
|
|
fn borrow_or_copy<I: Iterator<Item = Span>>(src: &str, spans: I, span: Span) -> CowStr {
|
|
|
|
let mut a = 0;
|
|
|
|
let mut s = String::new();
|
|
|
|
for sp in spans {
|
|
|
|
let b = a + sp.len();
|
|
|
|
if span.start() < b {
|
|
|
|
let r = if a <= span.start() {
|
|
|
|
if span.end() <= b {
|
|
|
|
// continuous
|
|
|
|
return CowStr::Borrowed(&sp.of(src)[span.start() - a..span.end() - a]);
|
|
|
|
}
|
|
|
|
(span.start() - a)..sp.len()
|
|
|
|
} else {
|
|
|
|
0..sp.len().min(span.end() - a)
|
|
|
|
};
|
|
|
|
s.push_str(&sp.of(src)[r]);
|
|
|
|
}
|
|
|
|
a = b;
|
|
|
|
}
|
|
|
|
assert_eq!(span.len(), s.len());
|
|
|
|
CowStr::Owned(s)
|
|
|
|
}
|
2023-01-15 09:47:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
struct InlineSpansSlice<'s, 'i> {
|
|
|
|
src: &'s str,
|
|
|
|
first_skip: usize,
|
|
|
|
last_len: usize,
|
|
|
|
spans: &'i [Span],
|
2022-12-18 12:05:39 -05:00
|
|
|
}
|
|
|
|
|
2023-01-15 10:02:36 -05:00
|
|
|
type InlineSpansSliceIter<'i> = std::iter::Chain<
|
|
|
|
std::iter::Chain<std::iter::Once<Span>, std::iter::Copied<std::slice::Iter<'i, Span>>>,
|
|
|
|
std::iter::Once<Span>,
|
|
|
|
>;
|
|
|
|
|
|
|
|
impl<'s, 'i> InlineSpansSlice<'s, 'i> {
|
|
|
|
fn spans(&self) -> InlineSpansSliceIter<'i> {
|
|
|
|
let (span_start, r_middle, span_end) = if self.spans.len() == 1 {
|
|
|
|
(
|
|
|
|
Span::by_len(self.spans[0].start() + self.first_skip, self.last_len),
|
|
|
|
0..0,
|
|
|
|
Span::by_len(self.spans[self.spans.len() - 1].start(), 0),
|
|
|
|
)
|
|
|
|
} else {
|
|
|
|
(
|
|
|
|
Span::new(self.spans[0].start() + self.first_skip, self.spans[0].end()),
|
|
|
|
1..self.spans.len().saturating_sub(2),
|
|
|
|
Span::by_len(self.spans[self.spans.len() - 1].start(), self.last_len),
|
|
|
|
)
|
|
|
|
};
|
|
|
|
std::iter::once(span_start)
|
|
|
|
.chain(self.spans[r_middle].iter().copied())
|
|
|
|
.chain(std::iter::once(span_end))
|
|
|
|
}
|
|
|
|
}
|
2023-01-15 09:47:28 -05:00
|
|
|
type InlineCharsIter<'s> = InlineChars<'s, std::iter::Copied<std::slice::Iter<'static, Span>>>;
|
|
|
|
|
2022-11-28 14:19:22 -05:00
|
|
|
pub struct Parser<'s> {
|
2022-11-20 13:13:48 -05:00
|
|
|
src: &'s str,
|
2022-11-28 14:19:22 -05:00
|
|
|
tree: block::Tree,
|
2022-12-18 12:05:39 -05:00
|
|
|
inlines: InlineSpans<'s>,
|
2023-01-15 09:47:28 -05:00
|
|
|
inline_parser: Option<inline::Parser<InlineCharsIter<'s>>>,
|
2022-11-26 19:12:56 -05:00
|
|
|
inline_start: usize,
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
|
2022-11-28 14:19:22 -05:00
|
|
|
impl<'s> Parser<'s> {
|
|
|
|
#[must_use]
|
|
|
|
pub fn new(src: &'s str) -> Self {
|
|
|
|
Self {
|
|
|
|
src,
|
|
|
|
tree: block::parse(src),
|
2022-12-18 12:05:39 -05:00
|
|
|
inlines: InlineSpans::new(src),
|
2022-12-11 12:47:00 -05:00
|
|
|
inline_parser: None,
|
2022-11-28 14:19:22 -05:00
|
|
|
inline_start: 0,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-12-17 06:21:15 -05:00
|
|
|
impl<'s> Parser<'s> {
|
2023-01-15 09:47:28 -05:00
|
|
|
fn inline(&mut self) -> Option<Event<'s>> {
|
|
|
|
self.inline_parser.as_mut().and_then(|parser| {
|
|
|
|
let mut inline = parser.next();
|
|
|
|
|
|
|
|
let mut first_is_attr = false;
|
|
|
|
let attributes = inline.as_ref().map_or_else(Attributes::new, |inl| {
|
|
|
|
if let inline::EventKind::Attributes = inl.kind {
|
|
|
|
first_is_attr = true;
|
|
|
|
attr::parse(self.inlines.slice(inl.span))
|
2022-12-17 06:21:15 -05:00
|
|
|
} else {
|
2023-01-15 09:47:28 -05:00
|
|
|
Attributes::new()
|
2022-12-17 06:21:15 -05:00
|
|
|
}
|
2023-01-15 09:47:28 -05:00
|
|
|
});
|
2022-11-20 13:13:48 -05:00
|
|
|
|
2023-01-15 09:47:28 -05:00
|
|
|
if first_is_attr {
|
|
|
|
inline = parser.next();
|
2022-12-11 12:47:00 -05:00
|
|
|
}
|
2022-11-22 13:19:21 -05:00
|
|
|
|
2023-01-15 09:47:28 -05:00
|
|
|
inline.map(|inline| match inline.kind {
|
|
|
|
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
|
|
|
let t = match c {
|
|
|
|
inline::Container::Span => Container::Span,
|
|
|
|
inline::Container::Verbatim => Container::Verbatim,
|
|
|
|
inline::Container::InlineMath => Container::Math { display: false },
|
|
|
|
inline::Container::DisplayMath => Container::Math { display: true },
|
|
|
|
inline::Container::RawFormat => Container::RawInline {
|
|
|
|
format: match self.inlines.src(inline.span) {
|
|
|
|
CowStr::Owned(_) => panic!(),
|
|
|
|
CowStr::Borrowed(s) => s,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
inline::Container::Subscript => Container::Subscript,
|
|
|
|
inline::Container::Superscript => Container::Superscript,
|
|
|
|
inline::Container::Insert => Container::Insert,
|
|
|
|
inline::Container::Delete => Container::Delete,
|
|
|
|
inline::Container::Emphasis => Container::Emphasis,
|
|
|
|
inline::Container::Strong => Container::Strong,
|
|
|
|
inline::Container::Mark => Container::Mark,
|
|
|
|
inline::Container::SingleQuoted => Container::SingleQuoted,
|
|
|
|
inline::Container::DoubleQuoted => Container::DoubleQuoted,
|
|
|
|
inline::Container::InlineLink => Container::Link(
|
|
|
|
match self.inlines.src(inline.span) {
|
|
|
|
CowStr::Owned(s) => s.replace('\n', "").into(),
|
|
|
|
s @ CowStr::Borrowed(_) => s,
|
|
|
|
},
|
|
|
|
LinkType::Span(SpanLinkType::Inline),
|
|
|
|
),
|
|
|
|
inline::Container::InlineImage => Container::Image(
|
|
|
|
match self.inlines.src(inline.span) {
|
|
|
|
CowStr::Owned(s) => s.replace('\n', "").into(),
|
|
|
|
s @ CowStr::Borrowed(_) => s,
|
|
|
|
},
|
|
|
|
SpanLinkType::Inline,
|
|
|
|
),
|
|
|
|
inline::Container::ReferenceLink => todo!("{:?}", c),
|
|
|
|
inline::Container::ReferenceImage => todo!("{:?}", c),
|
|
|
|
inline::Container::Autolink => todo!("{:?}", c),
|
|
|
|
};
|
|
|
|
if matches!(inline.kind, inline::EventKind::Enter(_)) {
|
|
|
|
Event::Start(t, attributes)
|
|
|
|
} else {
|
|
|
|
Event::End(t)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
inline::EventKind::Atom(a) => match a {
|
|
|
|
inline::Atom::Ellipsis => Event::Atom(Atom::Ellipsis),
|
|
|
|
inline::Atom::EnDash => Event::Atom(Atom::EnDash),
|
|
|
|
inline::Atom::EmDash => Event::Atom(Atom::EmDash),
|
|
|
|
inline::Atom::Nbsp => Event::Atom(Atom::NonBreakingSpace),
|
|
|
|
inline::Atom::Softbreak => Event::Atom(Atom::Softbreak),
|
|
|
|
inline::Atom::Hardbreak => Event::Atom(Atom::Hardbreak),
|
|
|
|
inline::Atom::Escape => Event::Atom(Atom::Escape),
|
|
|
|
},
|
|
|
|
inline::EventKind::Str => Event::Str(self.inlines.src(inline.span)),
|
|
|
|
inline::EventKind::Attributes | inline::EventKind::AttributesDummy => {
|
|
|
|
panic!("{:?}", inline)
|
|
|
|
}
|
|
|
|
})
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
fn block(&mut self) -> Option<Event<'s>> {
|
|
|
|
let mut attributes = Attributes::new();
|
2022-12-07 13:32:42 -05:00
|
|
|
for ev in &mut self.tree {
|
|
|
|
let content = ev.span.of(self.src);
|
|
|
|
let event = match ev.kind {
|
2022-12-10 04:26:06 -05:00
|
|
|
tree::EventKind::Atom(a) => match a {
|
2022-12-07 13:32:42 -05:00
|
|
|
block::Atom::Blankline => Event::Atom(Atom::Blankline),
|
2022-12-10 04:26:06 -05:00
|
|
|
block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak),
|
2022-12-07 13:32:42 -05:00
|
|
|
block::Atom::Attributes => {
|
2023-01-15 09:47:28 -05:00
|
|
|
attributes.parse(content);
|
2022-12-07 13:32:42 -05:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
},
|
2022-12-12 12:22:13 -05:00
|
|
|
tree::EventKind::Enter(c) => match c {
|
|
|
|
block::Node::Leaf(l) => {
|
2022-12-18 12:05:39 -05:00
|
|
|
self.inlines.set_spans(self.tree.inlines());
|
|
|
|
self.inline_parser = Some(inline::Parser::new(self.inlines.chars()));
|
2022-12-07 13:32:42 -05:00
|
|
|
self.inline_start = ev.span.end();
|
2022-12-12 12:22:13 -05:00
|
|
|
let container = match l {
|
|
|
|
block::Leaf::CodeBlock { .. } => {
|
|
|
|
self.inline_start += 1; // skip newline
|
|
|
|
Container::CodeBlock {
|
|
|
|
lang: (!ev.span.is_empty()).then(|| content),
|
|
|
|
}
|
2022-12-07 13:32:42 -05:00
|
|
|
}
|
2022-12-12 12:22:13 -05:00
|
|
|
_ => Container::from_leaf_block(content, l),
|
|
|
|
};
|
2023-01-15 09:47:28 -05:00
|
|
|
Event::Start(container, attributes)
|
2022-12-12 12:22:13 -05:00
|
|
|
}
|
|
|
|
block::Node::Container(c) => {
|
|
|
|
let container = match c {
|
|
|
|
block::Container::Div { .. } => Container::Div {
|
|
|
|
class: (!ev.span.is_empty()).then(|| content),
|
|
|
|
},
|
|
|
|
_ => Container::from_container_block(content, c),
|
|
|
|
};
|
2023-01-15 09:47:28 -05:00
|
|
|
Event::Start(container, attributes)
|
2022-12-12 12:22:13 -05:00
|
|
|
}
|
|
|
|
},
|
|
|
|
tree::EventKind::Exit(c) => match c {
|
|
|
|
block::Node::Leaf(l) => Event::End(Container::from_leaf_block(content, l)),
|
|
|
|
block::Node::Container(c) => {
|
|
|
|
Event::End(Container::from_container_block(content, c))
|
|
|
|
}
|
|
|
|
},
|
|
|
|
tree::EventKind::Inline => unreachable!(),
|
2022-12-07 13:32:42 -05:00
|
|
|
};
|
|
|
|
return Some(event);
|
|
|
|
}
|
|
|
|
None
|
2022-11-20 13:13:48 -05:00
|
|
|
}
|
|
|
|
}
|
2022-11-22 13:19:21 -05:00
|
|
|
|
2023-01-15 09:47:28 -05:00
|
|
|
impl<'s> Iterator for Parser<'s> {
|
|
|
|
type Item = Event<'s>;
|
|
|
|
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
|
|
self.inline().or_else(|| self.block())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
2022-11-29 12:34:13 -05:00
|
|
|
use super::Atom::*;
|
2022-11-28 18:33:43 -05:00
|
|
|
use super::Attributes;
|
2022-11-29 12:34:13 -05:00
|
|
|
use super::Container::*;
|
2022-11-22 13:19:21 -05:00
|
|
|
use super::Event::*;
|
2022-12-13 15:19:16 -05:00
|
|
|
use super::LinkType;
|
2022-12-17 12:03:06 -05:00
|
|
|
use super::SpanLinkType;
|
2022-11-22 13:19:21 -05:00
|
|
|
|
2022-11-22 13:48:17 -05:00
|
|
|
macro_rules! test_parse {
|
2022-12-13 15:19:16 -05:00
|
|
|
($src:expr $(,$($token:expr),* $(,)?)?) => {
|
2022-11-22 13:48:17 -05:00
|
|
|
#[allow(unused)]
|
2022-11-28 14:19:22 -05:00
|
|
|
let actual = super::Parser::new($src).collect::<Vec<_>>();
|
2022-11-22 13:48:17 -05:00
|
|
|
let expected = &[$($($token),*,)?];
|
2022-11-28 18:33:43 -05:00
|
|
|
assert_eq!(
|
|
|
|
actual,
|
|
|
|
expected,
|
|
|
|
concat!(
|
|
|
|
"\n",
|
|
|
|
"\x1b[0;1m====================== INPUT =========================\x1b[0m\n",
|
|
|
|
"\x1b[2m{}",
|
|
|
|
"\x1b[0;1m================ ACTUAL vs EXPECTED ==================\x1b[0m\n",
|
|
|
|
"{}",
|
|
|
|
"\x1b[0;1m======================================================\x1b[0m\n",
|
|
|
|
),
|
|
|
|
$src,
|
|
|
|
{
|
|
|
|
let a = actual.iter().map(|n| format!("{:?}", n)).collect::<Vec<_>>();
|
|
|
|
let b = expected.iter().map(|n| format!("{:?}", n)).collect::<Vec<_>>();
|
|
|
|
let max = a.len().max(b.len());
|
|
|
|
let a_width = a.iter().map(|a| a.len()).max().unwrap_or(0);
|
|
|
|
a.iter()
|
|
|
|
.map(AsRef::as_ref)
|
|
|
|
.chain(std::iter::repeat(""))
|
|
|
|
.zip(b.iter().map(AsRef::as_ref).chain(std::iter::repeat("")))
|
|
|
|
.take(max)
|
|
|
|
.map(|(a, b)|
|
|
|
|
format!(
|
|
|
|
"\x1b[{}m{:a_width$}\x1b[0m {}= \x1b[{}m{}\x1b[0m\n",
|
|
|
|
if a == b { "2" } else { "31" },
|
|
|
|
a,
|
|
|
|
if a == b { '=' } else { '!' },
|
|
|
|
if a == b { "2" } else { "32" },
|
|
|
|
b,
|
|
|
|
a_width = a_width,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
.collect::<String>()
|
|
|
|
},
|
|
|
|
);
|
2022-11-22 13:48:17 -05:00
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2022-12-17 06:21:15 -05:00
|
|
|
#[test]
|
|
|
|
fn empty() {
|
|
|
|
test_parse!("");
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn heading() {
|
|
|
|
test_parse!(
|
|
|
|
"#\n",
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Heading { level: 1 }, Attributes::new()),
|
2022-12-17 06:21:15 -05:00
|
|
|
End(Heading { level: 1 }),
|
|
|
|
);
|
|
|
|
test_parse!(
|
|
|
|
"# abc\ndef\n",
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Heading { level: 1 }, Attributes::new()),
|
|
|
|
Str("abc".into()),
|
2022-12-17 06:21:15 -05:00
|
|
|
Atom(Softbreak),
|
2022-12-18 12:05:39 -05:00
|
|
|
Str("def".into()),
|
2022-12-17 06:21:15 -05:00
|
|
|
End(Heading { level: 1 }),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn blockquote() {
|
|
|
|
test_parse!(
|
|
|
|
">\n",
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Blockquote, Attributes::new()),
|
2022-12-17 06:21:15 -05:00
|
|
|
Atom(Blankline),
|
|
|
|
End(Blockquote),
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
2022-11-22 13:19:21 -05:00
|
|
|
#[test]
|
2022-11-22 13:48:17 -05:00
|
|
|
fn para() {
|
|
|
|
test_parse!(
|
2022-11-26 19:12:56 -05:00
|
|
|
"para",
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Paragraph, Attributes::new()),
|
|
|
|
Str("para".into()),
|
2022-11-28 18:33:43 -05:00
|
|
|
End(Paragraph),
|
2022-11-26 19:12:56 -05:00
|
|
|
);
|
|
|
|
test_parse!(
|
|
|
|
"pa ra",
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Paragraph, Attributes::new()),
|
|
|
|
Str("pa ra".into()),
|
2022-11-28 18:33:43 -05:00
|
|
|
End(Paragraph),
|
2022-11-26 19:12:56 -05:00
|
|
|
);
|
|
|
|
test_parse!(
|
|
|
|
"para0\n\npara1",
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Paragraph, Attributes::new()),
|
|
|
|
Str("para0".into()),
|
2022-11-28 18:33:43 -05:00
|
|
|
End(Paragraph),
|
2022-12-10 04:26:06 -05:00
|
|
|
Atom(Blankline),
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Paragraph, Attributes::new()),
|
|
|
|
Str("para1".into()),
|
2022-11-28 18:33:43 -05:00
|
|
|
End(Paragraph),
|
2022-11-22 13:19:21 -05:00
|
|
|
);
|
|
|
|
}
|
2022-12-08 11:42:54 -05:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn verbatim() {
|
|
|
|
test_parse!(
|
|
|
|
"`abc\ndef",
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Paragraph, Attributes::new()),
|
|
|
|
Start(Verbatim, Attributes::new()),
|
|
|
|
Str("abc\ndef".into()),
|
2022-12-08 11:42:54 -05:00
|
|
|
End(Verbatim),
|
|
|
|
End(Paragraph),
|
|
|
|
);
|
2022-12-17 06:21:15 -05:00
|
|
|
test_parse!(
|
|
|
|
concat!(
|
|
|
|
"> `abc\n",
|
|
|
|
"> def\n", //
|
|
|
|
),
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Blockquote, Attributes::new()),
|
|
|
|
Start(Paragraph, Attributes::new()),
|
|
|
|
Start(Verbatim, Attributes::new()),
|
|
|
|
Str("abc\ndef".into()),
|
2022-12-17 06:21:15 -05:00
|
|
|
End(Verbatim),
|
|
|
|
End(Paragraph),
|
|
|
|
End(Blockquote),
|
|
|
|
);
|
2022-12-08 11:42:54 -05:00
|
|
|
}
|
2022-12-11 04:45:05 -05:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn raw_inline() {
|
|
|
|
test_parse!(
|
2022-12-11 15:43:22 -05:00
|
|
|
"``raw\nraw``{=format}",
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Paragraph, Attributes::new()),
|
|
|
|
Start(RawInline { format: "format" }, Attributes::new()),
|
|
|
|
Str("raw\nraw".into()),
|
2022-12-11 04:45:05 -05:00
|
|
|
End(RawInline { format: "format" }),
|
|
|
|
End(Paragraph),
|
|
|
|
);
|
|
|
|
}
|
2022-12-13 15:19:16 -05:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn link_inline() {
|
|
|
|
test_parse!(
|
|
|
|
"[text](url)",
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Paragraph, Attributes::new()),
|
2022-12-13 15:19:16 -05:00
|
|
|
Start(
|
2022-12-18 12:05:39 -05:00
|
|
|
Link("url".into(), LinkType::Span(SpanLinkType::Inline)),
|
|
|
|
Attributes::new()
|
2022-12-13 15:19:16 -05:00
|
|
|
),
|
2022-12-18 12:05:39 -05:00
|
|
|
Str("text".into()),
|
|
|
|
End(Link("url".into(), LinkType::Span(SpanLinkType::Inline))),
|
2022-12-13 15:19:16 -05:00
|
|
|
End(Paragraph),
|
|
|
|
);
|
|
|
|
test_parse!(
|
|
|
|
concat!(
|
|
|
|
"> [text](url\n",
|
|
|
|
"> url)\n", //
|
|
|
|
),
|
2022-12-18 12:05:39 -05:00
|
|
|
Start(Blockquote, Attributes::new()),
|
|
|
|
Start(Paragraph, Attributes::new()),
|
2022-12-13 15:19:16 -05:00
|
|
|
Start(
|
2022-12-18 12:05:39 -05:00
|
|
|
Link("urlurl".into(), LinkType::Span(SpanLinkType::Inline)),
|
|
|
|
Attributes::new()
|
2022-12-13 15:19:16 -05:00
|
|
|
),
|
2022-12-18 12:05:39 -05:00
|
|
|
Str("text".into()),
|
|
|
|
End(Link("urlurl".into(), LinkType::Span(SpanLinkType::Inline))),
|
2022-12-13 15:19:16 -05:00
|
|
|
End(Paragraph),
|
2022-12-17 06:21:15 -05:00
|
|
|
End(Blockquote),
|
2022-12-13 15:19:16 -05:00
|
|
|
);
|
|
|
|
}
|
2022-12-18 12:05:39 -05:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn attr_block() {
|
|
|
|
test_parse!(
|
|
|
|
"{.some_class}\npara\n",
|
|
|
|
Start(Paragraph, [("class", "some_class")].into_iter().collect()),
|
|
|
|
Str("para".into()),
|
|
|
|
End(Paragraph),
|
|
|
|
);
|
|
|
|
}
|
2023-01-15 09:47:28 -05:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn attr_inline() {
|
|
|
|
test_parse!(
|
|
|
|
"abc _def_{.ghi}",
|
|
|
|
Start(Paragraph, Attributes::new()),
|
|
|
|
Str("abc ".into()),
|
|
|
|
Start(Emphasis, [("class", "ghi")].into_iter().collect()),
|
|
|
|
Str("def".into()),
|
|
|
|
End(Emphasis),
|
|
|
|
End(Paragraph),
|
|
|
|
);
|
|
|
|
}
|
2022-11-22 13:19:21 -05:00
|
|
|
}
|