parent
b6b3a6f40b
commit
79b5119209
2 changed files with 145 additions and 7 deletions
146
src/lib.rs
146
src/lib.rs
|
@ -51,6 +51,7 @@
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fmt::Write as FmtWrite;
|
use std::fmt::Write as FmtWrite;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
#[cfg(feature = "html")]
|
#[cfg(feature = "html")]
|
||||||
pub mod html;
|
pub mod html;
|
||||||
|
@ -793,7 +794,117 @@ impl<'s> Parser<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn inline(&mut self) -> Option<Event<'s>> {
|
/// Turn the [`Parser`] into an iterator of tuples, each with an [`Event`] and a start/end byte
|
||||||
|
/// offset for its corresponding input (as a [`Range<usize>`]).
|
||||||
|
///
|
||||||
|
/// Generally, the range of each event does not overlap with any other event and the ranges are
|
||||||
|
/// in same order as the events are emitted, i.e. the start offset of an event must be greater
|
||||||
|
/// or equal to the (exclusive) end offset of all events that were emitted before that event.
|
||||||
|
/// However, there are some exceptions to this rule:
|
||||||
|
///
|
||||||
|
/// - Blank lines inbetween block attributes and the block causes the blankline events to
|
||||||
|
/// overlap with the block start event.
|
||||||
|
/// - Caption events are emitted before the table rows while the input for the caption content
|
||||||
|
/// is located after the table rows, causing the ranges to be out of order.
|
||||||
|
///
|
||||||
|
/// Characters between events, that are not part of any event range, are typically whitespace
|
||||||
|
/// but may also consist of unattached attributes or `>` characters from blockquotes.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// Start and end events of containers correspond only to the start and end markers for that
|
||||||
|
/// container, not its inner content:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use jotdown::*;
|
||||||
|
/// # use jotdown::Event::*;
|
||||||
|
/// # use jotdown::Container::*;
|
||||||
|
/// let input = "> _hello_ [text](url)\n";
|
||||||
|
/// assert!(matches!(
|
||||||
|
/// Parser::new(input)
|
||||||
|
/// .into_offset_iter()
|
||||||
|
/// .map(|(e, r)| (&input[r], e))
|
||||||
|
/// .collect::<Vec<_>>()
|
||||||
|
/// .as_slice(),
|
||||||
|
/// &[
|
||||||
|
/// (">", Start(Blockquote, ..)),
|
||||||
|
/// ("", Start(Paragraph, ..)),
|
||||||
|
/// ("_", Start(Emphasis, ..)),
|
||||||
|
/// ("hello", Str(..)),
|
||||||
|
/// ("_", End(Emphasis)),
|
||||||
|
/// (" ", Str(..)),
|
||||||
|
/// ("[", Start(Link { .. }, ..)),
|
||||||
|
/// ("text", Str(..)),
|
||||||
|
/// ("](url)", End(Link { .. })),
|
||||||
|
/// ("", End(Paragraph)),
|
||||||
|
/// ("", End(Blockquote)),
|
||||||
|
/// ],
|
||||||
|
/// ));
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// _Block_ attributes that belong to a container are included in the _start_ event. _Inline_
|
||||||
|
/// attributes that belong to a container are included in the _end_ event:
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use jotdown::*;
|
||||||
|
/// # use jotdown::Event::*;
|
||||||
|
/// # use jotdown::Container::*;
|
||||||
|
/// let input = "
|
||||||
|
/// {.quote}
|
||||||
|
/// > [Hello]{lang=en} world!";
|
||||||
|
/// assert!(matches!(
|
||||||
|
/// Parser::new(input)
|
||||||
|
/// .into_offset_iter()
|
||||||
|
/// .map(|(e, r)| (&input[r], e))
|
||||||
|
/// .collect::<Vec<_>>()
|
||||||
|
/// .as_slice(),
|
||||||
|
/// &[
|
||||||
|
/// ("\n", Blankline),
|
||||||
|
/// ("{.quote}\n>", Start(Blockquote, ..)),
|
||||||
|
/// ("", Start(Paragraph, ..)),
|
||||||
|
/// ("[", Start(Span, ..)),
|
||||||
|
/// ("Hello", Str(..)),
|
||||||
|
/// ("]{lang=en}", End(Span)),
|
||||||
|
/// (" world!", Str(..)),
|
||||||
|
/// ("", End(Paragraph)),
|
||||||
|
/// ("", End(Blockquote)),
|
||||||
|
/// ],
|
||||||
|
/// ));
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Inline events that span multiple lines may contain characters from outer block containers
|
||||||
|
/// (e.g. `>` characters from blockquotes or whitespace from list items):
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use jotdown::*;
|
||||||
|
/// # use jotdown::Event::*;
|
||||||
|
/// # use jotdown::Container::*;
|
||||||
|
/// let input = "
|
||||||
|
/// > [txt](multi
|
||||||
|
/// > line)";
|
||||||
|
/// assert!(matches!(
|
||||||
|
/// Parser::new(input)
|
||||||
|
/// .into_offset_iter()
|
||||||
|
/// .map(|(e, r)| (&input[r], e))
|
||||||
|
/// .collect::<Vec<_>>()
|
||||||
|
/// .as_slice(),
|
||||||
|
/// &[
|
||||||
|
/// ("\n", Blankline),
|
||||||
|
/// (">", Start(Blockquote, ..)),
|
||||||
|
/// ("", Start(Paragraph, ..)),
|
||||||
|
/// ("[", Start(Link { .. }, ..)),
|
||||||
|
/// ("txt", Str(..)),
|
||||||
|
/// ("](multi\n> line)", End(Link { .. })),
|
||||||
|
/// ("", End(Paragraph)),
|
||||||
|
/// ("", End(Blockquote)),
|
||||||
|
/// ],
|
||||||
|
/// ));
|
||||||
|
/// ```
|
||||||
|
pub fn into_offset_iter(self) -> OffsetIter<'s> {
|
||||||
|
OffsetIter { parser: self }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn inline(&mut self) -> Option<(Event<'s>, Range<usize>)> {
|
||||||
let next = self.inline_parser.next()?;
|
let next = self.inline_parser.next()?;
|
||||||
|
|
||||||
let (inline, mut attributes) = match next {
|
let (inline, mut attributes) = match next {
|
||||||
|
@ -809,7 +920,7 @@ impl<'s> Parser<'s> {
|
||||||
|
|
||||||
inline.map(|inline| {
|
inline.map(|inline| {
|
||||||
let enter = matches!(inline.kind, inline::EventKind::Enter(_));
|
let enter = matches!(inline.kind, inline::EventKind::Enter(_));
|
||||||
match inline.kind {
|
let event = match inline.kind {
|
||||||
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
||||||
let t = match c {
|
let t = match c {
|
||||||
inline::Container::Span => Container::Span,
|
inline::Container::Span => Container::Span,
|
||||||
|
@ -893,11 +1004,12 @@ impl<'s> Parser<'s> {
|
||||||
inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => {
|
inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => {
|
||||||
panic!("{:?}", inline)
|
panic!("{:?}", inline)
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
(event, inline.span.into())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn block(&mut self) -> Option<Event<'s>> {
|
fn block(&mut self) -> Option<(Event<'s>, Range<usize>)> {
|
||||||
while let Some(mut ev) = &mut self.blocks.next() {
|
while let Some(mut ev) = &mut self.blocks.next() {
|
||||||
let event = match ev.kind {
|
let event = match ev.kind {
|
||||||
block::EventKind::Atom(a) => match a {
|
block::EventKind::Atom(a) => match a {
|
||||||
|
@ -1030,22 +1142,42 @@ impl<'s> Parser<'s> {
|
||||||
Some(block::EventKind::Inline),
|
Some(block::EventKind::Inline),
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
return self.next();
|
return self.next_span();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
block::EventKind::Stale => continue,
|
block::EventKind::Stale => continue,
|
||||||
};
|
};
|
||||||
return Some(event);
|
return Some((event, ev.span.into()));
|
||||||
}
|
}
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn next_span(&mut self) -> Option<(Event<'s>, Range<usize>)> {
|
||||||
|
self.inline().or_else(|| self.block())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Iterator for Parser<'s> {
|
impl<'s> Iterator for Parser<'s> {
|
||||||
type Item = Event<'s>;
|
type Item = Event<'s>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.inline().or_else(|| self.block())
|
self.next_span().map(|(e, _)| e)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator that is identical to a [`Parser`], except that it also emits the location of each
|
||||||
|
/// event within the input.
|
||||||
|
///
|
||||||
|
/// See the documentation of [`Parser::into_offset_iter`] for more information.
|
||||||
|
pub struct OffsetIter<'s> {
|
||||||
|
parser: Parser<'s>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Iterator for OffsetIter<'s> {
|
||||||
|
type Item = (Event<'s>, Range<usize>);
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.parser.next_span()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,12 @@ pub struct Span {
|
||||||
end: u32,
|
end: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<Span> for std::ops::Range<usize> {
|
||||||
|
fn from(span: Span) -> Self {
|
||||||
|
span.start()..span.end()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Span {
|
impl Span {
|
||||||
pub fn new(start: usize, end: usize) -> Self {
|
pub fn new(start: usize, end: usize) -> Self {
|
||||||
Self::by_len(start, end.checked_sub(start).unwrap())
|
Self::by_len(start, end.checked_sub(start).unwrap())
|
||||||
|
|
Loading…
Reference in a new issue