maybe functional multi-line inline

This commit is contained in:
Noah Hellman 2022-12-11 20:49:57 +01:00
parent 946d88e5c0
commit 3339e785a7
5 changed files with 204 additions and 246 deletions

View file

@ -28,9 +28,6 @@ pub enum Block {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Atom {
/// Inline content with unparsed inline elements.
Inline,
/// A line with no non-whitespace characters.
Blankline,
@ -143,10 +140,7 @@ impl<'s> TreeParser<'s> {
};
match kind {
Block::Atom(a) => {
assert_ne!(a, Inline);
self.tree.atom(a, span);
}
Block::Atom(a) => self.tree.atom(a, span),
Block::Leaf(l) => {
self.tree.enter(kind, span);
@ -169,7 +163,7 @@ impl<'s> TreeParser<'s> {
}
}
lines.iter().for_each(|line| self.tree.atom(Inline, *line));
lines.iter().for_each(|line| self.tree.inline(*line));
self.tree.exit();
}
Block::Container(c) => {
@ -411,6 +405,7 @@ mod test {
use super::Container::*;
use super::Leaf::*;
/*
macro_rules! test_parse {
($src:expr $(,$($event:expr),* $(,)?)?) => {
let t = super::TreeParser::new($src).parse();
@ -731,4 +726,5 @@ mod test {
1,
);
}
*/
}

View file

@ -58,54 +58,13 @@ pub struct Event {
pub span: Span,
}
/// Current parsing state of elements that are not recursive, i.e. may not contain arbitrary inline
/// elements. There can only be one of these at a time, due to the non-recursion.
#[derive(Debug)]
enum AtomicState {
None,
/// Within a verbatim element, e.g. '$`xxxxx'
Verbatim {
kind: Container,
opener_len: usize,
opener_event: usize,
},
/// Potentially within an attribute list, e.g. '{a=b '.
Attributes {
comment: bool,
},
/// Potentially within an autolink URL or an inline link URL, e.g. '<https://' or
/// '[text](https://'.
Url {
auto: bool,
},
/// Potentially within a reference link tag, e.g. '[text][tag '
ReferenceLinkTag,
}
impl AtomicState {
fn verbatim(&self) -> Option<(Container, usize, usize)> {
if let Self::Verbatim {
kind,
opener_len,
opener_event,
} = self
{
Some((*kind, *opener_len, *opener_event))
} else {
None
}
}
}
pub struct Parser<I> {
/// The last inline element has been provided, finish current events.
last: bool,
/// Lexer, hosting upcoming source.
lexer: lex::Lexer<I>,
/// Span of current event.
span: Span,
/// State of non-recursive elements.
atomic_state: AtomicState,
/// The kind, opener_len and opener_event of the current verbatim container if within one.
verbatim: Option<(Container, usize, usize)>,
/// Stack with kind and index of _potential_ openers for typesetting containers.
typesets: Vec<(Container, usize)>,
/// Stack with index of _potential_ span/link openers.
@ -119,26 +78,15 @@ pub struct Parser<I> {
impl<I: Iterator<Item = char> + Clone> Parser<I> {
pub fn new(chars: I) -> Self {
Self {
last: true,
lexer: lex::Lexer::new(chars),
span: Span::new(0, 0),
atomic_state: AtomicState::None,
verbatim: None,
typesets: Vec::new(),
spans: Vec::new(),
events: std::collections::VecDeque::new(),
}
}
/*
pub fn parse(&mut self, src: &str, last: bool) {
self.lexer = lex::Lexer::new(src.chars());
if last {
assert!(!self.last);
}
self.last = last;
}
*/
fn eat(&mut self) -> Option<lex::Token> {
let tok = self.lexer.next();
if let Some(t) = &tok {
@ -158,8 +106,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
fn parse_event(&mut self) -> Option<Event> {
self.reset_span();
self.eat().map(|first| {
self.atomic(&first)
.or_else(|| self.parse_verbatim(&first))
self.parse_verbatim(&first)
.or_else(|| self.parse_span(&first))
.or_else(|| self.parse_typeset(&first))
.or_else(|| self.parse_atom(&first))
@ -170,41 +117,45 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
})
}
fn atomic(&mut self, first: &lex::Token) -> Option<Event> {
Some(match self.atomic_state {
AtomicState::None => return None,
AtomicState::Verbatim {
kind,
opener_len,
opener_event,
} => {
fn parse_verbatim(&mut self, first: &lex::Token) -> Option<Event> {
self.verbatim
.map(|(kind, opener_len, opener_event)| {
assert_eq!(self.events[opener_event].kind, EventKind::Enter(kind));
let kind = if matches!(first.kind, lex::Kind::Seq(lex::Sequence::Backtick))
&& first.len == opener_len
{
self.atomic_state = AtomicState::None;
let kind = todo!();
/*
if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") {
let mut chars = self.lexer.peek_ahead()["{=".len()..].chars();
let len = chars
.clone()
.take_while(|c| !c.is_whitespace() && !matches!(c, '{' | '}'))
.count();
if len > 0 && chars.nth(len) == Some('}') {
self.lexer = lex::Lexer::new(chars.as_str());
let span_format = Span::by_len(self.span.end() + "{=".len(), len);
self.events[opener_event].kind = EventKind::Enter(RawFormat);
self.events[opener_event].span = span_format;
self.span = span_format;
RawFormat
} else {
Verbatim
}
self.verbatim = None;
let kind = if matches!(kind, Verbatim)
&& matches!(
self.lexer.peek().map(|t| &t.kind),
Some(lex::Kind::Open(Delimiter::BraceEqual))
) {
let mut ahead = self.lexer.inner().clone();
let mut end = false;
let len = (&mut ahead)
.take_while(|c| {
if *c == '{' {
return false;
}
if *c == '}' {
end = true;
};
!end && !c.is_whitespace()
})
.count();
if len > 0 && end {
self.lexer = lex::Lexer::new(ahead);
let span_format = Span::by_len(self.span.end() + "{=".len(), len);
self.events[opener_event].kind = EventKind::Enter(RawFormat);
self.events[opener_event].span = span_format;
self.span = span_format;
RawFormat
} else {
kind
};
*/
Verbatim
}
} else {
kind
};
EventKind::Exit(kind)
} else {
EventKind::Str
@ -213,55 +164,46 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
kind,
span: self.span,
}
}
AtomicState::Attributes { .. } => todo!(),
AtomicState::Url { .. } => todo!(),
AtomicState::ReferenceLinkTag => todo!(),
})
}
fn parse_verbatim(&mut self, first: &lex::Token) -> Option<Event> {
match first.kind {
lex::Kind::Seq(lex::Sequence::Dollar) => {
let math_opt = (first.len <= 2)
.then(|| {
if let Some(lex::Token {
kind: lex::Kind::Seq(lex::Sequence::Backtick),
len,
}) = self.peek()
{
Some((
if first.len == 2 {
DisplayMath
})
.or_else(|| {
match first.kind {
lex::Kind::Seq(lex::Sequence::Dollar) => {
let math_opt = (first.len <= 2)
.then(|| {
if let Some(lex::Token {
kind: lex::Kind::Seq(lex::Sequence::Backtick),
len,
}) = self.peek()
{
Some((
if first.len == 2 {
DisplayMath
} else {
InlineMath
},
*len,
))
} else {
InlineMath
},
*len,
))
} else {
None
None
}
})
.flatten();
if math_opt.is_some() {
self.eat(); // backticks
}
})
.flatten();
if math_opt.is_some() {
self.eat(); // backticks
math_opt
}
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, first.len)),
_ => None,
}
math_opt
}
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, first.len)),
_ => None,
}
.map(|(kind, opener_len)| {
self.atomic_state = AtomicState::Verbatim {
kind,
opener_len,
opener_event: self.events.len(),
};
Event {
kind: EventKind::Enter(kind),
span: self.span,
}
})
.map(|(kind, opener_len)| {
self.verbatim = Some((kind, opener_len, self.events.len()));
Event {
kind: EventKind::Enter(kind),
span: self.span,
}
})
})
}
fn parse_span(&mut self, first: &lex::Token) -> Option<Event> {
@ -395,10 +337,9 @@ impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
type Item = Event;
fn next(&mut self) -> Option<Self::Item> {
let mut ready = true;
while self.events.is_empty()
|| !self.typesets.is_empty()
|| !matches!(self.atomic_state, AtomicState::None)
|| self.verbatim.is_some() // might be raw format
|| self // for merge
.events
.back()
@ -407,47 +348,42 @@ impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
if let Some(ev) = self.parse_event() {
self.events.push_back(ev);
} else {
ready = false;
break;
}
}
if self.last || ready {
self.events
.pop_front()
.map(|e| {
if matches!(e.kind, EventKind::Str) {
// merge str events
let mut span = e.span;
while self
.events
.front()
.map_or(false, |ev| matches!(ev.kind, EventKind::Str))
{
let ev = self.events.pop_front().unwrap();
assert_eq!(span.end(), ev.span.start());
span = span.union(ev.span);
}
Event {
kind: EventKind::Str,
span,
}
} else {
e
self.events
.pop_front()
.map(|e| {
if matches!(e.kind, EventKind::Str) {
// merge str events
let mut span = e.span;
while self
.events
.front()
.map_or(false, |ev| matches!(ev.kind, EventKind::Str))
{
let ev = self.events.pop_front().unwrap();
assert_eq!(span.end(), ev.span.start());
span = span.union(ev.span);
}
Event {
kind: EventKind::Str,
span,
}
} else {
e
}
})
.or_else(|| {
self.verbatim.map(|(kind, _, _)| {
self.verbatim = None;
Event {
kind: EventKind::Exit(kind),
span: self.span,
}
})
.or_else(|| {
self.atomic_state.verbatim().map(|(kind, _, _)| {
self.atomic_state = AtomicState::None;
Event {
kind: EventKind::Exit(kind),
span: self.span,
}
})
})
} else {
None
}
})
}
}

View file

@ -109,6 +109,10 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
self.next.as_ref()
}
pub fn inner(&self) -> &I {
&self.chars
}
/*
pub fn pos(&self) -> usize {
self.src.len()

View file

@ -268,23 +268,23 @@ impl<'s> Event<'s> {
}
impl<'s> Container<'s> {
fn from_block(content: &'s str, block: block::Block) -> Self {
match block {
block::Block::Atom(a) => todo!(),
block::Block::Leaf(l) => match l {
block::Leaf::Paragraph => Self::Paragraph,
block::Leaf::Heading => Self::Heading {
level: content.len(),
},
block::Leaf::CodeBlock => Self::CodeBlock { lang: None },
_ => todo!(),
},
block::Block::Container(c) => match c {
block::Container::Blockquote => Self::Blockquote,
block::Container::Div => Self::Div { class: None },
block::Container::Footnote => Self::Footnote { tag: content },
block::Container::ListItem => todo!(),
fn from_leaf_block(content: &str, l: block::Leaf) -> Self {
match l {
block::Leaf::Paragraph => Self::Paragraph,
block::Leaf::Heading => Self::Heading {
level: content.len(),
},
block::Leaf::CodeBlock => Self::CodeBlock { lang: None },
_ => todo!(),
}
}
fn from_container_block(content: &'s str, c: block::Container) -> Self {
match c {
block::Container::Blockquote => Self::Blockquote,
block::Container::Div => Self::Div { class: None },
block::Container::Footnote => Self::Footnote { tag: content },
block::Container::ListItem => todo!(),
}
}
}
@ -312,7 +312,7 @@ impl<'s> Attributes<'s> {
#[derive(Clone)]
struct InlineChars<'t, 's> {
src: &'s str,
inlines: tree::Atoms<'t, block::Block, block::Atom>,
inlines: tree::Inlines<'t, block::Block, block::Atom>,
}
impl<'t, 's> Iterator for InlineChars<'t, 's> {
@ -351,35 +351,17 @@ impl<'s> Iterator for Parser<'s> {
fn next(&mut self) -> Option<Self::Item> {
if let Some(parser) = &mut self.inline_parser {
// inside leaf block, with inline content
if let Some(mut inline) = parser.next() {
inline.span = inline.span.translate(self.inline_start);
return Some(Event::from_inline(self.src, inline));
}
self.inline_parser = None;
/*
else if let Some(ev) = self.tree.next() {
match ev.kind {
tree::EventKind::Atom(a) => {
assert_eq!(a, block::Atom::Inline);
let last_inline = self.tree.atoms().next().is_none();
parser.parse(ev.span.of(self.src), last_inline);
}
tree::EventKind::Exit(c) => {
self.inline_parser = None;
return Some(Event::End(Container::from_block(ev.span.of(self.src), c)));
}
tree::EventKind::Enter(..) => unreachable!(),
}
}
*/
}
for ev in &mut self.tree {
let content = ev.span.of(self.src);
let event = match ev.kind {
tree::EventKind::Atom(a) => match a {
block::Atom::Inline => panic!("inline outside leaf block"),
block::Atom::Blankline => Event::Atom(Atom::Blankline),
block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak),
block::Atom::Attributes => {
@ -391,7 +373,7 @@ impl<'s> Iterator for Parser<'s> {
if matches!(b, block::Block::Leaf(_)) {
let chars = InlineChars {
src: self.src,
inlines: self.tree.atoms(),
inlines: self.tree.inlines(),
};
// TODO solve self-referential reference here without unsafe
self.inline_parser =
@ -402,17 +384,24 @@ impl<'s> Iterator for Parser<'s> {
block::Block::Leaf(block::Leaf::CodeBlock { .. }) => {
self.inline_start += 1; // skip newline
Container::CodeBlock {
lang: (!ev.span.is_empty()).then(|| ev.span.of(self.src)),
lang: (!ev.span.is_empty()).then(|| content),
}
}
block::Block::Container(block::Container::Div { .. }) => Container::Div {
class: (!ev.span.is_empty()).then(|| ev.span.of(self.src)),
},
b => Container::from_block(content, b),
block::Block::Leaf(l) => Container::from_leaf_block(content, l),
block::Block::Container(c) => Container::from_container_block(content, c),
block::Block::Atom(..) => panic!(),
};
Event::Start(container, self.block_attributes.take())
}
tree::EventKind::Exit(c) => Event::End(Container::from_block(content, c)),
tree::EventKind::Exit(b) => Event::End(match b {
block::Block::Leaf(l) => Container::from_leaf_block(content, l),
block::Block::Container(c) => Container::from_container_block(content, c),
block::Block::Atom(..) => panic!(),
}),
tree::EventKind::Inline => panic!(),
};
return Some(event);
}

View file

@ -3,6 +3,7 @@ use crate::Span;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum EventKind<C, A> {
Enter(C),
Inline,
Exit(C),
Atom(A),
}
@ -21,11 +22,11 @@ pub struct Tree<C, A> {
}
#[derive(Clone)]
pub struct Atoms<'t, C, A> {
pub struct Inlines<'t, C, A> {
iter: std::slice::Iter<'t, Node<C, A>>,
}
impl<'t, C, A> Iterator for Atoms<'t, C, A> {
impl<'t, C, A> Iterator for Inlines<'t, C, A> {
type Item = Span;
fn next(&mut self) -> Option<Self::Item> {
@ -43,26 +44,21 @@ impl<C: Copy, A: Copy> Tree<C, A> {
}
}
pub fn atoms(&self) -> Atoms<C, A> {
pub fn inlines(&self) -> Inlines<C, A> {
let start = self.nodes[self.head.unwrap().index()].next.unwrap().index();
let end = start + self.atoms_().count();
Atoms {
let end = start + self.spans().count();
Inlines {
iter: self.nodes[start..end].iter(),
}
}
pub fn atoms_(&self) -> impl Iterator<Item = (A, Span)> + '_ {
pub fn spans(&self) -> impl Iterator<Item = Span> + '_ {
let mut head = self.head;
std::iter::from_fn(move || {
head.take().map(|h| {
let n = &self.nodes[h.index()];
let kind = match &n.kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(..) => panic!(),
NodeKind::Atom(a) => *a,
};
head = n.next;
(kind, n.span)
n.span
})
})
}
@ -85,6 +81,10 @@ impl<C: Copy, A: Copy> Iterator for Tree<C, A> {
self.head = n.next;
EventKind::Atom(*e)
}
NodeKind::Inline => {
self.head = n.next;
EventKind::Inline
}
};
Some(Event { kind, span: n.span })
} else if let Some(block_ni) = self.branch.pop() {
@ -128,6 +128,7 @@ enum NodeKind<C, A> {
Root,
Container(C, Option<NodeIndex>),
Atom(A),
Inline,
}
#[derive(Debug, Clone)]
@ -165,6 +166,14 @@ impl<C: Copy, A: Copy> Builder<C, A> {
});
}
pub(super) fn inline(&mut self, span: Span) {
self.add_node(Node {
span,
kind: NodeKind::Inline,
next: None,
});
}
pub(super) fn enter(&mut self, c: C, span: Span) {
self.add_node(Node {
span,
@ -192,14 +201,14 @@ impl<C: Copy, A: Copy> Builder<C, A> {
if let Some(head_ni) = &mut self.head {
let mut head = &mut self.nodes[head_ni.index()];
match &mut head.kind {
NodeKind::Root | NodeKind::Atom(_) => {
// update next pointer of previous node
NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => {
// set next pointer of previous node
assert_eq!(head.next, None);
head.next = Some(ni);
}
NodeKind::Container(_, child) => {
self.branch.push(*head_ni);
// update child pointer of current container
// set child pointer of current container
assert_eq!(*child, None);
*child = Some(ni);
}
@ -225,21 +234,43 @@ impl<C: Copy + std::fmt::Debug, A: Copy + std::fmt::Debug> std::fmt::Debug for T
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
const INDENT: &str = " ";
let mut level = 0;
for e in self.clone() {
let indent = INDENT.repeat(level);
match e.kind {
EventKind::Enter(c) => {
write!(f, "{}{:?}", indent, c)?;
level += 1;
/*
for e in self.clone() {
let indent = INDENT.repeat(level);
match e.kind {
<<<<<<< HEAD
EventKind::Enter(c) => {
write!(f, "{}{:?}", indent, c)?;
||||||| parent of 366c1d45 (maybe functional multi-line inline)
EventKind::Enter => {
write!(f, "{}{}", indent, e.elem)?;
=======
Event::Enter => {
write!(f, "{}{}", indent, e.elem)?;
>>>>>>> 366c1d45 (maybe functional multi-line inline)
level += 1;
}
<<<<<<< HEAD
EventKind::Exit(..) => {
||||||| parent of 366c1d45 (maybe functional multi-line inline)
EventKind::Exit => {
=======
Event::Exit => {
>>>>>>> 366c1d45 (maybe functional multi-line inline)
level -= 1;
continue;
}
<<<<<<< HEAD
EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?,
||||||| parent of 366c1d45 (maybe functional multi-line inline)
EventKind::Element => write!(f, "{}{}", indent, e.elem)?,
=======
Event::Element => write!(f, "{}{}", indent, e.elem)?,
>>>>>>> 366c1d45 (maybe functional multi-line inline)
}
writeln!(f, " ({}:{})", e.span.start(), e.span.end())?;
}
EventKind::Exit(..) => {
level -= 1;
continue;
}
EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?,
}
writeln!(f, " ({}:{})", e.span.start(), e.span.end())?;
}
*/
Ok(())
}
}
@ -248,6 +279,7 @@ impl<C: Copy + std::fmt::Debug, A: Copy + std::fmt::Debug> std::fmt::Debug for T
mod test {
use crate::Span;
/*
#[test]
fn fmt_linear() {
let mut tree: super::Builder<u8, u8> = super::Builder::new();
@ -301,4 +333,5 @@ mod test {
)
);
}
*/
}