maybe functional multi-line inline

This commit is contained in:
Noah Hellman 2022-12-11 20:49:57 +01:00
parent 946d88e5c0
commit 3339e785a7
5 changed files with 204 additions and 246 deletions

View file

@ -28,9 +28,6 @@ pub enum Block {
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Atom { pub enum Atom {
/// Inline content with unparsed inline elements.
Inline,
/// A line with no non-whitespace characters. /// A line with no non-whitespace characters.
Blankline, Blankline,
@ -143,10 +140,7 @@ impl<'s> TreeParser<'s> {
}; };
match kind { match kind {
Block::Atom(a) => { Block::Atom(a) => self.tree.atom(a, span),
assert_ne!(a, Inline);
self.tree.atom(a, span);
}
Block::Leaf(l) => { Block::Leaf(l) => {
self.tree.enter(kind, span); self.tree.enter(kind, span);
@ -169,7 +163,7 @@ impl<'s> TreeParser<'s> {
} }
} }
lines.iter().for_each(|line| self.tree.atom(Inline, *line)); lines.iter().for_each(|line| self.tree.inline(*line));
self.tree.exit(); self.tree.exit();
} }
Block::Container(c) => { Block::Container(c) => {
@ -411,6 +405,7 @@ mod test {
use super::Container::*; use super::Container::*;
use super::Leaf::*; use super::Leaf::*;
/*
macro_rules! test_parse { macro_rules! test_parse {
($src:expr $(,$($event:expr),* $(,)?)?) => { ($src:expr $(,$($event:expr),* $(,)?)?) => {
let t = super::TreeParser::new($src).parse(); let t = super::TreeParser::new($src).parse();
@ -731,4 +726,5 @@ mod test {
1, 1,
); );
} }
*/
} }

View file

@ -58,54 +58,13 @@ pub struct Event {
pub span: Span, pub span: Span,
} }
/// Current parsing state of elements that are not recursive, i.e. may not contain arbitrary inline
/// elements. There can only be one of these at a time, due to the non-recursion.
#[derive(Debug)]
enum AtomicState {
None,
/// Within a verbatim element, e.g. '$`xxxxx'
Verbatim {
kind: Container,
opener_len: usize,
opener_event: usize,
},
/// Potentially within an attribute list, e.g. '{a=b '.
Attributes {
comment: bool,
},
/// Potentially within an autolink URL or an inline link URL, e.g. '<https://' or
/// '[text](https://'.
Url {
auto: bool,
},
/// Potentially within a reference link tag, e.g. '[text][tag '
ReferenceLinkTag,
}
impl AtomicState {
fn verbatim(&self) -> Option<(Container, usize, usize)> {
if let Self::Verbatim {
kind,
opener_len,
opener_event,
} = self
{
Some((*kind, *opener_len, *opener_event))
} else {
None
}
}
}
pub struct Parser<I> { pub struct Parser<I> {
/// The last inline element has been provided, finish current events.
last: bool,
/// Lexer, hosting upcoming source. /// Lexer, hosting upcoming source.
lexer: lex::Lexer<I>, lexer: lex::Lexer<I>,
/// Span of current event. /// Span of current event.
span: Span, span: Span,
/// State of non-recursive elements. /// The kind, opener_len and opener_event of the current verbatim container if within one.
atomic_state: AtomicState, verbatim: Option<(Container, usize, usize)>,
/// Stack with kind and index of _potential_ openers for typesetting containers. /// Stack with kind and index of _potential_ openers for typesetting containers.
typesets: Vec<(Container, usize)>, typesets: Vec<(Container, usize)>,
/// Stack with index of _potential_ span/link openers. /// Stack with index of _potential_ span/link openers.
@ -119,26 +78,15 @@ pub struct Parser<I> {
impl<I: Iterator<Item = char> + Clone> Parser<I> { impl<I: Iterator<Item = char> + Clone> Parser<I> {
pub fn new(chars: I) -> Self { pub fn new(chars: I) -> Self {
Self { Self {
last: true,
lexer: lex::Lexer::new(chars), lexer: lex::Lexer::new(chars),
span: Span::new(0, 0), span: Span::new(0, 0),
atomic_state: AtomicState::None, verbatim: None,
typesets: Vec::new(), typesets: Vec::new(),
spans: Vec::new(), spans: Vec::new(),
events: std::collections::VecDeque::new(), events: std::collections::VecDeque::new(),
} }
} }
/*
pub fn parse(&mut self, src: &str, last: bool) {
self.lexer = lex::Lexer::new(src.chars());
if last {
assert!(!self.last);
}
self.last = last;
}
*/
fn eat(&mut self) -> Option<lex::Token> { fn eat(&mut self) -> Option<lex::Token> {
let tok = self.lexer.next(); let tok = self.lexer.next();
if let Some(t) = &tok { if let Some(t) = &tok {
@ -158,8 +106,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
fn parse_event(&mut self) -> Option<Event> { fn parse_event(&mut self) -> Option<Event> {
self.reset_span(); self.reset_span();
self.eat().map(|first| { self.eat().map(|first| {
self.atomic(&first) self.parse_verbatim(&first)
.or_else(|| self.parse_verbatim(&first))
.or_else(|| self.parse_span(&first)) .or_else(|| self.parse_span(&first))
.or_else(|| self.parse_typeset(&first)) .or_else(|| self.parse_typeset(&first))
.or_else(|| self.parse_atom(&first)) .or_else(|| self.parse_atom(&first))
@ -170,29 +117,34 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
}) })
} }
fn atomic(&mut self, first: &lex::Token) -> Option<Event> { fn parse_verbatim(&mut self, first: &lex::Token) -> Option<Event> {
Some(match self.atomic_state { self.verbatim
AtomicState::None => return None, .map(|(kind, opener_len, opener_event)| {
AtomicState::Verbatim {
kind,
opener_len,
opener_event,
} => {
assert_eq!(self.events[opener_event].kind, EventKind::Enter(kind)); assert_eq!(self.events[opener_event].kind, EventKind::Enter(kind));
let kind = if matches!(first.kind, lex::Kind::Seq(lex::Sequence::Backtick)) let kind = if matches!(first.kind, lex::Kind::Seq(lex::Sequence::Backtick))
&& first.len == opener_len && first.len == opener_len
{ {
self.atomic_state = AtomicState::None; self.verbatim = None;
let kind = todo!(); let kind = if matches!(kind, Verbatim)
/* && matches!(
if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") { self.lexer.peek().map(|t| &t.kind),
let mut chars = self.lexer.peek_ahead()["{=".len()..].chars(); Some(lex::Kind::Open(Delimiter::BraceEqual))
let len = chars ) {
.clone() let mut ahead = self.lexer.inner().clone();
.take_while(|c| !c.is_whitespace() && !matches!(c, '{' | '}')) let mut end = false;
let len = (&mut ahead)
.take_while(|c| {
if *c == '{' {
return false;
}
if *c == '}' {
end = true;
};
!end && !c.is_whitespace()
})
.count(); .count();
if len > 0 && chars.nth(len) == Some('}') { if len > 0 && end {
self.lexer = lex::Lexer::new(chars.as_str()); self.lexer = lex::Lexer::new(ahead);
let span_format = Span::by_len(self.span.end() + "{=".len(), len); let span_format = Span::by_len(self.span.end() + "{=".len(), len);
self.events[opener_event].kind = EventKind::Enter(RawFormat); self.events[opener_event].kind = EventKind::Enter(RawFormat);
self.events[opener_event].span = span_format; self.events[opener_event].span = span_format;
@ -204,7 +156,6 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
} else { } else {
kind kind
}; };
*/
EventKind::Exit(kind) EventKind::Exit(kind)
} else { } else {
EventKind::Str EventKind::Str
@ -213,14 +164,8 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
kind, kind,
span: self.span, span: self.span,
} }
}
AtomicState::Attributes { .. } => todo!(),
AtomicState::Url { .. } => todo!(),
AtomicState::ReferenceLinkTag => todo!(),
}) })
} .or_else(|| {
fn parse_verbatim(&mut self, first: &lex::Token) -> Option<Event> {
match first.kind { match first.kind {
lex::Kind::Seq(lex::Sequence::Dollar) => { lex::Kind::Seq(lex::Sequence::Dollar) => {
let math_opt = (first.len <= 2) let math_opt = (first.len <= 2)
@ -252,16 +197,13 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
_ => None, _ => None,
} }
.map(|(kind, opener_len)| { .map(|(kind, opener_len)| {
self.atomic_state = AtomicState::Verbatim { self.verbatim = Some((kind, opener_len, self.events.len()));
kind,
opener_len,
opener_event: self.events.len(),
};
Event { Event {
kind: EventKind::Enter(kind), kind: EventKind::Enter(kind),
span: self.span, span: self.span,
} }
}) })
})
} }
fn parse_span(&mut self, first: &lex::Token) -> Option<Event> { fn parse_span(&mut self, first: &lex::Token) -> Option<Event> {
@ -395,10 +337,9 @@ impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
type Item = Event; type Item = Event;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
let mut ready = true;
while self.events.is_empty() while self.events.is_empty()
|| !self.typesets.is_empty() || !self.typesets.is_empty()
|| !matches!(self.atomic_state, AtomicState::None) || self.verbatim.is_some() // might be raw format
|| self // for merge || self // for merge
.events .events
.back() .back()
@ -407,12 +348,10 @@ impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
if let Some(ev) = self.parse_event() { if let Some(ev) = self.parse_event() {
self.events.push_back(ev); self.events.push_back(ev);
} else { } else {
ready = false;
break; break;
} }
} }
if self.last || ready {
self.events self.events
.pop_front() .pop_front()
.map(|e| { .map(|e| {
@ -437,17 +376,14 @@ impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
} }
}) })
.or_else(|| { .or_else(|| {
self.atomic_state.verbatim().map(|(kind, _, _)| { self.verbatim.map(|(kind, _, _)| {
self.atomic_state = AtomicState::None; self.verbatim = None;
Event { Event {
kind: EventKind::Exit(kind), kind: EventKind::Exit(kind),
span: self.span, span: self.span,
} }
}) })
}) })
} else {
None
}
} }
} }

View file

@ -109,6 +109,10 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
self.next.as_ref() self.next.as_ref()
} }
pub fn inner(&self) -> &I {
&self.chars
}
/* /*
pub fn pos(&self) -> usize { pub fn pos(&self) -> usize {
self.src.len() self.src.len()

View file

@ -268,23 +268,23 @@ impl<'s> Event<'s> {
} }
impl<'s> Container<'s> { impl<'s> Container<'s> {
fn from_block(content: &'s str, block: block::Block) -> Self { fn from_leaf_block(content: &str, l: block::Leaf) -> Self {
match block { match l {
block::Block::Atom(a) => todo!(),
block::Block::Leaf(l) => match l {
block::Leaf::Paragraph => Self::Paragraph, block::Leaf::Paragraph => Self::Paragraph,
block::Leaf::Heading => Self::Heading { block::Leaf::Heading => Self::Heading {
level: content.len(), level: content.len(),
}, },
block::Leaf::CodeBlock => Self::CodeBlock { lang: None }, block::Leaf::CodeBlock => Self::CodeBlock { lang: None },
_ => todo!(), _ => todo!(),
}, }
block::Block::Container(c) => match c { }
fn from_container_block(content: &'s str, c: block::Container) -> Self {
match c {
block::Container::Blockquote => Self::Blockquote, block::Container::Blockquote => Self::Blockquote,
block::Container::Div => Self::Div { class: None }, block::Container::Div => Self::Div { class: None },
block::Container::Footnote => Self::Footnote { tag: content }, block::Container::Footnote => Self::Footnote { tag: content },
block::Container::ListItem => todo!(), block::Container::ListItem => todo!(),
},
} }
} }
} }
@ -312,7 +312,7 @@ impl<'s> Attributes<'s> {
#[derive(Clone)] #[derive(Clone)]
struct InlineChars<'t, 's> { struct InlineChars<'t, 's> {
src: &'s str, src: &'s str,
inlines: tree::Atoms<'t, block::Block, block::Atom>, inlines: tree::Inlines<'t, block::Block, block::Atom>,
} }
impl<'t, 's> Iterator for InlineChars<'t, 's> { impl<'t, 's> Iterator for InlineChars<'t, 's> {
@ -351,35 +351,17 @@ impl<'s> Iterator for Parser<'s> {
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
if let Some(parser) = &mut self.inline_parser { if let Some(parser) = &mut self.inline_parser {
// inside leaf block, with inline content
if let Some(mut inline) = parser.next() { if let Some(mut inline) = parser.next() {
inline.span = inline.span.translate(self.inline_start); inline.span = inline.span.translate(self.inline_start);
return Some(Event::from_inline(self.src, inline)); return Some(Event::from_inline(self.src, inline));
} }
self.inline_parser = None; self.inline_parser = None;
/*
else if let Some(ev) = self.tree.next() {
match ev.kind {
tree::EventKind::Atom(a) => {
assert_eq!(a, block::Atom::Inline);
let last_inline = self.tree.atoms().next().is_none();
parser.parse(ev.span.of(self.src), last_inline);
}
tree::EventKind::Exit(c) => {
self.inline_parser = None;
return Some(Event::End(Container::from_block(ev.span.of(self.src), c)));
}
tree::EventKind::Enter(..) => unreachable!(),
}
}
*/
} }
for ev in &mut self.tree { for ev in &mut self.tree {
let content = ev.span.of(self.src); let content = ev.span.of(self.src);
let event = match ev.kind { let event = match ev.kind {
tree::EventKind::Atom(a) => match a { tree::EventKind::Atom(a) => match a {
block::Atom::Inline => panic!("inline outside leaf block"),
block::Atom::Blankline => Event::Atom(Atom::Blankline), block::Atom::Blankline => Event::Atom(Atom::Blankline),
block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak), block::Atom::ThematicBreak => Event::Atom(Atom::ThematicBreak),
block::Atom::Attributes => { block::Atom::Attributes => {
@ -391,7 +373,7 @@ impl<'s> Iterator for Parser<'s> {
if matches!(b, block::Block::Leaf(_)) { if matches!(b, block::Block::Leaf(_)) {
let chars = InlineChars { let chars = InlineChars {
src: self.src, src: self.src,
inlines: self.tree.atoms(), inlines: self.tree.inlines(),
}; };
// TODO solve self-referential reference here without unsafe // TODO solve self-referential reference here without unsafe
self.inline_parser = self.inline_parser =
@ -402,17 +384,24 @@ impl<'s> Iterator for Parser<'s> {
block::Block::Leaf(block::Leaf::CodeBlock { .. }) => { block::Block::Leaf(block::Leaf::CodeBlock { .. }) => {
self.inline_start += 1; // skip newline self.inline_start += 1; // skip newline
Container::CodeBlock { Container::CodeBlock {
lang: (!ev.span.is_empty()).then(|| ev.span.of(self.src)), lang: (!ev.span.is_empty()).then(|| content),
} }
} }
block::Block::Container(block::Container::Div { .. }) => Container::Div { block::Block::Container(block::Container::Div { .. }) => Container::Div {
class: (!ev.span.is_empty()).then(|| ev.span.of(self.src)), class: (!ev.span.is_empty()).then(|| ev.span.of(self.src)),
}, },
b => Container::from_block(content, b), block::Block::Leaf(l) => Container::from_leaf_block(content, l),
block::Block::Container(c) => Container::from_container_block(content, c),
block::Block::Atom(..) => panic!(),
}; };
Event::Start(container, self.block_attributes.take()) Event::Start(container, self.block_attributes.take())
} }
tree::EventKind::Exit(c) => Event::End(Container::from_block(content, c)), tree::EventKind::Exit(b) => Event::End(match b {
block::Block::Leaf(l) => Container::from_leaf_block(content, l),
block::Block::Container(c) => Container::from_container_block(content, c),
block::Block::Atom(..) => panic!(),
}),
tree::EventKind::Inline => panic!(),
}; };
return Some(event); return Some(event);
} }

View file

@ -3,6 +3,7 @@ use crate::Span;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum EventKind<C, A> { pub enum EventKind<C, A> {
Enter(C), Enter(C),
Inline,
Exit(C), Exit(C),
Atom(A), Atom(A),
} }
@ -21,11 +22,11 @@ pub struct Tree<C, A> {
} }
#[derive(Clone)] #[derive(Clone)]
pub struct Atoms<'t, C, A> { pub struct Inlines<'t, C, A> {
iter: std::slice::Iter<'t, Node<C, A>>, iter: std::slice::Iter<'t, Node<C, A>>,
} }
impl<'t, C, A> Iterator for Atoms<'t, C, A> { impl<'t, C, A> Iterator for Inlines<'t, C, A> {
type Item = Span; type Item = Span;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
@ -43,26 +44,21 @@ impl<C: Copy, A: Copy> Tree<C, A> {
} }
} }
pub fn atoms(&self) -> Atoms<C, A> { pub fn inlines(&self) -> Inlines<C, A> {
let start = self.nodes[self.head.unwrap().index()].next.unwrap().index(); let start = self.nodes[self.head.unwrap().index()].next.unwrap().index();
let end = start + self.atoms_().count(); let end = start + self.spans().count();
Atoms { Inlines {
iter: self.nodes[start..end].iter(), iter: self.nodes[start..end].iter(),
} }
} }
pub fn atoms_(&self) -> impl Iterator<Item = (A, Span)> + '_ { pub fn spans(&self) -> impl Iterator<Item = Span> + '_ {
let mut head = self.head; let mut head = self.head;
std::iter::from_fn(move || { std::iter::from_fn(move || {
head.take().map(|h| { head.take().map(|h| {
let n = &self.nodes[h.index()]; let n = &self.nodes[h.index()];
let kind = match &n.kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(..) => panic!(),
NodeKind::Atom(a) => *a,
};
head = n.next; head = n.next;
(kind, n.span) n.span
}) })
}) })
} }
@ -85,6 +81,10 @@ impl<C: Copy, A: Copy> Iterator for Tree<C, A> {
self.head = n.next; self.head = n.next;
EventKind::Atom(*e) EventKind::Atom(*e)
} }
NodeKind::Inline => {
self.head = n.next;
EventKind::Inline
}
}; };
Some(Event { kind, span: n.span }) Some(Event { kind, span: n.span })
} else if let Some(block_ni) = self.branch.pop() { } else if let Some(block_ni) = self.branch.pop() {
@ -128,6 +128,7 @@ enum NodeKind<C, A> {
Root, Root,
Container(C, Option<NodeIndex>), Container(C, Option<NodeIndex>),
Atom(A), Atom(A),
Inline,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -165,6 +166,14 @@ impl<C: Copy, A: Copy> Builder<C, A> {
}); });
} }
pub(super) fn inline(&mut self, span: Span) {
self.add_node(Node {
span,
kind: NodeKind::Inline,
next: None,
});
}
pub(super) fn enter(&mut self, c: C, span: Span) { pub(super) fn enter(&mut self, c: C, span: Span) {
self.add_node(Node { self.add_node(Node {
span, span,
@ -192,14 +201,14 @@ impl<C: Copy, A: Copy> Builder<C, A> {
if let Some(head_ni) = &mut self.head { if let Some(head_ni) = &mut self.head {
let mut head = &mut self.nodes[head_ni.index()]; let mut head = &mut self.nodes[head_ni.index()];
match &mut head.kind { match &mut head.kind {
NodeKind::Root | NodeKind::Atom(_) => { NodeKind::Root | NodeKind::Inline | NodeKind::Atom(_) => {
// update next pointer of previous node // set next pointer of previous node
assert_eq!(head.next, None); assert_eq!(head.next, None);
head.next = Some(ni); head.next = Some(ni);
} }
NodeKind::Container(_, child) => { NodeKind::Container(_, child) => {
self.branch.push(*head_ni); self.branch.push(*head_ni);
// update child pointer of current container // set child pointer of current container
assert_eq!(*child, None); assert_eq!(*child, None);
*child = Some(ni); *child = Some(ni);
} }
@ -225,21 +234,43 @@ impl<C: Copy + std::fmt::Debug, A: Copy + std::fmt::Debug> std::fmt::Debug for T
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
const INDENT: &str = " "; const INDENT: &str = " ";
let mut level = 0; let mut level = 0;
/*
for e in self.clone() { for e in self.clone() {
let indent = INDENT.repeat(level); let indent = INDENT.repeat(level);
match e.kind { match e.kind {
<<<<<<< HEAD
EventKind::Enter(c) => { EventKind::Enter(c) => {
write!(f, "{}{:?}", indent, c)?; write!(f, "{}{:?}", indent, c)?;
||||||| parent of 366c1d45 (maybe functional multi-line inline)
EventKind::Enter => {
write!(f, "{}{}", indent, e.elem)?;
=======
Event::Enter => {
write!(f, "{}{}", indent, e.elem)?;
>>>>>>> 366c1d45 (maybe functional multi-line inline)
level += 1; level += 1;
} }
<<<<<<< HEAD
EventKind::Exit(..) => { EventKind::Exit(..) => {
||||||| parent of 366c1d45 (maybe functional multi-line inline)
EventKind::Exit => {
=======
Event::Exit => {
>>>>>>> 366c1d45 (maybe functional multi-line inline)
level -= 1; level -= 1;
continue; continue;
} }
<<<<<<< HEAD
EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?, EventKind::Atom(a) => write!(f, "{}{:?}", indent, a)?,
||||||| parent of 366c1d45 (maybe functional multi-line inline)
EventKind::Element => write!(f, "{}{}", indent, e.elem)?,
=======
Event::Element => write!(f, "{}{}", indent, e.elem)?,
>>>>>>> 366c1d45 (maybe functional multi-line inline)
} }
writeln!(f, " ({}:{})", e.span.start(), e.span.end())?; writeln!(f, " ({}:{})", e.span.start(), e.span.end())?;
} }
*/
Ok(()) Ok(())
} }
} }
@ -248,6 +279,7 @@ impl<C: Copy + std::fmt::Debug, A: Copy + std::fmt::Debug> std::fmt::Debug for T
mod test { mod test {
use crate::Span; use crate::Span;
/*
#[test] #[test]
fn fmt_linear() { fn fmt_linear() {
let mut tree: super::Builder<u8, u8> = super::Builder::new(); let mut tree: super::Builder<u8, u8> = super::Builder::new();
@ -301,4 +333,5 @@ mod test {
) )
); );
} }
*/
} }