pre remove atomic state

This commit is contained in:
Noah Hellman 2022-12-11 18:47:00 +01:00
parent 227c86f4f0
commit 946d88e5c0
4 changed files with 116 additions and 47 deletions

View file

@ -97,11 +97,11 @@ impl AtomicState {
} }
} }
pub struct Parser<'s> { pub struct Parser<I> {
/// The last inline element has been provided, finish current events. /// The last inline element has been provided, finish current events.
last: bool, last: bool,
/// Lexer, hosting upcoming source. /// Lexer, hosting upcoming source.
lexer: lex::Lexer<'s>, lexer: lex::Lexer<I>,
/// Span of current event. /// Span of current event.
span: Span, span: Span,
/// State of non-recursive elements. /// State of non-recursive elements.
@ -116,11 +116,11 @@ pub struct Parser<'s> {
events: std::collections::VecDeque<Event>, events: std::collections::VecDeque<Event>,
} }
impl<'s> Parser<'s> { impl<I: Iterator<Item = char> + Clone> Parser<I> {
pub fn new() -> Self { pub fn new(chars: I) -> Self {
Self { Self {
last: false, last: true,
lexer: lex::Lexer::new(""), lexer: lex::Lexer::new(chars),
span: Span::new(0, 0), span: Span::new(0, 0),
atomic_state: AtomicState::None, atomic_state: AtomicState::None,
typesets: Vec::new(), typesets: Vec::new(),
@ -129,13 +129,15 @@ impl<'s> Parser<'s> {
} }
} }
pub fn parse(&mut self, src: &'s str, last: bool) { /*
self.lexer = lex::Lexer::new(src); pub fn parse(&mut self, src: &str, last: bool) {
self.lexer = lex::Lexer::new(src.chars());
if last { if last {
assert!(!self.last); assert!(!self.last);
} }
self.last = last; self.last = last;
} }
*/
fn eat(&mut self) -> Option<lex::Token> { fn eat(&mut self) -> Option<lex::Token> {
let tok = self.lexer.next(); let tok = self.lexer.next();
@ -181,7 +183,8 @@ impl<'s> Parser<'s> {
&& first.len == opener_len && first.len == opener_len
{ {
self.atomic_state = AtomicState::None; self.atomic_state = AtomicState::None;
let kind = let kind = todo!();
/*
if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") { if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") {
let mut chars = self.lexer.peek_ahead()["{=".len()..].chars(); let mut chars = self.lexer.peek_ahead()["{=".len()..].chars();
let len = chars let len = chars
@ -201,6 +204,7 @@ impl<'s> Parser<'s> {
} else { } else {
kind kind
}; };
*/
EventKind::Exit(kind) EventKind::Exit(kind)
} else { } else {
EventKind::Str EventKind::Str
@ -261,13 +265,12 @@ impl<'s> Parser<'s> {
} }
fn parse_span(&mut self, first: &lex::Token) -> Option<Event> { fn parse_span(&mut self, first: &lex::Token) -> Option<Event> {
match first.kind { if let Some(open) = match first.kind {
lex::Kind::Open(Delimiter::Bracket) => Some(true), lex::Kind::Open(Delimiter::Bracket) => Some(true),
lex::Kind::Close(Delimiter::Bracket) => Some(false), lex::Kind::Close(Delimiter::Bracket) => Some(false),
_ => None, _ => None,
} } {
.map(|open| { Some(if open {
if open {
self.spans.push(self.events.len()); self.spans.push(self.events.len());
// use str for now, replace if closed later // use str for now, replace if closed later
Event { Event {
@ -275,21 +278,44 @@ impl<'s> Parser<'s> {
span: self.span, span: self.span,
} }
} else { } else {
if self.lexer.peek_ahead().starts_with('[') { /*
let kind = if self.lexer.peek_ahead().starts_with('[') {
let mut chars = self.lexer.peek_ahead()["[".len()..].chars(); let mut chars = self.lexer.peek_ahead()["[".len()..].chars();
let len = chars let len = chars
.clone() .clone()
.take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']')) .take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']'))
.count(); .count();
match chars.nth(len) { match chars.nth(len) {
Some(']') => todo!(), Some(']') => EventKind::Exit(ReferenceLink),
None => self.atomic_state = AtomicState::ReferenceLinkTag, None => {
_ => todo!(), self.atomic_state = AtomicState::ReferenceLinkTag;
return None;
}
_ => EventKind::Str,
} }
} } else if self.lexer.peek_ahead().starts_with('(') {
let mut chars = self.lexer.peek_ahead()["[".len()..].chars();
let len = chars
.clone()
.take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']'))
.count();
match chars.nth(len) {
Some(']') => EventKind::Exit(ReferenceLink),
None => {
self.atomic_state = AtomicState::Url { auto: false };
return None;
}
_ => EventKind::Str,
}
} else {
return None;
};
*/
todo!() todo!()
} })
}) } else {
None
}
} }
fn parse_typeset(&mut self, first: &lex::Token) -> Option<Event> { fn parse_typeset(&mut self, first: &lex::Token) -> Option<Event> {
@ -365,7 +391,7 @@ impl<'s> Parser<'s> {
} }
} }
impl<'s> Iterator for Parser<'s> { impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
type Item = Event; type Item = Event;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
@ -437,8 +463,7 @@ mod test {
macro_rules! test_parse { macro_rules! test_parse {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)] #[allow(unused)]
let mut p = super::Parser::new(); let mut p = super::Parser::new($src.chars());
p.parse($src, true);
let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>(); let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
let expected = &[$($($token),*,)?]; let expected = &[$($($token),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src); assert_eq!(actual, expected, "\n\n{}\n\n", $src);

View file

@ -82,9 +82,8 @@ impl Sequence {
} }
#[derive(Clone)] #[derive(Clone)]
pub(crate) struct Lexer<'s> { pub(crate) struct Lexer<I> {
pub src: &'s str, chars: I,
chars: std::str::Chars<'s>,
/// Next character should be escaped. /// Next character should be escaped.
escape: bool, escape: bool,
/// Token to be peeked or next'ed. /// Token to be peeked or next'ed.
@ -93,11 +92,10 @@ pub(crate) struct Lexer<'s> {
len: usize, len: usize,
} }
impl<'s> Lexer<'s> { impl<I: Iterator<Item = char> + Clone> Lexer<I> {
pub fn new(src: &'s str) -> Lexer<'s> { pub fn new(chars: I) -> Lexer<I> {
Lexer { Lexer {
src, chars,
chars: src.chars(),
escape: false, escape: false,
next: None, next: None,
len: 0, len: 0,
@ -111,15 +109,19 @@ impl<'s> Lexer<'s> {
self.next.as_ref() self.next.as_ref()
} }
/*
pub fn pos(&self) -> usize { pub fn pos(&self) -> usize {
self.src.len() self.src.len()
- self.chars.as_str().len() - self.chars.as_str().len()
- self.next.as_ref().map(|t| t.len).unwrap_or_default() - self.next.as_ref().map(|t| t.len).unwrap_or_default()
} }
*/
/*
pub fn peek_ahead(&mut self) -> &'s str { pub fn peek_ahead(&mut self) -> &'s str {
&self.src[self.pos()..] &self.src[self.pos()..]
} }
*/
fn next_token(&mut self) -> Option<Token> { fn next_token(&mut self) -> Option<Token> {
let mut current = self.token(); let mut current = self.token();
@ -272,7 +274,7 @@ impl<'s> Lexer<'s> {
} }
} }
impl<'s> Iterator for Lexer<'s> { impl<I: Iterator<Item = char> + Clone> Iterator for Lexer<I> {
type Item = Token; type Item = Token;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
@ -290,7 +292,7 @@ mod test {
macro_rules! test_lex { macro_rules! test_lex {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)] #[allow(unused)]
let actual = super::Lexer::new($src).collect::<Vec<_>>(); let actual = super::Lexer::new($src.chars()).collect::<Vec<_>>();
let expected = vec![$($($token),*,)?]; let expected = vec![$($($token),*,)?];
assert_eq!(actual, expected, "{}", $src); assert_eq!(actual, expected, "{}", $src);
}; };

View file

@ -304,20 +304,31 @@ impl<'s> Attributes<'s> {
Self(self.0.take()) Self(self.0.take())
} }
#[must_use]
pub fn valid(src: &str) -> bool {
todo!()
}
pub fn parse(&mut self, src: &'s str) { pub fn parse(&mut self, src: &'s str) {
todo!() todo!()
} }
} }
#[derive(Clone)]
struct InlineChars<'t, 's> {
src: &'s str,
inlines: tree::Atoms<'t, block::Block, block::Atom>,
}
impl<'t, 's> Iterator for InlineChars<'t, 's> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
(&mut self.inlines)
.flat_map(|sp| sp.of(self.src).chars())
.next()
}
}
pub struct Parser<'s> { pub struct Parser<'s> {
src: &'s str, src: &'s str,
tree: block::Tree, tree: block::Tree,
parser: Option<inline::Parser<'s>>, inline_parser: Option<inline::Parser<InlineChars<'s, 's>>>,
inline_start: usize, inline_start: usize,
block_attributes: Attributes<'s>, block_attributes: Attributes<'s>,
} }
@ -328,7 +339,7 @@ impl<'s> Parser<'s> {
Self { Self {
src, src,
tree: block::parse(src), tree: block::parse(src),
parser: None, inline_parser: None,
inline_start: 0, inline_start: 0,
block_attributes: Attributes::none(), block_attributes: Attributes::none(),
} }
@ -339,12 +350,15 @@ impl<'s> Iterator for Parser<'s> {
type Item = Event<'s>; type Item = Event<'s>;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
while let Some(parser) = &mut self.parser { if let Some(parser) = &mut self.inline_parser {
// inside leaf block, with inline content // inside leaf block, with inline content
if let Some(mut inline) = parser.next() { if let Some(mut inline) = parser.next() {
inline.span = inline.span.translate(self.inline_start); inline.span = inline.span.translate(self.inline_start);
return Some(Event::from_inline(self.src, inline)); return Some(Event::from_inline(self.src, inline));
} else if let Some(ev) = self.tree.next() { }
self.inline_parser = None;
/*
else if let Some(ev) = self.tree.next() {
match ev.kind { match ev.kind {
tree::EventKind::Atom(a) => { tree::EventKind::Atom(a) => {
assert_eq!(a, block::Atom::Inline); assert_eq!(a, block::Atom::Inline);
@ -352,12 +366,13 @@ impl<'s> Iterator for Parser<'s> {
parser.parse(ev.span.of(self.src), last_inline); parser.parse(ev.span.of(self.src), last_inline);
} }
tree::EventKind::Exit(c) => { tree::EventKind::Exit(c) => {
self.parser = None; self.inline_parser = None;
return Some(Event::End(Container::from_block(ev.span.of(self.src), c))); return Some(Event::End(Container::from_block(ev.span.of(self.src), c)));
} }
tree::EventKind::Enter(..) => unreachable!(), tree::EventKind::Enter(..) => unreachable!(),
} }
} }
*/
} }
for ev in &mut self.tree { for ev in &mut self.tree {
@ -372,12 +387,18 @@ impl<'s> Iterator for Parser<'s> {
continue; continue;
} }
}, },
tree::EventKind::Enter(c) => { tree::EventKind::Enter(b) => {
if matches!(c, block::Block::Leaf(_)) { if matches!(b, block::Block::Leaf(_)) {
self.parser = Some(inline::Parser::new()); let chars = InlineChars {
src: self.src,
inlines: self.tree.atoms(),
};
// TODO solve self-referential reference here without unsafe
self.inline_parser =
unsafe { Some(std::mem::transmute(inline::Parser::new(chars))) };
self.inline_start = ev.span.end(); self.inline_start = ev.span.end();
} }
let container = match c { let container = match b {
block::Block::Leaf(block::Leaf::CodeBlock { .. }) => { block::Block::Leaf(block::Leaf::CodeBlock { .. }) => {
self.inline_start += 1; // skip newline self.inline_start += 1; // skip newline
Container::CodeBlock { Container::CodeBlock {

View file

@ -20,6 +20,19 @@ pub struct Tree<C, A> {
head: Option<NodeIndex>, head: Option<NodeIndex>,
} }
#[derive(Clone)]
pub struct Atoms<'t, C, A> {
iter: std::slice::Iter<'t, Node<C, A>>,
}
impl<'t, C, A> Iterator for Atoms<'t, C, A> {
type Item = Span;
fn next(&mut self) -> Option<Self::Item> {
self.iter.next().map(|n| n.span)
}
}
impl<C: Copy, A: Copy> Tree<C, A> { impl<C: Copy, A: Copy> Tree<C, A> {
fn new(nodes: Vec<Node<C, A>>) -> Self { fn new(nodes: Vec<Node<C, A>>) -> Self {
let head = nodes[NodeIndex::root().index()].next; let head = nodes[NodeIndex::root().index()].next;
@ -30,7 +43,15 @@ impl<C: Copy, A: Copy> Tree<C, A> {
} }
} }
pub fn atoms(&self) -> impl Iterator<Item = (A, Span)> + '_ { pub fn atoms(&self) -> Atoms<C, A> {
let start = self.nodes[self.head.unwrap().index()].next.unwrap().index();
let end = start + self.atoms_().count();
Atoms {
iter: self.nodes[start..end].iter(),
}
}
pub fn atoms_(&self) -> impl Iterator<Item = (A, Span)> + '_ {
let mut head = self.head; let mut head = self.head;
std::iter::from_fn(move || { std::iter::from_fn(move || {
head.take().map(|h| { head.take().map(|h| {