pre remove atomic state

This commit is contained in:
Noah Hellman 2022-12-11 18:47:00 +01:00
parent 227c86f4f0
commit 946d88e5c0
4 changed files with 116 additions and 47 deletions

View file

@ -97,11 +97,11 @@ impl AtomicState {
}
}
pub struct Parser<'s> {
pub struct Parser<I> {
/// The last inline element has been provided, finish current events.
last: bool,
/// Lexer, hosting upcoming source.
lexer: lex::Lexer<'s>,
lexer: lex::Lexer<I>,
/// Span of current event.
span: Span,
/// State of non-recursive elements.
@ -116,11 +116,11 @@ pub struct Parser<'s> {
events: std::collections::VecDeque<Event>,
}
impl<'s> Parser<'s> {
pub fn new() -> Self {
impl<I: Iterator<Item = char> + Clone> Parser<I> {
pub fn new(chars: I) -> Self {
Self {
last: false,
lexer: lex::Lexer::new(""),
last: true,
lexer: lex::Lexer::new(chars),
span: Span::new(0, 0),
atomic_state: AtomicState::None,
typesets: Vec::new(),
@ -129,13 +129,15 @@ impl<'s> Parser<'s> {
}
}
pub fn parse(&mut self, src: &'s str, last: bool) {
self.lexer = lex::Lexer::new(src);
/*
pub fn parse(&mut self, src: &str, last: bool) {
self.lexer = lex::Lexer::new(src.chars());
if last {
assert!(!self.last);
}
self.last = last;
}
*/
fn eat(&mut self) -> Option<lex::Token> {
let tok = self.lexer.next();
@ -181,7 +183,8 @@ impl<'s> Parser<'s> {
&& first.len == opener_len
{
self.atomic_state = AtomicState::None;
let kind =
let kind = todo!();
/*
if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") {
let mut chars = self.lexer.peek_ahead()["{=".len()..].chars();
let len = chars
@ -201,6 +204,7 @@ impl<'s> Parser<'s> {
} else {
kind
};
*/
EventKind::Exit(kind)
} else {
EventKind::Str
@ -261,13 +265,12 @@ impl<'s> Parser<'s> {
}
fn parse_span(&mut self, first: &lex::Token) -> Option<Event> {
match first.kind {
if let Some(open) = match first.kind {
lex::Kind::Open(Delimiter::Bracket) => Some(true),
lex::Kind::Close(Delimiter::Bracket) => Some(false),
_ => None,
}
.map(|open| {
if open {
} {
Some(if open {
self.spans.push(self.events.len());
// use str for now, replace if closed later
Event {
@ -275,21 +278,44 @@ impl<'s> Parser<'s> {
span: self.span,
}
} else {
if self.lexer.peek_ahead().starts_with('[') {
/*
let kind = if self.lexer.peek_ahead().starts_with('[') {
let mut chars = self.lexer.peek_ahead()["[".len()..].chars();
let len = chars
.clone()
.take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']'))
.count();
match chars.nth(len) {
Some(']') => todo!(),
None => self.atomic_state = AtomicState::ReferenceLinkTag,
_ => todo!(),
Some(']') => EventKind::Exit(ReferenceLink),
None => {
self.atomic_state = AtomicState::ReferenceLinkTag;
return None;
}
_ => EventKind::Str,
}
}
} else if self.lexer.peek_ahead().starts_with('(') {
let mut chars = self.lexer.peek_ahead()["[".len()..].chars();
let len = chars
.clone()
.take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']'))
.count();
match chars.nth(len) {
Some(']') => EventKind::Exit(ReferenceLink),
None => {
self.atomic_state = AtomicState::Url { auto: false };
return None;
}
_ => EventKind::Str,
}
} else {
return None;
};
*/
todo!()
}
})
})
} else {
None
}
}
fn parse_typeset(&mut self, first: &lex::Token) -> Option<Event> {
@ -365,7 +391,7 @@ impl<'s> Parser<'s> {
}
}
impl<'s> Iterator for Parser<'s> {
impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
type Item = Event;
fn next(&mut self) -> Option<Self::Item> {
@ -437,8 +463,7 @@ mod test {
macro_rules! test_parse {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)]
let mut p = super::Parser::new();
p.parse($src, true);
let mut p = super::Parser::new($src.chars());
let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
let expected = &[$($($token),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src);

View file

@ -82,9 +82,8 @@ impl Sequence {
}
#[derive(Clone)]
pub(crate) struct Lexer<'s> {
pub src: &'s str,
chars: std::str::Chars<'s>,
pub(crate) struct Lexer<I> {
chars: I,
/// Next character should be escaped.
escape: bool,
/// Token to be peeked or next'ed.
@ -93,11 +92,10 @@ pub(crate) struct Lexer<'s> {
len: usize,
}
impl<'s> Lexer<'s> {
pub fn new(src: &'s str) -> Lexer<'s> {
impl<I: Iterator<Item = char> + Clone> Lexer<I> {
pub fn new(chars: I) -> Lexer<I> {
Lexer {
src,
chars: src.chars(),
chars,
escape: false,
next: None,
len: 0,
@ -111,15 +109,19 @@ impl<'s> Lexer<'s> {
self.next.as_ref()
}
/*
pub fn pos(&self) -> usize {
self.src.len()
- self.chars.as_str().len()
- self.next.as_ref().map(|t| t.len).unwrap_or_default()
}
*/
/*
pub fn peek_ahead(&mut self) -> &'s str {
&self.src[self.pos()..]
}
*/
fn next_token(&mut self) -> Option<Token> {
let mut current = self.token();
@ -272,7 +274,7 @@ impl<'s> Lexer<'s> {
}
}
impl<'s> Iterator for Lexer<'s> {
impl<I: Iterator<Item = char> + Clone> Iterator for Lexer<I> {
type Item = Token;
fn next(&mut self) -> Option<Self::Item> {
@ -290,7 +292,7 @@ mod test {
macro_rules! test_lex {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)]
let actual = super::Lexer::new($src).collect::<Vec<_>>();
let actual = super::Lexer::new($src.chars()).collect::<Vec<_>>();
let expected = vec![$($($token),*,)?];
assert_eq!(actual, expected, "{}", $src);
};

View file

@ -304,20 +304,31 @@ impl<'s> Attributes<'s> {
Self(self.0.take())
}
#[must_use]
pub fn valid(src: &str) -> bool {
todo!()
}
pub fn parse(&mut self, src: &'s str) {
todo!()
}
}
#[derive(Clone)]
struct InlineChars<'t, 's> {
src: &'s str,
inlines: tree::Atoms<'t, block::Block, block::Atom>,
}
impl<'t, 's> Iterator for InlineChars<'t, 's> {
type Item = char;
fn next(&mut self) -> Option<Self::Item> {
(&mut self.inlines)
.flat_map(|sp| sp.of(self.src).chars())
.next()
}
}
pub struct Parser<'s> {
src: &'s str,
tree: block::Tree,
parser: Option<inline::Parser<'s>>,
inline_parser: Option<inline::Parser<InlineChars<'s, 's>>>,
inline_start: usize,
block_attributes: Attributes<'s>,
}
@ -328,7 +339,7 @@ impl<'s> Parser<'s> {
Self {
src,
tree: block::parse(src),
parser: None,
inline_parser: None,
inline_start: 0,
block_attributes: Attributes::none(),
}
@ -339,12 +350,15 @@ impl<'s> Iterator for Parser<'s> {
type Item = Event<'s>;
fn next(&mut self) -> Option<Self::Item> {
while let Some(parser) = &mut self.parser {
if let Some(parser) = &mut self.inline_parser {
// inside leaf block, with inline content
if let Some(mut inline) = parser.next() {
inline.span = inline.span.translate(self.inline_start);
return Some(Event::from_inline(self.src, inline));
} else if let Some(ev) = self.tree.next() {
}
self.inline_parser = None;
/*
else if let Some(ev) = self.tree.next() {
match ev.kind {
tree::EventKind::Atom(a) => {
assert_eq!(a, block::Atom::Inline);
@ -352,12 +366,13 @@ impl<'s> Iterator for Parser<'s> {
parser.parse(ev.span.of(self.src), last_inline);
}
tree::EventKind::Exit(c) => {
self.parser = None;
self.inline_parser = None;
return Some(Event::End(Container::from_block(ev.span.of(self.src), c)));
}
tree::EventKind::Enter(..) => unreachable!(),
}
}
*/
}
for ev in &mut self.tree {
@ -372,12 +387,18 @@ impl<'s> Iterator for Parser<'s> {
continue;
}
},
tree::EventKind::Enter(c) => {
if matches!(c, block::Block::Leaf(_)) {
self.parser = Some(inline::Parser::new());
tree::EventKind::Enter(b) => {
if matches!(b, block::Block::Leaf(_)) {
let chars = InlineChars {
src: self.src,
inlines: self.tree.atoms(),
};
// TODO solve self-referential reference here without unsafe
self.inline_parser =
unsafe { Some(std::mem::transmute(inline::Parser::new(chars))) };
self.inline_start = ev.span.end();
}
let container = match c {
let container = match b {
block::Block::Leaf(block::Leaf::CodeBlock { .. }) => {
self.inline_start += 1; // skip newline
Container::CodeBlock {

View file

@ -20,6 +20,19 @@ pub struct Tree<C, A> {
head: Option<NodeIndex>,
}
#[derive(Clone)]
pub struct Atoms<'t, C, A> {
iter: std::slice::Iter<'t, Node<C, A>>,
}
impl<'t, C, A> Iterator for Atoms<'t, C, A> {
type Item = Span;
fn next(&mut self) -> Option<Self::Item> {
self.iter.next().map(|n| n.span)
}
}
impl<C: Copy, A: Copy> Tree<C, A> {
fn new(nodes: Vec<Node<C, A>>) -> Self {
let head = nodes[NodeIndex::root().index()].next;
@ -30,7 +43,15 @@ impl<C: Copy, A: Copy> Tree<C, A> {
}
}
pub fn atoms(&self) -> impl Iterator<Item = (A, Span)> + '_ {
pub fn atoms(&self) -> Atoms<C, A> {
let start = self.nodes[self.head.unwrap().index()].next.unwrap().index();
let end = start + self.atoms_().count();
Atoms {
iter: self.nodes[start..end].iter(),
}
}
pub fn atoms_(&self) -> impl Iterator<Item = (A, Span)> + '_ {
let mut head = self.head;
std::iter::from_fn(move || {
head.take().map(|h| {