pre remove atomic state
This commit is contained in:
parent
227c86f4f0
commit
946d88e5c0
4 changed files with 116 additions and 47 deletions
|
@ -97,11 +97,11 @@ impl AtomicState {
|
|||
}
|
||||
}
|
||||
|
||||
pub struct Parser<'s> {
|
||||
pub struct Parser<I> {
|
||||
/// The last inline element has been provided, finish current events.
|
||||
last: bool,
|
||||
/// Lexer, hosting upcoming source.
|
||||
lexer: lex::Lexer<'s>,
|
||||
lexer: lex::Lexer<I>,
|
||||
/// Span of current event.
|
||||
span: Span,
|
||||
/// State of non-recursive elements.
|
||||
|
@ -116,11 +116,11 @@ pub struct Parser<'s> {
|
|||
events: std::collections::VecDeque<Event>,
|
||||
}
|
||||
|
||||
impl<'s> Parser<'s> {
|
||||
pub fn new() -> Self {
|
||||
impl<I: Iterator<Item = char> + Clone> Parser<I> {
|
||||
pub fn new(chars: I) -> Self {
|
||||
Self {
|
||||
last: false,
|
||||
lexer: lex::Lexer::new(""),
|
||||
last: true,
|
||||
lexer: lex::Lexer::new(chars),
|
||||
span: Span::new(0, 0),
|
||||
atomic_state: AtomicState::None,
|
||||
typesets: Vec::new(),
|
||||
|
@ -129,13 +129,15 @@ impl<'s> Parser<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn parse(&mut self, src: &'s str, last: bool) {
|
||||
self.lexer = lex::Lexer::new(src);
|
||||
/*
|
||||
pub fn parse(&mut self, src: &str, last: bool) {
|
||||
self.lexer = lex::Lexer::new(src.chars());
|
||||
if last {
|
||||
assert!(!self.last);
|
||||
}
|
||||
self.last = last;
|
||||
}
|
||||
*/
|
||||
|
||||
fn eat(&mut self) -> Option<lex::Token> {
|
||||
let tok = self.lexer.next();
|
||||
|
@ -181,7 +183,8 @@ impl<'s> Parser<'s> {
|
|||
&& first.len == opener_len
|
||||
{
|
||||
self.atomic_state = AtomicState::None;
|
||||
let kind =
|
||||
let kind = todo!();
|
||||
/*
|
||||
if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") {
|
||||
let mut chars = self.lexer.peek_ahead()["{=".len()..].chars();
|
||||
let len = chars
|
||||
|
@ -201,6 +204,7 @@ impl<'s> Parser<'s> {
|
|||
} else {
|
||||
kind
|
||||
};
|
||||
*/
|
||||
EventKind::Exit(kind)
|
||||
} else {
|
||||
EventKind::Str
|
||||
|
@ -261,13 +265,12 @@ impl<'s> Parser<'s> {
|
|||
}
|
||||
|
||||
fn parse_span(&mut self, first: &lex::Token) -> Option<Event> {
|
||||
match first.kind {
|
||||
if let Some(open) = match first.kind {
|
||||
lex::Kind::Open(Delimiter::Bracket) => Some(true),
|
||||
lex::Kind::Close(Delimiter::Bracket) => Some(false),
|
||||
_ => None,
|
||||
}
|
||||
.map(|open| {
|
||||
if open {
|
||||
} {
|
||||
Some(if open {
|
||||
self.spans.push(self.events.len());
|
||||
// use str for now, replace if closed later
|
||||
Event {
|
||||
|
@ -275,21 +278,44 @@ impl<'s> Parser<'s> {
|
|||
span: self.span,
|
||||
}
|
||||
} else {
|
||||
if self.lexer.peek_ahead().starts_with('[') {
|
||||
/*
|
||||
let kind = if self.lexer.peek_ahead().starts_with('[') {
|
||||
let mut chars = self.lexer.peek_ahead()["[".len()..].chars();
|
||||
let len = chars
|
||||
.clone()
|
||||
.take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']'))
|
||||
.count();
|
||||
match chars.nth(len) {
|
||||
Some(']') => todo!(),
|
||||
None => self.atomic_state = AtomicState::ReferenceLinkTag,
|
||||
_ => todo!(),
|
||||
Some(']') => EventKind::Exit(ReferenceLink),
|
||||
None => {
|
||||
self.atomic_state = AtomicState::ReferenceLinkTag;
|
||||
return None;
|
||||
}
|
||||
_ => EventKind::Str,
|
||||
}
|
||||
} else if self.lexer.peek_ahead().starts_with('(') {
|
||||
let mut chars = self.lexer.peek_ahead()["[".len()..].chars();
|
||||
let len = chars
|
||||
.clone()
|
||||
.take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']'))
|
||||
.count();
|
||||
match chars.nth(len) {
|
||||
Some(']') => EventKind::Exit(ReferenceLink),
|
||||
None => {
|
||||
self.atomic_state = AtomicState::Url { auto: false };
|
||||
return None;
|
||||
}
|
||||
_ => EventKind::Str,
|
||||
}
|
||||
} else {
|
||||
return None;
|
||||
};
|
||||
*/
|
||||
todo!()
|
||||
}
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_typeset(&mut self, first: &lex::Token) -> Option<Event> {
|
||||
|
@ -365,7 +391,7 @@ impl<'s> Parser<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'s> Iterator for Parser<'s> {
|
||||
impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
|
||||
type Item = Event;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
|
@ -437,8 +463,7 @@ mod test {
|
|||
macro_rules! test_parse {
|
||||
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
||||
#[allow(unused)]
|
||||
let mut p = super::Parser::new();
|
||||
p.parse($src, true);
|
||||
let mut p = super::Parser::new($src.chars());
|
||||
let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
|
||||
let expected = &[$($($token),*,)?];
|
||||
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
|
||||
|
|
20
src/lex.rs
20
src/lex.rs
|
@ -82,9 +82,8 @@ impl Sequence {
|
|||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct Lexer<'s> {
|
||||
pub src: &'s str,
|
||||
chars: std::str::Chars<'s>,
|
||||
pub(crate) struct Lexer<I> {
|
||||
chars: I,
|
||||
/// Next character should be escaped.
|
||||
escape: bool,
|
||||
/// Token to be peeked or next'ed.
|
||||
|
@ -93,11 +92,10 @@ pub(crate) struct Lexer<'s> {
|
|||
len: usize,
|
||||
}
|
||||
|
||||
impl<'s> Lexer<'s> {
|
||||
pub fn new(src: &'s str) -> Lexer<'s> {
|
||||
impl<I: Iterator<Item = char> + Clone> Lexer<I> {
|
||||
pub fn new(chars: I) -> Lexer<I> {
|
||||
Lexer {
|
||||
src,
|
||||
chars: src.chars(),
|
||||
chars,
|
||||
escape: false,
|
||||
next: None,
|
||||
len: 0,
|
||||
|
@ -111,15 +109,19 @@ impl<'s> Lexer<'s> {
|
|||
self.next.as_ref()
|
||||
}
|
||||
|
||||
/*
|
||||
pub fn pos(&self) -> usize {
|
||||
self.src.len()
|
||||
- self.chars.as_str().len()
|
||||
- self.next.as_ref().map(|t| t.len).unwrap_or_default()
|
||||
}
|
||||
*/
|
||||
|
||||
/*
|
||||
pub fn peek_ahead(&mut self) -> &'s str {
|
||||
&self.src[self.pos()..]
|
||||
}
|
||||
*/
|
||||
|
||||
fn next_token(&mut self) -> Option<Token> {
|
||||
let mut current = self.token();
|
||||
|
@ -272,7 +274,7 @@ impl<'s> Lexer<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'s> Iterator for Lexer<'s> {
|
||||
impl<I: Iterator<Item = char> + Clone> Iterator for Lexer<I> {
|
||||
type Item = Token;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
|
@ -290,7 +292,7 @@ mod test {
|
|||
macro_rules! test_lex {
|
||||
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
||||
#[allow(unused)]
|
||||
let actual = super::Lexer::new($src).collect::<Vec<_>>();
|
||||
let actual = super::Lexer::new($src.chars()).collect::<Vec<_>>();
|
||||
let expected = vec![$($($token),*,)?];
|
||||
assert_eq!(actual, expected, "{}", $src);
|
||||
};
|
||||
|
|
49
src/lib.rs
49
src/lib.rs
|
@ -304,20 +304,31 @@ impl<'s> Attributes<'s> {
|
|||
Self(self.0.take())
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn valid(src: &str) -> bool {
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn parse(&mut self, src: &'s str) {
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct InlineChars<'t, 's> {
|
||||
src: &'s str,
|
||||
inlines: tree::Atoms<'t, block::Block, block::Atom>,
|
||||
}
|
||||
|
||||
impl<'t, 's> Iterator for InlineChars<'t, 's> {
|
||||
type Item = char;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
(&mut self.inlines)
|
||||
.flat_map(|sp| sp.of(self.src).chars())
|
||||
.next()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Parser<'s> {
|
||||
src: &'s str,
|
||||
tree: block::Tree,
|
||||
parser: Option<inline::Parser<'s>>,
|
||||
inline_parser: Option<inline::Parser<InlineChars<'s, 's>>>,
|
||||
inline_start: usize,
|
||||
block_attributes: Attributes<'s>,
|
||||
}
|
||||
|
@ -328,7 +339,7 @@ impl<'s> Parser<'s> {
|
|||
Self {
|
||||
src,
|
||||
tree: block::parse(src),
|
||||
parser: None,
|
||||
inline_parser: None,
|
||||
inline_start: 0,
|
||||
block_attributes: Attributes::none(),
|
||||
}
|
||||
|
@ -339,12 +350,15 @@ impl<'s> Iterator for Parser<'s> {
|
|||
type Item = Event<'s>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
while let Some(parser) = &mut self.parser {
|
||||
if let Some(parser) = &mut self.inline_parser {
|
||||
// inside leaf block, with inline content
|
||||
if let Some(mut inline) = parser.next() {
|
||||
inline.span = inline.span.translate(self.inline_start);
|
||||
return Some(Event::from_inline(self.src, inline));
|
||||
} else if let Some(ev) = self.tree.next() {
|
||||
}
|
||||
self.inline_parser = None;
|
||||
/*
|
||||
else if let Some(ev) = self.tree.next() {
|
||||
match ev.kind {
|
||||
tree::EventKind::Atom(a) => {
|
||||
assert_eq!(a, block::Atom::Inline);
|
||||
|
@ -352,12 +366,13 @@ impl<'s> Iterator for Parser<'s> {
|
|||
parser.parse(ev.span.of(self.src), last_inline);
|
||||
}
|
||||
tree::EventKind::Exit(c) => {
|
||||
self.parser = None;
|
||||
self.inline_parser = None;
|
||||
return Some(Event::End(Container::from_block(ev.span.of(self.src), c)));
|
||||
}
|
||||
tree::EventKind::Enter(..) => unreachable!(),
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
for ev in &mut self.tree {
|
||||
|
@ -372,12 +387,18 @@ impl<'s> Iterator for Parser<'s> {
|
|||
continue;
|
||||
}
|
||||
},
|
||||
tree::EventKind::Enter(c) => {
|
||||
if matches!(c, block::Block::Leaf(_)) {
|
||||
self.parser = Some(inline::Parser::new());
|
||||
tree::EventKind::Enter(b) => {
|
||||
if matches!(b, block::Block::Leaf(_)) {
|
||||
let chars = InlineChars {
|
||||
src: self.src,
|
||||
inlines: self.tree.atoms(),
|
||||
};
|
||||
// TODO solve self-referential reference here without unsafe
|
||||
self.inline_parser =
|
||||
unsafe { Some(std::mem::transmute(inline::Parser::new(chars))) };
|
||||
self.inline_start = ev.span.end();
|
||||
}
|
||||
let container = match c {
|
||||
let container = match b {
|
||||
block::Block::Leaf(block::Leaf::CodeBlock { .. }) => {
|
||||
self.inline_start += 1; // skip newline
|
||||
Container::CodeBlock {
|
||||
|
|
23
src/tree.rs
23
src/tree.rs
|
@ -20,6 +20,19 @@ pub struct Tree<C, A> {
|
|||
head: Option<NodeIndex>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Atoms<'t, C, A> {
|
||||
iter: std::slice::Iter<'t, Node<C, A>>,
|
||||
}
|
||||
|
||||
impl<'t, C, A> Iterator for Atoms<'t, C, A> {
|
||||
type Item = Span;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
self.iter.next().map(|n| n.span)
|
||||
}
|
||||
}
|
||||
|
||||
impl<C: Copy, A: Copy> Tree<C, A> {
|
||||
fn new(nodes: Vec<Node<C, A>>) -> Self {
|
||||
let head = nodes[NodeIndex::root().index()].next;
|
||||
|
@ -30,7 +43,15 @@ impl<C: Copy, A: Copy> Tree<C, A> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn atoms(&self) -> impl Iterator<Item = (A, Span)> + '_ {
|
||||
pub fn atoms(&self) -> Atoms<C, A> {
|
||||
let start = self.nodes[self.head.unwrap().index()].next.unwrap().index();
|
||||
let end = start + self.atoms_().count();
|
||||
Atoms {
|
||||
iter: self.nodes[start..end].iter(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn atoms_(&self) -> impl Iterator<Item = (A, Span)> + '_ {
|
||||
let mut head = self.head;
|
||||
std::iter::from_fn(move || {
|
||||
head.take().map(|h| {
|
||||
|
|
Loading…
Reference in a new issue