pre remove atomic state
This commit is contained in:
parent
227c86f4f0
commit
946d88e5c0
4 changed files with 116 additions and 47 deletions
|
@ -97,11 +97,11 @@ impl AtomicState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Parser<'s> {
|
pub struct Parser<I> {
|
||||||
/// The last inline element has been provided, finish current events.
|
/// The last inline element has been provided, finish current events.
|
||||||
last: bool,
|
last: bool,
|
||||||
/// Lexer, hosting upcoming source.
|
/// Lexer, hosting upcoming source.
|
||||||
lexer: lex::Lexer<'s>,
|
lexer: lex::Lexer<I>,
|
||||||
/// Span of current event.
|
/// Span of current event.
|
||||||
span: Span,
|
span: Span,
|
||||||
/// State of non-recursive elements.
|
/// State of non-recursive elements.
|
||||||
|
@ -116,11 +116,11 @@ pub struct Parser<'s> {
|
||||||
events: std::collections::VecDeque<Event>,
|
events: std::collections::VecDeque<Event>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Parser<'s> {
|
impl<I: Iterator<Item = char> + Clone> Parser<I> {
|
||||||
pub fn new() -> Self {
|
pub fn new(chars: I) -> Self {
|
||||||
Self {
|
Self {
|
||||||
last: false,
|
last: true,
|
||||||
lexer: lex::Lexer::new(""),
|
lexer: lex::Lexer::new(chars),
|
||||||
span: Span::new(0, 0),
|
span: Span::new(0, 0),
|
||||||
atomic_state: AtomicState::None,
|
atomic_state: AtomicState::None,
|
||||||
typesets: Vec::new(),
|
typesets: Vec::new(),
|
||||||
|
@ -129,13 +129,15 @@ impl<'s> Parser<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse(&mut self, src: &'s str, last: bool) {
|
/*
|
||||||
self.lexer = lex::Lexer::new(src);
|
pub fn parse(&mut self, src: &str, last: bool) {
|
||||||
|
self.lexer = lex::Lexer::new(src.chars());
|
||||||
if last {
|
if last {
|
||||||
assert!(!self.last);
|
assert!(!self.last);
|
||||||
}
|
}
|
||||||
self.last = last;
|
self.last = last;
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
fn eat(&mut self) -> Option<lex::Token> {
|
fn eat(&mut self) -> Option<lex::Token> {
|
||||||
let tok = self.lexer.next();
|
let tok = self.lexer.next();
|
||||||
|
@ -181,7 +183,8 @@ impl<'s> Parser<'s> {
|
||||||
&& first.len == opener_len
|
&& first.len == opener_len
|
||||||
{
|
{
|
||||||
self.atomic_state = AtomicState::None;
|
self.atomic_state = AtomicState::None;
|
||||||
let kind =
|
let kind = todo!();
|
||||||
|
/*
|
||||||
if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") {
|
if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") {
|
||||||
let mut chars = self.lexer.peek_ahead()["{=".len()..].chars();
|
let mut chars = self.lexer.peek_ahead()["{=".len()..].chars();
|
||||||
let len = chars
|
let len = chars
|
||||||
|
@ -201,6 +204,7 @@ impl<'s> Parser<'s> {
|
||||||
} else {
|
} else {
|
||||||
kind
|
kind
|
||||||
};
|
};
|
||||||
|
*/
|
||||||
EventKind::Exit(kind)
|
EventKind::Exit(kind)
|
||||||
} else {
|
} else {
|
||||||
EventKind::Str
|
EventKind::Str
|
||||||
|
@ -261,13 +265,12 @@ impl<'s> Parser<'s> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_span(&mut self, first: &lex::Token) -> Option<Event> {
|
fn parse_span(&mut self, first: &lex::Token) -> Option<Event> {
|
||||||
match first.kind {
|
if let Some(open) = match first.kind {
|
||||||
lex::Kind::Open(Delimiter::Bracket) => Some(true),
|
lex::Kind::Open(Delimiter::Bracket) => Some(true),
|
||||||
lex::Kind::Close(Delimiter::Bracket) => Some(false),
|
lex::Kind::Close(Delimiter::Bracket) => Some(false),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
} {
|
||||||
.map(|open| {
|
Some(if open {
|
||||||
if open {
|
|
||||||
self.spans.push(self.events.len());
|
self.spans.push(self.events.len());
|
||||||
// use str for now, replace if closed later
|
// use str for now, replace if closed later
|
||||||
Event {
|
Event {
|
||||||
|
@ -275,21 +278,44 @@ impl<'s> Parser<'s> {
|
||||||
span: self.span,
|
span: self.span,
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if self.lexer.peek_ahead().starts_with('[') {
|
/*
|
||||||
|
let kind = if self.lexer.peek_ahead().starts_with('[') {
|
||||||
let mut chars = self.lexer.peek_ahead()["[".len()..].chars();
|
let mut chars = self.lexer.peek_ahead()["[".len()..].chars();
|
||||||
let len = chars
|
let len = chars
|
||||||
.clone()
|
.clone()
|
||||||
.take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']'))
|
.take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']'))
|
||||||
.count();
|
.count();
|
||||||
match chars.nth(len) {
|
match chars.nth(len) {
|
||||||
Some(']') => todo!(),
|
Some(']') => EventKind::Exit(ReferenceLink),
|
||||||
None => self.atomic_state = AtomicState::ReferenceLinkTag,
|
None => {
|
||||||
_ => todo!(),
|
self.atomic_state = AtomicState::ReferenceLinkTag;
|
||||||
|
return None;
|
||||||
}
|
}
|
||||||
|
_ => EventKind::Str,
|
||||||
}
|
}
|
||||||
|
} else if self.lexer.peek_ahead().starts_with('(') {
|
||||||
|
let mut chars = self.lexer.peek_ahead()["[".len()..].chars();
|
||||||
|
let len = chars
|
||||||
|
.clone()
|
||||||
|
.take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']'))
|
||||||
|
.count();
|
||||||
|
match chars.nth(len) {
|
||||||
|
Some(']') => EventKind::Exit(ReferenceLink),
|
||||||
|
None => {
|
||||||
|
self.atomic_state = AtomicState::Url { auto: false };
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
_ => EventKind::Str,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return None;
|
||||||
|
};
|
||||||
|
*/
|
||||||
todo!()
|
todo!()
|
||||||
}
|
|
||||||
})
|
})
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_typeset(&mut self, first: &lex::Token) -> Option<Event> {
|
fn parse_typeset(&mut self, first: &lex::Token) -> Option<Event> {
|
||||||
|
@ -365,7 +391,7 @@ impl<'s> Parser<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Iterator for Parser<'s> {
|
impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
|
||||||
type Item = Event;
|
type Item = Event;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
@ -437,8 +463,7 @@ mod test {
|
||||||
macro_rules! test_parse {
|
macro_rules! test_parse {
|
||||||
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
let mut p = super::Parser::new();
|
let mut p = super::Parser::new($src.chars());
|
||||||
p.parse($src, true);
|
|
||||||
let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
|
let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>();
|
||||||
let expected = &[$($($token),*,)?];
|
let expected = &[$($($token),*,)?];
|
||||||
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
|
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
|
||||||
|
|
20
src/lex.rs
20
src/lex.rs
|
@ -82,9 +82,8 @@ impl Sequence {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub(crate) struct Lexer<'s> {
|
pub(crate) struct Lexer<I> {
|
||||||
pub src: &'s str,
|
chars: I,
|
||||||
chars: std::str::Chars<'s>,
|
|
||||||
/// Next character should be escaped.
|
/// Next character should be escaped.
|
||||||
escape: bool,
|
escape: bool,
|
||||||
/// Token to be peeked or next'ed.
|
/// Token to be peeked or next'ed.
|
||||||
|
@ -93,11 +92,10 @@ pub(crate) struct Lexer<'s> {
|
||||||
len: usize,
|
len: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Lexer<'s> {
|
impl<I: Iterator<Item = char> + Clone> Lexer<I> {
|
||||||
pub fn new(src: &'s str) -> Lexer<'s> {
|
pub fn new(chars: I) -> Lexer<I> {
|
||||||
Lexer {
|
Lexer {
|
||||||
src,
|
chars,
|
||||||
chars: src.chars(),
|
|
||||||
escape: false,
|
escape: false,
|
||||||
next: None,
|
next: None,
|
||||||
len: 0,
|
len: 0,
|
||||||
|
@ -111,15 +109,19 @@ impl<'s> Lexer<'s> {
|
||||||
self.next.as_ref()
|
self.next.as_ref()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
pub fn pos(&self) -> usize {
|
pub fn pos(&self) -> usize {
|
||||||
self.src.len()
|
self.src.len()
|
||||||
- self.chars.as_str().len()
|
- self.chars.as_str().len()
|
||||||
- self.next.as_ref().map(|t| t.len).unwrap_or_default()
|
- self.next.as_ref().map(|t| t.len).unwrap_or_default()
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
pub fn peek_ahead(&mut self) -> &'s str {
|
pub fn peek_ahead(&mut self) -> &'s str {
|
||||||
&self.src[self.pos()..]
|
&self.src[self.pos()..]
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
fn next_token(&mut self) -> Option<Token> {
|
fn next_token(&mut self) -> Option<Token> {
|
||||||
let mut current = self.token();
|
let mut current = self.token();
|
||||||
|
@ -272,7 +274,7 @@ impl<'s> Lexer<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Iterator for Lexer<'s> {
|
impl<I: Iterator<Item = char> + Clone> Iterator for Lexer<I> {
|
||||||
type Item = Token;
|
type Item = Token;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
@ -290,7 +292,7 @@ mod test {
|
||||||
macro_rules! test_lex {
|
macro_rules! test_lex {
|
||||||
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
let actual = super::Lexer::new($src).collect::<Vec<_>>();
|
let actual = super::Lexer::new($src.chars()).collect::<Vec<_>>();
|
||||||
let expected = vec![$($($token),*,)?];
|
let expected = vec![$($($token),*,)?];
|
||||||
assert_eq!(actual, expected, "{}", $src);
|
assert_eq!(actual, expected, "{}", $src);
|
||||||
};
|
};
|
||||||
|
|
49
src/lib.rs
49
src/lib.rs
|
@ -304,20 +304,31 @@ impl<'s> Attributes<'s> {
|
||||||
Self(self.0.take())
|
Self(self.0.take())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[must_use]
|
|
||||||
pub fn valid(src: &str) -> bool {
|
|
||||||
todo!()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse(&mut self, src: &'s str) {
|
pub fn parse(&mut self, src: &'s str) {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
struct InlineChars<'t, 's> {
|
||||||
|
src: &'s str,
|
||||||
|
inlines: tree::Atoms<'t, block::Block, block::Atom>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t, 's> Iterator for InlineChars<'t, 's> {
|
||||||
|
type Item = char;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
(&mut self.inlines)
|
||||||
|
.flat_map(|sp| sp.of(self.src).chars())
|
||||||
|
.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Parser<'s> {
|
pub struct Parser<'s> {
|
||||||
src: &'s str,
|
src: &'s str,
|
||||||
tree: block::Tree,
|
tree: block::Tree,
|
||||||
parser: Option<inline::Parser<'s>>,
|
inline_parser: Option<inline::Parser<InlineChars<'s, 's>>>,
|
||||||
inline_start: usize,
|
inline_start: usize,
|
||||||
block_attributes: Attributes<'s>,
|
block_attributes: Attributes<'s>,
|
||||||
}
|
}
|
||||||
|
@ -328,7 +339,7 @@ impl<'s> Parser<'s> {
|
||||||
Self {
|
Self {
|
||||||
src,
|
src,
|
||||||
tree: block::parse(src),
|
tree: block::parse(src),
|
||||||
parser: None,
|
inline_parser: None,
|
||||||
inline_start: 0,
|
inline_start: 0,
|
||||||
block_attributes: Attributes::none(),
|
block_attributes: Attributes::none(),
|
||||||
}
|
}
|
||||||
|
@ -339,12 +350,15 @@ impl<'s> Iterator for Parser<'s> {
|
||||||
type Item = Event<'s>;
|
type Item = Event<'s>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
while let Some(parser) = &mut self.parser {
|
if let Some(parser) = &mut self.inline_parser {
|
||||||
// inside leaf block, with inline content
|
// inside leaf block, with inline content
|
||||||
if let Some(mut inline) = parser.next() {
|
if let Some(mut inline) = parser.next() {
|
||||||
inline.span = inline.span.translate(self.inline_start);
|
inline.span = inline.span.translate(self.inline_start);
|
||||||
return Some(Event::from_inline(self.src, inline));
|
return Some(Event::from_inline(self.src, inline));
|
||||||
} else if let Some(ev) = self.tree.next() {
|
}
|
||||||
|
self.inline_parser = None;
|
||||||
|
/*
|
||||||
|
else if let Some(ev) = self.tree.next() {
|
||||||
match ev.kind {
|
match ev.kind {
|
||||||
tree::EventKind::Atom(a) => {
|
tree::EventKind::Atom(a) => {
|
||||||
assert_eq!(a, block::Atom::Inline);
|
assert_eq!(a, block::Atom::Inline);
|
||||||
|
@ -352,12 +366,13 @@ impl<'s> Iterator for Parser<'s> {
|
||||||
parser.parse(ev.span.of(self.src), last_inline);
|
parser.parse(ev.span.of(self.src), last_inline);
|
||||||
}
|
}
|
||||||
tree::EventKind::Exit(c) => {
|
tree::EventKind::Exit(c) => {
|
||||||
self.parser = None;
|
self.inline_parser = None;
|
||||||
return Some(Event::End(Container::from_block(ev.span.of(self.src), c)));
|
return Some(Event::End(Container::from_block(ev.span.of(self.src), c)));
|
||||||
}
|
}
|
||||||
tree::EventKind::Enter(..) => unreachable!(),
|
tree::EventKind::Enter(..) => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
}
|
}
|
||||||
|
|
||||||
for ev in &mut self.tree {
|
for ev in &mut self.tree {
|
||||||
|
@ -372,12 +387,18 @@ impl<'s> Iterator for Parser<'s> {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
tree::EventKind::Enter(c) => {
|
tree::EventKind::Enter(b) => {
|
||||||
if matches!(c, block::Block::Leaf(_)) {
|
if matches!(b, block::Block::Leaf(_)) {
|
||||||
self.parser = Some(inline::Parser::new());
|
let chars = InlineChars {
|
||||||
|
src: self.src,
|
||||||
|
inlines: self.tree.atoms(),
|
||||||
|
};
|
||||||
|
// TODO solve self-referential reference here without unsafe
|
||||||
|
self.inline_parser =
|
||||||
|
unsafe { Some(std::mem::transmute(inline::Parser::new(chars))) };
|
||||||
self.inline_start = ev.span.end();
|
self.inline_start = ev.span.end();
|
||||||
}
|
}
|
||||||
let container = match c {
|
let container = match b {
|
||||||
block::Block::Leaf(block::Leaf::CodeBlock { .. }) => {
|
block::Block::Leaf(block::Leaf::CodeBlock { .. }) => {
|
||||||
self.inline_start += 1; // skip newline
|
self.inline_start += 1; // skip newline
|
||||||
Container::CodeBlock {
|
Container::CodeBlock {
|
||||||
|
|
23
src/tree.rs
23
src/tree.rs
|
@ -20,6 +20,19 @@ pub struct Tree<C, A> {
|
||||||
head: Option<NodeIndex>,
|
head: Option<NodeIndex>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Atoms<'t, C, A> {
|
||||||
|
iter: std::slice::Iter<'t, Node<C, A>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t, C, A> Iterator for Atoms<'t, C, A> {
|
||||||
|
type Item = Span;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
self.iter.next().map(|n| n.span)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<C: Copy, A: Copy> Tree<C, A> {
|
impl<C: Copy, A: Copy> Tree<C, A> {
|
||||||
fn new(nodes: Vec<Node<C, A>>) -> Self {
|
fn new(nodes: Vec<Node<C, A>>) -> Self {
|
||||||
let head = nodes[NodeIndex::root().index()].next;
|
let head = nodes[NodeIndex::root().index()].next;
|
||||||
|
@ -30,7 +43,15 @@ impl<C: Copy, A: Copy> Tree<C, A> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn atoms(&self) -> impl Iterator<Item = (A, Span)> + '_ {
|
pub fn atoms(&self) -> Atoms<C, A> {
|
||||||
|
let start = self.nodes[self.head.unwrap().index()].next.unwrap().index();
|
||||||
|
let end = start + self.atoms_().count();
|
||||||
|
Atoms {
|
||||||
|
iter: self.nodes[start..end].iter(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn atoms_(&self) -> impl Iterator<Item = (A, Span)> + '_ {
|
||||||
let mut head = self.head;
|
let mut head = self.head;
|
||||||
std::iter::from_fn(move || {
|
std::iter::from_fn(move || {
|
||||||
head.take().map(|h| {
|
head.take().map(|h| {
|
||||||
|
|
Loading…
Reference in a new issue