atomic state wip
This commit is contained in:
parent
463f146623
commit
227c86f4f0
1 changed files with 151 additions and 106 deletions
257
src/inline.rs
257
src/inline.rs
|
@ -59,9 +59,9 @@ pub struct Event {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Current parsing state of elements that are not recursive, i.e. may not contain arbitrary inline
|
/// Current parsing state of elements that are not recursive, i.e. may not contain arbitrary inline
|
||||||
/// elements, can only be one of these at a time.
|
/// elements. There can only be one of these at a time, due to the non-recursion.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum State {
|
enum AtomicState {
|
||||||
None,
|
None,
|
||||||
/// Within a verbatim element, e.g. '$`xxxxx'
|
/// Within a verbatim element, e.g. '$`xxxxx'
|
||||||
Verbatim {
|
Verbatim {
|
||||||
|
@ -82,7 +82,7 @@ enum State {
|
||||||
ReferenceLinkTag,
|
ReferenceLinkTag,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl State {
|
impl AtomicState {
|
||||||
fn verbatim(&self) -> Option<(Container, usize, usize)> {
|
fn verbatim(&self) -> Option<(Container, usize, usize)> {
|
||||||
if let Self::Verbatim {
|
if let Self::Verbatim {
|
||||||
kind,
|
kind,
|
||||||
|
@ -98,27 +98,34 @@ impl State {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Parser<'s> {
|
pub struct Parser<'s> {
|
||||||
openers: Vec<(Container, usize)>,
|
/// The last inline element has been provided, finish current events.
|
||||||
events: std::collections::VecDeque<Event>,
|
|
||||||
span: Span,
|
|
||||||
|
|
||||||
lexer: lex::Lexer<'s>,
|
|
||||||
|
|
||||||
state: State,
|
|
||||||
last: bool,
|
last: bool,
|
||||||
|
/// Lexer, hosting upcoming source.
|
||||||
|
lexer: lex::Lexer<'s>,
|
||||||
|
/// Span of current event.
|
||||||
|
span: Span,
|
||||||
|
/// State of non-recursive elements.
|
||||||
|
atomic_state: AtomicState,
|
||||||
|
/// Stack with kind and index of _potential_ openers for typesetting containers.
|
||||||
|
typesets: Vec<(Container, usize)>,
|
||||||
|
/// Stack with index of _potential_ span/link openers.
|
||||||
|
spans: Vec<usize>,
|
||||||
|
//attributes: Vec<(Span, usize)>,
|
||||||
|
/// Buffer queue for next events. Events are buffered until no modifications due to future
|
||||||
|
/// characters are needed.
|
||||||
|
events: std::collections::VecDeque<Event>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Parser<'s> {
|
impl<'s> Parser<'s> {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
openers: Vec::new(),
|
|
||||||
events: std::collections::VecDeque::new(),
|
|
||||||
span: Span::new(0, 0),
|
|
||||||
|
|
||||||
lexer: lex::Lexer::new(""),
|
|
||||||
|
|
||||||
state: State::None,
|
|
||||||
last: false,
|
last: false,
|
||||||
|
lexer: lex::Lexer::new(""),
|
||||||
|
span: Span::new(0, 0),
|
||||||
|
atomic_state: AtomicState::None,
|
||||||
|
typesets: Vec::new(),
|
||||||
|
spans: Vec::new(),
|
||||||
|
events: std::collections::VecDeque::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -149,8 +156,10 @@ impl<'s> Parser<'s> {
|
||||||
fn parse_event(&mut self) -> Option<Event> {
|
fn parse_event(&mut self) -> Option<Event> {
|
||||||
self.reset_span();
|
self.reset_span();
|
||||||
self.eat().map(|first| {
|
self.eat().map(|first| {
|
||||||
self.parse_verbatim(&first)
|
self.atomic(&first)
|
||||||
.or_else(|| self.parse_container(&first))
|
.or_else(|| self.parse_verbatim(&first))
|
||||||
|
.or_else(|| self.parse_span(&first))
|
||||||
|
.or_else(|| self.parse_typeset(&first))
|
||||||
.or_else(|| self.parse_atom(&first))
|
.or_else(|| self.parse_atom(&first))
|
||||||
.unwrap_or(Event {
|
.unwrap_or(Event {
|
||||||
kind: EventKind::Str,
|
kind: EventKind::Str,
|
||||||
|
@ -159,37 +168,22 @@ impl<'s> Parser<'s> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_atom(&mut self, first: &lex::Token) -> Option<Event> {
|
fn atomic(&mut self, first: &lex::Token) -> Option<Event> {
|
||||||
let atom = match first.kind {
|
Some(match self.atomic_state {
|
||||||
lex::Kind::Newline => Softbreak,
|
AtomicState::None => return None,
|
||||||
lex::Kind::Hardbreak => Hardbreak,
|
AtomicState::Verbatim {
|
||||||
lex::Kind::Escape => Escape,
|
kind,
|
||||||
lex::Kind::Nbsp => Nbsp,
|
opener_len,
|
||||||
lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis,
|
opener_event,
|
||||||
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 2 => EnDash,
|
} => {
|
||||||
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 3 => EmDash,
|
|
||||||
_ => return None,
|
|
||||||
};
|
|
||||||
|
|
||||||
Some(Event {
|
|
||||||
kind: EventKind::Atom(atom),
|
|
||||||
span: self.span,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_verbatim(&mut self, first: &lex::Token) -> Option<Event> {
|
|
||||||
self.state
|
|
||||||
.verbatim()
|
|
||||||
.map(|(kind, opener_len, opener_event)| {
|
|
||||||
dbg!(&self.events, opener_event);
|
|
||||||
assert_eq!(self.events[opener_event].kind, EventKind::Enter(kind));
|
assert_eq!(self.events[opener_event].kind, EventKind::Enter(kind));
|
||||||
let kind = if matches!(first.kind, lex::Kind::Seq(lex::Sequence::Backtick))
|
let kind = if matches!(first.kind, lex::Kind::Seq(lex::Sequence::Backtick))
|
||||||
&& first.len == opener_len
|
&& first.len == opener_len
|
||||||
{
|
{
|
||||||
self.state = State::None;
|
self.atomic_state = AtomicState::None;
|
||||||
let kind =
|
let kind =
|
||||||
if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") {
|
if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") {
|
||||||
let mut chars = self.lexer.peek_ahead()[2..].chars();
|
let mut chars = self.lexer.peek_ahead()["{=".len()..].chars();
|
||||||
let len = chars
|
let len = chars
|
||||||
.clone()
|
.clone()
|
||||||
.take_while(|c| !c.is_whitespace() && !matches!(c, '{' | '}'))
|
.take_while(|c| !c.is_whitespace() && !matches!(c, '{' | '}'))
|
||||||
|
@ -215,54 +209,90 @@ impl<'s> Parser<'s> {
|
||||||
kind,
|
kind,
|
||||||
span: self.span,
|
span: self.span,
|
||||||
}
|
}
|
||||||
})
|
}
|
||||||
.or_else(|| {
|
AtomicState::Attributes { .. } => todo!(),
|
||||||
match first.kind {
|
AtomicState::Url { .. } => todo!(),
|
||||||
lex::Kind::Seq(lex::Sequence::Dollar) => {
|
AtomicState::ReferenceLinkTag => todo!(),
|
||||||
let math_opt = (first.len <= 2)
|
})
|
||||||
.then(|| {
|
|
||||||
if let Some(lex::Token {
|
|
||||||
kind: lex::Kind::Seq(lex::Sequence::Backtick),
|
|
||||||
len,
|
|
||||||
}) = self.peek()
|
|
||||||
{
|
|
||||||
Some((
|
|
||||||
if first.len == 2 {
|
|
||||||
DisplayMath
|
|
||||||
} else {
|
|
||||||
InlineMath
|
|
||||||
},
|
|
||||||
*len,
|
|
||||||
))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.flatten();
|
|
||||||
if math_opt.is_some() {
|
|
||||||
self.eat(); // backticks
|
|
||||||
}
|
|
||||||
math_opt
|
|
||||||
}
|
|
||||||
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, first.len)),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
.map(|(kind, opener_len)| {
|
|
||||||
dbg!(&self.events);
|
|
||||||
self.state = State::Verbatim {
|
|
||||||
kind,
|
|
||||||
opener_len,
|
|
||||||
opener_event: self.events.len(),
|
|
||||||
};
|
|
||||||
Event {
|
|
||||||
kind: EventKind::Enter(kind),
|
|
||||||
span: self.span,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_container(&mut self, first: &lex::Token) -> Option<Event> {
|
fn parse_verbatim(&mut self, first: &lex::Token) -> Option<Event> {
|
||||||
|
match first.kind {
|
||||||
|
lex::Kind::Seq(lex::Sequence::Dollar) => {
|
||||||
|
let math_opt = (first.len <= 2)
|
||||||
|
.then(|| {
|
||||||
|
if let Some(lex::Token {
|
||||||
|
kind: lex::Kind::Seq(lex::Sequence::Backtick),
|
||||||
|
len,
|
||||||
|
}) = self.peek()
|
||||||
|
{
|
||||||
|
Some((
|
||||||
|
if first.len == 2 {
|
||||||
|
DisplayMath
|
||||||
|
} else {
|
||||||
|
InlineMath
|
||||||
|
},
|
||||||
|
*len,
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.flatten();
|
||||||
|
if math_opt.is_some() {
|
||||||
|
self.eat(); // backticks
|
||||||
|
}
|
||||||
|
math_opt
|
||||||
|
}
|
||||||
|
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, first.len)),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
.map(|(kind, opener_len)| {
|
||||||
|
self.atomic_state = AtomicState::Verbatim {
|
||||||
|
kind,
|
||||||
|
opener_len,
|
||||||
|
opener_event: self.events.len(),
|
||||||
|
};
|
||||||
|
Event {
|
||||||
|
kind: EventKind::Enter(kind),
|
||||||
|
span: self.span,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_span(&mut self, first: &lex::Token) -> Option<Event> {
|
||||||
|
match first.kind {
|
||||||
|
lex::Kind::Open(Delimiter::Bracket) => Some(true),
|
||||||
|
lex::Kind::Close(Delimiter::Bracket) => Some(false),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
.map(|open| {
|
||||||
|
if open {
|
||||||
|
self.spans.push(self.events.len());
|
||||||
|
// use str for now, replace if closed later
|
||||||
|
Event {
|
||||||
|
kind: EventKind::Str,
|
||||||
|
span: self.span,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if self.lexer.peek_ahead().starts_with('[') {
|
||||||
|
let mut chars = self.lexer.peek_ahead()["[".len()..].chars();
|
||||||
|
let len = chars
|
||||||
|
.clone()
|
||||||
|
.take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']'))
|
||||||
|
.count();
|
||||||
|
match chars.nth(len) {
|
||||||
|
Some(']') => todo!(),
|
||||||
|
None => self.atomic_state = AtomicState::ReferenceLinkTag,
|
||||||
|
_ => todo!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
todo!()
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_typeset(&mut self, first: &lex::Token) -> Option<Event> {
|
||||||
enum Dir {
|
enum Dir {
|
||||||
Open,
|
Open,
|
||||||
Close,
|
Close,
|
||||||
|
@ -276,8 +306,6 @@ impl<'s> Parser<'s> {
|
||||||
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
|
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
|
||||||
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
|
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
|
||||||
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
|
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
|
||||||
lex::Kind::Open(Delimiter::Bracket) => Some((Span, Dir::Open)),
|
|
||||||
lex::Kind::Close(Delimiter::Bracket) => Some((Span, Dir::Close)),
|
|
||||||
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
|
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
|
||||||
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
|
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
|
||||||
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
|
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
|
||||||
|
@ -295,19 +323,19 @@ impl<'s> Parser<'s> {
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
.map(|(cont, dir)| {
|
.map(|(cont, dir)| {
|
||||||
self.openers
|
self.typesets
|
||||||
.iter()
|
.iter()
|
||||||
.rposition(|(c, _)| *c == cont)
|
.rposition(|(c, _)| *c == cont)
|
||||||
.and_then(|o| {
|
.and_then(|o| {
|
||||||
matches!(dir, Dir::Close | Dir::Both).then(|| {
|
matches!(dir, Dir::Close | Dir::Both).then(|| {
|
||||||
let (_, e) = &mut self.openers[o];
|
let (_, e) = &mut self.typesets[o];
|
||||||
self.events[*e].kind = EventKind::Enter(cont);
|
self.events[*e].kind = EventKind::Enter(cont);
|
||||||
self.openers.drain(o..);
|
self.typesets.drain(o..);
|
||||||
EventKind::Exit(cont)
|
EventKind::Exit(cont)
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.unwrap_or_else(|| {
|
.unwrap_or_else(|| {
|
||||||
self.openers.push((cont, self.events.len()));
|
self.typesets.push((cont, self.events.len()));
|
||||||
// use str for now, replace if closed later
|
// use str for now, replace if closed later
|
||||||
EventKind::Str
|
EventKind::Str
|
||||||
})
|
})
|
||||||
|
@ -317,6 +345,24 @@ impl<'s> Parser<'s> {
|
||||||
span: self.span,
|
span: self.span,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn parse_atom(&mut self, first: &lex::Token) -> Option<Event> {
|
||||||
|
let atom = match first.kind {
|
||||||
|
lex::Kind::Newline => Softbreak,
|
||||||
|
lex::Kind::Hardbreak => Hardbreak,
|
||||||
|
lex::Kind::Escape => Escape,
|
||||||
|
lex::Kind::Nbsp => Nbsp,
|
||||||
|
lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis,
|
||||||
|
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 2 => EnDash,
|
||||||
|
lex::Kind::Seq(lex::Sequence::Hyphen) if first.len == 3 => EmDash,
|
||||||
|
_ => return None,
|
||||||
|
};
|
||||||
|
|
||||||
|
Some(Event {
|
||||||
|
kind: EventKind::Atom(atom),
|
||||||
|
span: self.span,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Iterator for Parser<'s> {
|
impl<'s> Iterator for Parser<'s> {
|
||||||
|
@ -325,8 +371,8 @@ impl<'s> Iterator for Parser<'s> {
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
let mut ready = true;
|
let mut ready = true;
|
||||||
while self.events.is_empty()
|
while self.events.is_empty()
|
||||||
|| !self.openers.is_empty()
|
|| !self.typesets.is_empty()
|
||||||
|| !matches!(self.state, State::None)
|
|| !matches!(self.atomic_state, AtomicState::None)
|
||||||
|| self // for merge
|
|| self // for merge
|
||||||
.events
|
.events
|
||||||
.back()
|
.back()
|
||||||
|
@ -334,7 +380,6 @@ impl<'s> Iterator for Parser<'s> {
|
||||||
{
|
{
|
||||||
if let Some(ev) = self.parse_event() {
|
if let Some(ev) = self.parse_event() {
|
||||||
self.events.push_back(ev);
|
self.events.push_back(ev);
|
||||||
dbg!(&self.events, &self.state);
|
|
||||||
} else {
|
} else {
|
||||||
ready = false;
|
ready = false;
|
||||||
break;
|
break;
|
||||||
|
@ -366,8 +411,8 @@ impl<'s> Iterator for Parser<'s> {
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
.or_else(|| {
|
.or_else(|| {
|
||||||
self.state.verbatim().map(|(kind, _, _)| {
|
self.atomic_state.verbatim().map(|(kind, _, _)| {
|
||||||
self.state = State::None;
|
self.atomic_state = AtomicState::None;
|
||||||
Event {
|
Event {
|
||||||
kind: EventKind::Exit(kind),
|
kind: EventKind::Exit(kind),
|
||||||
span: self.span,
|
span: self.span,
|
||||||
|
@ -490,7 +535,7 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn container_basic() {
|
fn typeset_basic() {
|
||||||
test_parse!(
|
test_parse!(
|
||||||
"_abc_",
|
"_abc_",
|
||||||
(Enter(Emphasis), "_"),
|
(Enter(Emphasis), "_"),
|
||||||
|
@ -506,7 +551,7 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn container_nest() {
|
fn typeset_nest() {
|
||||||
test_parse!(
|
test_parse!(
|
||||||
"{_{_abc_}_}",
|
"{_{_abc_}_}",
|
||||||
(Enter(Emphasis), "{_"),
|
(Enter(Emphasis), "{_"),
|
||||||
|
@ -526,12 +571,12 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn container_unopened() {
|
fn typeset_unopened() {
|
||||||
test_parse!("*}abc", (Str, "*}abc"));
|
test_parse!("*}abc", (Str, "*}abc"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn container_close_parent() {
|
fn typeset_close_parent() {
|
||||||
test_parse!(
|
test_parse!(
|
||||||
"{*{_abc*}",
|
"{*{_abc*}",
|
||||||
(Enter(Strong), "{*"),
|
(Enter(Strong), "{*"),
|
||||||
|
@ -541,7 +586,7 @@ mod test {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn container_close_block() {
|
fn typeset_close_block() {
|
||||||
test_parse!("{_abc", (Str, "{_abc"));
|
test_parse!("{_abc", (Str, "{_abc"));
|
||||||
test_parse!("{_{*{_abc", (Str, "{_{*{_abc"));
|
test_parse!("{_{*{_abc", (Str, "{_{*{_abc"));
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue