wip fix span / typeset precedence

This commit is contained in:
Noah Hellman 2022-12-22 20:39:11 +01:00
parent 352be02ccf
commit d8d464902a
3 changed files with 182 additions and 118 deletions

View file

@ -4,8 +4,13 @@ use crate::Span;
use State::*;
pub fn valid<I: Iterator<Item = char>>(chars: I) -> bool {
!Parser::new(chars).any(|e| matches!(e, Element::Invalid))
pub fn valid<I: Iterator<Item = char>>(chars: I) -> usize {
let mut p = Parser::new(chars);
if p.any(|e| matches!(e, Element::Invalid)) {
0
} else {
p.pos
}
}
// Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
@ -178,14 +183,7 @@ impl<I: Iterator<Item = char>> Parser<I> {
ValueQuoted
}
}
Done => {
if c.is_whitespace() {
Done
} else {
Invalid
}
}
Invalid => panic!(),
Invalid | Done => panic!(),
}
})
}
@ -193,6 +191,10 @@ impl<I: Iterator<Item = char>> Parser<I> {
fn step(&mut self) -> (State, Span) {
let start = self.pos.saturating_sub(1);
if self.state == Done {
return (Done, Span::empty_at(start));
}
while let Some(state_next) = self.step_char() {
if self.state != state_next {
return (
@ -204,7 +206,7 @@ impl<I: Iterator<Item = char>> Parser<I> {
(
if self.state == Done { Done } else { Invalid },
Span::new(start, self.pos),
Span::new(start, self.pos - 1),
)
}
}
@ -325,4 +327,13 @@ mod test {
("id", "some_id"),
);
}
#[test]
fn valid() {
let src0 = "{.class %comment%}";
assert_eq!(super::valid(src0.chars()), src0.len());
let src1 = format!("{} trailing", src0);
assert_eq!(super::valid(src1.chars()), src0.len());
}
}

View file

@ -270,7 +270,7 @@ impl BlockParser {
))
}
}
'{' => attr::valid(line_t.chars())
'{' => (attr::valid(line_t.chars()) == line_t.trim_end().len())
.then(|| (Block::Atom(Attributes), Span::by_len(start, line_t.len()))),
'|' => (&line_t[line_t.len() - 1..] == "|"
&& &line_t[line_t.len() - 2..line_t.len() - 1] != "\\")

View file

@ -1,3 +1,4 @@
use crate::attr;
use crate::lex;
use crate::Span;
@ -71,10 +72,7 @@ pub struct Parser<I> {
/// Span of current event.
span: Span,
/// Stack with kind and index of _potential_ openers for typesetting containers.
typesets: Vec<(Container, usize)>,
/// Stack with index of _potential_ span/link openers.
spans: Vec<(usize, bool)>,
//attributes: Vec<(Span, usize)>,
openers: Vec<(Delim, usize)>,
/// Buffer queue for next events. Events are buffered until no modifications due to future
/// characters are needed.
events: std::collections::VecDeque<Event>,
@ -85,8 +83,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
Self {
lexer: lex::Lexer::new(chars),
span: Span::new(0, 0),
typesets: Vec::new(),
spans: Vec::new(),
openers: Vec::new(),
events: std::collections::VecDeque::new(),
}
}
@ -111,8 +108,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
self.reset_span();
self.eat().map(|first| {
self.parse_verbatim(&first)
.or_else(|| self.parse_span(&first))
.or_else(|| self.parse_typeset(&first))
.or_else(|| self.parse_container(&first))
.or_else(|| self.parse_atom(&first))
.unwrap_or(Event {
kind: EventKind::Str,
@ -212,30 +208,86 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
})
}
fn parse_span(&mut self, first: &lex::Token) -> Option<Event> {
fn parse_container(&mut self, first: &lex::Token) -> Option<Event> {
enum Dir {
Open,
Close,
Both,
}
use Directionality::{Bi, Uni};
use SpanType::{General, Image};
match first.kind {
lex::Kind::Sym(Symbol::ExclaimBracket) => Some((true, true)),
lex::Kind::Open(Delimiter::Bracket) => Some((true, false)),
lex::Kind::Close(Delimiter::Bracket) => Some((false, false)),
lex::Kind::Sym(Symbol::Asterisk) => Some((Delim::Strong(Bi), Dir::Both)),
lex::Kind::Sym(Symbol::Underscore) => Some((Delim::Emphasis(Bi), Dir::Both)),
lex::Kind::Sym(Symbol::Caret) => Some((Delim::Superscript(Bi), Dir::Both)),
lex::Kind::Sym(Symbol::Tilde) => Some((Delim::Subscript(Bi), Dir::Both)),
lex::Kind::Sym(Symbol::Quote1) => Some((Delim::SingleQuoted, Dir::Both)),
lex::Kind::Sym(Symbol::Quote2) => Some((Delim::DoubleQuoted, Dir::Both)),
lex::Kind::Sym(Symbol::ExclaimBracket) => Some((Delim::Span(Image), Dir::Open)),
lex::Kind::Open(Delimiter::Bracket) => Some((Delim::Span(General), Dir::Open)),
lex::Kind::Close(Delimiter::Bracket) => Some((Delim::Span(General), Dir::Close)),
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Delim::Strong(Uni), Dir::Open)),
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Delim::Strong(Uni), Dir::Close)),
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Delim::Emphasis(Uni), Dir::Open)),
lex::Kind::Close(Delimiter::BraceUnderscore) => {
Some((Delim::Emphasis(Uni), Dir::Close))
}
lex::Kind::Open(Delimiter::BraceCaret) => Some((Delim::Superscript(Uni), Dir::Open)),
lex::Kind::Close(Delimiter::BraceCaret) => Some((Delim::Superscript(Uni), Dir::Close)),
lex::Kind::Open(Delimiter::BraceTilde) => Some((Delim::Subscript(Uni), Dir::Open)),
lex::Kind::Close(Delimiter::BraceTilde) => Some((Delim::Subscript(Uni), Dir::Close)),
lex::Kind::Open(Delimiter::BraceEqual) => Some((Delim::Mark, Dir::Open)),
lex::Kind::Close(Delimiter::BraceEqual) => Some((Delim::Mark, Dir::Close)),
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delim::Delete, Dir::Open)),
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delim::Delete, Dir::Close)),
lex::Kind::Open(Delimiter::BracePlus) => Some((Delim::Insert, Dir::Open)),
lex::Kind::Close(Delimiter::BracePlus) => Some((Delim::Insert, Dir::Close)),
_ => None,
}
.and_then(|(open, img)| {
if open {
self.spans.push((self.events.len(), img));
// use str for now, replace if closed later
.map(|(delim, dir)| {
self.openers
.iter()
.rposition(|(d, _)| d.matches(delim))
.and_then(|o| {
let (d, e) = self.openers[o];
if matches!(dir, Dir::Close | Dir::Both) {
let e = match Container::try_from(d) {
Ok(cont) => {
self.events[e].kind = EventKind::Enter(cont);
Some(Event {
kind: EventKind::Str,
kind: EventKind::Exit(cont),
span: self.span,
})
} else if !self.spans.is_empty() {
}
Err(ty) => self.post_span(ty, e),
};
self.openers.drain(o..);
e
} else {
None
}
})
.unwrap_or_else(|| {
self.openers.push((delim, self.events.len()));
// use str for now, replace if closed later
Event {
kind: EventKind::Str,
span: self.span,
}
})
})
}
fn post_span(&mut self, ty: SpanType, opener_event: usize) -> Option<Event> {
let mut ahead = self.lexer.inner().clone();
let img = self.spans.last().unwrap().1;
match ahead.next() {
Some(opener @ ('[' | '(')) => {
let (closer, kind) = match opener {
'[' if img => (']', ReferenceImage),
'[' if ty == SpanType::Image => (']', ReferenceImage),
'[' => (']', ReferenceLink),
'(' if img => (')', InlineImage),
'(' if ty == SpanType::Image => (')', InlineImage),
'(' => (')', InlineLink),
_ => unreachable!(),
};
@ -253,12 +305,10 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
(kind, span)
})
}
Some('{') => todo!(),
_ => None,
}
.map(|(kind, span)| {
self.lexer = lex::Lexer::new(ahead);
let (opener_event, _) = self.spans.pop().unwrap();
self.events[opener_event].kind = EventKind::Enter(kind);
self.events[opener_event].span = span;
self.span = span.translate(1);
@ -267,64 +317,6 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
span,
}
})
} else {
None
}
})
}
fn parse_typeset(&mut self, first: &lex::Token) -> Option<Event> {
enum Dir {
Open,
Close,
Both,
}
match first.kind {
lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)),
lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)),
lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)),
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)),
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)),
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)),
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)),
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)),
lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)),
lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)),
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)),
lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)),
_ => None,
}
.map(|(cont, dir)| {
self.typesets
.iter()
.rposition(|(c, _)| *c == cont)
.and_then(|o| {
matches!(dir, Dir::Close | Dir::Both).then(|| {
let (_, e) = &mut self.typesets[o];
self.events[*e].kind = EventKind::Enter(cont);
self.typesets.drain(o..);
EventKind::Exit(cont)
})
})
.unwrap_or_else(|| {
self.typesets.push((cont, self.events.len()));
// use str for now, replace if closed later
EventKind::Str
})
})
.map(|kind| Event {
kind,
span: self.span,
})
}
fn parse_atom(&mut self, first: &lex::Token) -> Option<Event> {
@ -346,13 +338,74 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Directionality {
Uni,
Bi,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum SpanType {
Image,
General,
}
#[derive(Debug, Clone, Copy)]
enum Delim {
Span(SpanType),
Strong(Directionality),
Emphasis(Directionality),
Superscript(Directionality),
Subscript(Directionality),
SingleQuoted,
DoubleQuoted,
Mark,
Delete,
Insert,
}
impl Delim {
fn matches(self, other: Delim) -> bool {
match self {
Self::Span(..) => matches!(other, Self::Span(..)),
Self::Strong(..) => matches!(other, Self::Strong(..)),
Self::Emphasis(..) => matches!(other, Self::Emphasis(..)),
Self::Superscript(..) => matches!(other, Self::Superscript(..)),
Self::Subscript(..) => matches!(other, Self::Subscript(..)),
Self::SingleQuoted => matches!(other, Self::SingleQuoted),
Self::DoubleQuoted => matches!(other, Self::DoubleQuoted),
Self::Mark => matches!(other, Self::Mark),
Self::Delete => matches!(other, Self::Delete),
Self::Insert => matches!(other, Self::Insert),
}
}
}
impl TryFrom<Delim> for Container {
type Error = SpanType;
fn try_from(d: Delim) -> Result<Self, Self::Error> {
match d {
Delim::Span(ty) => Err(ty),
Delim::Strong(..) => Ok(Self::Strong),
Delim::Emphasis(..) => Ok(Self::Emphasis),
Delim::Superscript(..) => Ok(Self::Superscript),
Delim::Subscript(..) => Ok(Self::Subscript),
Delim::SingleQuoted => Ok(Self::SingleQuoted),
Delim::DoubleQuoted => Ok(Self::DoubleQuoted),
Delim::Mark => Ok(Self::Mark),
Delim::Delete => Ok(Self::Delete),
Delim::Insert => Ok(Self::Insert),
}
}
}
impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
type Item = Event;
fn next(&mut self) -> Option<Self::Item> {
while self.events.is_empty()
|| !self.typesets.is_empty()
|| !self.spans.is_empty()
|| !self.openers.is_empty()
|| self // for merge
.events
.back()