wip fix span / typeset precedence
This commit is contained in:
parent
352be02ccf
commit
d8d464902a
3 changed files with 182 additions and 118 deletions
33
src/attr.rs
33
src/attr.rs
|
@ -4,8 +4,13 @@ use crate::Span;
|
||||||
|
|
||||||
use State::*;
|
use State::*;
|
||||||
|
|
||||||
pub fn valid<I: Iterator<Item = char>>(chars: I) -> bool {
|
pub fn valid<I: Iterator<Item = char>>(chars: I) -> usize {
|
||||||
!Parser::new(chars).any(|e| matches!(e, Element::Invalid))
|
let mut p = Parser::new(chars);
|
||||||
|
if p.any(|e| matches!(e, Element::Invalid)) {
|
||||||
|
0
|
||||||
|
} else {
|
||||||
|
p.pos
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
|
// Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
|
||||||
|
@ -178,14 +183,7 @@ impl<I: Iterator<Item = char>> Parser<I> {
|
||||||
ValueQuoted
|
ValueQuoted
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Done => {
|
Invalid | Done => panic!(),
|
||||||
if c.is_whitespace() {
|
|
||||||
Done
|
|
||||||
} else {
|
|
||||||
Invalid
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Invalid => panic!(),
|
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -193,6 +191,10 @@ impl<I: Iterator<Item = char>> Parser<I> {
|
||||||
fn step(&mut self) -> (State, Span) {
|
fn step(&mut self) -> (State, Span) {
|
||||||
let start = self.pos.saturating_sub(1);
|
let start = self.pos.saturating_sub(1);
|
||||||
|
|
||||||
|
if self.state == Done {
|
||||||
|
return (Done, Span::empty_at(start));
|
||||||
|
}
|
||||||
|
|
||||||
while let Some(state_next) = self.step_char() {
|
while let Some(state_next) = self.step_char() {
|
||||||
if self.state != state_next {
|
if self.state != state_next {
|
||||||
return (
|
return (
|
||||||
|
@ -204,7 +206,7 @@ impl<I: Iterator<Item = char>> Parser<I> {
|
||||||
|
|
||||||
(
|
(
|
||||||
if self.state == Done { Done } else { Invalid },
|
if self.state == Done { Done } else { Invalid },
|
||||||
Span::new(start, self.pos),
|
Span::new(start, self.pos - 1),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -325,4 +327,13 @@ mod test {
|
||||||
("id", "some_id"),
|
("id", "some_id"),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn valid() {
|
||||||
|
let src0 = "{.class %comment%}";
|
||||||
|
assert_eq!(super::valid(src0.chars()), src0.len());
|
||||||
|
|
||||||
|
let src1 = format!("{} trailing", src0);
|
||||||
|
assert_eq!(super::valid(src1.chars()), src0.len());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -270,7 +270,7 @@ impl BlockParser {
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
'{' => attr::valid(line_t.chars())
|
'{' => (attr::valid(line_t.chars()) == line_t.trim_end().len())
|
||||||
.then(|| (Block::Atom(Attributes), Span::by_len(start, line_t.len()))),
|
.then(|| (Block::Atom(Attributes), Span::by_len(start, line_t.len()))),
|
||||||
'|' => (&line_t[line_t.len() - 1..] == "|"
|
'|' => (&line_t[line_t.len() - 1..] == "|"
|
||||||
&& &line_t[line_t.len() - 2..line_t.len() - 1] != "\\")
|
&& &line_t[line_t.len() - 2..line_t.len() - 1] != "\\")
|
||||||
|
|
265
src/inline.rs
265
src/inline.rs
|
@ -1,3 +1,4 @@
|
||||||
|
use crate::attr;
|
||||||
use crate::lex;
|
use crate::lex;
|
||||||
use crate::Span;
|
use crate::Span;
|
||||||
|
|
||||||
|
@ -71,10 +72,7 @@ pub struct Parser<I> {
|
||||||
/// Span of current event.
|
/// Span of current event.
|
||||||
span: Span,
|
span: Span,
|
||||||
/// Stack with kind and index of _potential_ openers for typesetting containers.
|
/// Stack with kind and index of _potential_ openers for typesetting containers.
|
||||||
typesets: Vec<(Container, usize)>,
|
openers: Vec<(Delim, usize)>,
|
||||||
/// Stack with index of _potential_ span/link openers.
|
|
||||||
spans: Vec<(usize, bool)>,
|
|
||||||
//attributes: Vec<(Span, usize)>,
|
|
||||||
/// Buffer queue for next events. Events are buffered until no modifications due to future
|
/// Buffer queue for next events. Events are buffered until no modifications due to future
|
||||||
/// characters are needed.
|
/// characters are needed.
|
||||||
events: std::collections::VecDeque<Event>,
|
events: std::collections::VecDeque<Event>,
|
||||||
|
@ -85,8 +83,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
|
||||||
Self {
|
Self {
|
||||||
lexer: lex::Lexer::new(chars),
|
lexer: lex::Lexer::new(chars),
|
||||||
span: Span::new(0, 0),
|
span: Span::new(0, 0),
|
||||||
typesets: Vec::new(),
|
openers: Vec::new(),
|
||||||
spans: Vec::new(),
|
|
||||||
events: std::collections::VecDeque::new(),
|
events: std::collections::VecDeque::new(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -111,8 +108,7 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
|
||||||
self.reset_span();
|
self.reset_span();
|
||||||
self.eat().map(|first| {
|
self.eat().map(|first| {
|
||||||
self.parse_verbatim(&first)
|
self.parse_verbatim(&first)
|
||||||
.or_else(|| self.parse_span(&first))
|
.or_else(|| self.parse_container(&first))
|
||||||
.or_else(|| self.parse_typeset(&first))
|
|
||||||
.or_else(|| self.parse_atom(&first))
|
.or_else(|| self.parse_atom(&first))
|
||||||
.unwrap_or(Event {
|
.unwrap_or(Event {
|
||||||
kind: EventKind::Str,
|
kind: EventKind::Str,
|
||||||
|
@ -212,118 +208,114 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_span(&mut self, first: &lex::Token) -> Option<Event> {
|
fn parse_container(&mut self, first: &lex::Token) -> Option<Event> {
|
||||||
match first.kind {
|
|
||||||
lex::Kind::Sym(Symbol::ExclaimBracket) => Some((true, true)),
|
|
||||||
lex::Kind::Open(Delimiter::Bracket) => Some((true, false)),
|
|
||||||
lex::Kind::Close(Delimiter::Bracket) => Some((false, false)),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
.and_then(|(open, img)| {
|
|
||||||
if open {
|
|
||||||
self.spans.push((self.events.len(), img));
|
|
||||||
// use str for now, replace if closed later
|
|
||||||
Some(Event {
|
|
||||||
kind: EventKind::Str,
|
|
||||||
span: self.span,
|
|
||||||
})
|
|
||||||
} else if !self.spans.is_empty() {
|
|
||||||
let mut ahead = self.lexer.inner().clone();
|
|
||||||
let img = self.spans.last().unwrap().1;
|
|
||||||
match ahead.next() {
|
|
||||||
Some(opener @ ('[' | '(')) => {
|
|
||||||
let (closer, kind) = match opener {
|
|
||||||
'[' if img => (']', ReferenceImage),
|
|
||||||
'[' => (']', ReferenceLink),
|
|
||||||
'(' if img => (')', InlineImage),
|
|
||||||
'(' => (')', InlineLink),
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
let mut end = false;
|
|
||||||
let len = (&mut ahead)
|
|
||||||
.take_while(|c| {
|
|
||||||
if *c == closer {
|
|
||||||
end = true;
|
|
||||||
};
|
|
||||||
!end && *c != opener
|
|
||||||
})
|
|
||||||
.count();
|
|
||||||
end.then(|| {
|
|
||||||
let span = Span::by_len(self.span.end() + 1, len);
|
|
||||||
(kind, span)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
Some('{') => todo!(),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
.map(|(kind, span)| {
|
|
||||||
self.lexer = lex::Lexer::new(ahead);
|
|
||||||
let (opener_event, _) = self.spans.pop().unwrap();
|
|
||||||
self.events[opener_event].kind = EventKind::Enter(kind);
|
|
||||||
self.events[opener_event].span = span;
|
|
||||||
self.span = span.translate(1);
|
|
||||||
Event {
|
|
||||||
kind: EventKind::Exit(kind),
|
|
||||||
span,
|
|
||||||
}
|
|
||||||
})
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_typeset(&mut self, first: &lex::Token) -> Option<Event> {
|
|
||||||
enum Dir {
|
enum Dir {
|
||||||
Open,
|
Open,
|
||||||
Close,
|
Close,
|
||||||
Both,
|
Both,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
use Directionality::{Bi, Uni};
|
||||||
|
use SpanType::{General, Image};
|
||||||
|
|
||||||
match first.kind {
|
match first.kind {
|
||||||
lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)),
|
lex::Kind::Sym(Symbol::Asterisk) => Some((Delim::Strong(Bi), Dir::Both)),
|
||||||
lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)),
|
lex::Kind::Sym(Symbol::Underscore) => Some((Delim::Emphasis(Bi), Dir::Both)),
|
||||||
lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)),
|
lex::Kind::Sym(Symbol::Caret) => Some((Delim::Superscript(Bi), Dir::Both)),
|
||||||
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
|
lex::Kind::Sym(Symbol::Tilde) => Some((Delim::Subscript(Bi), Dir::Both)),
|
||||||
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
|
lex::Kind::Sym(Symbol::Quote1) => Some((Delim::SingleQuoted, Dir::Both)),
|
||||||
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
|
lex::Kind::Sym(Symbol::Quote2) => Some((Delim::DoubleQuoted, Dir::Both)),
|
||||||
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
|
lex::Kind::Sym(Symbol::ExclaimBracket) => Some((Delim::Span(Image), Dir::Open)),
|
||||||
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
|
lex::Kind::Open(Delimiter::Bracket) => Some((Delim::Span(General), Dir::Open)),
|
||||||
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
|
lex::Kind::Close(Delimiter::Bracket) => Some((Delim::Span(General), Dir::Close)),
|
||||||
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)),
|
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Delim::Strong(Uni), Dir::Open)),
|
||||||
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
|
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Delim::Strong(Uni), Dir::Close)),
|
||||||
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
|
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Delim::Emphasis(Uni), Dir::Open)),
|
||||||
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)),
|
lex::Kind::Close(Delimiter::BraceUnderscore) => {
|
||||||
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)),
|
Some((Delim::Emphasis(Uni), Dir::Close))
|
||||||
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)),
|
}
|
||||||
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)),
|
lex::Kind::Open(Delimiter::BraceCaret) => Some((Delim::Superscript(Uni), Dir::Open)),
|
||||||
lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)),
|
lex::Kind::Close(Delimiter::BraceCaret) => Some((Delim::Superscript(Uni), Dir::Close)),
|
||||||
lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)),
|
lex::Kind::Open(Delimiter::BraceTilde) => Some((Delim::Subscript(Uni), Dir::Open)),
|
||||||
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)),
|
lex::Kind::Close(Delimiter::BraceTilde) => Some((Delim::Subscript(Uni), Dir::Close)),
|
||||||
lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)),
|
lex::Kind::Open(Delimiter::BraceEqual) => Some((Delim::Mark, Dir::Open)),
|
||||||
|
lex::Kind::Close(Delimiter::BraceEqual) => Some((Delim::Mark, Dir::Close)),
|
||||||
|
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delim::Delete, Dir::Open)),
|
||||||
|
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delim::Delete, Dir::Close)),
|
||||||
|
lex::Kind::Open(Delimiter::BracePlus) => Some((Delim::Insert, Dir::Open)),
|
||||||
|
lex::Kind::Close(Delimiter::BracePlus) => Some((Delim::Insert, Dir::Close)),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
.map(|(cont, dir)| {
|
.map(|(delim, dir)| {
|
||||||
self.typesets
|
self.openers
|
||||||
.iter()
|
.iter()
|
||||||
.rposition(|(c, _)| *c == cont)
|
.rposition(|(d, _)| d.matches(delim))
|
||||||
.and_then(|o| {
|
.and_then(|o| {
|
||||||
matches!(dir, Dir::Close | Dir::Both).then(|| {
|
let (d, e) = self.openers[o];
|
||||||
let (_, e) = &mut self.typesets[o];
|
if matches!(dir, Dir::Close | Dir::Both) {
|
||||||
self.events[*e].kind = EventKind::Enter(cont);
|
let e = match Container::try_from(d) {
|
||||||
self.typesets.drain(o..);
|
Ok(cont) => {
|
||||||
EventKind::Exit(cont)
|
self.events[e].kind = EventKind::Enter(cont);
|
||||||
})
|
Some(Event {
|
||||||
|
kind: EventKind::Exit(cont),
|
||||||
|
span: self.span,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
Err(ty) => self.post_span(ty, e),
|
||||||
|
};
|
||||||
|
self.openers.drain(o..);
|
||||||
|
e
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.unwrap_or_else(|| {
|
.unwrap_or_else(|| {
|
||||||
self.typesets.push((cont, self.events.len()));
|
self.openers.push((delim, self.events.len()));
|
||||||
// use str for now, replace if closed later
|
// use str for now, replace if closed later
|
||||||
EventKind::Str
|
Event {
|
||||||
|
kind: EventKind::Str,
|
||||||
|
span: self.span,
|
||||||
|
}
|
||||||
})
|
})
|
||||||
})
|
})
|
||||||
.map(|kind| Event {
|
}
|
||||||
kind,
|
|
||||||
span: self.span,
|
fn post_span(&mut self, ty: SpanType, opener_event: usize) -> Option<Event> {
|
||||||
|
let mut ahead = self.lexer.inner().clone();
|
||||||
|
match ahead.next() {
|
||||||
|
Some(opener @ ('[' | '(')) => {
|
||||||
|
let (closer, kind) = match opener {
|
||||||
|
'[' if ty == SpanType::Image => (']', ReferenceImage),
|
||||||
|
'[' => (']', ReferenceLink),
|
||||||
|
'(' if ty == SpanType::Image => (')', InlineImage),
|
||||||
|
'(' => (')', InlineLink),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
let mut end = false;
|
||||||
|
let len = (&mut ahead)
|
||||||
|
.take_while(|c| {
|
||||||
|
if *c == closer {
|
||||||
|
end = true;
|
||||||
|
};
|
||||||
|
!end && *c != opener
|
||||||
|
})
|
||||||
|
.count();
|
||||||
|
end.then(|| {
|
||||||
|
let span = Span::by_len(self.span.end() + 1, len);
|
||||||
|
(kind, span)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
.map(|(kind, span)| {
|
||||||
|
self.lexer = lex::Lexer::new(ahead);
|
||||||
|
self.events[opener_event].kind = EventKind::Enter(kind);
|
||||||
|
self.events[opener_event].span = span;
|
||||||
|
self.span = span.translate(1);
|
||||||
|
Event {
|
||||||
|
kind: EventKind::Exit(kind),
|
||||||
|
span,
|
||||||
|
}
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -346,13 +338,74 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum Directionality {
|
||||||
|
Uni,
|
||||||
|
Bi,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
enum SpanType {
|
||||||
|
Image,
|
||||||
|
General,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
enum Delim {
|
||||||
|
Span(SpanType),
|
||||||
|
Strong(Directionality),
|
||||||
|
Emphasis(Directionality),
|
||||||
|
Superscript(Directionality),
|
||||||
|
Subscript(Directionality),
|
||||||
|
SingleQuoted,
|
||||||
|
DoubleQuoted,
|
||||||
|
Mark,
|
||||||
|
Delete,
|
||||||
|
Insert,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Delim {
|
||||||
|
fn matches(self, other: Delim) -> bool {
|
||||||
|
match self {
|
||||||
|
Self::Span(..) => matches!(other, Self::Span(..)),
|
||||||
|
Self::Strong(..) => matches!(other, Self::Strong(..)),
|
||||||
|
Self::Emphasis(..) => matches!(other, Self::Emphasis(..)),
|
||||||
|
Self::Superscript(..) => matches!(other, Self::Superscript(..)),
|
||||||
|
Self::Subscript(..) => matches!(other, Self::Subscript(..)),
|
||||||
|
Self::SingleQuoted => matches!(other, Self::SingleQuoted),
|
||||||
|
Self::DoubleQuoted => matches!(other, Self::DoubleQuoted),
|
||||||
|
Self::Mark => matches!(other, Self::Mark),
|
||||||
|
Self::Delete => matches!(other, Self::Delete),
|
||||||
|
Self::Insert => matches!(other, Self::Insert),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Delim> for Container {
|
||||||
|
type Error = SpanType;
|
||||||
|
|
||||||
|
fn try_from(d: Delim) -> Result<Self, Self::Error> {
|
||||||
|
match d {
|
||||||
|
Delim::Span(ty) => Err(ty),
|
||||||
|
Delim::Strong(..) => Ok(Self::Strong),
|
||||||
|
Delim::Emphasis(..) => Ok(Self::Emphasis),
|
||||||
|
Delim::Superscript(..) => Ok(Self::Superscript),
|
||||||
|
Delim::Subscript(..) => Ok(Self::Subscript),
|
||||||
|
Delim::SingleQuoted => Ok(Self::SingleQuoted),
|
||||||
|
Delim::DoubleQuoted => Ok(Self::DoubleQuoted),
|
||||||
|
Delim::Mark => Ok(Self::Mark),
|
||||||
|
Delim::Delete => Ok(Self::Delete),
|
||||||
|
Delim::Insert => Ok(Self::Insert),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
|
impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> {
|
||||||
type Item = Event;
|
type Item = Event;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
while self.events.is_empty()
|
while self.events.is_empty()
|
||||||
|| !self.typesets.is_empty()
|
|| !self.openers.is_empty()
|
||||||
|| !self.spans.is_empty()
|
|
||||||
|| self // for merge
|
|| self // for merge
|
||||||
.events
|
.events
|
||||||
.back()
|
.back()
|
||||||
|
|
Loading…
Reference in a new issue