lex: rm lex::Kind::Whitespace
Whitespace tokens do not necessarily create new events but they work as a delimiter for words with attributes and affect some container delimiters. Now when we can read the source from inline, we can instead inspect for whitespace when needed. Removing the whitespace token allows the lexer to continue a lot longer without stopping. E.g. a typical line in a paragraph with no special characters can turn into a single token.
This commit is contained in:
parent
9454a2e393
commit
86ee4ee520
3 changed files with 40 additions and 43 deletions
|
@ -62,7 +62,6 @@ pub enum EventKind {
|
|||
Exit(Container),
|
||||
Atom(Atom),
|
||||
Str,
|
||||
Whitespace,
|
||||
Attributes { container: bool },
|
||||
Placeholder,
|
||||
}
|
||||
|
@ -240,11 +239,7 @@ impl<'s> Parser<'s> {
|
|||
.or_else(|| self.parse_container(&first))
|
||||
.or_else(|| self.parse_atom(&first))
|
||||
.unwrap_or_else(|| {
|
||||
self.push(if matches!(first.kind, lex::Kind::Whitespace) {
|
||||
EventKind::Whitespace
|
||||
} else {
|
||||
EventKind::Str
|
||||
});
|
||||
self.push(EventKind::Str);
|
||||
})
|
||||
})
|
||||
}
|
||||
|
@ -296,7 +291,13 @@ impl<'s> Parser<'s> {
|
|||
self.verbatim = None;
|
||||
} else {
|
||||
// continue verbatim
|
||||
if matches!(first.kind, lex::Kind::Whitespace) {
|
||||
let is_whitespace = self
|
||||
.input
|
||||
.span
|
||||
.of(self.input.src)
|
||||
.chars()
|
||||
.all(char::is_whitespace);
|
||||
if is_whitespace {
|
||||
if !*non_whitespace_encountered
|
||||
&& self.input.peek().map_or(false, |t| {
|
||||
matches!(
|
||||
|
@ -489,10 +490,14 @@ impl<'s> Parser<'s> {
|
|||
// empty container
|
||||
return None;
|
||||
}
|
||||
let whitespace_after = self.events.back().map_or(false, |ev| {
|
||||
matches!(ev.kind, EventKind::Whitespace | EventKind::Atom(Softbreak))
|
||||
let whitespace_before = self.events.back().map_or(false, |ev| {
|
||||
ev.span
|
||||
.of(self.input.src)
|
||||
.chars()
|
||||
.last()
|
||||
.map_or(false, char::is_whitespace)
|
||||
});
|
||||
if opener.bidirectional() && whitespace_after {
|
||||
if opener.bidirectional() && whitespace_before {
|
||||
return None;
|
||||
}
|
||||
|
||||
|
@ -577,19 +582,29 @@ impl<'s> Parser<'s> {
|
|||
})
|
||||
.or_else(|| {
|
||||
let opener = Opener::from_token(first.kind)?;
|
||||
if opener.bidirectional()
|
||||
&& self
|
||||
let whitespace_after = self
|
||||
.input
|
||||
.peek()
|
||||
.map_or(true, |t| matches!(t.kind, lex::Kind::Whitespace))
|
||||
{
|
||||
.lexer
|
||||
.ahead()
|
||||
.chars()
|
||||
.next()
|
||||
.map_or(true, char::is_whitespace);
|
||||
if opener.bidirectional() && whitespace_after {
|
||||
return None;
|
||||
}
|
||||
let whitespace_before = self.events.back().map_or(false, |ev| {
|
||||
ev.span
|
||||
.of(self.input.src)
|
||||
.chars()
|
||||
.last()
|
||||
.map_or(false, char::is_whitespace)
|
||||
});
|
||||
if matches!(opener, Opener::SingleQuoted | Opener::DoubleQuoted)
|
||||
&& self
|
||||
.events
|
||||
.back()
|
||||
.map_or(false, |ev| matches!(ev.kind, EventKind::Str))
|
||||
&& !whitespace_before
|
||||
{
|
||||
return None;
|
||||
}
|
||||
|
@ -675,10 +690,8 @@ impl<'s> Parser<'s> {
|
|||
fn merge_str_events(&mut self, span_str: Span) -> Event {
|
||||
let mut span = span_str;
|
||||
let should_merge = |e: &Event, span: Span| {
|
||||
matches!(
|
||||
e.kind,
|
||||
EventKind::Str | EventKind::Whitespace | EventKind::Placeholder
|
||||
) && span.end() == e.span.start()
|
||||
matches!(e.kind, EventKind::Str | EventKind::Placeholder)
|
||||
&& span.end() == e.span.start()
|
||||
};
|
||||
while self.events.front().map_or(false, |e| should_merge(e, span)) {
|
||||
let ev = self.events.pop_front().unwrap();
|
||||
|
@ -882,9 +895,7 @@ impl<'s> Iterator for Parser<'s> {
|
|||
|| self // for merge or attributes
|
||||
.events
|
||||
.back()
|
||||
.map_or(false, |ev| {
|
||||
matches!(ev.kind, EventKind::Str | EventKind::Whitespace)
|
||||
})
|
||||
.map_or(false, |ev| matches!(ev.kind, EventKind::Str))
|
||||
{
|
||||
if self.parse_event().is_none() {
|
||||
if self.input.complete {
|
||||
|
@ -911,7 +922,7 @@ impl<'s> Iterator for Parser<'s> {
|
|||
|
||||
self.events.pop_front().and_then(|e| match e.kind {
|
||||
EventKind::Str if e.span.is_empty() => self.next(),
|
||||
EventKind::Str | EventKind::Whitespace => Some(self.merge_str_events(e.span)),
|
||||
EventKind::Str => Some(self.merge_str_events(e.span)),
|
||||
EventKind::Placeholder | EventKind::Attributes { container: false } => self.next(),
|
||||
_ => Some(e),
|
||||
})
|
||||
|
|
20
src/lex.rs
20
src/lex.rs
|
@ -13,7 +13,6 @@ pub(crate) struct Token {
|
|||
pub enum Kind {
|
||||
Text,
|
||||
Newline,
|
||||
Whitespace,
|
||||
Nbsp,
|
||||
Hardbreak,
|
||||
Escape,
|
||||
|
@ -167,6 +166,8 @@ impl<'s> Lexer<'s> {
|
|||
_ if escape && first == ' ' => Nbsp,
|
||||
_ if escape => Text,
|
||||
|
||||
'\n' => Newline,
|
||||
|
||||
'\\' => {
|
||||
if self
|
||||
.peek_char()
|
||||
|
@ -179,12 +180,6 @@ impl<'s> Lexer<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
'\n' => Newline,
|
||||
_ if first.is_whitespace() => {
|
||||
self.eat_while(char::is_whitespace);
|
||||
Whitespace
|
||||
}
|
||||
|
||||
'[' => Open(Bracket),
|
||||
']' => Close(Bracket),
|
||||
'(' => Open(Paren),
|
||||
|
@ -323,18 +318,11 @@ mod test {
|
|||
test_lex!("abc", Text.l(3));
|
||||
test_lex!(
|
||||
"para w/ some _emphasis_ and *strong*.",
|
||||
Text.l(4),
|
||||
Whitespace.l(1),
|
||||
Text.l(2),
|
||||
Whitespace.l(1),
|
||||
Text.l(4),
|
||||
Whitespace.l(1),
|
||||
Text.l(13),
|
||||
Sym(Underscore).l(1),
|
||||
Text.l(8),
|
||||
Sym(Underscore).l(1),
|
||||
Whitespace.l(1),
|
||||
Text.l(3),
|
||||
Whitespace.l(1),
|
||||
Text.l(5),
|
||||
Sym(Asterisk).l(1),
|
||||
Text.l(6),
|
||||
Sym(Asterisk).l(1),
|
||||
|
|
|
@ -910,9 +910,7 @@ impl<'s> Parser<'s> {
|
|||
inline::Atom::Escape => Event::Escape,
|
||||
},
|
||||
inline::EventKind::Str => Event::Str(inline.span.of(self.src).into()),
|
||||
inline::EventKind::Whitespace
|
||||
| inline::EventKind::Attributes { .. }
|
||||
| inline::EventKind::Placeholder => {
|
||||
inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => {
|
||||
panic!("{:?}", inline)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue