diff --git a/src/inline.rs b/src/inline.rs index 947288c..73e2551 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -62,7 +62,6 @@ pub enum EventKind { Exit(Container), Atom(Atom), Str, - Whitespace, Attributes { container: bool }, Placeholder, } @@ -240,11 +239,7 @@ impl<'s> Parser<'s> { .or_else(|| self.parse_container(&first)) .or_else(|| self.parse_atom(&first)) .unwrap_or_else(|| { - self.push(if matches!(first.kind, lex::Kind::Whitespace) { - EventKind::Whitespace - } else { - EventKind::Str - }); + self.push(EventKind::Str); }) }) } @@ -296,7 +291,13 @@ impl<'s> Parser<'s> { self.verbatim = None; } else { // continue verbatim - if matches!(first.kind, lex::Kind::Whitespace) { + let is_whitespace = self + .input + .span + .of(self.input.src) + .chars() + .all(char::is_whitespace); + if is_whitespace { if !*non_whitespace_encountered && self.input.peek().map_or(false, |t| { matches!( @@ -489,10 +490,14 @@ impl<'s> Parser<'s> { // empty container return None; } - let whitespace_after = self.events.back().map_or(false, |ev| { - matches!(ev.kind, EventKind::Whitespace | EventKind::Atom(Softbreak)) + let whitespace_before = self.events.back().map_or(false, |ev| { + ev.span + .of(self.input.src) + .chars() + .last() + .map_or(false, char::is_whitespace) }); - if opener.bidirectional() && whitespace_after { + if opener.bidirectional() && whitespace_before { return None; } @@ -577,19 +582,29 @@ impl<'s> Parser<'s> { }) .or_else(|| { let opener = Opener::from_token(first.kind)?; - if opener.bidirectional() - && self - .input - .peek() - .map_or(true, |t| matches!(t.kind, lex::Kind::Whitespace)) - { + let whitespace_after = self + .input + .lexer + .ahead() + .chars() + .next() + .map_or(true, char::is_whitespace); + if opener.bidirectional() && whitespace_after { return None; } + let whitespace_before = self.events.back().map_or(false, |ev| { + ev.span + .of(self.input.src) + .chars() + .last() + .map_or(false, char::is_whitespace) + }); if matches!(opener, Opener::SingleQuoted | Opener::DoubleQuoted) && self .events .back() .map_or(false, |ev| matches!(ev.kind, EventKind::Str)) + && !whitespace_before { return None; } @@ -675,10 +690,8 @@ impl<'s> Parser<'s> { fn merge_str_events(&mut self, span_str: Span) -> Event { let mut span = span_str; let should_merge = |e: &Event, span: Span| { - matches!( - e.kind, - EventKind::Str | EventKind::Whitespace | EventKind::Placeholder - ) && span.end() == e.span.start() + matches!(e.kind, EventKind::Str | EventKind::Placeholder) + && span.end() == e.span.start() }; while self.events.front().map_or(false, |e| should_merge(e, span)) { let ev = self.events.pop_front().unwrap(); @@ -882,9 +895,7 @@ impl<'s> Iterator for Parser<'s> { || self // for merge or attributes .events .back() - .map_or(false, |ev| { - matches!(ev.kind, EventKind::Str | EventKind::Whitespace) - }) + .map_or(false, |ev| matches!(ev.kind, EventKind::Str)) { if self.parse_event().is_none() { if self.input.complete { @@ -911,7 +922,7 @@ impl<'s> Iterator for Parser<'s> { self.events.pop_front().and_then(|e| match e.kind { EventKind::Str if e.span.is_empty() => self.next(), - EventKind::Str | EventKind::Whitespace => Some(self.merge_str_events(e.span)), + EventKind::Str => Some(self.merge_str_events(e.span)), EventKind::Placeholder | EventKind::Attributes { container: false } => self.next(), _ => Some(e), }) diff --git a/src/lex.rs b/src/lex.rs index 1dd4bea..6efee3d 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -13,7 +13,6 @@ pub(crate) struct Token { pub enum Kind { Text, Newline, - Whitespace, Nbsp, Hardbreak, Escape, @@ -167,6 +166,8 @@ impl<'s> Lexer<'s> { _ if escape && first == ' ' => Nbsp, _ if escape => Text, + '\n' => Newline, + '\\' => { if self .peek_char() @@ -179,12 +180,6 @@ impl<'s> Lexer<'s> { } } - '\n' => Newline, - _ if first.is_whitespace() => { - self.eat_while(char::is_whitespace); - Whitespace - } - '[' => Open(Bracket), ']' => Close(Bracket), '(' => Open(Paren), @@ -323,18 +318,11 @@ mod test { test_lex!("abc", Text.l(3)); test_lex!( "para w/ some _emphasis_ and *strong*.", - Text.l(4), - Whitespace.l(1), - Text.l(2), - Whitespace.l(1), - Text.l(4), - Whitespace.l(1), + Text.l(13), Sym(Underscore).l(1), Text.l(8), Sym(Underscore).l(1), - Whitespace.l(1), - Text.l(3), - Whitespace.l(1), + Text.l(5), Sym(Asterisk).l(1), Text.l(6), Sym(Asterisk).l(1), diff --git a/src/lib.rs b/src/lib.rs index e02f78f..7a8768b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -910,9 +910,7 @@ impl<'s> Parser<'s> { inline::Atom::Escape => Event::Escape, }, inline::EventKind::Str => Event::Str(inline.span.of(self.src).into()), - inline::EventKind::Whitespace - | inline::EventKind::Attributes { .. } - | inline::EventKind::Placeholder => { + inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => { panic!("{:?}", inline) } }