lex: rm lex::Kind::Whitespace
Whitespace tokens do not necessarily create new events but they work as a delimiter for words with attributes and affect some container delimiters. Now when we can read the source from inline, we can instead inspect for whitespace when needed. Removing the whitespace token allows the lexer to continue a lot longer without stopping. E.g. a typical line in a paragraph with no special characters can turn into a single token.
This commit is contained in:
		
					parent
					
						
							
								9454a2e393
							
						
					
				
			
			
				commit
				
					
						86ee4ee520
					
				
			
		
					 3 changed files with 40 additions and 43 deletions
				
			
		| 
						 | 
				
			
			@ -62,7 +62,6 @@ pub enum EventKind {
 | 
			
		|||
    Exit(Container),
 | 
			
		||||
    Atom(Atom),
 | 
			
		||||
    Str,
 | 
			
		||||
    Whitespace,
 | 
			
		||||
    Attributes { container: bool },
 | 
			
		||||
    Placeholder,
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -240,11 +239,7 @@ impl<'s> Parser<'s> {
 | 
			
		|||
                .or_else(|| self.parse_container(&first))
 | 
			
		||||
                .or_else(|| self.parse_atom(&first))
 | 
			
		||||
                .unwrap_or_else(|| {
 | 
			
		||||
                    self.push(if matches!(first.kind, lex::Kind::Whitespace) {
 | 
			
		||||
                        EventKind::Whitespace
 | 
			
		||||
                    } else {
 | 
			
		||||
                        EventKind::Str
 | 
			
		||||
                    });
 | 
			
		||||
                    self.push(EventKind::Str);
 | 
			
		||||
                })
 | 
			
		||||
        })
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			@ -296,7 +291,13 @@ impl<'s> Parser<'s> {
 | 
			
		|||
                self.verbatim = None;
 | 
			
		||||
            } else {
 | 
			
		||||
                // continue verbatim
 | 
			
		||||
                if matches!(first.kind, lex::Kind::Whitespace) {
 | 
			
		||||
                let is_whitespace = self
 | 
			
		||||
                    .input
 | 
			
		||||
                    .span
 | 
			
		||||
                    .of(self.input.src)
 | 
			
		||||
                    .chars()
 | 
			
		||||
                    .all(char::is_whitespace);
 | 
			
		||||
                if is_whitespace {
 | 
			
		||||
                    if !*non_whitespace_encountered
 | 
			
		||||
                        && self.input.peek().map_or(false, |t| {
 | 
			
		||||
                            matches!(
 | 
			
		||||
| 
						 | 
				
			
			@ -489,10 +490,14 @@ impl<'s> Parser<'s> {
 | 
			
		|||
                    // empty container
 | 
			
		||||
                    return None;
 | 
			
		||||
                }
 | 
			
		||||
                let whitespace_after = self.events.back().map_or(false, |ev| {
 | 
			
		||||
                    matches!(ev.kind, EventKind::Whitespace | EventKind::Atom(Softbreak))
 | 
			
		||||
                let whitespace_before = self.events.back().map_or(false, |ev| {
 | 
			
		||||
                    ev.span
 | 
			
		||||
                        .of(self.input.src)
 | 
			
		||||
                        .chars()
 | 
			
		||||
                        .last()
 | 
			
		||||
                        .map_or(false, char::is_whitespace)
 | 
			
		||||
                });
 | 
			
		||||
                if opener.bidirectional() && whitespace_after {
 | 
			
		||||
                if opener.bidirectional() && whitespace_before {
 | 
			
		||||
                    return None;
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -577,19 +582,29 @@ impl<'s> Parser<'s> {
 | 
			
		|||
            })
 | 
			
		||||
            .or_else(|| {
 | 
			
		||||
                let opener = Opener::from_token(first.kind)?;
 | 
			
		||||
                if opener.bidirectional()
 | 
			
		||||
                    && self
 | 
			
		||||
                let whitespace_after = self
 | 
			
		||||
                    .input
 | 
			
		||||
                        .peek()
 | 
			
		||||
                        .map_or(true, |t| matches!(t.kind, lex::Kind::Whitespace))
 | 
			
		||||
                {
 | 
			
		||||
                    .lexer
 | 
			
		||||
                    .ahead()
 | 
			
		||||
                    .chars()
 | 
			
		||||
                    .next()
 | 
			
		||||
                    .map_or(true, char::is_whitespace);
 | 
			
		||||
                if opener.bidirectional() && whitespace_after {
 | 
			
		||||
                    return None;
 | 
			
		||||
                }
 | 
			
		||||
                let whitespace_before = self.events.back().map_or(false, |ev| {
 | 
			
		||||
                    ev.span
 | 
			
		||||
                        .of(self.input.src)
 | 
			
		||||
                        .chars()
 | 
			
		||||
                        .last()
 | 
			
		||||
                        .map_or(false, char::is_whitespace)
 | 
			
		||||
                });
 | 
			
		||||
                if matches!(opener, Opener::SingleQuoted | Opener::DoubleQuoted)
 | 
			
		||||
                    && self
 | 
			
		||||
                        .events
 | 
			
		||||
                        .back()
 | 
			
		||||
                        .map_or(false, |ev| matches!(ev.kind, EventKind::Str))
 | 
			
		||||
                    && !whitespace_before
 | 
			
		||||
                {
 | 
			
		||||
                    return None;
 | 
			
		||||
                }
 | 
			
		||||
| 
						 | 
				
			
			@ -675,10 +690,8 @@ impl<'s> Parser<'s> {
 | 
			
		|||
    fn merge_str_events(&mut self, span_str: Span) -> Event {
 | 
			
		||||
        let mut span = span_str;
 | 
			
		||||
        let should_merge = |e: &Event, span: Span| {
 | 
			
		||||
            matches!(
 | 
			
		||||
                e.kind,
 | 
			
		||||
                EventKind::Str | EventKind::Whitespace | EventKind::Placeholder
 | 
			
		||||
            ) && span.end() == e.span.start()
 | 
			
		||||
            matches!(e.kind, EventKind::Str | EventKind::Placeholder)
 | 
			
		||||
                && span.end() == e.span.start()
 | 
			
		||||
        };
 | 
			
		||||
        while self.events.front().map_or(false, |e| should_merge(e, span)) {
 | 
			
		||||
            let ev = self.events.pop_front().unwrap();
 | 
			
		||||
| 
						 | 
				
			
			@ -882,9 +895,7 @@ impl<'s> Iterator for Parser<'s> {
 | 
			
		|||
            || self // for merge or attributes
 | 
			
		||||
                .events
 | 
			
		||||
                .back()
 | 
			
		||||
                .map_or(false, |ev| {
 | 
			
		||||
                    matches!(ev.kind, EventKind::Str | EventKind::Whitespace)
 | 
			
		||||
                })
 | 
			
		||||
                .map_or(false, |ev| matches!(ev.kind, EventKind::Str))
 | 
			
		||||
        {
 | 
			
		||||
            if self.parse_event().is_none() {
 | 
			
		||||
                if self.input.complete {
 | 
			
		||||
| 
						 | 
				
			
			@ -911,7 +922,7 @@ impl<'s> Iterator for Parser<'s> {
 | 
			
		|||
 | 
			
		||||
        self.events.pop_front().and_then(|e| match e.kind {
 | 
			
		||||
            EventKind::Str if e.span.is_empty() => self.next(),
 | 
			
		||||
            EventKind::Str | EventKind::Whitespace => Some(self.merge_str_events(e.span)),
 | 
			
		||||
            EventKind::Str => Some(self.merge_str_events(e.span)),
 | 
			
		||||
            EventKind::Placeholder | EventKind::Attributes { container: false } => self.next(),
 | 
			
		||||
            _ => Some(e),
 | 
			
		||||
        })
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										20
									
								
								src/lex.rs
									
										
									
									
									
								
							
							
						
						
									
										20
									
								
								src/lex.rs
									
										
									
									
									
								
							| 
						 | 
				
			
			@ -13,7 +13,6 @@ pub(crate) struct Token {
 | 
			
		|||
pub enum Kind {
 | 
			
		||||
    Text,
 | 
			
		||||
    Newline,
 | 
			
		||||
    Whitespace,
 | 
			
		||||
    Nbsp,
 | 
			
		||||
    Hardbreak,
 | 
			
		||||
    Escape,
 | 
			
		||||
| 
						 | 
				
			
			@ -167,6 +166,8 @@ impl<'s> Lexer<'s> {
 | 
			
		|||
            _ if escape && first == ' ' => Nbsp,
 | 
			
		||||
            _ if escape => Text,
 | 
			
		||||
 | 
			
		||||
            '\n' => Newline,
 | 
			
		||||
 | 
			
		||||
            '\\' => {
 | 
			
		||||
                if self
 | 
			
		||||
                    .peek_char()
 | 
			
		||||
| 
						 | 
				
			
			@ -179,12 +180,6 @@ impl<'s> Lexer<'s> {
 | 
			
		|||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            '\n' => Newline,
 | 
			
		||||
            _ if first.is_whitespace() => {
 | 
			
		||||
                self.eat_while(char::is_whitespace);
 | 
			
		||||
                Whitespace
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            '[' => Open(Bracket),
 | 
			
		||||
            ']' => Close(Bracket),
 | 
			
		||||
            '(' => Open(Paren),
 | 
			
		||||
| 
						 | 
				
			
			@ -323,18 +318,11 @@ mod test {
 | 
			
		|||
        test_lex!("abc", Text.l(3));
 | 
			
		||||
        test_lex!(
 | 
			
		||||
            "para w/ some _emphasis_ and *strong*.",
 | 
			
		||||
            Text.l(4),
 | 
			
		||||
            Whitespace.l(1),
 | 
			
		||||
            Text.l(2),
 | 
			
		||||
            Whitespace.l(1),
 | 
			
		||||
            Text.l(4),
 | 
			
		||||
            Whitespace.l(1),
 | 
			
		||||
            Text.l(13),
 | 
			
		||||
            Sym(Underscore).l(1),
 | 
			
		||||
            Text.l(8),
 | 
			
		||||
            Sym(Underscore).l(1),
 | 
			
		||||
            Whitespace.l(1),
 | 
			
		||||
            Text.l(3),
 | 
			
		||||
            Whitespace.l(1),
 | 
			
		||||
            Text.l(5),
 | 
			
		||||
            Sym(Asterisk).l(1),
 | 
			
		||||
            Text.l(6),
 | 
			
		||||
            Sym(Asterisk).l(1),
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -910,9 +910,7 @@ impl<'s> Parser<'s> {
 | 
			
		|||
                    inline::Atom::Escape => Event::Escape,
 | 
			
		||||
                },
 | 
			
		||||
                inline::EventKind::Str => Event::Str(inline.span.of(self.src).into()),
 | 
			
		||||
                inline::EventKind::Whitespace
 | 
			
		||||
                | inline::EventKind::Attributes { .. }
 | 
			
		||||
                | inline::EventKind::Placeholder => {
 | 
			
		||||
                inline::EventKind::Attributes { .. } | inline::EventKind::Placeholder => {
 | 
			
		||||
                    panic!("{:?}", inline)
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue