lex: eat non special chars separately

let tight loop work as long as there no special characters
This commit is contained in:
Noah Hellman 2023-04-23 12:15:39 +02:00
parent 3701d282ac
commit bdab4f021b

View file

@ -164,92 +164,98 @@ impl<'s> Lexer<'s> {
_ => Text, _ => Text,
} }
} else { } else {
match self.eat_char()? { self.eat_while(|c| !is_special(c));
'\n' => Newline, if self.len > 0 {
Text
} else {
match self.eat_char()? {
'\n' => Newline,
'\\' => { '\\' => {
if self if self
.peek_char() .peek_char()
.map_or(false, |c| c.is_whitespace() || c.is_ascii_punctuation()) .map_or(false, |c| c.is_whitespace() || c.is_ascii_punctuation())
{ {
self.escape = true; self.escape = true;
Escape Escape
} else { } else {
Text Text
}
} }
}
'[' => Open(Bracket), '[' => Open(Bracket),
']' => Close(Bracket), ']' => Close(Bracket),
'(' => Open(Paren), '(' => Open(Paren),
')' => Close(Paren), ')' => Close(Paren),
'{' => { '{' => {
let explicit = match self.peek_char() { let explicit = match self.peek_char() {
Some('*') => Some(Open(BraceAsterisk)), Some('*') => Some(Open(BraceAsterisk)),
Some('^') => Some(Open(BraceCaret)), Some('^') => Some(Open(BraceCaret)),
Some('=') => Some(Open(BraceEqual)), Some('=') => Some(Open(BraceEqual)),
Some('-') => Some(Open(BraceHyphen)), Some('-') => Some(Open(BraceHyphen)),
Some('+') => Some(Open(BracePlus)), Some('+') => Some(Open(BracePlus)),
Some('~') => Some(Open(BraceTilde)), Some('~') => Some(Open(BraceTilde)),
Some('_') => Some(Open(BraceUnderscore)), Some('_') => Some(Open(BraceUnderscore)),
Some('\'') => Some(Open(BraceQuote1)), Some('\'') => Some(Open(BraceQuote1)),
Some('"') => Some(Open(BraceQuote2)), Some('"') => Some(Open(BraceQuote2)),
_ => None, _ => None,
}; };
if let Some(exp) = explicit { if let Some(exp) = explicit {
self.eat_char();
exp
} else {
Open(Brace)
}
}
'}' => Close(Brace),
'*' => self.maybe_eat_close_brace(Sym(Asterisk), BraceAsterisk),
'^' => self.maybe_eat_close_brace(Sym(Caret), BraceCaret),
'=' => self.maybe_eat_close_brace(Text, BraceEqual),
'+' => self.maybe_eat_close_brace(Text, BracePlus),
'~' => self.maybe_eat_close_brace(Sym(Tilde), BraceTilde),
'_' => self.maybe_eat_close_brace(Sym(Underscore), BraceUnderscore),
'\'' => self.maybe_eat_close_brace(Sym(Quote1), BraceQuote1),
'"' => self.maybe_eat_close_brace(Sym(Quote2), BraceQuote2),
'-' => {
if self.peek_char() == Some('}') {
self.eat_char();
Close(BraceHyphen)
} else {
while self.peek_char() == Some('-') && self.peek_char_n(1) != Some('}') {
self.eat_char(); self.eat_char();
exp
} else {
Open(Brace)
} }
Seq(Hyphen)
} }
} '}' => Close(Brace),
'*' => self.maybe_eat_close_brace(Sym(Asterisk), BraceAsterisk),
'!' if self.peek_char() == Some('[') => { '^' => self.maybe_eat_close_brace(Sym(Caret), BraceCaret),
self.eat_char(); '=' => self.maybe_eat_close_brace(Text, BraceEqual),
Sym(ExclaimBracket) '+' => self.maybe_eat_close_brace(Text, BracePlus),
} '~' => self.maybe_eat_close_brace(Sym(Tilde), BraceTilde),
'<' => Sym(Lt), '_' => self.maybe_eat_close_brace(Sym(Underscore), BraceUnderscore),
'|' => Sym(Pipe), '\'' => self.maybe_eat_close_brace(Sym(Quote1), BraceQuote1),
':' => Sym(Colon), '"' => self.maybe_eat_close_brace(Sym(Quote2), BraceQuote2),
'-' => {
'`' => self.eat_seq(Backtick), if self.peek_char() == Some('}') {
'.' => self.eat_seq(Period), self.eat_char();
'$' => { Close(BraceHyphen)
self.eat_while(|c| c == '$'); } else {
let mut n_ticks: u8 = 0; while self.peek_char() == Some('-') && self.peek_char_n(1) != Some('}')
self.eat_while(|c| { {
if c == '`' { self.eat_char();
if let Some(l) = n_ticks.checked_add(1) {
n_ticks = l;
return true;
} }
Seq(Hyphen)
} }
false }
});
DollarBacktick(n_ticks)
}
_ => Text, '!' if self.peek_char() == Some('[') => {
self.eat_char();
Sym(ExclaimBracket)
}
'<' => Sym(Lt),
'|' => Sym(Pipe),
':' => Sym(Colon),
'`' => self.eat_seq(Backtick),
'.' => self.eat_seq(Period),
'$' => {
self.eat_while(|c| c == '$');
let mut n_ticks: u8 = 0;
self.eat_while(|c| {
if c == '`' {
if let Some(l) = n_ticks.checked_add(1) {
n_ticks = l;
return true;
}
}
false
});
DollarBacktick(n_ticks)
}
_ => Text,
}
} }
}; };
@ -282,6 +288,35 @@ impl<'s> Iterator for Lexer<'s> {
} }
} }
fn is_special(c: char) -> bool {
matches!(
c,
'\\' | '['
| ']'
| '('
| ')'
| '{'
| '}'
| '*'
| '^'
| '='
| '+'
| '~'
| '_'
| '\''
| '"'
| '-'
| '!'
| '<'
| '|'
| ':'
| '`'
| '.'
| '$'
| '\n'
)
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::Delimiter::*; use super::Delimiter::*;