do not treat \0 as EOF

may appear in input
This commit is contained in:
Noah Hellman 2023-02-01 19:51:08 +01:00
parent 3425ad4189
commit ca7f3c7e89
3 changed files with 42 additions and 28 deletions

View file

@ -2,7 +2,6 @@ use crate::Alignment;
use crate::OrderedListNumbering::*; use crate::OrderedListNumbering::*;
use crate::OrderedListStyle::*; use crate::OrderedListStyle::*;
use crate::Span; use crate::Span;
use crate::EOF;
use crate::attr; use crate::attr;
use crate::lex; use crate::lex;
@ -583,8 +582,17 @@ impl IdentifiedBlock {
let lt = line_t.len(); let lt = line_t.len();
let mut chars = line.chars(); let mut chars = line.chars();
match chars.next().unwrap_or(EOF) {
EOF => Some((Kind::Atom(Blankline), Span::empty_at(indent))), let first = if let Some(c) = chars.next() {
c
} else {
return Self {
kind: Kind::Atom(Blankline),
span: Span::empty_at(indent),
};
};
match first {
'\n' => Some((Kind::Atom(Blankline), Span::by_len(indent, 1))), '\n' => Some((Kind::Atom(Blankline), Span::by_len(indent, 1))),
'#' => chars '#' => chars
.find(|c| *c != '#') .find(|c| *c != '#')
@ -722,7 +730,11 @@ impl IdentifiedBlock {
let start_paren = first == '('; let start_paren = first == '(';
if start_paren { if start_paren {
first = chars.next().unwrap_or(EOF); first = if let Some(c) = chars.next() {
c
} else {
return None;
};
} }
let numbering = if first.is_ascii_digit() { let numbering = if first.is_ascii_digit() {

View file

@ -1,5 +1,3 @@
use crate::EOF;
use Delimiter::*; use Delimiter::*;
use Kind::*; use Kind::*;
use Sequence::*; use Sequence::*;
@ -124,11 +122,11 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
current current
} }
fn peek_char_n(&mut self, n: usize) -> char { fn peek_char_n(&mut self, n: usize) -> Option<char> {
self.chars.clone().nth(n).unwrap_or(EOF) self.chars.clone().nth(n)
} }
fn peek_char(&mut self) -> char { fn peek_char(&mut self) -> Option<char> {
self.peek_char_n(0) self.peek_char_n(0)
} }
@ -139,8 +137,12 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
} }
fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) { fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) {
while predicate(self.peek_char()) { while let Some(c) = self.peek_char() {
self.eat_char(); if predicate(c) {
self.eat_char();
} else {
break;
}
} }
} }
@ -165,8 +167,10 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
_ if escape => Text, _ if escape => Text,
'\\' => { '\\' => {
let next = self.peek_char(); if self
if next.is_whitespace() || next.is_ascii_punctuation() { .peek_char()
.map_or(false, |c| c.is_whitespace() || c.is_ascii_punctuation())
{
self.escape = true; self.escape = true;
Escape Escape
} else { } else {
@ -184,15 +188,15 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
']' => Close(Bracket), ']' => Close(Bracket),
'{' => { '{' => {
let explicit = match self.peek_char() { let explicit = match self.peek_char() {
'*' => Some(Open(BraceAsterisk)), Some('*') => Some(Open(BraceAsterisk)),
'^' => Some(Open(BraceCaret)), Some('^') => Some(Open(BraceCaret)),
'=' => Some(Open(BraceEqual)), Some('=') => Some(Open(BraceEqual)),
'-' => Some(Open(BraceHyphen)), Some('-') => Some(Open(BraceHyphen)),
'+' => Some(Open(BracePlus)), Some('+') => Some(Open(BracePlus)),
'~' => Some(Open(BraceTilde)), Some('~') => Some(Open(BraceTilde)),
'_' => Some(Open(BraceUnderscore)), Some('_') => Some(Open(BraceUnderscore)),
'\'' => Some(Open(BraceQuote1)), Some('\'') => Some(Open(BraceQuote1)),
'"' => Some(Open(BraceQuote2)), Some('"') => Some(Open(BraceQuote2)),
_ => None, _ => None,
}; };
if let Some(exp) = explicit { if let Some(exp) = explicit {
@ -211,18 +215,18 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
'\'' => self.maybe_eat_close_brace(Sym(Quote1), BraceQuote1), '\'' => self.maybe_eat_close_brace(Sym(Quote1), BraceQuote1),
'"' => self.maybe_eat_close_brace(Sym(Quote2), BraceQuote2), '"' => self.maybe_eat_close_brace(Sym(Quote2), BraceQuote2),
'-' => { '-' => {
if self.peek_char() == '}' { if self.peek_char() == Some('}') {
self.eat_char(); self.eat_char();
Close(BraceHyphen) Close(BraceHyphen)
} else { } else {
while self.peek_char() == '-' && self.peek_char_n(1) != '}' { while self.peek_char() == Some('-') && self.peek_char_n(1) != Some('}') {
self.eat_char(); self.eat_char();
} }
Seq(Hyphen) Seq(Hyphen)
} }
} }
'!' if self.peek_char() == '[' => { '!' if self.peek_char() == Some('[') => {
self.eat_char(); self.eat_char();
Sym(ExclaimBracket) Sym(ExclaimBracket)
} }
@ -252,7 +256,7 @@ impl<I: Iterator<Item = char> + Clone> Lexer<I> {
} }
fn maybe_eat_close_brace(&mut self, kind: Kind, d: Delimiter) -> Kind { fn maybe_eat_close_brace(&mut self, kind: Kind, d: Delimiter) -> Kind {
if self.peek_char() == '}' { if self.peek_char() == Some('}') {
self.eat_char(); self.eat_char();
Close(d) Close(d)
} else { } else {

View file

@ -16,8 +16,6 @@ pub use attr::Attributes;
type CowStr<'s> = std::borrow::Cow<'s, str>; type CowStr<'s> = std::borrow::Cow<'s, str>;
const EOF: char = '\0';
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum Event<'s> { pub enum Event<'s> {
/// Start of a container. /// Start of a container.