From ca7f3c7e89f2fcd124b808f3f7d8062245fb2de6 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Wed, 1 Feb 2023 19:51:08 +0100 Subject: [PATCH] do not treat \0 as EOF may appear in input --- src/block.rs | 20 ++++++++++++++++---- src/lex.rs | 48 ++++++++++++++++++++++++++---------------------- src/lib.rs | 2 -- 3 files changed, 42 insertions(+), 28 deletions(-) diff --git a/src/block.rs b/src/block.rs index 1dd757d..b5b545f 100644 --- a/src/block.rs +++ b/src/block.rs @@ -2,7 +2,6 @@ use crate::Alignment; use crate::OrderedListNumbering::*; use crate::OrderedListStyle::*; use crate::Span; -use crate::EOF; use crate::attr; use crate::lex; @@ -583,8 +582,17 @@ impl IdentifiedBlock { let lt = line_t.len(); let mut chars = line.chars(); - match chars.next().unwrap_or(EOF) { - EOF => Some((Kind::Atom(Blankline), Span::empty_at(indent))), + + let first = if let Some(c) = chars.next() { + c + } else { + return Self { + kind: Kind::Atom(Blankline), + span: Span::empty_at(indent), + }; + }; + + match first { '\n' => Some((Kind::Atom(Blankline), Span::by_len(indent, 1))), '#' => chars .find(|c| *c != '#') @@ -722,7 +730,11 @@ impl IdentifiedBlock { let start_paren = first == '('; if start_paren { - first = chars.next().unwrap_or(EOF); + first = if let Some(c) = chars.next() { + c + } else { + return None; + }; } let numbering = if first.is_ascii_digit() { diff --git a/src/lex.rs b/src/lex.rs index 3beb047..ebb70db 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -1,5 +1,3 @@ -use crate::EOF; - use Delimiter::*; use Kind::*; use Sequence::*; @@ -124,11 +122,11 @@ impl + Clone> Lexer { current } - fn peek_char_n(&mut self, n: usize) -> char { - self.chars.clone().nth(n).unwrap_or(EOF) + fn peek_char_n(&mut self, n: usize) -> Option { + self.chars.clone().nth(n) } - fn peek_char(&mut self) -> char { + fn peek_char(&mut self) -> Option { self.peek_char_n(0) } @@ -139,8 +137,12 @@ impl + Clone> Lexer { } fn eat_while(&mut self, mut predicate: impl FnMut(char) -> bool) { - while predicate(self.peek_char()) { - self.eat_char(); + while let Some(c) = self.peek_char() { + if predicate(c) { + self.eat_char(); + } else { + break; + } } } @@ -165,8 +167,10 @@ impl + Clone> Lexer { _ if escape => Text, '\\' => { - let next = self.peek_char(); - if next.is_whitespace() || next.is_ascii_punctuation() { + if self + .peek_char() + .map_or(false, |c| c.is_whitespace() || c.is_ascii_punctuation()) + { self.escape = true; Escape } else { @@ -184,15 +188,15 @@ impl + Clone> Lexer { ']' => Close(Bracket), '{' => { let explicit = match self.peek_char() { - '*' => Some(Open(BraceAsterisk)), - '^' => Some(Open(BraceCaret)), - '=' => Some(Open(BraceEqual)), - '-' => Some(Open(BraceHyphen)), - '+' => Some(Open(BracePlus)), - '~' => Some(Open(BraceTilde)), - '_' => Some(Open(BraceUnderscore)), - '\'' => Some(Open(BraceQuote1)), - '"' => Some(Open(BraceQuote2)), + Some('*') => Some(Open(BraceAsterisk)), + Some('^') => Some(Open(BraceCaret)), + Some('=') => Some(Open(BraceEqual)), + Some('-') => Some(Open(BraceHyphen)), + Some('+') => Some(Open(BracePlus)), + Some('~') => Some(Open(BraceTilde)), + Some('_') => Some(Open(BraceUnderscore)), + Some('\'') => Some(Open(BraceQuote1)), + Some('"') => Some(Open(BraceQuote2)), _ => None, }; if let Some(exp) = explicit { @@ -211,18 +215,18 @@ impl + Clone> Lexer { '\'' => self.maybe_eat_close_brace(Sym(Quote1), BraceQuote1), '"' => self.maybe_eat_close_brace(Sym(Quote2), BraceQuote2), '-' => { - if self.peek_char() == '}' { + if self.peek_char() == Some('}') { self.eat_char(); Close(BraceHyphen) } else { - while self.peek_char() == '-' && self.peek_char_n(1) != '}' { + while self.peek_char() == Some('-') && self.peek_char_n(1) != Some('}') { self.eat_char(); } Seq(Hyphen) } } - '!' if self.peek_char() == '[' => { + '!' if self.peek_char() == Some('[') => { self.eat_char(); Sym(ExclaimBracket) } @@ -252,7 +256,7 @@ impl + Clone> Lexer { } fn maybe_eat_close_brace(&mut self, kind: Kind, d: Delimiter) -> Kind { - if self.peek_char() == '}' { + if self.peek_char() == Some('}') { self.eat_char(); Close(d) } else { diff --git a/src/lib.rs b/src/lib.rs index fb0a15f..616d105 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -16,8 +16,6 @@ pub use attr::Attributes; type CowStr<'s> = std::borrow::Cow<'s, str>; -const EOF: char = '\0'; - #[derive(Debug, PartialEq, Eq)] pub enum Event<'s> { /// Start of a container.