inline: use bytes instead of chars

This commit is contained in:
Noah Hellman 2023-05-13 20:49:40 +02:00
parent eaa21fd393
commit 3908823d18
4 changed files with 60 additions and 51 deletions

View file

@ -601,7 +601,7 @@ impl<'s> TreeParser<'s> {
let row_event_enter =
self.enter(Node::Container(TableRow { head: false }), row.with_len(1));
let rem = row.skip(1); // |
let lex = lex::Lexer::new(rem.of(self.src));
let lex = lex::Lexer::new(rem.of(self.src).as_bytes());
let mut pos = rem.start();
let mut cell_start = pos;
let mut separator_row = true;

View file

@ -94,7 +94,7 @@ impl<'s> Input<'s> {
fn new(src: &'s str) -> Self {
Self {
src,
lexer: lex::Lexer::new(""),
lexer: lex::Lexer::new(b""),
complete: false,
span_line: Span::new(0, 0),
ahead: std::collections::VecDeque::new(),
@ -118,13 +118,13 @@ impl<'s> Input<'s> {
}
fn set_current_line(&mut self, line: Span) {
self.lexer = lex::Lexer::new(line.of(self.src));
self.lexer = lex::Lexer::new(line.of(self.src).as_bytes());
self.span_line = line;
self.span = line.empty_before();
}
fn reset(&mut self) {
self.lexer = lex::Lexer::new("");
self.lexer = lex::Lexer::new(b"");
self.complete = false;
self.ahead.clear();
}
@ -154,21 +154,22 @@ impl<'s> Input<'s> {
self.lexer.peek().map(|t| &t.kind),
Some(lex::Kind::Open(Delimiter::BraceEqual))
) {
let mut ahead = self.lexer.ahead().chars();
let mut end = false;
let len = (&mut ahead)
let len = self
.lexer
.ahead()
.iter()
.skip(2) // {=
.take_while(|c| {
if *c == '{' {
if **c == b'{' {
return false;
}
if *c == '}' {
if **c == b'}' {
end = true;
};
!end && !c.is_whitespace()
!end && !c.is_ascii_whitespace()
})
.map(char::len_utf8)
.sum();
.count();
(len > 0 && end).then(|| {
let tok = self.eat();
debug_assert_eq!(
@ -178,7 +179,7 @@ impl<'s> Input<'s> {
len: 2,
})
);
self.lexer = lex::Lexer::new(ahead.as_str());
self.lexer.skip_ahead(len + 1);
self.span.after(len)
})
} else {
@ -493,7 +494,7 @@ impl<'s> Parser<'s> {
if opener_eaten {
self.input.span = Span::empty_at(start_attr);
self.input.lexer = lex::Lexer::new(
&self.input.src[start_attr..self.input.span_line.end()],
&self.input.src.as_bytes()[start_attr..self.input.span_line.end()],
);
}
return Some(More);
@ -523,7 +524,7 @@ impl<'s> Parser<'s> {
self.input.set_current_line(l);
}
self.input.span = Span::new(start_attr, state.end_attr);
self.input.lexer = lex::Lexer::new(&self.input.src[state.end_attr..line_end]);
self.input.lexer = lex::Lexer::new(&self.input.src.as_bytes()[state.end_attr..line_end]);
if attrs.is_empty() {
if matches!(state.elem_ty, AttributesElementType::Container { .. }) {
@ -563,26 +564,28 @@ impl<'s> Parser<'s> {
fn parse_autolink(&mut self, first: &lex::Token) -> Option<ControlFlow> {
if first.kind == lex::Kind::Sym(Symbol::Lt) {
let mut ahead = self.input.lexer.ahead().chars();
let mut end = false;
let mut is_url = false;
let len = (&mut ahead)
let len = self
.input
.lexer
.ahead()
.iter()
.take_while(|c| {
if *c == '<' {
if **c == b'<' {
return false;
}
if *c == '>' {
if **c == b'>' {
end = true;
};
if matches!(*c, ':' | '@') {
if matches!(*c, b':' | b'@') {
is_url = true;
}
!end && !c.is_whitespace()
!end && !c.is_ascii_whitespace()
})
.map(char::len_utf8)
.sum();
.count();
if end && is_url {
self.input.lexer = lex::Lexer::new(ahead.as_str());
self.input.lexer.skip_ahead(len + 1);
let span_url = self.input.span.after(len);
let url = span_url.of(self.input.src);
self.push(EventKind::Enter(Autolink(url)));
@ -597,22 +600,24 @@ impl<'s> Parser<'s> {
fn parse_symbol(&mut self, first: &lex::Token) -> Option<ControlFlow> {
if first.kind == lex::Kind::Sym(Symbol::Colon) {
let mut ahead = self.input.lexer.ahead().chars();
let mut end = false;
let mut valid = true;
let len = (&mut ahead)
let len = self
.input
.lexer
.ahead()
.iter()
.take_while(|c| {
if *c == ':' {
if **c == b':' {
end = true;
} else if !c.is_ascii_alphanumeric() && !matches!(c, '-' | '+' | '_') {
} else if !c.is_ascii_alphanumeric() && !matches!(c, b'-' | b'+' | b'_') {
valid = false;
}
!end && !c.is_whitespace()
!end && !c.is_ascii_whitespace()
})
.map(char::len_utf8)
.sum();
.count();
if end && valid {
self.input.lexer = lex::Lexer::new(ahead.as_str());
self.input.lexer.skip_ahead(len + 1);
let span_symbol = self.input.span.after(len);
self.input.span = Span::new(self.input.span.start(), span_symbol.end() + 1);
return self.push(EventKind::Atom(Atom::Symbol(
@ -641,22 +646,24 @@ impl<'s> Parser<'s> {
len: 1,
})
);
let mut ahead = self.input.lexer.ahead().chars();
let mut end = false;
let len = (&mut ahead)
let len = self
.input
.lexer
.ahead()
.iter()
.take_while(|c| {
if *c == '[' {
if **c == b'[' {
return false;
}
if *c == ']' {
if **c == b']' {
end = true;
};
!end && *c != '\n'
!end && **c != b'\n'
})
.map(char::len_utf8)
.sum();
.count();
if end {
self.input.lexer = lex::Lexer::new(ahead.as_str());
self.input.lexer.skip_ahead(len + 1);
let span_label = self.input.span.after(len);
let label = span_label.of(self.input.src);
self.input.span = Span::new(self.input.span.start(), span_label.end() + 1);
@ -837,9 +844,9 @@ impl<'s> Parser<'s> {
.input
.lexer
.ahead()
.chars()
.iter()
.next()
.map_or(true, char::is_whitespace);
.map_or(true, |c| c.is_ascii_whitespace());
if opener.bidirectional() && whitespace_after {
return None;
}

View file

@ -71,7 +71,7 @@ impl Sequence {
#[derive(Clone)]
pub(crate) struct Lexer<'s> {
src: &'s str,
src: &'s [u8],
/// Current position within `src`.
pos: usize,
/// Next character should be escaped.
@ -81,7 +81,7 @@ pub(crate) struct Lexer<'s> {
}
impl<'s> Lexer<'s> {
pub fn new(src: &'s str) -> Self {
pub fn new(src: &'s [u8]) -> Self {
Lexer {
src,
pos: 0,
@ -99,10 +99,14 @@ impl<'s> Lexer<'s> {
self.next.as_ref()
}
pub fn ahead(&self) -> &'s str {
pub fn ahead(&self) -> &'s [u8] {
&self.src[self.pos - self.next.as_ref().map_or(0, |t| t.len)..]
}
pub fn skip_ahead(&mut self, n: usize) {
*self = Self::new(&self.src[self.pos + n..]);
}
fn next_token(&mut self) -> Option<Token> {
let mut current = self.token();
@ -119,7 +123,7 @@ impl<'s> Lexer<'s> {
}
fn peek_byte_n(&mut self, n: usize) -> Option<u8> {
self.src.as_bytes().get(self.pos + n).copied()
self.src.get(self.pos + n).copied()
}
fn peek_byte(&mut self) -> Option<u8> {
@ -128,7 +132,7 @@ impl<'s> Lexer<'s> {
fn eat_byte(&mut self) -> Option<u8> {
if self.pos < self.src.len() {
let c = self.src.as_bytes()[self.pos];
let c = self.src[self.pos];
self.pos += 1;
Some(c)
} else {
@ -155,9 +159,9 @@ impl<'s> Lexer<'s> {
b'\n' => Hardbreak,
b'\t' | b' '
if self.src[self.pos..]
.bytes()
.iter()
.find(|c| !matches!(c, b' ' | b'\t'))
== Some(b'\n') =>
== Some(&b'\n') =>
{
while self.eat_byte() != Some(b'\n') {}
Hardbreak
@ -319,7 +323,7 @@ mod test {
macro_rules! test_lex {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)]
let actual = super::Lexer::new($src).collect::<Vec<_>>();
let actual = super::Lexer::new($src.as_bytes()).collect::<Vec<_>>();
let expected = vec![$($($token),*,)?];
assert_eq!(actual, expected, "{}", $src);
};

View file

@ -3,8 +3,6 @@
f4f22fc:attribute key class order
ae6fc15:bugged left/right quote
168469a:bugged left/right quote
2056174:unicode whitespace emph
2e8fffa:unicode whitespace strong
e1f5b5e:untrimmed whitespace before linebreak
07888f3:div close within raw block
8423412:heading id conflict with existing id