attr: use bytes instead of chars

only consider ascii whitespace
This commit is contained in:
Noah Hellman 2023-05-13 12:35:17 +02:00
parent 798f8941d8
commit 72a3378831
2 changed files with 43 additions and 49 deletions

View file

@ -8,13 +8,13 @@ pub(crate) fn parse(src: &str) -> Attributes {
a a
} }
pub fn valid<I: Iterator<Item = char>>(chars: I) -> usize { pub fn valid(src: &str) -> usize {
use State::*; use State::*;
let mut n = 0; let mut n = 0;
let mut state = Start; let mut state = Start;
for c in chars { for c in src.bytes() {
n += c.len_utf8(); n += 1;
state = state.step(c); state = state.step(c);
match state { match state {
Done | Invalid => break, Done | Invalid => break,
@ -256,11 +256,11 @@ impl Validator {
/// Returns number of valid bytes parsed (0 means invalid) if finished, otherwise more input is /// Returns number of valid bytes parsed (0 means invalid) if finished, otherwise more input is
/// needed. /// needed.
pub fn parse(&mut self, input: &str) -> Option<usize> { pub fn parse(&mut self, input: &str) -> Option<usize> {
let mut chars = input.chars(); let mut bytes = input.bytes();
for c in &mut chars { for c in &mut bytes {
self.state = self.state.step(c); self.state = self.state.step(c);
match self.state { match self.state {
State::Done => return Some(input.len() - chars.as_str().len()), State::Done => return Some(input.len() - bytes.len()),
State::Invalid => return Some(0), State::Invalid => return Some(0),
_ => {} _ => {}
} }
@ -297,7 +297,7 @@ impl<'s> Parser<'s> {
let mut pos = 0; let mut pos = 0;
let mut pos_prev = 0; let mut pos_prev = 0;
for c in input.chars() { for c in input.bytes() {
let state_next = self.state.step(c); let state_next = self.state.step(c);
let st = std::mem::replace(&mut self.state, state_next); let st = std::mem::replace(&mut self.state, state_next);
@ -318,7 +318,7 @@ impl<'s> Parser<'s> {
} }
}; };
pos += c.len_utf8(); pos += 1;
debug_assert!(!matches!(self.state, Invalid)); debug_assert!(!matches!(self.state, Invalid));
@ -358,40 +358,40 @@ enum State {
} }
impl State { impl State {
fn step(self, c: char) -> State { fn step(self, c: u8) -> State {
use State::*; use State::*;
match self { match self {
Start if c == '{' => Whitespace, Start if c == b'{' => Whitespace,
Start => Invalid, Start => Invalid,
Whitespace => match c { Whitespace => match c {
'}' => Done, b'}' => Done,
'.' => ClassFirst, b'.' => ClassFirst,
'#' => IdentifierFirst, b'#' => IdentifierFirst,
'%' => Comment, b'%' => Comment,
c if is_name(c) => Key, c if is_name(c) => Key,
c if c.is_whitespace() => Whitespace, c if c.is_ascii_whitespace() => Whitespace,
_ => Invalid, _ => Invalid,
}, },
Comment if c == '%' => Whitespace, Comment if c == b'%' => Whitespace,
Comment => Comment, Comment => Comment,
ClassFirst if is_name(c) => Class, ClassFirst if is_name(c) => Class,
ClassFirst => Invalid, ClassFirst => Invalid,
IdentifierFirst if is_name(c) => Identifier, IdentifierFirst if is_name(c) => Identifier,
IdentifierFirst => Invalid, IdentifierFirst => Invalid,
s @ (Class | Identifier | Value) if is_name(c) => s, s @ (Class | Identifier | Value) if is_name(c) => s,
Class | Identifier | Value if c.is_whitespace() => Whitespace, Class | Identifier | Value if c.is_ascii_whitespace() => Whitespace,
Class | Identifier | Value if c == '}' => Done, Class | Identifier | Value if c == b'}' => Done,
Class | Identifier | Value => Invalid, Class | Identifier | Value => Invalid,
Key if is_name(c) => Key, Key if is_name(c) => Key,
Key if c == '=' => ValueFirst, Key if c == b'=' => ValueFirst,
Key => Invalid, Key => Invalid,
ValueFirst if is_name(c) => Value, ValueFirst if is_name(c) => Value,
ValueFirst if c == '"' => ValueQuoted, ValueFirst if c == b'"' => ValueQuoted,
ValueFirst => Invalid, ValueFirst => Invalid,
ValueQuoted | ValueNewline | ValueContinued if c == '"' => Whitespace, ValueQuoted | ValueNewline | ValueContinued if c == b'"' => Whitespace,
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == '\n' => ValueNewline, ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == b'\n' => ValueNewline,
ValueQuoted if c == '\\' => ValueEscape, ValueQuoted if c == b'\\' => ValueEscape,
ValueQuoted | ValueEscape => ValueQuoted, ValueQuoted | ValueEscape => ValueQuoted,
ValueNewline | ValueContinued => ValueContinued, ValueNewline | ValueContinued => ValueContinued,
Invalid | Done => panic!("{:?}", self), Invalid | Done => panic!("{:?}", self),
@ -399,8 +399,8 @@ impl State {
} }
} }
pub fn is_name(c: char) -> bool { pub fn is_name(c: u8) -> bool {
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-') c.is_ascii_alphanumeric() || matches!(c, b':' | b'_' | b'-')
} }
#[cfg(test)] #[cfg(test)]
@ -435,11 +435,6 @@ mod test {
test_attr!("{#a #b}", ("id", "b")); test_attr!("{#a #b}", ("id", "b"));
} }
#[test]
fn unicode_whitespace() {
test_attr!("{.a .b}", ("class", "a b"));
}
#[test] #[test]
fn value_unquoted() { fn value_unquoted() {
test_attr!( test_attr!(
@ -517,47 +512,45 @@ mod test {
#[test] #[test]
fn valid_full() { fn valid_full() {
let src = "{.class %comment%}"; let src = "{.class %comment%}";
assert_eq!(super::valid(src.chars()), src.len()); assert_eq!(super::valid(src), src.len());
} }
#[test] #[test]
fn valid_unicode() { fn valid_unicode() {
let src = r#"{a="б"}"#; let src = r#"{a="б"}"#;
assert_eq!(super::valid(src.chars()), src.len()); assert_eq!(super::valid(src), src.len());
} }
#[test] #[test]
fn valid_empty() { fn valid_empty() {
let src = "{}"; let src = "{}";
assert_eq!(super::valid(src.chars()), src.len()); assert_eq!(super::valid(src), src.len());
} }
#[test] #[test]
fn valid_whitespace() { fn valid_whitespace() {
let src = "{ \n }"; let src = "{ \n }";
assert_eq!(super::valid(src.chars()), src.len()); assert_eq!(super::valid(src), src.len());
} }
#[test] #[test]
fn valid_comment() { fn valid_comment() {
let src = "{%comment%}"; let src = "{%comment%}";
assert_eq!(super::valid(src.chars()), src.len()); assert_eq!(super::valid(src), src.len());
} }
#[test] #[test]
fn valid_trailing() { fn valid_trailing() {
let src = "{.class}"; let src = "{.class}{.ignore}";
assert_eq!( let src_valid = "{.class}";
super::valid(src.chars().chain("{.ignore}".chars())), assert_eq!(super::valid(src), src_valid.len());
src.len(),
);
} }
#[test] #[test]
fn valid_invalid() { fn valid_invalid() {
assert_eq!(super::valid(" {.valid}".chars()), 0); assert_eq!(super::valid(" {.valid}"), 0);
assert_eq!(super::valid("{.class invalid}".chars()), 0); assert_eq!(super::valid("{.class invalid}"), 0);
assert_eq!(super::valid("abc".chars()), 0); assert_eq!(super::valid("abc"), 0);
assert_eq!(super::valid("{.abc.}".chars()), 0); assert_eq!(super::valid("{.abc.}"), 0);
} }
} }

View file

@ -834,8 +834,9 @@ impl<'s> IdentifiedBlock<'s> {
None None
} }
} }
'{' => (attr::valid(line.chars()) == lt) '{' => {
.then(|| (Kind::Atom(Attributes), Span::by_len(indent, l))), (attr::valid(line) == lt).then(|| (Kind::Atom(Attributes), Span::by_len(indent, l)))
}
'|' => { '|' => {
if lt >= 2 && line_t.ends_with('|') && !line_t.ends_with("\\|") { if lt >= 2 && line_t.ends_with('|') && !line_t.ends_with("\\|") {
Some((Kind::Table { caption: false }, Span::empty_at(indent))) Some((Kind::Table { caption: false }, Span::empty_at(indent)))
@ -902,10 +903,10 @@ impl<'s> IdentifiedBlock<'s> {
let spec = let spec =
&line_t[fence_length..].trim_start_matches(|c: char| c.is_ascii_whitespace()); &line_t[fence_length..].trim_start_matches(|c: char| c.is_ascii_whitespace());
let valid_spec = if f == ':' { let valid_spec = if f == ':' {
spec.chars().all(attr::is_name) spec.bytes().all(attr::is_name)
} else { } else {
!spec.chars().any(|c| c.is_ascii_whitespace()) !spec.bytes().any(|c| c.is_ascii_whitespace())
&& !spec.chars().any(|c| c == '`') && !spec.bytes().any(|c| c == b'`')
}; };
(valid_spec && fence_length >= 3).then(|| { (valid_spec && fence_length >= 3).then(|| {
( (