attr: use bytes instead of chars
only consider ascii whitespace
This commit is contained in:
parent
798f8941d8
commit
72a3378831
2 changed files with 43 additions and 49 deletions
81
src/attr.rs
81
src/attr.rs
|
@ -8,13 +8,13 @@ pub(crate) fn parse(src: &str) -> Attributes {
|
|||
a
|
||||
}
|
||||
|
||||
pub fn valid<I: Iterator<Item = char>>(chars: I) -> usize {
|
||||
pub fn valid(src: &str) -> usize {
|
||||
use State::*;
|
||||
|
||||
let mut n = 0;
|
||||
let mut state = Start;
|
||||
for c in chars {
|
||||
n += c.len_utf8();
|
||||
for c in src.bytes() {
|
||||
n += 1;
|
||||
state = state.step(c);
|
||||
match state {
|
||||
Done | Invalid => break,
|
||||
|
@ -256,11 +256,11 @@ impl Validator {
|
|||
/// Returns number of valid bytes parsed (0 means invalid) if finished, otherwise more input is
|
||||
/// needed.
|
||||
pub fn parse(&mut self, input: &str) -> Option<usize> {
|
||||
let mut chars = input.chars();
|
||||
for c in &mut chars {
|
||||
let mut bytes = input.bytes();
|
||||
for c in &mut bytes {
|
||||
self.state = self.state.step(c);
|
||||
match self.state {
|
||||
State::Done => return Some(input.len() - chars.as_str().len()),
|
||||
State::Done => return Some(input.len() - bytes.len()),
|
||||
State::Invalid => return Some(0),
|
||||
_ => {}
|
||||
}
|
||||
|
@ -297,7 +297,7 @@ impl<'s> Parser<'s> {
|
|||
let mut pos = 0;
|
||||
let mut pos_prev = 0;
|
||||
|
||||
for c in input.chars() {
|
||||
for c in input.bytes() {
|
||||
let state_next = self.state.step(c);
|
||||
let st = std::mem::replace(&mut self.state, state_next);
|
||||
|
||||
|
@ -318,7 +318,7 @@ impl<'s> Parser<'s> {
|
|||
}
|
||||
};
|
||||
|
||||
pos += c.len_utf8();
|
||||
pos += 1;
|
||||
|
||||
debug_assert!(!matches!(self.state, Invalid));
|
||||
|
||||
|
@ -358,40 +358,40 @@ enum State {
|
|||
}
|
||||
|
||||
impl State {
|
||||
fn step(self, c: char) -> State {
|
||||
fn step(self, c: u8) -> State {
|
||||
use State::*;
|
||||
|
||||
match self {
|
||||
Start if c == '{' => Whitespace,
|
||||
Start if c == b'{' => Whitespace,
|
||||
Start => Invalid,
|
||||
Whitespace => match c {
|
||||
'}' => Done,
|
||||
'.' => ClassFirst,
|
||||
'#' => IdentifierFirst,
|
||||
'%' => Comment,
|
||||
b'}' => Done,
|
||||
b'.' => ClassFirst,
|
||||
b'#' => IdentifierFirst,
|
||||
b'%' => Comment,
|
||||
c if is_name(c) => Key,
|
||||
c if c.is_whitespace() => Whitespace,
|
||||
c if c.is_ascii_whitespace() => Whitespace,
|
||||
_ => Invalid,
|
||||
},
|
||||
Comment if c == '%' => Whitespace,
|
||||
Comment if c == b'%' => Whitespace,
|
||||
Comment => Comment,
|
||||
ClassFirst if is_name(c) => Class,
|
||||
ClassFirst => Invalid,
|
||||
IdentifierFirst if is_name(c) => Identifier,
|
||||
IdentifierFirst => Invalid,
|
||||
s @ (Class | Identifier | Value) if is_name(c) => s,
|
||||
Class | Identifier | Value if c.is_whitespace() => Whitespace,
|
||||
Class | Identifier | Value if c == '}' => Done,
|
||||
Class | Identifier | Value if c.is_ascii_whitespace() => Whitespace,
|
||||
Class | Identifier | Value if c == b'}' => Done,
|
||||
Class | Identifier | Value => Invalid,
|
||||
Key if is_name(c) => Key,
|
||||
Key if c == '=' => ValueFirst,
|
||||
Key if c == b'=' => ValueFirst,
|
||||
Key => Invalid,
|
||||
ValueFirst if is_name(c) => Value,
|
||||
ValueFirst if c == '"' => ValueQuoted,
|
||||
ValueFirst if c == b'"' => ValueQuoted,
|
||||
ValueFirst => Invalid,
|
||||
ValueQuoted | ValueNewline | ValueContinued if c == '"' => Whitespace,
|
||||
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == '\n' => ValueNewline,
|
||||
ValueQuoted if c == '\\' => ValueEscape,
|
||||
ValueQuoted | ValueNewline | ValueContinued if c == b'"' => Whitespace,
|
||||
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == b'\n' => ValueNewline,
|
||||
ValueQuoted if c == b'\\' => ValueEscape,
|
||||
ValueQuoted | ValueEscape => ValueQuoted,
|
||||
ValueNewline | ValueContinued => ValueContinued,
|
||||
Invalid | Done => panic!("{:?}", self),
|
||||
|
@ -399,8 +399,8 @@ impl State {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn is_name(c: char) -> bool {
|
||||
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
|
||||
pub fn is_name(c: u8) -> bool {
|
||||
c.is_ascii_alphanumeric() || matches!(c, b':' | b'_' | b'-')
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@ -435,11 +435,6 @@ mod test {
|
|||
test_attr!("{#a #b}", ("id", "b"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unicode_whitespace() {
|
||||
test_attr!("{.a .b}", ("class", "a b"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn value_unquoted() {
|
||||
test_attr!(
|
||||
|
@ -517,47 +512,45 @@ mod test {
|
|||
#[test]
|
||||
fn valid_full() {
|
||||
let src = "{.class %comment%}";
|
||||
assert_eq!(super::valid(src.chars()), src.len());
|
||||
assert_eq!(super::valid(src), src.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_unicode() {
|
||||
let src = r#"{a="б"}"#;
|
||||
assert_eq!(super::valid(src.chars()), src.len());
|
||||
assert_eq!(super::valid(src), src.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_empty() {
|
||||
let src = "{}";
|
||||
assert_eq!(super::valid(src.chars()), src.len());
|
||||
assert_eq!(super::valid(src), src.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_whitespace() {
|
||||
let src = "{ \n }";
|
||||
assert_eq!(super::valid(src.chars()), src.len());
|
||||
assert_eq!(super::valid(src), src.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_comment() {
|
||||
let src = "{%comment%}";
|
||||
assert_eq!(super::valid(src.chars()), src.len());
|
||||
assert_eq!(super::valid(src), src.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_trailing() {
|
||||
let src = "{.class}";
|
||||
assert_eq!(
|
||||
super::valid(src.chars().chain("{.ignore}".chars())),
|
||||
src.len(),
|
||||
);
|
||||
let src = "{.class}{.ignore}";
|
||||
let src_valid = "{.class}";
|
||||
assert_eq!(super::valid(src), src_valid.len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_invalid() {
|
||||
assert_eq!(super::valid(" {.valid}".chars()), 0);
|
||||
assert_eq!(super::valid("{.class invalid}".chars()), 0);
|
||||
assert_eq!(super::valid("abc".chars()), 0);
|
||||
assert_eq!(super::valid("{.abc.}".chars()), 0);
|
||||
assert_eq!(super::valid(" {.valid}"), 0);
|
||||
assert_eq!(super::valid("{.class invalid}"), 0);
|
||||
assert_eq!(super::valid("abc"), 0);
|
||||
assert_eq!(super::valid("{.abc.}"), 0);
|
||||
}
|
||||
}
|
||||
|
|
11
src/block.rs
11
src/block.rs
|
@ -834,8 +834,9 @@ impl<'s> IdentifiedBlock<'s> {
|
|||
None
|
||||
}
|
||||
}
|
||||
'{' => (attr::valid(line.chars()) == lt)
|
||||
.then(|| (Kind::Atom(Attributes), Span::by_len(indent, l))),
|
||||
'{' => {
|
||||
(attr::valid(line) == lt).then(|| (Kind::Atom(Attributes), Span::by_len(indent, l)))
|
||||
}
|
||||
'|' => {
|
||||
if lt >= 2 && line_t.ends_with('|') && !line_t.ends_with("\\|") {
|
||||
Some((Kind::Table { caption: false }, Span::empty_at(indent)))
|
||||
|
@ -902,10 +903,10 @@ impl<'s> IdentifiedBlock<'s> {
|
|||
let spec =
|
||||
&line_t[fence_length..].trim_start_matches(|c: char| c.is_ascii_whitespace());
|
||||
let valid_spec = if f == ':' {
|
||||
spec.chars().all(attr::is_name)
|
||||
spec.bytes().all(attr::is_name)
|
||||
} else {
|
||||
!spec.chars().any(|c| c.is_ascii_whitespace())
|
||||
&& !spec.chars().any(|c| c == '`')
|
||||
!spec.bytes().any(|c| c.is_ascii_whitespace())
|
||||
&& !spec.bytes().any(|c| c == b'`')
|
||||
};
|
||||
(valid_spec && fence_length >= 3).then(|| {
|
||||
(
|
||||
|
|
Loading…
Reference in a new issue