attr: use bytes instead of chars
only consider ascii whitespace
This commit is contained in:
parent
798f8941d8
commit
72a3378831
2 changed files with 43 additions and 49 deletions
81
src/attr.rs
81
src/attr.rs
|
@ -8,13 +8,13 @@ pub(crate) fn parse(src: &str) -> Attributes {
|
||||||
a
|
a
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn valid<I: Iterator<Item = char>>(chars: I) -> usize {
|
pub fn valid(src: &str) -> usize {
|
||||||
use State::*;
|
use State::*;
|
||||||
|
|
||||||
let mut n = 0;
|
let mut n = 0;
|
||||||
let mut state = Start;
|
let mut state = Start;
|
||||||
for c in chars {
|
for c in src.bytes() {
|
||||||
n += c.len_utf8();
|
n += 1;
|
||||||
state = state.step(c);
|
state = state.step(c);
|
||||||
match state {
|
match state {
|
||||||
Done | Invalid => break,
|
Done | Invalid => break,
|
||||||
|
@ -256,11 +256,11 @@ impl Validator {
|
||||||
/// Returns number of valid bytes parsed (0 means invalid) if finished, otherwise more input is
|
/// Returns number of valid bytes parsed (0 means invalid) if finished, otherwise more input is
|
||||||
/// needed.
|
/// needed.
|
||||||
pub fn parse(&mut self, input: &str) -> Option<usize> {
|
pub fn parse(&mut self, input: &str) -> Option<usize> {
|
||||||
let mut chars = input.chars();
|
let mut bytes = input.bytes();
|
||||||
for c in &mut chars {
|
for c in &mut bytes {
|
||||||
self.state = self.state.step(c);
|
self.state = self.state.step(c);
|
||||||
match self.state {
|
match self.state {
|
||||||
State::Done => return Some(input.len() - chars.as_str().len()),
|
State::Done => return Some(input.len() - bytes.len()),
|
||||||
State::Invalid => return Some(0),
|
State::Invalid => return Some(0),
|
||||||
_ => {}
|
_ => {}
|
||||||
}
|
}
|
||||||
|
@ -297,7 +297,7 @@ impl<'s> Parser<'s> {
|
||||||
let mut pos = 0;
|
let mut pos = 0;
|
||||||
let mut pos_prev = 0;
|
let mut pos_prev = 0;
|
||||||
|
|
||||||
for c in input.chars() {
|
for c in input.bytes() {
|
||||||
let state_next = self.state.step(c);
|
let state_next = self.state.step(c);
|
||||||
let st = std::mem::replace(&mut self.state, state_next);
|
let st = std::mem::replace(&mut self.state, state_next);
|
||||||
|
|
||||||
|
@ -318,7 +318,7 @@ impl<'s> Parser<'s> {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
pos += c.len_utf8();
|
pos += 1;
|
||||||
|
|
||||||
debug_assert!(!matches!(self.state, Invalid));
|
debug_assert!(!matches!(self.state, Invalid));
|
||||||
|
|
||||||
|
@ -358,40 +358,40 @@ enum State {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl State {
|
impl State {
|
||||||
fn step(self, c: char) -> State {
|
fn step(self, c: u8) -> State {
|
||||||
use State::*;
|
use State::*;
|
||||||
|
|
||||||
match self {
|
match self {
|
||||||
Start if c == '{' => Whitespace,
|
Start if c == b'{' => Whitespace,
|
||||||
Start => Invalid,
|
Start => Invalid,
|
||||||
Whitespace => match c {
|
Whitespace => match c {
|
||||||
'}' => Done,
|
b'}' => Done,
|
||||||
'.' => ClassFirst,
|
b'.' => ClassFirst,
|
||||||
'#' => IdentifierFirst,
|
b'#' => IdentifierFirst,
|
||||||
'%' => Comment,
|
b'%' => Comment,
|
||||||
c if is_name(c) => Key,
|
c if is_name(c) => Key,
|
||||||
c if c.is_whitespace() => Whitespace,
|
c if c.is_ascii_whitespace() => Whitespace,
|
||||||
_ => Invalid,
|
_ => Invalid,
|
||||||
},
|
},
|
||||||
Comment if c == '%' => Whitespace,
|
Comment if c == b'%' => Whitespace,
|
||||||
Comment => Comment,
|
Comment => Comment,
|
||||||
ClassFirst if is_name(c) => Class,
|
ClassFirst if is_name(c) => Class,
|
||||||
ClassFirst => Invalid,
|
ClassFirst => Invalid,
|
||||||
IdentifierFirst if is_name(c) => Identifier,
|
IdentifierFirst if is_name(c) => Identifier,
|
||||||
IdentifierFirst => Invalid,
|
IdentifierFirst => Invalid,
|
||||||
s @ (Class | Identifier | Value) if is_name(c) => s,
|
s @ (Class | Identifier | Value) if is_name(c) => s,
|
||||||
Class | Identifier | Value if c.is_whitespace() => Whitespace,
|
Class | Identifier | Value if c.is_ascii_whitespace() => Whitespace,
|
||||||
Class | Identifier | Value if c == '}' => Done,
|
Class | Identifier | Value if c == b'}' => Done,
|
||||||
Class | Identifier | Value => Invalid,
|
Class | Identifier | Value => Invalid,
|
||||||
Key if is_name(c) => Key,
|
Key if is_name(c) => Key,
|
||||||
Key if c == '=' => ValueFirst,
|
Key if c == b'=' => ValueFirst,
|
||||||
Key => Invalid,
|
Key => Invalid,
|
||||||
ValueFirst if is_name(c) => Value,
|
ValueFirst if is_name(c) => Value,
|
||||||
ValueFirst if c == '"' => ValueQuoted,
|
ValueFirst if c == b'"' => ValueQuoted,
|
||||||
ValueFirst => Invalid,
|
ValueFirst => Invalid,
|
||||||
ValueQuoted | ValueNewline | ValueContinued if c == '"' => Whitespace,
|
ValueQuoted | ValueNewline | ValueContinued if c == b'"' => Whitespace,
|
||||||
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == '\n' => ValueNewline,
|
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == b'\n' => ValueNewline,
|
||||||
ValueQuoted if c == '\\' => ValueEscape,
|
ValueQuoted if c == b'\\' => ValueEscape,
|
||||||
ValueQuoted | ValueEscape => ValueQuoted,
|
ValueQuoted | ValueEscape => ValueQuoted,
|
||||||
ValueNewline | ValueContinued => ValueContinued,
|
ValueNewline | ValueContinued => ValueContinued,
|
||||||
Invalid | Done => panic!("{:?}", self),
|
Invalid | Done => panic!("{:?}", self),
|
||||||
|
@ -399,8 +399,8 @@ impl State {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_name(c: char) -> bool {
|
pub fn is_name(c: u8) -> bool {
|
||||||
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
|
c.is_ascii_alphanumeric() || matches!(c, b':' | b'_' | b'-')
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -435,11 +435,6 @@ mod test {
|
||||||
test_attr!("{#a #b}", ("id", "b"));
|
test_attr!("{#a #b}", ("id", "b"));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn unicode_whitespace() {
|
|
||||||
test_attr!("{.a .b}", ("class", "a b"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn value_unquoted() {
|
fn value_unquoted() {
|
||||||
test_attr!(
|
test_attr!(
|
||||||
|
@ -517,47 +512,45 @@ mod test {
|
||||||
#[test]
|
#[test]
|
||||||
fn valid_full() {
|
fn valid_full() {
|
||||||
let src = "{.class %comment%}";
|
let src = "{.class %comment%}";
|
||||||
assert_eq!(super::valid(src.chars()), src.len());
|
assert_eq!(super::valid(src), src.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn valid_unicode() {
|
fn valid_unicode() {
|
||||||
let src = r#"{a="б"}"#;
|
let src = r#"{a="б"}"#;
|
||||||
assert_eq!(super::valid(src.chars()), src.len());
|
assert_eq!(super::valid(src), src.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn valid_empty() {
|
fn valid_empty() {
|
||||||
let src = "{}";
|
let src = "{}";
|
||||||
assert_eq!(super::valid(src.chars()), src.len());
|
assert_eq!(super::valid(src), src.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn valid_whitespace() {
|
fn valid_whitespace() {
|
||||||
let src = "{ \n }";
|
let src = "{ \n }";
|
||||||
assert_eq!(super::valid(src.chars()), src.len());
|
assert_eq!(super::valid(src), src.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn valid_comment() {
|
fn valid_comment() {
|
||||||
let src = "{%comment%}";
|
let src = "{%comment%}";
|
||||||
assert_eq!(super::valid(src.chars()), src.len());
|
assert_eq!(super::valid(src), src.len());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn valid_trailing() {
|
fn valid_trailing() {
|
||||||
let src = "{.class}";
|
let src = "{.class}{.ignore}";
|
||||||
assert_eq!(
|
let src_valid = "{.class}";
|
||||||
super::valid(src.chars().chain("{.ignore}".chars())),
|
assert_eq!(super::valid(src), src_valid.len());
|
||||||
src.len(),
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn valid_invalid() {
|
fn valid_invalid() {
|
||||||
assert_eq!(super::valid(" {.valid}".chars()), 0);
|
assert_eq!(super::valid(" {.valid}"), 0);
|
||||||
assert_eq!(super::valid("{.class invalid}".chars()), 0);
|
assert_eq!(super::valid("{.class invalid}"), 0);
|
||||||
assert_eq!(super::valid("abc".chars()), 0);
|
assert_eq!(super::valid("abc"), 0);
|
||||||
assert_eq!(super::valid("{.abc.}".chars()), 0);
|
assert_eq!(super::valid("{.abc.}"), 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
11
src/block.rs
11
src/block.rs
|
@ -834,8 +834,9 @@ impl<'s> IdentifiedBlock<'s> {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
'{' => (attr::valid(line.chars()) == lt)
|
'{' => {
|
||||||
.then(|| (Kind::Atom(Attributes), Span::by_len(indent, l))),
|
(attr::valid(line) == lt).then(|| (Kind::Atom(Attributes), Span::by_len(indent, l)))
|
||||||
|
}
|
||||||
'|' => {
|
'|' => {
|
||||||
if lt >= 2 && line_t.ends_with('|') && !line_t.ends_with("\\|") {
|
if lt >= 2 && line_t.ends_with('|') && !line_t.ends_with("\\|") {
|
||||||
Some((Kind::Table { caption: false }, Span::empty_at(indent)))
|
Some((Kind::Table { caption: false }, Span::empty_at(indent)))
|
||||||
|
@ -902,10 +903,10 @@ impl<'s> IdentifiedBlock<'s> {
|
||||||
let spec =
|
let spec =
|
||||||
&line_t[fence_length..].trim_start_matches(|c: char| c.is_ascii_whitespace());
|
&line_t[fence_length..].trim_start_matches(|c: char| c.is_ascii_whitespace());
|
||||||
let valid_spec = if f == ':' {
|
let valid_spec = if f == ':' {
|
||||||
spec.chars().all(attr::is_name)
|
spec.bytes().all(attr::is_name)
|
||||||
} else {
|
} else {
|
||||||
!spec.chars().any(|c| c.is_ascii_whitespace())
|
!spec.bytes().any(|c| c.is_ascii_whitespace())
|
||||||
&& !spec.chars().any(|c| c == '`')
|
&& !spec.bytes().any(|c| c == b'`')
|
||||||
};
|
};
|
||||||
(valid_spec && fence_length >= 3).then(|| {
|
(valid_spec && fence_length >= 3).then(|| {
|
||||||
(
|
(
|
||||||
|
|
Loading…
Reference in a new issue