attr: step one char at a time

make sure attr can keep track of all state so one char can be provided
at a time

this allows not restarting from beginning if we find out we need more
chars to finish parsing attributes
This commit is contained in:
Noah Hellman 2023-02-17 19:18:49 +01:00
parent e1b12ba642
commit 172f555272

View file

@ -12,16 +12,21 @@ pub(crate) fn parse(src: &str) -> Attributes {
pub fn valid<I: Iterator<Item = char>>(chars: I) -> (usize, bool) { pub fn valid<I: Iterator<Item = char>>(chars: I) -> (usize, bool) {
let mut has_attr = false; let mut has_attr = false;
let mut p = Parser::new(chars); let mut p = Parser::new();
for e in &mut p { for c in chars {
match e { if p.step(c).is_some() {
Element::Class(..) | Element::Identifier(..) | Element::Attribute(..) => {
has_attr = true; has_attr = true;
} }
Element::Invalid => return (0, false), if matches!(p.state, Done | Invalid) {
break;
} }
} }
if matches!(p.state, Done) {
(p.pos, has_attr) (p.pos, has_attr)
} else {
(0, false)
}
} }
/// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying, /// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying,
@ -112,15 +117,20 @@ impl<'s> Attributes<'s> {
} }
pub(crate) fn parse(&mut self, input: &'s str) -> bool { pub(crate) fn parse(&mut self, input: &'s str) -> bool {
for elem in Parser::new(input.chars()) { let mut p = Parser::new();
for c in input.chars() {
if let Some(elem) = p.step(c) {
match elem { match elem {
Element::Class(c) => self.insert("class", c.of(input).into()), Element::Class(c) => self.insert("class", c.of(input).into()),
Element::Identifier(i) => self.insert("id", i.of(input).into()), Element::Identifier(i) => self.insert("id", i.of(input).into()),
Element::Attribute(a, v) => self.insert(a.of(input), v.of(input).into()), Element::Attribute(a, v) => self.insert(a.of(input), v.of(input).into()),
Element::Invalid => return false,
} }
} }
true if matches!(p.state, Done | Invalid) {
break;
}
}
matches!(p.state, Done)
} }
/// Combine all attributes from both objects, prioritizing self on conflicts. /// Combine all attributes from both objects, prioritizing self on conflicts.
@ -220,132 +230,93 @@ enum State {
ValueFirst, ValueFirst,
Value, Value,
ValueQuoted, ValueQuoted,
ValueEscape,
Done, Done,
Invalid, Invalid,
} }
struct Parser<I> { impl State {
chars: I, fn step(self, c: char) -> State {
pos: usize, match self {
pos_prev: usize, Start if c == '{' => Whitespace,
state: State, Start => Invalid,
}
impl<I: Iterator<Item = char>> Parser<I> {
fn new(chars: I) -> Self {
Parser {
chars,
pos: 0,
pos_prev: 0,
state: Start,
}
}
fn step_char(&mut self) -> Option<State> {
self.chars.next().map(|c| {
self.pos_prev = self.pos;
self.pos += c.len_utf8();
match self.state {
Start => match c {
'{' => Whitespace,
_ => Invalid,
},
Whitespace => match c { Whitespace => match c {
'}' => Done, '}' => Done,
'.' => ClassFirst, '.' => ClassFirst,
'#' => IdentifierFirst, '#' => IdentifierFirst,
'%' => Comment, '%' => Comment,
c if c.is_ascii_alphanumeric() || matches!(c, '_' | ':' | '-') => Key, c if is_name(c) => Key,
c if c.is_whitespace() => Whitespace, c if c.is_whitespace() => Whitespace,
_ => Invalid, _ => Invalid,
}, },
Comment => { Comment if c == '%' => Whitespace,
if c == '%' { Comment => Comment,
Whitespace ClassFirst if is_name(c) => Class,
} else { ClassFirst => Invalid,
Comment IdentifierFirst if is_name(c) => Identifier,
IdentifierFirst => Invalid,
s @ (Class | Identifier | Value) if is_name(c) => s,
Class | Identifier | Value if c.is_whitespace() => Whitespace,
Class | Identifier | Value if c == '}' => Done,
Class | Identifier | Value => Invalid,
Key if is_name(c) => Key,
Key if c == '=' => ValueFirst,
Key => Invalid,
ValueFirst if is_name(c) => Value,
ValueFirst if c == '"' => ValueQuoted,
ValueFirst => Invalid,
ValueQuoted if c == '"' => Whitespace,
ValueQuoted if c == '\\' => ValueEscape,
ValueQuoted | ValueEscape => ValueQuoted,
Invalid | Done => panic!("{:?}", self),
} }
} }
s @ (ClassFirst | IdentifierFirst) => { }
if is_name(c) {
match s { struct Parser {
ClassFirst => Class, pos: usize,
IdentifierFirst => Identifier, pos_prev: usize,
_ => panic!(), span1: Span,
} state: State,
} else { }
Invalid
impl Parser {
fn new() -> Self {
Parser {
pos: 0,
pos_prev: 0,
span1: Span::new(0, 0),
state: Start,
} }
} }
s @ (Class | Identifier | Value) => {
if is_name(c) { fn step(&mut self, c: char) -> Option<Element> {
s let state_next = self.state.step(c);
} else if c.is_whitespace() { let st = std::mem::replace(&mut self.state, state_next);
Whitespace
} else if c == '}' { let elem = if st != self.state
Done && !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
} else { {
Invalid let span0 = Span::new(self.pos_prev, self.pos);
}
}
Key => {
if is_name(c) {
Key
} else if c == '=' {
ValueFirst
} else {
Invalid
}
}
ValueFirst => {
if is_name(c) {
Value
} else if c == '"' {
ValueQuoted
} else {
Invalid
}
}
ValueQuoted => match c {
'\\' => {
if let Some(c) = self.chars.next() {
self.pos_prev = self.pos; self.pos_prev = self.pos;
match st {
Key => {
self.span1 = span0;
None
}
Class => Some(Element::Class(span0)),
Identifier => Some(Element::Identifier(span0)),
Value => Some(Element::Attribute(self.span1, span0)),
ValueQuoted => Some(Element::Attribute(self.span1, span0.skip(1))),
_ => None,
}
} else {
None
};
self.pos += c.len_utf8(); self.pos += c.len_utf8();
}
ValueQuoted
}
'"' => Whitespace,
_ => ValueQuoted,
},
Invalid | Done => panic!("{:?}", self.state),
}
})
}
fn step(&mut self) -> (State, Span) { elem
let start = self.pos_prev;
if self.state == Done {
return (Done, Span::empty_at(start));
}
if self.state == Invalid {
return (Invalid, Span::empty_at(start));
}
while let Some(state_next) = self.step_char() {
if self.state != state_next {
return (
std::mem::replace(&mut self.state, state_next),
Span::new(start, self.pos_prev),
);
}
}
(
if self.state == Done { Done } else { Invalid },
Span::new(start, self.pos_prev),
)
} }
} }
@ -357,47 +328,6 @@ enum Element {
Class(Span), Class(Span),
Identifier(Span), Identifier(Span),
Attribute(Span, Span), Attribute(Span, Span),
Invalid,
}
impl<I: Iterator<Item = char>> Iterator for Parser<I> {
type Item = Element;
fn next(&mut self) -> Option<Self::Item> {
loop {
let (st, span0) = self.step();
return match st {
ClassFirst | IdentifierFirst => {
let (st, span1) = self.step();
Some(match st {
Class => Element::Class(span1),
Identifier => Element::Identifier(span1),
_ => return Some(Element::Invalid),
})
}
Key => {
let (st, _span1) = self.step();
match st {
ValueFirst => {
let (st, span2) = self.step();
match st {
Value => Some(Element::Attribute(span0, span2)),
ValueQuoted => Some(Element::Attribute(span0, span2.skip(1))),
Invalid => Some(Element::Invalid),
_ => panic!("{:?}", st),
}
}
Invalid => Some(Element::Invalid),
_ => panic!("{:?}", st),
}
}
Comment | Start | Whitespace => continue,
Done => None,
Invalid => Some(Element::Invalid),
_ => panic!("{:?}", st),
};
}
}
} }
#[cfg(test)] #[cfg(test)]