attr: Parser overhaul

- allow reading one line at a time, values may span multiple inputs
- mv event push to Parser, allowing reuse from outside Attributes::parse
- get rid of Element, simplify
This commit is contained in:
Noah Hellman 2023-02-17 23:42:12 +01:00
parent 34e74ddc43
commit 98f3fe5c7c

View file

@ -1,7 +1,7 @@
use crate::CowStr; use crate::CowStr;
use crate::Span;
use std::fmt; use std::fmt;
/// Parse attributes, assumed to be valid.
pub(crate) fn parse(src: &str) -> Attributes { pub(crate) fn parse(src: &str) -> Attributes {
let mut a = Attributes::new(); let mut a = Attributes::new();
a.parse(src); a.parse(src);
@ -44,6 +44,23 @@ impl<'s> AttributeValue<'s> {
pub fn parts(&'s self) -> AttributeValueParts<'s> { pub fn parts(&'s self) -> AttributeValueParts<'s> {
AttributeValueParts { ahead: &self.raw } AttributeValueParts { ahead: &self.raw }
} }
// lifetime is 's to avoid allocation if empty value is concatenated with single value
fn extend(&mut self, s: &'s str) {
match &mut self.raw {
CowStr::Borrowed(prev) => {
if prev.is_empty() {
*prev = s;
} else {
self.raw = format!("{} {}", prev, s).into();
}
}
CowStr::Owned(ref mut prev) => {
prev.push(' ');
prev.push_str(s);
}
}
}
} }
impl<'s> From<&'s str> for AttributeValue<'s> { impl<'s> From<&'s str> for AttributeValue<'s> {
@ -118,21 +135,11 @@ impl<'s> Attributes<'s> {
Self(self.0.take()) Self(self.0.take())
} }
pub(crate) fn parse(&mut self, input: &'s str) -> bool { /// Parse and append attributes, assumed to be valid.
let mut p = Parser::new(); pub(crate) fn parse(&mut self, input: &'s str) {
for c in input.chars() { let mut parser = Parser::new(self.take());
if let Some(elem) = p.step(c) { parser.parse(input);
match elem { *self = parser.finish();
Element::Class(c) => self.insert("class", c.of(input).into()),
Element::Identifier(i) => self.insert("id", i.of(input).into()),
Element::Attribute(a, v) => self.insert(a.of(input), v.of(input).into()),
}
}
if matches!(p.state, State::Done | State::Invalid) {
break;
}
}
matches!(p.state, State::Done)
} }
/// Combine all attributes from both objects, prioritizing self on conflicts. /// Combine all attributes from both objects, prioritizing self on conflicts.
@ -154,6 +161,11 @@ impl<'s> Attributes<'s> {
/// overwritten, unless it is a "class" attribute. In that case the provided value will be /// overwritten, unless it is a "class" attribute. In that case the provided value will be
/// appended to the existing value. /// appended to the existing value.
pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) { pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
self.insert_pos(key, val);
}
// duplicate of insert but returns position of inserted value
fn insert_pos(&mut self, key: &'s str, val: AttributeValue<'s>) -> usize {
if self.0.is_none() { if self.0.is_none() {
self.0 = Some(Vec::new().into()); self.0 = Some(Vec::new().into());
}; };
@ -162,12 +174,20 @@ impl<'s> Attributes<'s> {
if let Some(i) = attrs.iter().position(|(k, _)| *k == key) { if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
let prev = &mut attrs[i].1; let prev = &mut attrs[i].1;
if key == "class" { if key == "class" {
*prev = format!("{} {}", prev, val).into(); match val.raw {
CowStr::Borrowed(s) => prev.extend(s),
CowStr::Owned(s) => {
*prev = format!("{} {}", prev, s).into();
}
}
} else { } else {
*prev = val; *prev = val;
} }
i
} else { } else {
let i = attrs.len();
attrs.push((key, val)); attrs.push((key, val));
i
} }
} }
@ -219,6 +239,74 @@ impl<'s> std::fmt::Debug for Attributes<'s> {
} }
} }
/// Attributes parser, take input of one or more consecutive attributes and create an `Attributes`
/// object.
///
/// Input is assumed to contain a valid series of attribute sets, the attributes are added as they
/// are encountered.
pub struct Parser<'s> {
attrs: Attributes<'s>,
i_prev: usize,
state: State,
}
impl<'s> Parser<'s> {
pub fn new(attrs: Attributes<'s>) -> Self {
Self {
attrs,
i_prev: usize::MAX,
state: State::Start,
}
}
/// Return value indicates the number of bytes parsed if finished. If None, more input is
/// required to finish the attributes.
pub fn parse(&mut self, input: &'s str) {
use State::*;
let mut pos = 0;
let mut pos_prev = 0;
for c in input.chars() {
let state_next = self.state.step(c);
let st = std::mem::replace(&mut self.state, state_next);
if st != self.state && !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
{
let content = &input[pos_prev..pos];
pos_prev = pos;
match st {
Class => self.attrs.insert("class", content.into()),
Identifier => self.attrs.insert("id", content.into()),
Key => self.i_prev = self.attrs.insert_pos(content, "".into()),
Value | ValueQuoted | ValueContinued => {
self.attrs.0.as_mut().unwrap()[self.i_prev]
.1
.extend(&content[usize::from(matches!(st, ValueQuoted))..]);
}
_ => {}
}
};
pos += c.len_utf8();
debug_assert!(!matches!(self.state, Invalid));
if matches!(self.state, Done) {
if input[pos..].starts_with('{') {
self.state = Start;
} else {
return;
}
}
}
}
fn finish(self) -> Attributes<'s> {
self.attrs
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum State { enum State {
Start, Start,
@ -233,6 +321,8 @@ enum State {
Value, Value,
ValueQuoted, ValueQuoted,
ValueEscape, ValueEscape,
ValueNewline,
ValueContinued,
Done, Done,
Invalid, Invalid,
} }
@ -269,73 +359,20 @@ impl State {
ValueFirst if is_name(c) => Value, ValueFirst if is_name(c) => Value,
ValueFirst if c == '"' => ValueQuoted, ValueFirst if c == '"' => ValueQuoted,
ValueFirst => Invalid, ValueFirst => Invalid,
ValueQuoted if c == '"' => Whitespace, ValueQuoted | ValueNewline | ValueContinued if c == '"' => Whitespace,
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == '\n' => ValueNewline,
ValueQuoted if c == '\\' => ValueEscape, ValueQuoted if c == '\\' => ValueEscape,
ValueQuoted | ValueEscape => ValueQuoted, ValueQuoted | ValueEscape => ValueQuoted,
ValueNewline | ValueContinued => ValueContinued,
Invalid | Done => panic!("{:?}", self), Invalid | Done => panic!("{:?}", self),
} }
} }
} }
struct Parser {
pos: usize,
pos_prev: usize,
span1: Span,
state: State,
}
impl Parser {
fn new() -> Self {
Parser {
pos: 0,
pos_prev: 0,
span1: Span::new(0, 0),
state: State::Start,
}
}
fn step(&mut self, c: char) -> Option<Element> {
use State::*;
let state_next = self.state.step(c);
let st = std::mem::replace(&mut self.state, state_next);
let elem = if st != self.state
&& !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
{
let span0 = Span::new(self.pos_prev, self.pos);
self.pos_prev = self.pos;
match st {
Key => {
self.span1 = span0;
None
}
Class => Some(Element::Class(span0)),
Identifier => Some(Element::Identifier(span0)),
Value => Some(Element::Attribute(self.span1, span0)),
ValueQuoted => Some(Element::Attribute(self.span1, span0.skip(1))),
_ => None,
}
} else {
None
};
self.pos += c.len_utf8();
elem
}
}
pub fn is_name(c: char) -> bool { pub fn is_name(c: char) -> bool {
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-') c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
} }
enum Element {
Class(Span),
Identifier(Span),
Attribute(Span, Span),
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
macro_rules! test_attr { macro_rules! test_attr {
@ -397,6 +434,11 @@ mod test {
); );
} }
#[test]
fn value_newline() {
test_attr!("{attr0=\"abc\ndef\"}", ("attr0", "abc def"));
}
#[test] #[test]
fn comment() { fn comment() {
test_attr!("{%%}"); test_attr!("{%%}");