attr: Parser overhaul
- allow reading one line at a time, values may span multiple inputs - mv event push to Parser, allowing reuse from outside Attributes::parse - get rid of Element, simplify
This commit is contained in:
parent
34e74ddc43
commit
98f3fe5c7c
1 changed files with 115 additions and 73 deletions
188
src/attr.rs
188
src/attr.rs
|
@ -1,7 +1,7 @@
|
|||
use crate::CowStr;
|
||||
use crate::Span;
|
||||
use std::fmt;
|
||||
|
||||
/// Parse attributes, assumed to be valid.
|
||||
pub(crate) fn parse(src: &str) -> Attributes {
|
||||
let mut a = Attributes::new();
|
||||
a.parse(src);
|
||||
|
@ -44,6 +44,23 @@ impl<'s> AttributeValue<'s> {
|
|||
pub fn parts(&'s self) -> AttributeValueParts<'s> {
|
||||
AttributeValueParts { ahead: &self.raw }
|
||||
}
|
||||
|
||||
// lifetime is 's to avoid allocation if empty value is concatenated with single value
|
||||
fn extend(&mut self, s: &'s str) {
|
||||
match &mut self.raw {
|
||||
CowStr::Borrowed(prev) => {
|
||||
if prev.is_empty() {
|
||||
*prev = s;
|
||||
} else {
|
||||
self.raw = format!("{} {}", prev, s).into();
|
||||
}
|
||||
}
|
||||
CowStr::Owned(ref mut prev) => {
|
||||
prev.push(' ');
|
||||
prev.push_str(s);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<&'s str> for AttributeValue<'s> {
|
||||
|
@ -118,21 +135,11 @@ impl<'s> Attributes<'s> {
|
|||
Self(self.0.take())
|
||||
}
|
||||
|
||||
pub(crate) fn parse(&mut self, input: &'s str) -> bool {
|
||||
let mut p = Parser::new();
|
||||
for c in input.chars() {
|
||||
if let Some(elem) = p.step(c) {
|
||||
match elem {
|
||||
Element::Class(c) => self.insert("class", c.of(input).into()),
|
||||
Element::Identifier(i) => self.insert("id", i.of(input).into()),
|
||||
Element::Attribute(a, v) => self.insert(a.of(input), v.of(input).into()),
|
||||
}
|
||||
}
|
||||
if matches!(p.state, State::Done | State::Invalid) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
matches!(p.state, State::Done)
|
||||
/// Parse and append attributes, assumed to be valid.
|
||||
pub(crate) fn parse(&mut self, input: &'s str) {
|
||||
let mut parser = Parser::new(self.take());
|
||||
parser.parse(input);
|
||||
*self = parser.finish();
|
||||
}
|
||||
|
||||
/// Combine all attributes from both objects, prioritizing self on conflicts.
|
||||
|
@ -154,6 +161,11 @@ impl<'s> Attributes<'s> {
|
|||
/// overwritten, unless it is a "class" attribute. In that case the provided value will be
|
||||
/// appended to the existing value.
|
||||
pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
|
||||
self.insert_pos(key, val);
|
||||
}
|
||||
|
||||
// duplicate of insert but returns position of inserted value
|
||||
fn insert_pos(&mut self, key: &'s str, val: AttributeValue<'s>) -> usize {
|
||||
if self.0.is_none() {
|
||||
self.0 = Some(Vec::new().into());
|
||||
};
|
||||
|
@ -162,12 +174,20 @@ impl<'s> Attributes<'s> {
|
|||
if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
|
||||
let prev = &mut attrs[i].1;
|
||||
if key == "class" {
|
||||
*prev = format!("{} {}", prev, val).into();
|
||||
match val.raw {
|
||||
CowStr::Borrowed(s) => prev.extend(s),
|
||||
CowStr::Owned(s) => {
|
||||
*prev = format!("{} {}", prev, s).into();
|
||||
}
|
||||
}
|
||||
} else {
|
||||
*prev = val;
|
||||
}
|
||||
i
|
||||
} else {
|
||||
let i = attrs.len();
|
||||
attrs.push((key, val));
|
||||
i
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -219,6 +239,74 @@ impl<'s> std::fmt::Debug for Attributes<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
/// Attributes parser, take input of one or more consecutive attributes and create an `Attributes`
|
||||
/// object.
|
||||
///
|
||||
/// Input is assumed to contain a valid series of attribute sets, the attributes are added as they
|
||||
/// are encountered.
|
||||
pub struct Parser<'s> {
|
||||
attrs: Attributes<'s>,
|
||||
i_prev: usize,
|
||||
state: State,
|
||||
}
|
||||
|
||||
impl<'s> Parser<'s> {
|
||||
pub fn new(attrs: Attributes<'s>) -> Self {
|
||||
Self {
|
||||
attrs,
|
||||
i_prev: usize::MAX,
|
||||
state: State::Start,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return value indicates the number of bytes parsed if finished. If None, more input is
|
||||
/// required to finish the attributes.
|
||||
pub fn parse(&mut self, input: &'s str) {
|
||||
use State::*;
|
||||
|
||||
let mut pos = 0;
|
||||
let mut pos_prev = 0;
|
||||
|
||||
for c in input.chars() {
|
||||
let state_next = self.state.step(c);
|
||||
let st = std::mem::replace(&mut self.state, state_next);
|
||||
|
||||
if st != self.state && !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
|
||||
{
|
||||
let content = &input[pos_prev..pos];
|
||||
pos_prev = pos;
|
||||
match st {
|
||||
Class => self.attrs.insert("class", content.into()),
|
||||
Identifier => self.attrs.insert("id", content.into()),
|
||||
Key => self.i_prev = self.attrs.insert_pos(content, "".into()),
|
||||
Value | ValueQuoted | ValueContinued => {
|
||||
self.attrs.0.as_mut().unwrap()[self.i_prev]
|
||||
.1
|
||||
.extend(&content[usize::from(matches!(st, ValueQuoted))..]);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
};
|
||||
|
||||
pos += c.len_utf8();
|
||||
|
||||
debug_assert!(!matches!(self.state, Invalid));
|
||||
|
||||
if matches!(self.state, Done) {
|
||||
if input[pos..].starts_with('{') {
|
||||
self.state = Start;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn finish(self) -> Attributes<'s> {
|
||||
self.attrs
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
enum State {
|
||||
Start,
|
||||
|
@ -233,6 +321,8 @@ enum State {
|
|||
Value,
|
||||
ValueQuoted,
|
||||
ValueEscape,
|
||||
ValueNewline,
|
||||
ValueContinued,
|
||||
Done,
|
||||
Invalid,
|
||||
}
|
||||
|
@ -269,73 +359,20 @@ impl State {
|
|||
ValueFirst if is_name(c) => Value,
|
||||
ValueFirst if c == '"' => ValueQuoted,
|
||||
ValueFirst => Invalid,
|
||||
ValueQuoted if c == '"' => Whitespace,
|
||||
ValueQuoted | ValueNewline | ValueContinued if c == '"' => Whitespace,
|
||||
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == '\n' => ValueNewline,
|
||||
ValueQuoted if c == '\\' => ValueEscape,
|
||||
ValueQuoted | ValueEscape => ValueQuoted,
|
||||
ValueNewline | ValueContinued => ValueContinued,
|
||||
Invalid | Done => panic!("{:?}", self),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Parser {
|
||||
pos: usize,
|
||||
pos_prev: usize,
|
||||
span1: Span,
|
||||
state: State,
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
fn new() -> Self {
|
||||
Parser {
|
||||
pos: 0,
|
||||
pos_prev: 0,
|
||||
span1: Span::new(0, 0),
|
||||
state: State::Start,
|
||||
}
|
||||
}
|
||||
|
||||
fn step(&mut self, c: char) -> Option<Element> {
|
||||
use State::*;
|
||||
|
||||
let state_next = self.state.step(c);
|
||||
let st = std::mem::replace(&mut self.state, state_next);
|
||||
|
||||
let elem = if st != self.state
|
||||
&& !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
|
||||
{
|
||||
let span0 = Span::new(self.pos_prev, self.pos);
|
||||
self.pos_prev = self.pos;
|
||||
match st {
|
||||
Key => {
|
||||
self.span1 = span0;
|
||||
None
|
||||
}
|
||||
Class => Some(Element::Class(span0)),
|
||||
Identifier => Some(Element::Identifier(span0)),
|
||||
Value => Some(Element::Attribute(self.span1, span0)),
|
||||
ValueQuoted => Some(Element::Attribute(self.span1, span0.skip(1))),
|
||||
_ => None,
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
self.pos += c.len_utf8();
|
||||
|
||||
elem
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_name(c: char) -> bool {
|
||||
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
|
||||
}
|
||||
|
||||
enum Element {
|
||||
Class(Span),
|
||||
Identifier(Span),
|
||||
Attribute(Span, Span),
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
macro_rules! test_attr {
|
||||
|
@ -397,6 +434,11 @@ mod test {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn value_newline() {
|
||||
test_attr!("{attr0=\"abc\ndef\"}", ("attr0", "abc def"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn comment() {
|
||||
test_attr!("{%%}");
|
||||
|
|
Loading…
Reference in a new issue