attr: Parser overhaul
- allow reading one line at a time, values may span multiple inputs - mv event push to Parser, allowing reuse from outside Attributes::parse - get rid of Element, simplify
This commit is contained in:
parent
34e74ddc43
commit
98f3fe5c7c
1 changed files with 115 additions and 73 deletions
188
src/attr.rs
188
src/attr.rs
|
@ -1,7 +1,7 @@
|
||||||
use crate::CowStr;
|
use crate::CowStr;
|
||||||
use crate::Span;
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
|
/// Parse attributes, assumed to be valid.
|
||||||
pub(crate) fn parse(src: &str) -> Attributes {
|
pub(crate) fn parse(src: &str) -> Attributes {
|
||||||
let mut a = Attributes::new();
|
let mut a = Attributes::new();
|
||||||
a.parse(src);
|
a.parse(src);
|
||||||
|
@ -44,6 +44,23 @@ impl<'s> AttributeValue<'s> {
|
||||||
pub fn parts(&'s self) -> AttributeValueParts<'s> {
|
pub fn parts(&'s self) -> AttributeValueParts<'s> {
|
||||||
AttributeValueParts { ahead: &self.raw }
|
AttributeValueParts { ahead: &self.raw }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// lifetime is 's to avoid allocation if empty value is concatenated with single value
|
||||||
|
fn extend(&mut self, s: &'s str) {
|
||||||
|
match &mut self.raw {
|
||||||
|
CowStr::Borrowed(prev) => {
|
||||||
|
if prev.is_empty() {
|
||||||
|
*prev = s;
|
||||||
|
} else {
|
||||||
|
self.raw = format!("{} {}", prev, s).into();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CowStr::Owned(ref mut prev) => {
|
||||||
|
prev.push(' ');
|
||||||
|
prev.push_str(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> From<&'s str> for AttributeValue<'s> {
|
impl<'s> From<&'s str> for AttributeValue<'s> {
|
||||||
|
@ -118,21 +135,11 @@ impl<'s> Attributes<'s> {
|
||||||
Self(self.0.take())
|
Self(self.0.take())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse(&mut self, input: &'s str) -> bool {
|
/// Parse and append attributes, assumed to be valid.
|
||||||
let mut p = Parser::new();
|
pub(crate) fn parse(&mut self, input: &'s str) {
|
||||||
for c in input.chars() {
|
let mut parser = Parser::new(self.take());
|
||||||
if let Some(elem) = p.step(c) {
|
parser.parse(input);
|
||||||
match elem {
|
*self = parser.finish();
|
||||||
Element::Class(c) => self.insert("class", c.of(input).into()),
|
|
||||||
Element::Identifier(i) => self.insert("id", i.of(input).into()),
|
|
||||||
Element::Attribute(a, v) => self.insert(a.of(input), v.of(input).into()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if matches!(p.state, State::Done | State::Invalid) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
matches!(p.state, State::Done)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Combine all attributes from both objects, prioritizing self on conflicts.
|
/// Combine all attributes from both objects, prioritizing self on conflicts.
|
||||||
|
@ -154,6 +161,11 @@ impl<'s> Attributes<'s> {
|
||||||
/// overwritten, unless it is a "class" attribute. In that case the provided value will be
|
/// overwritten, unless it is a "class" attribute. In that case the provided value will be
|
||||||
/// appended to the existing value.
|
/// appended to the existing value.
|
||||||
pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
|
pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
|
||||||
|
self.insert_pos(key, val);
|
||||||
|
}
|
||||||
|
|
||||||
|
// duplicate of insert but returns position of inserted value
|
||||||
|
fn insert_pos(&mut self, key: &'s str, val: AttributeValue<'s>) -> usize {
|
||||||
if self.0.is_none() {
|
if self.0.is_none() {
|
||||||
self.0 = Some(Vec::new().into());
|
self.0 = Some(Vec::new().into());
|
||||||
};
|
};
|
||||||
|
@ -162,12 +174,20 @@ impl<'s> Attributes<'s> {
|
||||||
if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
|
if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
|
||||||
let prev = &mut attrs[i].1;
|
let prev = &mut attrs[i].1;
|
||||||
if key == "class" {
|
if key == "class" {
|
||||||
*prev = format!("{} {}", prev, val).into();
|
match val.raw {
|
||||||
|
CowStr::Borrowed(s) => prev.extend(s),
|
||||||
|
CowStr::Owned(s) => {
|
||||||
|
*prev = format!("{} {}", prev, s).into();
|
||||||
|
}
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
*prev = val;
|
*prev = val;
|
||||||
}
|
}
|
||||||
|
i
|
||||||
} else {
|
} else {
|
||||||
|
let i = attrs.len();
|
||||||
attrs.push((key, val));
|
attrs.push((key, val));
|
||||||
|
i
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -219,6 +239,74 @@ impl<'s> std::fmt::Debug for Attributes<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attributes parser, take input of one or more consecutive attributes and create an `Attributes`
|
||||||
|
/// object.
|
||||||
|
///
|
||||||
|
/// Input is assumed to contain a valid series of attribute sets, the attributes are added as they
|
||||||
|
/// are encountered.
|
||||||
|
pub struct Parser<'s> {
|
||||||
|
attrs: Attributes<'s>,
|
||||||
|
i_prev: usize,
|
||||||
|
state: State,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Parser<'s> {
|
||||||
|
pub fn new(attrs: Attributes<'s>) -> Self {
|
||||||
|
Self {
|
||||||
|
attrs,
|
||||||
|
i_prev: usize::MAX,
|
||||||
|
state: State::Start,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return value indicates the number of bytes parsed if finished. If None, more input is
|
||||||
|
/// required to finish the attributes.
|
||||||
|
pub fn parse(&mut self, input: &'s str) {
|
||||||
|
use State::*;
|
||||||
|
|
||||||
|
let mut pos = 0;
|
||||||
|
let mut pos_prev = 0;
|
||||||
|
|
||||||
|
for c in input.chars() {
|
||||||
|
let state_next = self.state.step(c);
|
||||||
|
let st = std::mem::replace(&mut self.state, state_next);
|
||||||
|
|
||||||
|
if st != self.state && !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
|
||||||
|
{
|
||||||
|
let content = &input[pos_prev..pos];
|
||||||
|
pos_prev = pos;
|
||||||
|
match st {
|
||||||
|
Class => self.attrs.insert("class", content.into()),
|
||||||
|
Identifier => self.attrs.insert("id", content.into()),
|
||||||
|
Key => self.i_prev = self.attrs.insert_pos(content, "".into()),
|
||||||
|
Value | ValueQuoted | ValueContinued => {
|
||||||
|
self.attrs.0.as_mut().unwrap()[self.i_prev]
|
||||||
|
.1
|
||||||
|
.extend(&content[usize::from(matches!(st, ValueQuoted))..]);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
pos += c.len_utf8();
|
||||||
|
|
||||||
|
debug_assert!(!matches!(self.state, Invalid));
|
||||||
|
|
||||||
|
if matches!(self.state, Done) {
|
||||||
|
if input[pos..].starts_with('{') {
|
||||||
|
self.state = Start;
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn finish(self) -> Attributes<'s> {
|
||||||
|
self.attrs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
enum State {
|
enum State {
|
||||||
Start,
|
Start,
|
||||||
|
@ -233,6 +321,8 @@ enum State {
|
||||||
Value,
|
Value,
|
||||||
ValueQuoted,
|
ValueQuoted,
|
||||||
ValueEscape,
|
ValueEscape,
|
||||||
|
ValueNewline,
|
||||||
|
ValueContinued,
|
||||||
Done,
|
Done,
|
||||||
Invalid,
|
Invalid,
|
||||||
}
|
}
|
||||||
|
@ -269,73 +359,20 @@ impl State {
|
||||||
ValueFirst if is_name(c) => Value,
|
ValueFirst if is_name(c) => Value,
|
||||||
ValueFirst if c == '"' => ValueQuoted,
|
ValueFirst if c == '"' => ValueQuoted,
|
||||||
ValueFirst => Invalid,
|
ValueFirst => Invalid,
|
||||||
ValueQuoted if c == '"' => Whitespace,
|
ValueQuoted | ValueNewline | ValueContinued if c == '"' => Whitespace,
|
||||||
|
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == '\n' => ValueNewline,
|
||||||
ValueQuoted if c == '\\' => ValueEscape,
|
ValueQuoted if c == '\\' => ValueEscape,
|
||||||
ValueQuoted | ValueEscape => ValueQuoted,
|
ValueQuoted | ValueEscape => ValueQuoted,
|
||||||
|
ValueNewline | ValueContinued => ValueContinued,
|
||||||
Invalid | Done => panic!("{:?}", self),
|
Invalid | Done => panic!("{:?}", self),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Parser {
|
|
||||||
pos: usize,
|
|
||||||
pos_prev: usize,
|
|
||||||
span1: Span,
|
|
||||||
state: State,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Parser {
|
|
||||||
fn new() -> Self {
|
|
||||||
Parser {
|
|
||||||
pos: 0,
|
|
||||||
pos_prev: 0,
|
|
||||||
span1: Span::new(0, 0),
|
|
||||||
state: State::Start,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn step(&mut self, c: char) -> Option<Element> {
|
|
||||||
use State::*;
|
|
||||||
|
|
||||||
let state_next = self.state.step(c);
|
|
||||||
let st = std::mem::replace(&mut self.state, state_next);
|
|
||||||
|
|
||||||
let elem = if st != self.state
|
|
||||||
&& !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
|
|
||||||
{
|
|
||||||
let span0 = Span::new(self.pos_prev, self.pos);
|
|
||||||
self.pos_prev = self.pos;
|
|
||||||
match st {
|
|
||||||
Key => {
|
|
||||||
self.span1 = span0;
|
|
||||||
None
|
|
||||||
}
|
|
||||||
Class => Some(Element::Class(span0)),
|
|
||||||
Identifier => Some(Element::Identifier(span0)),
|
|
||||||
Value => Some(Element::Attribute(self.span1, span0)),
|
|
||||||
ValueQuoted => Some(Element::Attribute(self.span1, span0.skip(1))),
|
|
||||||
_ => None,
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
};
|
|
||||||
|
|
||||||
self.pos += c.len_utf8();
|
|
||||||
|
|
||||||
elem
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_name(c: char) -> bool {
|
pub fn is_name(c: char) -> bool {
|
||||||
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
|
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
|
||||||
}
|
}
|
||||||
|
|
||||||
enum Element {
|
|
||||||
Class(Span),
|
|
||||||
Identifier(Span),
|
|
||||||
Attribute(Span, Span),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
macro_rules! test_attr {
|
macro_rules! test_attr {
|
||||||
|
@ -397,6 +434,11 @@ mod test {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn value_newline() {
|
||||||
|
test_attr!("{attr0=\"abc\ndef\"}", ("attr0", "abc def"));
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn comment() {
|
fn comment() {
|
||||||
test_attr!("{%%}");
|
test_attr!("{%%}");
|
||||||
|
|
Loading…
Reference in a new issue