attr: Parser overhaul
- allow reading one line at a time, values may span multiple inputs - mv event push to Parser, allowing reuse from outside Attributes::parse - get rid of Element, simplify
This commit is contained in:
		
					parent
					
						
							
								34e74ddc43
							
						
					
				
			
			
				commit
				
					
						98f3fe5c7c
					
				
			
		
					 1 changed files with 115 additions and 73 deletions
				
			
		
							
								
								
									
										188
									
								
								src/attr.rs
									
										
									
									
									
								
							
							
						
						
									
										188
									
								
								src/attr.rs
									
										
									
									
									
								
							| 
						 | 
					@ -1,7 +1,7 @@
 | 
				
			||||||
use crate::CowStr;
 | 
					use crate::CowStr;
 | 
				
			||||||
use crate::Span;
 | 
					 | 
				
			||||||
use std::fmt;
 | 
					use std::fmt;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Parse attributes, assumed to be valid.
 | 
				
			||||||
pub(crate) fn parse(src: &str) -> Attributes {
 | 
					pub(crate) fn parse(src: &str) -> Attributes {
 | 
				
			||||||
    let mut a = Attributes::new();
 | 
					    let mut a = Attributes::new();
 | 
				
			||||||
    a.parse(src);
 | 
					    a.parse(src);
 | 
				
			||||||
| 
						 | 
					@ -44,6 +44,23 @@ impl<'s> AttributeValue<'s> {
 | 
				
			||||||
    pub fn parts(&'s self) -> AttributeValueParts<'s> {
 | 
					    pub fn parts(&'s self) -> AttributeValueParts<'s> {
 | 
				
			||||||
        AttributeValueParts { ahead: &self.raw }
 | 
					        AttributeValueParts { ahead: &self.raw }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // lifetime is 's to avoid allocation if empty value is concatenated with single value
 | 
				
			||||||
 | 
					    fn extend(&mut self, s: &'s str) {
 | 
				
			||||||
 | 
					        match &mut self.raw {
 | 
				
			||||||
 | 
					            CowStr::Borrowed(prev) => {
 | 
				
			||||||
 | 
					                if prev.is_empty() {
 | 
				
			||||||
 | 
					                    *prev = s;
 | 
				
			||||||
 | 
					                } else {
 | 
				
			||||||
 | 
					                    self.raw = format!("{} {}", prev, s).into();
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            CowStr::Owned(ref mut prev) => {
 | 
				
			||||||
 | 
					                prev.push(' ');
 | 
				
			||||||
 | 
					                prev.push_str(s);
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
impl<'s> From<&'s str> for AttributeValue<'s> {
 | 
					impl<'s> From<&'s str> for AttributeValue<'s> {
 | 
				
			||||||
| 
						 | 
					@ -118,21 +135,11 @@ impl<'s> Attributes<'s> {
 | 
				
			||||||
        Self(self.0.take())
 | 
					        Self(self.0.take())
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    pub(crate) fn parse(&mut self, input: &'s str) -> bool {
 | 
					    /// Parse and append attributes, assumed to be valid.
 | 
				
			||||||
        let mut p = Parser::new();
 | 
					    pub(crate) fn parse(&mut self, input: &'s str) {
 | 
				
			||||||
        for c in input.chars() {
 | 
					        let mut parser = Parser::new(self.take());
 | 
				
			||||||
            if let Some(elem) = p.step(c) {
 | 
					        parser.parse(input);
 | 
				
			||||||
                match elem {
 | 
					        *self = parser.finish();
 | 
				
			||||||
                    Element::Class(c) => self.insert("class", c.of(input).into()),
 | 
					 | 
				
			||||||
                    Element::Identifier(i) => self.insert("id", i.of(input).into()),
 | 
					 | 
				
			||||||
                    Element::Attribute(a, v) => self.insert(a.of(input), v.of(input).into()),
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
            if matches!(p.state, State::Done | State::Invalid) {
 | 
					 | 
				
			||||||
                break;
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
        matches!(p.state, State::Done)
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Combine all attributes from both objects, prioritizing self on conflicts.
 | 
					    /// Combine all attributes from both objects, prioritizing self on conflicts.
 | 
				
			||||||
| 
						 | 
					@ -154,6 +161,11 @@ impl<'s> Attributes<'s> {
 | 
				
			||||||
    /// overwritten, unless it is a "class" attribute. In that case the provided value will be
 | 
					    /// overwritten, unless it is a "class" attribute. In that case the provided value will be
 | 
				
			||||||
    /// appended to the existing value.
 | 
					    /// appended to the existing value.
 | 
				
			||||||
    pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
 | 
					    pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
 | 
				
			||||||
 | 
					        self.insert_pos(key, val);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // duplicate of insert but returns position of inserted value
 | 
				
			||||||
 | 
					    fn insert_pos(&mut self, key: &'s str, val: AttributeValue<'s>) -> usize {
 | 
				
			||||||
        if self.0.is_none() {
 | 
					        if self.0.is_none() {
 | 
				
			||||||
            self.0 = Some(Vec::new().into());
 | 
					            self.0 = Some(Vec::new().into());
 | 
				
			||||||
        };
 | 
					        };
 | 
				
			||||||
| 
						 | 
					@ -162,12 +174,20 @@ impl<'s> Attributes<'s> {
 | 
				
			||||||
        if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
 | 
					        if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
 | 
				
			||||||
            let prev = &mut attrs[i].1;
 | 
					            let prev = &mut attrs[i].1;
 | 
				
			||||||
            if key == "class" {
 | 
					            if key == "class" {
 | 
				
			||||||
                *prev = format!("{} {}", prev, val).into();
 | 
					                match val.raw {
 | 
				
			||||||
 | 
					                    CowStr::Borrowed(s) => prev.extend(s),
 | 
				
			||||||
 | 
					                    CowStr::Owned(s) => {
 | 
				
			||||||
 | 
					                        *prev = format!("{} {}", prev, s).into();
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
            } else {
 | 
					            } else {
 | 
				
			||||||
                *prev = val;
 | 
					                *prev = val;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
 | 
					            i
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
 | 
					            let i = attrs.len();
 | 
				
			||||||
            attrs.push((key, val));
 | 
					            attrs.push((key, val));
 | 
				
			||||||
 | 
					            i
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -219,6 +239,74 @@ impl<'s> std::fmt::Debug for Attributes<'s> {
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Attributes parser, take input of one or more consecutive attributes and create an `Attributes`
 | 
				
			||||||
 | 
					/// object.
 | 
				
			||||||
 | 
					///
 | 
				
			||||||
 | 
					/// Input is assumed to contain a valid series of attribute sets, the attributes are added as they
 | 
				
			||||||
 | 
					/// are encountered.
 | 
				
			||||||
 | 
					pub struct Parser<'s> {
 | 
				
			||||||
 | 
					    attrs: Attributes<'s>,
 | 
				
			||||||
 | 
					    i_prev: usize,
 | 
				
			||||||
 | 
					    state: State,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl<'s> Parser<'s> {
 | 
				
			||||||
 | 
					    pub fn new(attrs: Attributes<'s>) -> Self {
 | 
				
			||||||
 | 
					        Self {
 | 
				
			||||||
 | 
					            attrs,
 | 
				
			||||||
 | 
					            i_prev: usize::MAX,
 | 
				
			||||||
 | 
					            state: State::Start,
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /// Return value indicates the number of bytes parsed if finished. If None, more input is
 | 
				
			||||||
 | 
					    /// required to finish the attributes.
 | 
				
			||||||
 | 
					    pub fn parse(&mut self, input: &'s str) {
 | 
				
			||||||
 | 
					        use State::*;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        let mut pos = 0;
 | 
				
			||||||
 | 
					        let mut pos_prev = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        for c in input.chars() {
 | 
				
			||||||
 | 
					            let state_next = self.state.step(c);
 | 
				
			||||||
 | 
					            let st = std::mem::replace(&mut self.state, state_next);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if st != self.state && !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
 | 
				
			||||||
 | 
					            {
 | 
				
			||||||
 | 
					                let content = &input[pos_prev..pos];
 | 
				
			||||||
 | 
					                pos_prev = pos;
 | 
				
			||||||
 | 
					                match st {
 | 
				
			||||||
 | 
					                    Class => self.attrs.insert("class", content.into()),
 | 
				
			||||||
 | 
					                    Identifier => self.attrs.insert("id", content.into()),
 | 
				
			||||||
 | 
					                    Key => self.i_prev = self.attrs.insert_pos(content, "".into()),
 | 
				
			||||||
 | 
					                    Value | ValueQuoted | ValueContinued => {
 | 
				
			||||||
 | 
					                        self.attrs.0.as_mut().unwrap()[self.i_prev]
 | 
				
			||||||
 | 
					                            .1
 | 
				
			||||||
 | 
					                            .extend(&content[usize::from(matches!(st, ValueQuoted))..]);
 | 
				
			||||||
 | 
					                    }
 | 
				
			||||||
 | 
					                    _ => {}
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            pos += c.len_utf8();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            debug_assert!(!matches!(self.state, Invalid));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            if matches!(self.state, Done) {
 | 
				
			||||||
 | 
					                if input[pos..].starts_with('{') {
 | 
				
			||||||
 | 
					                    self.state = Start;
 | 
				
			||||||
 | 
					                } else {
 | 
				
			||||||
 | 
					                    return;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn finish(self) -> Attributes<'s> {
 | 
				
			||||||
 | 
					        self.attrs
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 | 
					#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 | 
				
			||||||
enum State {
 | 
					enum State {
 | 
				
			||||||
    Start,
 | 
					    Start,
 | 
				
			||||||
| 
						 | 
					@ -233,6 +321,8 @@ enum State {
 | 
				
			||||||
    Value,
 | 
					    Value,
 | 
				
			||||||
    ValueQuoted,
 | 
					    ValueQuoted,
 | 
				
			||||||
    ValueEscape,
 | 
					    ValueEscape,
 | 
				
			||||||
 | 
					    ValueNewline,
 | 
				
			||||||
 | 
					    ValueContinued,
 | 
				
			||||||
    Done,
 | 
					    Done,
 | 
				
			||||||
    Invalid,
 | 
					    Invalid,
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -269,73 +359,20 @@ impl State {
 | 
				
			||||||
            ValueFirst if is_name(c) => Value,
 | 
					            ValueFirst if is_name(c) => Value,
 | 
				
			||||||
            ValueFirst if c == '"' => ValueQuoted,
 | 
					            ValueFirst if c == '"' => ValueQuoted,
 | 
				
			||||||
            ValueFirst => Invalid,
 | 
					            ValueFirst => Invalid,
 | 
				
			||||||
            ValueQuoted if c == '"' => Whitespace,
 | 
					            ValueQuoted | ValueNewline | ValueContinued if c == '"' => Whitespace,
 | 
				
			||||||
 | 
					            ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == '\n' => ValueNewline,
 | 
				
			||||||
            ValueQuoted if c == '\\' => ValueEscape,
 | 
					            ValueQuoted if c == '\\' => ValueEscape,
 | 
				
			||||||
            ValueQuoted | ValueEscape => ValueQuoted,
 | 
					            ValueQuoted | ValueEscape => ValueQuoted,
 | 
				
			||||||
 | 
					            ValueNewline | ValueContinued => ValueContinued,
 | 
				
			||||||
            Invalid | Done => panic!("{:?}", self),
 | 
					            Invalid | Done => panic!("{:?}", self),
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct Parser {
 | 
					 | 
				
			||||||
    pos: usize,
 | 
					 | 
				
			||||||
    pos_prev: usize,
 | 
					 | 
				
			||||||
    span1: Span,
 | 
					 | 
				
			||||||
    state: State,
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
impl Parser {
 | 
					 | 
				
			||||||
    fn new() -> Self {
 | 
					 | 
				
			||||||
        Parser {
 | 
					 | 
				
			||||||
            pos: 0,
 | 
					 | 
				
			||||||
            pos_prev: 0,
 | 
					 | 
				
			||||||
            span1: Span::new(0, 0),
 | 
					 | 
				
			||||||
            state: State::Start,
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    fn step(&mut self, c: char) -> Option<Element> {
 | 
					 | 
				
			||||||
        use State::*;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        let state_next = self.state.step(c);
 | 
					 | 
				
			||||||
        let st = std::mem::replace(&mut self.state, state_next);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        let elem = if st != self.state
 | 
					 | 
				
			||||||
            && !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
 | 
					 | 
				
			||||||
        {
 | 
					 | 
				
			||||||
            let span0 = Span::new(self.pos_prev, self.pos);
 | 
					 | 
				
			||||||
            self.pos_prev = self.pos;
 | 
					 | 
				
			||||||
            match st {
 | 
					 | 
				
			||||||
                Key => {
 | 
					 | 
				
			||||||
                    self.span1 = span0;
 | 
					 | 
				
			||||||
                    None
 | 
					 | 
				
			||||||
                }
 | 
					 | 
				
			||||||
                Class => Some(Element::Class(span0)),
 | 
					 | 
				
			||||||
                Identifier => Some(Element::Identifier(span0)),
 | 
					 | 
				
			||||||
                Value => Some(Element::Attribute(self.span1, span0)),
 | 
					 | 
				
			||||||
                ValueQuoted => Some(Element::Attribute(self.span1, span0.skip(1))),
 | 
					 | 
				
			||||||
                _ => None,
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
        } else {
 | 
					 | 
				
			||||||
            None
 | 
					 | 
				
			||||||
        };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        self.pos += c.len_utf8();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        elem
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
pub fn is_name(c: char) -> bool {
 | 
					pub fn is_name(c: char) -> bool {
 | 
				
			||||||
    c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
 | 
					    c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
enum Element {
 | 
					 | 
				
			||||||
    Class(Span),
 | 
					 | 
				
			||||||
    Identifier(Span),
 | 
					 | 
				
			||||||
    Attribute(Span, Span),
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
#[cfg(test)]
 | 
					#[cfg(test)]
 | 
				
			||||||
mod test {
 | 
					mod test {
 | 
				
			||||||
    macro_rules! test_attr {
 | 
					    macro_rules! test_attr {
 | 
				
			||||||
| 
						 | 
					@ -397,6 +434,11 @@ mod test {
 | 
				
			||||||
        );
 | 
					        );
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    #[test]
 | 
				
			||||||
 | 
					    fn value_newline() {
 | 
				
			||||||
 | 
					        test_attr!("{attr0=\"abc\ndef\"}", ("attr0", "abc def"));
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    #[test]
 | 
					    #[test]
 | 
				
			||||||
    fn comment() {
 | 
					    fn comment() {
 | 
				
			||||||
        test_attr!("{%%}");
 | 
					        test_attr!("{%%}");
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue