556 lines
15 KiB
Rust
556 lines
15 KiB
Rust
use crate::CowStr;
|
||
use std::fmt;
|
||
|
||
/// Parse attributes, assumed to be valid.
|
||
pub(crate) fn parse(src: &str) -> Attributes {
|
||
let mut a = Attributes::new();
|
||
a.parse(src);
|
||
a
|
||
}
|
||
|
||
pub fn valid(src: &str) -> usize {
|
||
use State::*;
|
||
|
||
let mut n = 0;
|
||
let mut state = Start;
|
||
for c in src.bytes() {
|
||
n += 1;
|
||
state = state.step(c);
|
||
match state {
|
||
Done | Invalid => break,
|
||
_ => {}
|
||
}
|
||
}
|
||
|
||
if matches!(state, Done) {
|
||
n
|
||
} else {
|
||
0
|
||
}
|
||
}
|
||
|
||
/// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying,
|
||
/// without allocating.
|
||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||
pub struct AttributeValue<'s> {
|
||
raw: CowStr<'s>,
|
||
}
|
||
|
||
impl<'s> AttributeValue<'s> {
|
||
/// Processes the attribute value escapes and returns an iterator of the parts of the value
|
||
/// that should be displayed.
|
||
pub fn parts(&'s self) -> AttributeValueParts<'s> {
|
||
AttributeValueParts { ahead: &self.raw }
|
||
}
|
||
|
||
// lifetime is 's to avoid allocation if empty value is concatenated with single value
|
||
fn extend(&mut self, s: &'s str) {
|
||
match &mut self.raw {
|
||
CowStr::Borrowed(prev) => {
|
||
if prev.is_empty() {
|
||
*prev = s;
|
||
} else {
|
||
self.raw = format!("{} {}", prev, s).into();
|
||
}
|
||
}
|
||
CowStr::Owned(ref mut prev) => {
|
||
prev.push(' ');
|
||
prev.push_str(s);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<'s> From<&'s str> for AttributeValue<'s> {
|
||
fn from(value: &'s str) -> Self {
|
||
Self { raw: value.into() }
|
||
}
|
||
}
|
||
|
||
impl<'s> From<CowStr<'s>> for AttributeValue<'s> {
|
||
fn from(value: CowStr<'s>) -> Self {
|
||
Self { raw: value }
|
||
}
|
||
}
|
||
|
||
impl<'s> From<String> for AttributeValue<'s> {
|
||
fn from(value: String) -> Self {
|
||
Self { raw: value.into() }
|
||
}
|
||
}
|
||
|
||
impl<'s> fmt::Display for AttributeValue<'s> {
|
||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||
self.parts().try_for_each(|part| f.write_str(part))
|
||
}
|
||
}
|
||
|
||
/// An iterator over the parts of an [`AttributeValue`] that should be displayed.
|
||
pub struct AttributeValueParts<'s> {
|
||
ahead: &'s str,
|
||
}
|
||
|
||
impl<'s> Iterator for AttributeValueParts<'s> {
|
||
type Item = &'s str;
|
||
|
||
fn next(&mut self) -> Option<Self::Item> {
|
||
for (i, _) in self.ahead.match_indices('\\') {
|
||
match self.ahead.as_bytes().get(i + 1) {
|
||
Some(b'\\') => {
|
||
let next = &self.ahead[..i + 1];
|
||
self.ahead = &self.ahead[i + 2..];
|
||
return Some(next);
|
||
}
|
||
Some(c) if c.is_ascii_punctuation() => {
|
||
let next = &self.ahead[..i];
|
||
self.ahead = &self.ahead[i + 1..];
|
||
return Some(next);
|
||
}
|
||
_ => {}
|
||
}
|
||
}
|
||
|
||
(!self.ahead.is_empty()).then(|| std::mem::take(&mut self.ahead))
|
||
}
|
||
}
|
||
|
||
/// A collection of attributes, i.e. a key-value map.
|
||
// Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
|
||
// indirection instead of always 24 bytes.
|
||
#[allow(clippy::box_vec)]
|
||
#[derive(Clone, PartialEq, Eq, Default)]
|
||
pub struct Attributes<'s>(Option<Box<Vec<(&'s str, AttributeValue<'s>)>>>);
|
||
|
||
impl<'s> Attributes<'s> {
|
||
/// Create an empty collection.
|
||
#[must_use]
|
||
pub fn new() -> Self {
|
||
Self::default()
|
||
}
|
||
|
||
#[must_use]
|
||
pub(crate) fn take(&mut self) -> Self {
|
||
Self(self.0.take())
|
||
}
|
||
|
||
/// Parse and append attributes, assumed to be valid.
|
||
pub(crate) fn parse(&mut self, input: &'s str) {
|
||
let mut parser = Parser::new(self.take());
|
||
parser.parse(input);
|
||
*self = parser.finish();
|
||
}
|
||
|
||
/// Combine all attributes from both objects, prioritizing self on conflicts.
|
||
pub(crate) fn union(&mut self, other: Self) {
|
||
if let Some(attrs0) = &mut self.0 {
|
||
if let Some(mut attrs1) = other.0 {
|
||
for (key, val) in attrs1.drain(..) {
|
||
if !attrs0.iter().any(|(k, _)| *k == key) {
|
||
attrs0.push((key, val));
|
||
}
|
||
}
|
||
}
|
||
} else {
|
||
self.0 = other.0;
|
||
}
|
||
}
|
||
|
||
/// Insert an attribute. If the attribute already exists, the previous value will be
|
||
/// overwritten, unless it is a "class" attribute. In that case the provided value will be
|
||
/// appended to the existing value.
|
||
pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
|
||
self.insert_pos(key, val);
|
||
}
|
||
|
||
// duplicate of insert but returns position of inserted value
|
||
fn insert_pos(&mut self, key: &'s str, val: AttributeValue<'s>) -> usize {
|
||
if self.0.is_none() {
|
||
self.0 = Some(Vec::new().into());
|
||
};
|
||
|
||
let attrs = self.0.as_mut().unwrap();
|
||
if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
|
||
let prev = &mut attrs[i].1;
|
||
if key == "class" {
|
||
match val.raw {
|
||
CowStr::Borrowed(s) => prev.extend(s),
|
||
CowStr::Owned(s) => {
|
||
*prev = format!("{} {}", prev, s).into();
|
||
}
|
||
}
|
||
} else {
|
||
*prev = val;
|
||
}
|
||
i
|
||
} else {
|
||
let i = attrs.len();
|
||
attrs.push((key, val));
|
||
i
|
||
}
|
||
}
|
||
|
||
/// Returns true if the collection contains no attributes.
|
||
#[must_use]
|
||
pub fn is_empty(&self) -> bool {
|
||
self.0.as_ref().map_or(true, |v| v.is_empty())
|
||
}
|
||
|
||
/// Returns a reference to the value corresponding to the attribute key.
|
||
#[must_use]
|
||
pub fn get(&self, key: &str) -> Option<&AttributeValue<'s>> {
|
||
self.iter().find(|(k, _)| *k == key).map(|(_, v)| v)
|
||
}
|
||
|
||
/// Returns an iterator over the attributes in undefined order.
|
||
pub fn iter(&self) -> impl Iterator<Item = (&'s str, &AttributeValue<'s>)> + '_ {
|
||
self.0.iter().flat_map(|v| v.iter().map(|(a, b)| (*a, b)))
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
impl<'s> FromIterator<(&'s str, &'s str)> for Attributes<'s> {
|
||
fn from_iter<I: IntoIterator<Item = (&'s str, &'s str)>>(iter: I) -> Self {
|
||
let attrs = iter
|
||
.into_iter()
|
||
.map(|(a, v)| (a, v.into()))
|
||
.collect::<Vec<_>>();
|
||
if attrs.is_empty() {
|
||
Attributes::new()
|
||
} else {
|
||
Attributes(Some(attrs.into()))
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<'s> std::fmt::Debug for Attributes<'s> {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||
write!(f, "{{")?;
|
||
let mut first = true;
|
||
for (k, v) in self.iter() {
|
||
if !first {
|
||
write!(f, ", ")?;
|
||
}
|
||
first = false;
|
||
write!(f, "{}=\"{}\"", k, v.raw)?;
|
||
}
|
||
write!(f, "}}")
|
||
}
|
||
}
|
||
|
||
#[derive(Clone)]
|
||
pub struct Validator {
|
||
state: State,
|
||
}
|
||
|
||
impl Validator {
|
||
pub fn new() -> Self {
|
||
Self {
|
||
state: State::Start,
|
||
}
|
||
}
|
||
|
||
pub fn restart(&mut self) {
|
||
self.state = State::Start;
|
||
}
|
||
|
||
/// Returns number of valid bytes parsed (0 means invalid) if finished, otherwise more input is
|
||
/// needed.
|
||
pub fn parse(&mut self, input: &str) -> Option<usize> {
|
||
let mut bytes = input.bytes();
|
||
for c in &mut bytes {
|
||
self.state = self.state.step(c);
|
||
match self.state {
|
||
State::Done => return Some(input.len() - bytes.len()),
|
||
State::Invalid => return Some(0),
|
||
_ => {}
|
||
}
|
||
}
|
||
None
|
||
}
|
||
}
|
||
|
||
/// Attributes parser, take input of one or more consecutive attributes and create an `Attributes`
|
||
/// object.
|
||
///
|
||
/// Input is assumed to contain a valid series of attribute sets, the attributes are added as they
|
||
/// are encountered.
|
||
pub struct Parser<'s> {
|
||
attrs: Attributes<'s>,
|
||
i_prev: usize,
|
||
state: State,
|
||
}
|
||
|
||
impl<'s> Parser<'s> {
|
||
pub fn new(attrs: Attributes<'s>) -> Self {
|
||
Self {
|
||
attrs,
|
||
i_prev: usize::MAX,
|
||
state: State::Start,
|
||
}
|
||
}
|
||
|
||
/// Return value indicates the number of bytes parsed if finished. If None, more input is
|
||
/// required to finish the attributes.
|
||
pub fn parse(&mut self, input: &'s str) {
|
||
use State::*;
|
||
|
||
let mut pos = 0;
|
||
let mut pos_prev = 0;
|
||
|
||
for c in input.bytes() {
|
||
let state_next = self.state.step(c);
|
||
let st = std::mem::replace(&mut self.state, state_next);
|
||
|
||
if st != self.state && !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
|
||
{
|
||
let content = &input[pos_prev..pos];
|
||
pos_prev = pos;
|
||
match st {
|
||
Class => self.attrs.insert("class", content.into()),
|
||
Identifier => self.attrs.insert("id", content.into()),
|
||
Key => self.i_prev = self.attrs.insert_pos(content, "".into()),
|
||
Value | ValueQuoted | ValueContinued => {
|
||
self.attrs.0.as_mut().unwrap()[self.i_prev]
|
||
.1
|
||
.extend(&content[usize::from(matches!(st, ValueQuoted))..]);
|
||
}
|
||
_ => {}
|
||
}
|
||
};
|
||
|
||
pos += 1;
|
||
|
||
debug_assert!(!matches!(self.state, Invalid));
|
||
|
||
if matches!(self.state, Done) {
|
||
if input[pos..].starts_with('{') {
|
||
self.state = Start;
|
||
} else {
|
||
return;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
pub fn finish(self) -> Attributes<'s> {
|
||
self.attrs
|
||
}
|
||
}
|
||
|
||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||
enum State {
|
||
Start,
|
||
Whitespace,
|
||
Comment,
|
||
ClassFirst,
|
||
Class,
|
||
IdentifierFirst,
|
||
Identifier,
|
||
Key,
|
||
ValueFirst,
|
||
Value,
|
||
ValueQuoted,
|
||
ValueEscape,
|
||
ValueNewline,
|
||
ValueContinued,
|
||
Done,
|
||
Invalid,
|
||
}
|
||
|
||
impl State {
|
||
fn step(self, c: u8) -> State {
|
||
use State::*;
|
||
|
||
match self {
|
||
Start if c == b'{' => Whitespace,
|
||
Start => Invalid,
|
||
Whitespace => match c {
|
||
b'}' => Done,
|
||
b'.' => ClassFirst,
|
||
b'#' => IdentifierFirst,
|
||
b'%' => Comment,
|
||
c if is_name(c) => Key,
|
||
c if c.is_ascii_whitespace() => Whitespace,
|
||
_ => Invalid,
|
||
},
|
||
Comment if c == b'%' => Whitespace,
|
||
Comment => Comment,
|
||
ClassFirst if is_name(c) => Class,
|
||
ClassFirst => Invalid,
|
||
IdentifierFirst if is_name(c) => Identifier,
|
||
IdentifierFirst => Invalid,
|
||
s @ (Class | Identifier | Value) if is_name(c) => s,
|
||
Class | Identifier | Value if c.is_ascii_whitespace() => Whitespace,
|
||
Class | Identifier | Value if c == b'}' => Done,
|
||
Class | Identifier | Value => Invalid,
|
||
Key if is_name(c) => Key,
|
||
Key if c == b'=' => ValueFirst,
|
||
Key => Invalid,
|
||
ValueFirst if is_name(c) => Value,
|
||
ValueFirst if c == b'"' => ValueQuoted,
|
||
ValueFirst => Invalid,
|
||
ValueQuoted | ValueNewline | ValueContinued if c == b'"' => Whitespace,
|
||
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == b'\n' => ValueNewline,
|
||
ValueQuoted if c == b'\\' => ValueEscape,
|
||
ValueQuoted | ValueEscape => ValueQuoted,
|
||
ValueNewline | ValueContinued => ValueContinued,
|
||
Invalid | Done => panic!("{:?}", self),
|
||
}
|
||
}
|
||
}
|
||
|
||
pub fn is_name(c: u8) -> bool {
|
||
c.is_ascii_alphanumeric() || matches!(c, b':' | b'_' | b'-')
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod test {
|
||
macro_rules! test_attr {
|
||
($src:expr $(,$($av:expr),* $(,)?)?) => {
|
||
#[allow(unused)]
|
||
let mut attr = super::Attributes::new();
|
||
attr.parse($src);
|
||
let actual = attr.iter().collect::<Vec<_>>();
|
||
let expected = &[$($($av),*,)?];
|
||
for i in 0..actual.len() {
|
||
let actual_val = format!("{}", actual[i].1);
|
||
assert_eq!((actual[i].0, actual_val.as_str()), expected[i], "\n\n{}\n\n", $src);
|
||
}
|
||
};
|
||
}
|
||
|
||
#[test]
|
||
fn empty() {
|
||
test_attr!("{}");
|
||
}
|
||
|
||
#[test]
|
||
fn class_id() {
|
||
test_attr!(
|
||
"{.some_class #some_id}",
|
||
("class", "some_class"),
|
||
("id", "some_id"),
|
||
);
|
||
test_attr!("{.a .b}", ("class", "a b"));
|
||
test_attr!("{#a #b}", ("id", "b"));
|
||
}
|
||
|
||
#[test]
|
||
fn value_unquoted() {
|
||
test_attr!(
|
||
"{attr0=val0 attr1=val1}",
|
||
("attr0", "val0"),
|
||
("attr1", "val1"),
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn value_quoted() {
|
||
test_attr!(
|
||
r#"{attr0="val0" attr1="val1"}"#,
|
||
("attr0", "val0"),
|
||
("attr1", "val1"),
|
||
);
|
||
test_attr!(
|
||
r#"{#id .class style="color:red"}"#,
|
||
("id", "id"),
|
||
("class", "class"),
|
||
("style", "color:red")
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn value_newline() {
|
||
test_attr!("{attr0=\"abc\ndef\"}", ("attr0", "abc def"));
|
||
}
|
||
|
||
#[test]
|
||
fn comment() {
|
||
test_attr!("{%%}");
|
||
test_attr!("{ % abc % }");
|
||
test_attr!(
|
||
"{ .some_class % abc % #some_id}",
|
||
("class", "some_class"),
|
||
("id", "some_id"),
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn escape() {
|
||
test_attr!(
|
||
r#"{attr="with escaped \~ char"}"#,
|
||
("attr", "with escaped ~ char")
|
||
);
|
||
test_attr!(
|
||
r#"{key="quotes \" should be escaped"}"#,
|
||
("key", r#"quotes " should be escaped"#)
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn escape_backslash() {
|
||
test_attr!(r#"{attr="with\\backslash"}"#, ("attr", r"with\backslash"));
|
||
test_attr!(
|
||
r#"{attr="with many backslashes\\\\"}"#,
|
||
("attr", r"with many backslashes\\")
|
||
);
|
||
test_attr!(
|
||
r#"{attr="\\escaped backslash at start"}"#,
|
||
("attr", r"\escaped backslash at start")
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn only_escape_punctuation() {
|
||
test_attr!(r#"{attr="do not \escape"}"#, ("attr", r"do not \escape"));
|
||
test_attr!(
|
||
r#"{attr="\backslash at the beginning"}"#,
|
||
("attr", r"\backslash at the beginning")
|
||
);
|
||
}
|
||
|
||
#[test]
|
||
fn valid_full() {
|
||
let src = "{.class %comment%}";
|
||
assert_eq!(super::valid(src), src.len());
|
||
}
|
||
|
||
#[test]
|
||
fn valid_unicode() {
|
||
let src = r#"{a="б"}"#;
|
||
assert_eq!(super::valid(src), src.len());
|
||
}
|
||
|
||
#[test]
|
||
fn valid_empty() {
|
||
let src = "{}";
|
||
assert_eq!(super::valid(src), src.len());
|
||
}
|
||
|
||
#[test]
|
||
fn valid_whitespace() {
|
||
let src = "{ \n }";
|
||
assert_eq!(super::valid(src), src.len());
|
||
}
|
||
|
||
#[test]
|
||
fn valid_comment() {
|
||
let src = "{%comment%}";
|
||
assert_eq!(super::valid(src), src.len());
|
||
}
|
||
|
||
#[test]
|
||
fn valid_trailing() {
|
||
let src = "{.class}{.ignore}";
|
||
let src_valid = "{.class}";
|
||
assert_eq!(super::valid(src), src_valid.len());
|
||
}
|
||
|
||
#[test]
|
||
fn valid_invalid() {
|
||
assert_eq!(super::valid(" {.valid}"), 0);
|
||
assert_eq!(super::valid("{.class invalid}"), 0);
|
||
assert_eq!(super::valid("abc"), 0);
|
||
assert_eq!(super::valid("{.abc.}"), 0);
|
||
}
|
||
}
|