From e3f39d4b88d0123c4cda377a4eb1fd9ae275ea44 Mon Sep 17 00:00:00 2001 From: kmaasrud Date: Mon, 6 Mar 2023 12:18:58 +0100 Subject: [PATCH] feat: support escapes in attributes Related issue: #1 --- src/attr.rs | 157 ++++++++++++++++++++++++++++++++++++++++------- src/html.rs | 25 +++++--- src/lib.rs | 2 +- tests/suite/skip | 2 - 4 files changed, 152 insertions(+), 34 deletions(-) diff --git a/src/attr.rs b/src/attr.rs index e724812..9980f69 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -1,6 +1,8 @@ use crate::CowStr; use crate::DiscontinuousString; use crate::Span; +use std::borrow::Cow; +use std::fmt; use State::*; @@ -24,12 +26,80 @@ pub fn valid>(chars: I) -> (usize, bool) { (p.pos, has_attr) } +/// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying, +/// without allocating. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct AttributeValue<'s> { + raw: CowStr<'s>, +} + +impl<'s> AttributeValue<'s> { + /// Processes the attribute value escapes and returns an iterator of the parts of the value + /// that should be displayed. + pub fn parts(&'s self) -> AttributeValueParts<'s> { + AttributeValueParts { ahead: &self.raw } + } +} + +impl<'s> From<&'s str> for AttributeValue<'s> { + fn from(value: &'s str) -> Self { + Self { raw: value.into() } + } +} + +impl<'s> From> for AttributeValue<'s> { + fn from(value: CowStr<'s>) -> Self { + Self { raw: value } + } +} + +impl<'s> From for AttributeValue<'s> { + fn from(value: String) -> Self { + Self { raw: value.into() } + } +} + +impl<'s> fmt::Display for AttributeValue<'s> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.parts().try_for_each(|part| f.write_str(part)) + } +} + +/// An iterator over the parts of an [`AttributeValue`] that should be displayed. +pub struct AttributeValueParts<'s> { + ahead: &'s str, +} + +impl<'s> Iterator for AttributeValueParts<'s> { + type Item = &'s str; + + fn next(&mut self) -> Option { + for (i, _) in self.ahead.match_indices('\\') { + match self.ahead.as_bytes().get(i + 1) { + Some(b'\\') => { + let next = &self.ahead[..i + 1]; + self.ahead = &self.ahead[i + 2..]; + return Some(next); + } + Some(c) if c.is_ascii_punctuation() => { + let next = &self.ahead[..i]; + self.ahead = &self.ahead[i + 1..]; + return Some(next); + } + _ => {} + } + } + + (!self.ahead.is_empty()).then(|| std::mem::take(&mut self.ahead)) + } +} + /// A collection of attributes, i.e. a key-value map. // Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra // indirection instead of always 24 bytes. #[allow(clippy::box_vec)] #[derive(Debug, Clone, PartialEq, Eq, Default)] -pub struct Attributes<'s>(Option)>>>); +pub struct Attributes<'s>(Option)>>>); impl<'s> Attributes<'s> { /// Create an empty collection. @@ -44,17 +114,19 @@ impl<'s> Attributes<'s> { } pub(crate) fn parse>(&mut self, input: S) -> bool { + #[inline] + fn borrow(cow: CowStr) -> &str { + match cow { + Cow::Owned(_) => panic!(), + Cow::Borrowed(s) => s, + } + } + for elem in Parser::new(input.chars()) { match elem { - Element::Class(c) => self.insert("class", input.src(c)), - Element::Identifier(i) => self.insert("id", input.src(i)), - Element::Attribute(a, v) => self.insert( - match input.src(a) { - CowStr::Owned(_) => panic!(), - CowStr::Borrowed(s) => s, - }, - input.src(v), - ), + Element::Class(c) => self.insert("class", input.src(c).into()), + Element::Identifier(i) => self.insert("id", input.src(i).into()), + Element::Attribute(a, v) => self.insert(borrow(input.src(a)), input.src(v).into()), Element::Invalid => return false, } } @@ -79,7 +151,7 @@ impl<'s> Attributes<'s> { /// Insert an attribute. If the attribute already exists, the previous value will be /// overwritten, unless it is a "class" attribute. In that case the provided value will be /// appended to the existing value. - pub fn insert(&mut self, key: &'s str, val: CowStr<'s>) { + pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) { if self.0.is_none() { self.0 = Some(Vec::new().into()); }; @@ -105,15 +177,13 @@ impl<'s> Attributes<'s> { /// Returns a reference to the value corresponding to the attribute key. #[must_use] - pub fn get(&self, key: &str) -> Option<&str> { + pub fn get(&self, key: &str) -> Option<&AttributeValue<'s>> { self.iter().find(|(k, _)| *k == key).map(|(_, v)| v) } /// Returns an iterator over the attributes in undefined order. - pub fn iter(&self) -> impl Iterator + '_ { - self.0 - .iter() - .flat_map(|v| v.iter().map(|(a, b)| (*a, b.as_ref()))) + pub fn iter(&self) -> impl Iterator)> + '_ { + self.0.iter().flat_map(|v| v.iter().map(|(a, b)| (*a, b))) } } @@ -231,13 +301,17 @@ impl> Parser { Invalid } } - ValueQuoted => { - if c == '"' { - Whitespace - } else { + ValueQuoted => match c { + '\\' => { + if let Some(c) = self.chars.next() { + self.pos_prev = self.pos; + self.pos += c.len_utf8(); + } ValueQuoted } - } + '"' => Whitespace, + _ => ValueQuoted, + }, Invalid | Done => panic!("{:?}", self.state), } }) @@ -330,11 +404,14 @@ mod test { macro_rules! test_attr { ($src:expr $(,$($av:expr),* $(,)?)?) => { #[allow(unused)] - let mut attr =super::Attributes::new(); + let mut attr = super::Attributes::new(); attr.parse($src); let actual = attr.iter().collect::>(); let expected = &[$($($av),*,)?]; - assert_eq!(actual, expected, "\n\n{}\n\n", $src); + for i in 0..actual.len() { + let actual_val = format!("{}", actual[i].1); + assert_eq!((actual[i].0, actual_val.as_str()), expected[i], "\n\n{}\n\n", $src); + } }; } @@ -394,6 +471,40 @@ mod test { ); } + #[test] + fn escape() { + test_attr!( + r#"{attr="with escaped \~ char"}"#, + ("attr", "with escaped ~ char") + ); + test_attr!( + r#"{key="quotes \" should be escaped"}"#, + ("key", r#"quotes " should be escaped"#) + ); + } + + #[test] + fn escape_backslash() { + test_attr!(r#"{attr="with\\backslash"}"#, ("attr", r"with\backslash")); + test_attr!( + r#"{attr="with many backslashes\\\\"}"#, + ("attr", r"with many backslashes\\") + ); + test_attr!( + r#"{attr="\\escaped backslash at start"}"#, + ("attr", r"\escaped backslash at start") + ); + } + + #[test] + fn only_escape_punctuation() { + test_attr!(r#"{attr="do not \escape"}"#, ("attr", r"do not \escape")); + test_attr!( + r#"{attr="\backslash at the beginning"}"#, + ("attr", r"\backslash at the beginning") + ); + } + #[test] fn valid_full() { let src = "{.class %comment%}"; diff --git a/src/html.rs b/src/html.rs index a7f2037..7fff549 100644 --- a/src/html.rs +++ b/src/html.rs @@ -166,7 +166,7 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { self.out.write_str(">, W: std::fmt::Write> Writer<'s, I, W> { for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") { write!(self.out, r#" {}=""#, a)?; - self.write_escape(v)?; + v.parts().try_for_each(|part| self.write_attr(part))?; self.out.write_char('"')?; } @@ -207,7 +207,7 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { { if !attrs.iter().any(|(a, _)| a == "id") { self.out.write_str(r#" id=""#)?; - self.write_escape(id)?; + self.write_attr(id)?; self.out.write_char('"')?; } } @@ -249,7 +249,7 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { self.out.write_char(' ')?; } first_written = true; - self.out.write_str(cls)?; + cls.parts().try_for_each(|part| self.write_attr(part))?; } // div class goes after classes from attrs if let Container::Div { class: Some(cls) } = c { @@ -276,7 +276,7 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { Container::CodeBlock { lang } => { if let Some(l) = lang { self.out.write_str(r#">"#)?; } else { self.out.write_str(">")?; @@ -388,7 +388,7 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { } } Event::Str(s) => match self.raw { - Raw::None => self.write_escape(&s)?, + Raw::None => self.write_text(&s)?, Raw::Html => self.out.write_str(&s)?, Raw::Other => {} }, @@ -415,7 +415,7 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { self.out.write_str("\n")?; @@ -430,13 +430,14 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { Ok(()) } - fn write_escape(&mut self, mut s: &str) -> std::fmt::Result { + fn write_escape(&mut self, mut s: &str, escape_quotes: bool) -> std::fmt::Result { let mut ent = ""; while let Some(i) = s.find(|c| { match c { '<' => Some("<"), '>' => Some(">"), '&' => Some("&"), + '"' if escape_quotes => Some("""), _ => None, } .map_or(false, |s| { @@ -450,4 +451,12 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { } self.out.write_str(s) } + + fn write_text(&mut self, s: &str) -> std::fmt::Result { + self.write_escape(s, false) + } + + fn write_attr(&mut self, s: &str) -> std::fmt::Result { + self.write_escape(s, true) + } } diff --git a/src/lib.rs b/src/lib.rs index bd81e3f..100a9c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -63,7 +63,7 @@ mod tree; use span::DiscontinuousString; use span::Span; -pub use attr::Attributes; +pub use attr::{AttributeValue, AttributeValueParts, Attributes}; type CowStr<'s> = std::borrow::Cow<'s, str>; diff --git a/tests/suite/skip b/tests/suite/skip index ee358ac..aea68c3 100644 --- a/tests/suite/skip +++ b/tests/suite/skip @@ -1,7 +1,5 @@ 38d85f9:multi-line block attributes 6c14561:multi-line block attributes -48546bb:escape in attributes -6bc4257:escape in attributes 613a9d6:attribute container precedence f4f22fc:attribute key class order ae6fc15:bugged left/right quote