PR #19 Support escapes in attributes

This commit is contained in:
Noah Hellman 2023-03-12 10:25:31 +01:00
commit 16491a4a99
4 changed files with 152 additions and 34 deletions

View file

@ -1,6 +1,8 @@
use crate::CowStr; use crate::CowStr;
use crate::DiscontinuousString; use crate::DiscontinuousString;
use crate::Span; use crate::Span;
use std::borrow::Cow;
use std::fmt;
use State::*; use State::*;
@ -24,12 +26,80 @@ pub fn valid<I: Iterator<Item = char>>(chars: I) -> (usize, bool) {
(p.pos, has_attr) (p.pos, has_attr)
} }
/// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying,
/// without allocating.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct AttributeValue<'s> {
raw: CowStr<'s>,
}
impl<'s> AttributeValue<'s> {
/// Processes the attribute value escapes and returns an iterator of the parts of the value
/// that should be displayed.
pub fn parts(&'s self) -> AttributeValueParts<'s> {
AttributeValueParts { ahead: &self.raw }
}
}
impl<'s> From<&'s str> for AttributeValue<'s> {
fn from(value: &'s str) -> Self {
Self { raw: value.into() }
}
}
impl<'s> From<CowStr<'s>> for AttributeValue<'s> {
fn from(value: CowStr<'s>) -> Self {
Self { raw: value }
}
}
impl<'s> From<String> for AttributeValue<'s> {
fn from(value: String) -> Self {
Self { raw: value.into() }
}
}
impl<'s> fmt::Display for AttributeValue<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.parts().try_for_each(|part| f.write_str(part))
}
}
/// An iterator over the parts of an [`AttributeValue`] that should be displayed.
pub struct AttributeValueParts<'s> {
ahead: &'s str,
}
impl<'s> Iterator for AttributeValueParts<'s> {
type Item = &'s str;
fn next(&mut self) -> Option<Self::Item> {
for (i, _) in self.ahead.match_indices('\\') {
match self.ahead.as_bytes().get(i + 1) {
Some(b'\\') => {
let next = &self.ahead[..i + 1];
self.ahead = &self.ahead[i + 2..];
return Some(next);
}
Some(c) if c.is_ascii_punctuation() => {
let next = &self.ahead[..i];
self.ahead = &self.ahead[i + 1..];
return Some(next);
}
_ => {}
}
}
(!self.ahead.is_empty()).then(|| std::mem::take(&mut self.ahead))
}
}
/// A collection of attributes, i.e. a key-value map. /// A collection of attributes, i.e. a key-value map.
// Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra // Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
// indirection instead of always 24 bytes. // indirection instead of always 24 bytes.
#[allow(clippy::box_vec)] #[allow(clippy::box_vec)]
#[derive(Debug, Clone, PartialEq, Eq, Default)] #[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct Attributes<'s>(Option<Box<Vec<(&'s str, CowStr<'s>)>>>); pub struct Attributes<'s>(Option<Box<Vec<(&'s str, AttributeValue<'s>)>>>);
impl<'s> Attributes<'s> { impl<'s> Attributes<'s> {
/// Create an empty collection. /// Create an empty collection.
@ -44,17 +114,19 @@ impl<'s> Attributes<'s> {
} }
pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool { pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool {
#[inline]
fn borrow(cow: CowStr) -> &str {
match cow {
Cow::Owned(_) => panic!(),
Cow::Borrowed(s) => s,
}
}
for elem in Parser::new(input.chars()) { for elem in Parser::new(input.chars()) {
match elem { match elem {
Element::Class(c) => self.insert("class", input.src(c)), Element::Class(c) => self.insert("class", input.src(c).into()),
Element::Identifier(i) => self.insert("id", input.src(i)), Element::Identifier(i) => self.insert("id", input.src(i).into()),
Element::Attribute(a, v) => self.insert( Element::Attribute(a, v) => self.insert(borrow(input.src(a)), input.src(v).into()),
match input.src(a) {
CowStr::Owned(_) => panic!(),
CowStr::Borrowed(s) => s,
},
input.src(v),
),
Element::Invalid => return false, Element::Invalid => return false,
} }
} }
@ -79,7 +151,7 @@ impl<'s> Attributes<'s> {
/// Insert an attribute. If the attribute already exists, the previous value will be /// Insert an attribute. If the attribute already exists, the previous value will be
/// overwritten, unless it is a "class" attribute. In that case the provided value will be /// overwritten, unless it is a "class" attribute. In that case the provided value will be
/// appended to the existing value. /// appended to the existing value.
pub fn insert(&mut self, key: &'s str, val: CowStr<'s>) { pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
if self.0.is_none() { if self.0.is_none() {
self.0 = Some(Vec::new().into()); self.0 = Some(Vec::new().into());
}; };
@ -105,15 +177,13 @@ impl<'s> Attributes<'s> {
/// Returns a reference to the value corresponding to the attribute key. /// Returns a reference to the value corresponding to the attribute key.
#[must_use] #[must_use]
pub fn get(&self, key: &str) -> Option<&str> { pub fn get(&self, key: &str) -> Option<&AttributeValue<'s>> {
self.iter().find(|(k, _)| *k == key).map(|(_, v)| v) self.iter().find(|(k, _)| *k == key).map(|(_, v)| v)
} }
/// Returns an iterator over the attributes in undefined order. /// Returns an iterator over the attributes in undefined order.
pub fn iter(&self) -> impl Iterator<Item = (&'s str, &str)> + '_ { pub fn iter(&self) -> impl Iterator<Item = (&'s str, &AttributeValue<'s>)> + '_ {
self.0 self.0.iter().flat_map(|v| v.iter().map(|(a, b)| (*a, b)))
.iter()
.flat_map(|v| v.iter().map(|(a, b)| (*a, b.as_ref())))
} }
} }
@ -231,13 +301,17 @@ impl<I: Iterator<Item = char>> Parser<I> {
Invalid Invalid
} }
} }
ValueQuoted => { ValueQuoted => match c {
if c == '"' { '\\' => {
Whitespace if let Some(c) = self.chars.next() {
} else { self.pos_prev = self.pos;
self.pos += c.len_utf8();
}
ValueQuoted ValueQuoted
} }
} '"' => Whitespace,
_ => ValueQuoted,
},
Invalid | Done => panic!("{:?}", self.state), Invalid | Done => panic!("{:?}", self.state),
} }
}) })
@ -330,11 +404,14 @@ mod test {
macro_rules! test_attr { macro_rules! test_attr {
($src:expr $(,$($av:expr),* $(,)?)?) => { ($src:expr $(,$($av:expr),* $(,)?)?) => {
#[allow(unused)] #[allow(unused)]
let mut attr =super::Attributes::new(); let mut attr = super::Attributes::new();
attr.parse($src); attr.parse($src);
let actual = attr.iter().collect::<Vec<_>>(); let actual = attr.iter().collect::<Vec<_>>();
let expected = &[$($($av),*,)?]; let expected = &[$($($av),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src); for i in 0..actual.len() {
let actual_val = format!("{}", actual[i].1);
assert_eq!((actual[i].0, actual_val.as_str()), expected[i], "\n\n{}\n\n", $src);
}
}; };
} }
@ -394,6 +471,40 @@ mod test {
); );
} }
#[test]
fn escape() {
test_attr!(
r#"{attr="with escaped \~ char"}"#,
("attr", "with escaped ~ char")
);
test_attr!(
r#"{key="quotes \" should be escaped"}"#,
("key", r#"quotes " should be escaped"#)
);
}
#[test]
fn escape_backslash() {
test_attr!(r#"{attr="with\\backslash"}"#, ("attr", r"with\backslash"));
test_attr!(
r#"{attr="with many backslashes\\\\"}"#,
("attr", r"with many backslashes\\")
);
test_attr!(
r#"{attr="\\escaped backslash at start"}"#,
("attr", r"\escaped backslash at start")
);
}
#[test]
fn only_escape_punctuation() {
test_attr!(r#"{attr="do not \escape"}"#, ("attr", r"do not \escape"));
test_attr!(
r#"{attr="\backslash at the beginning"}"#,
("attr", r"\backslash at the beginning")
);
}
#[test] #[test]
fn valid_full() { fn valid_full() {
let src = "{.class %comment%}"; let src = "{.class %comment%}";

View file

@ -166,7 +166,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
self.out.write_str("<a")?; self.out.write_str("<a")?;
} else { } else {
self.out.write_str(r#"<a href=""#)?; self.out.write_str(r#"<a href=""#)?;
self.write_escape(dst)?; self.write_attr(dst)?;
self.out.write_char('"')?; self.out.write_char('"')?;
} }
} }
@ -194,7 +194,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") { for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") {
write!(self.out, r#" {}=""#, a)?; write!(self.out, r#" {}=""#, a)?;
self.write_escape(v)?; v.parts().try_for_each(|part| self.write_attr(part))?;
self.out.write_char('"')?; self.out.write_char('"')?;
} }
@ -207,7 +207,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
{ {
if !attrs.iter().any(|(a, _)| a == "id") { if !attrs.iter().any(|(a, _)| a == "id") {
self.out.write_str(r#" id=""#)?; self.out.write_str(r#" id=""#)?;
self.write_escape(id)?; self.write_attr(id)?;
self.out.write_char('"')?; self.out.write_char('"')?;
} }
} }
@ -249,7 +249,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
self.out.write_char(' ')?; self.out.write_char(' ')?;
} }
first_written = true; first_written = true;
self.out.write_str(cls)?; cls.parts().try_for_each(|part| self.write_attr(part))?;
} }
// div class goes after classes from attrs // div class goes after classes from attrs
if let Container::Div { class: Some(cls) } = c { if let Container::Div { class: Some(cls) } = c {
@ -276,7 +276,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
Container::CodeBlock { lang } => { Container::CodeBlock { lang } => {
if let Some(l) = lang { if let Some(l) = lang {
self.out.write_str(r#"><code class="language-"#)?; self.out.write_str(r#"><code class="language-"#)?;
self.write_escape(l)?; self.write_attr(l)?;
self.out.write_str(r#"">"#)?; self.out.write_str(r#"">"#)?;
} else { } else {
self.out.write_str("><code>")?; self.out.write_str("><code>")?;
@ -388,7 +388,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
} }
} }
Event::Str(s) => match self.raw { Event::Str(s) => match self.raw {
Raw::None => self.write_escape(&s)?, Raw::None => self.write_text(&s)?,
Raw::Html => self.out.write_str(&s)?, Raw::Html => self.out.write_str(&s)?,
Raw::Other => {} Raw::Other => {}
}, },
@ -415,7 +415,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
self.out.write_str("\n<hr")?; self.out.write_str("\n<hr")?;
for (a, v) in attrs.iter() { for (a, v) in attrs.iter() {
write!(self.out, r#" {}=""#, a)?; write!(self.out, r#" {}=""#, a)?;
self.write_escape(v)?; v.parts().try_for_each(|part| self.write_attr(part))?;
self.out.write_char('"')?; self.out.write_char('"')?;
} }
self.out.write_str(">")?; self.out.write_str(">")?;
@ -430,13 +430,14 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
Ok(()) Ok(())
} }
fn write_escape(&mut self, mut s: &str) -> std::fmt::Result { fn write_escape(&mut self, mut s: &str, escape_quotes: bool) -> std::fmt::Result {
let mut ent = ""; let mut ent = "";
while let Some(i) = s.find(|c| { while let Some(i) = s.find(|c| {
match c { match c {
'<' => Some("&lt;"), '<' => Some("&lt;"),
'>' => Some("&gt;"), '>' => Some("&gt;"),
'&' => Some("&amp;"), '&' => Some("&amp;"),
'"' if escape_quotes => Some("&quot;"),
_ => None, _ => None,
} }
.map_or(false, |s| { .map_or(false, |s| {
@ -450,4 +451,12 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
} }
self.out.write_str(s) self.out.write_str(s)
} }
fn write_text(&mut self, s: &str) -> std::fmt::Result {
self.write_escape(s, false)
}
fn write_attr(&mut self, s: &str) -> std::fmt::Result {
self.write_escape(s, true)
}
} }

View file

@ -63,7 +63,7 @@ mod tree;
use span::DiscontinuousString; use span::DiscontinuousString;
use span::Span; use span::Span;
pub use attr::Attributes; pub use attr::{AttributeValue, AttributeValueParts, Attributes};
type CowStr<'s> = std::borrow::Cow<'s, str>; type CowStr<'s> = std::borrow::Cow<'s, str>;

View file

@ -1,7 +1,5 @@
38d85f9:multi-line block attributes 38d85f9:multi-line block attributes
6c14561:multi-line block attributes 6c14561:multi-line block attributes
48546bb:escape in attributes
6bc4257:escape in attributes
613a9d6:attribute container precedence 613a9d6:attribute container precedence
f4f22fc:attribute key class order f4f22fc:attribute key class order
ae6fc15:bugged left/right quote ae6fc15:bugged left/right quote