PR #19 Support escapes in attributes
This commit is contained in:
commit
16491a4a99
4 changed files with 152 additions and 34 deletions
157
src/attr.rs
157
src/attr.rs
|
@ -1,6 +1,8 @@
|
|||
use crate::CowStr;
|
||||
use crate::DiscontinuousString;
|
||||
use crate::Span;
|
||||
use std::borrow::Cow;
|
||||
use std::fmt;
|
||||
|
||||
use State::*;
|
||||
|
||||
|
@ -24,12 +26,80 @@ pub fn valid<I: Iterator<Item = char>>(chars: I) -> (usize, bool) {
|
|||
(p.pos, has_attr)
|
||||
}
|
||||
|
||||
/// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying,
|
||||
/// without allocating.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct AttributeValue<'s> {
|
||||
raw: CowStr<'s>,
|
||||
}
|
||||
|
||||
impl<'s> AttributeValue<'s> {
|
||||
/// Processes the attribute value escapes and returns an iterator of the parts of the value
|
||||
/// that should be displayed.
|
||||
pub fn parts(&'s self) -> AttributeValueParts<'s> {
|
||||
AttributeValueParts { ahead: &self.raw }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<&'s str> for AttributeValue<'s> {
|
||||
fn from(value: &'s str) -> Self {
|
||||
Self { raw: value.into() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<CowStr<'s>> for AttributeValue<'s> {
|
||||
fn from(value: CowStr<'s>) -> Self {
|
||||
Self { raw: value }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> From<String> for AttributeValue<'s> {
|
||||
fn from(value: String) -> Self {
|
||||
Self { raw: value.into() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> fmt::Display for AttributeValue<'s> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.parts().try_for_each(|part| f.write_str(part))
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over the parts of an [`AttributeValue`] that should be displayed.
|
||||
pub struct AttributeValueParts<'s> {
|
||||
ahead: &'s str,
|
||||
}
|
||||
|
||||
impl<'s> Iterator for AttributeValueParts<'s> {
|
||||
type Item = &'s str;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
for (i, _) in self.ahead.match_indices('\\') {
|
||||
match self.ahead.as_bytes().get(i + 1) {
|
||||
Some(b'\\') => {
|
||||
let next = &self.ahead[..i + 1];
|
||||
self.ahead = &self.ahead[i + 2..];
|
||||
return Some(next);
|
||||
}
|
||||
Some(c) if c.is_ascii_punctuation() => {
|
||||
let next = &self.ahead[..i];
|
||||
self.ahead = &self.ahead[i + 1..];
|
||||
return Some(next);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
(!self.ahead.is_empty()).then(|| std::mem::take(&mut self.ahead))
|
||||
}
|
||||
}
|
||||
|
||||
/// A collection of attributes, i.e. a key-value map.
|
||||
// Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
|
||||
// indirection instead of always 24 bytes.
|
||||
#[allow(clippy::box_vec)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Default)]
|
||||
pub struct Attributes<'s>(Option<Box<Vec<(&'s str, CowStr<'s>)>>>);
|
||||
pub struct Attributes<'s>(Option<Box<Vec<(&'s str, AttributeValue<'s>)>>>);
|
||||
|
||||
impl<'s> Attributes<'s> {
|
||||
/// Create an empty collection.
|
||||
|
@ -44,17 +114,19 @@ impl<'s> Attributes<'s> {
|
|||
}
|
||||
|
||||
pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool {
|
||||
#[inline]
|
||||
fn borrow(cow: CowStr) -> &str {
|
||||
match cow {
|
||||
Cow::Owned(_) => panic!(),
|
||||
Cow::Borrowed(s) => s,
|
||||
}
|
||||
}
|
||||
|
||||
for elem in Parser::new(input.chars()) {
|
||||
match elem {
|
||||
Element::Class(c) => self.insert("class", input.src(c)),
|
||||
Element::Identifier(i) => self.insert("id", input.src(i)),
|
||||
Element::Attribute(a, v) => self.insert(
|
||||
match input.src(a) {
|
||||
CowStr::Owned(_) => panic!(),
|
||||
CowStr::Borrowed(s) => s,
|
||||
},
|
||||
input.src(v),
|
||||
),
|
||||
Element::Class(c) => self.insert("class", input.src(c).into()),
|
||||
Element::Identifier(i) => self.insert("id", input.src(i).into()),
|
||||
Element::Attribute(a, v) => self.insert(borrow(input.src(a)), input.src(v).into()),
|
||||
Element::Invalid => return false,
|
||||
}
|
||||
}
|
||||
|
@ -79,7 +151,7 @@ impl<'s> Attributes<'s> {
|
|||
/// Insert an attribute. If the attribute already exists, the previous value will be
|
||||
/// overwritten, unless it is a "class" attribute. In that case the provided value will be
|
||||
/// appended to the existing value.
|
||||
pub fn insert(&mut self, key: &'s str, val: CowStr<'s>) {
|
||||
pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
|
||||
if self.0.is_none() {
|
||||
self.0 = Some(Vec::new().into());
|
||||
};
|
||||
|
@ -105,15 +177,13 @@ impl<'s> Attributes<'s> {
|
|||
|
||||
/// Returns a reference to the value corresponding to the attribute key.
|
||||
#[must_use]
|
||||
pub fn get(&self, key: &str) -> Option<&str> {
|
||||
pub fn get(&self, key: &str) -> Option<&AttributeValue<'s>> {
|
||||
self.iter().find(|(k, _)| *k == key).map(|(_, v)| v)
|
||||
}
|
||||
|
||||
/// Returns an iterator over the attributes in undefined order.
|
||||
pub fn iter(&self) -> impl Iterator<Item = (&'s str, &str)> + '_ {
|
||||
self.0
|
||||
.iter()
|
||||
.flat_map(|v| v.iter().map(|(a, b)| (*a, b.as_ref())))
|
||||
pub fn iter(&self) -> impl Iterator<Item = (&'s str, &AttributeValue<'s>)> + '_ {
|
||||
self.0.iter().flat_map(|v| v.iter().map(|(a, b)| (*a, b)))
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -231,13 +301,17 @@ impl<I: Iterator<Item = char>> Parser<I> {
|
|||
Invalid
|
||||
}
|
||||
}
|
||||
ValueQuoted => {
|
||||
if c == '"' {
|
||||
Whitespace
|
||||
} else {
|
||||
ValueQuoted => match c {
|
||||
'\\' => {
|
||||
if let Some(c) = self.chars.next() {
|
||||
self.pos_prev = self.pos;
|
||||
self.pos += c.len_utf8();
|
||||
}
|
||||
ValueQuoted
|
||||
}
|
||||
}
|
||||
'"' => Whitespace,
|
||||
_ => ValueQuoted,
|
||||
},
|
||||
Invalid | Done => panic!("{:?}", self.state),
|
||||
}
|
||||
})
|
||||
|
@ -330,11 +404,14 @@ mod test {
|
|||
macro_rules! test_attr {
|
||||
($src:expr $(,$($av:expr),* $(,)?)?) => {
|
||||
#[allow(unused)]
|
||||
let mut attr =super::Attributes::new();
|
||||
let mut attr = super::Attributes::new();
|
||||
attr.parse($src);
|
||||
let actual = attr.iter().collect::<Vec<_>>();
|
||||
let expected = &[$($($av),*,)?];
|
||||
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
|
||||
for i in 0..actual.len() {
|
||||
let actual_val = format!("{}", actual[i].1);
|
||||
assert_eq!((actual[i].0, actual_val.as_str()), expected[i], "\n\n{}\n\n", $src);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -394,6 +471,40 @@ mod test {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escape() {
|
||||
test_attr!(
|
||||
r#"{attr="with escaped \~ char"}"#,
|
||||
("attr", "with escaped ~ char")
|
||||
);
|
||||
test_attr!(
|
||||
r#"{key="quotes \" should be escaped"}"#,
|
||||
("key", r#"quotes " should be escaped"#)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escape_backslash() {
|
||||
test_attr!(r#"{attr="with\\backslash"}"#, ("attr", r"with\backslash"));
|
||||
test_attr!(
|
||||
r#"{attr="with many backslashes\\\\"}"#,
|
||||
("attr", r"with many backslashes\\")
|
||||
);
|
||||
test_attr!(
|
||||
r#"{attr="\\escaped backslash at start"}"#,
|
||||
("attr", r"\escaped backslash at start")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn only_escape_punctuation() {
|
||||
test_attr!(r#"{attr="do not \escape"}"#, ("attr", r"do not \escape"));
|
||||
test_attr!(
|
||||
r#"{attr="\backslash at the beginning"}"#,
|
||||
("attr", r"\backslash at the beginning")
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn valid_full() {
|
||||
let src = "{.class %comment%}";
|
||||
|
|
25
src/html.rs
25
src/html.rs
|
@ -166,7 +166,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
self.out.write_str("<a")?;
|
||||
} else {
|
||||
self.out.write_str(r#"<a href=""#)?;
|
||||
self.write_escape(dst)?;
|
||||
self.write_attr(dst)?;
|
||||
self.out.write_char('"')?;
|
||||
}
|
||||
}
|
||||
|
@ -194,7 +194,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
|
||||
for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") {
|
||||
write!(self.out, r#" {}=""#, a)?;
|
||||
self.write_escape(v)?;
|
||||
v.parts().try_for_each(|part| self.write_attr(part))?;
|
||||
self.out.write_char('"')?;
|
||||
}
|
||||
|
||||
|
@ -207,7 +207,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
{
|
||||
if !attrs.iter().any(|(a, _)| a == "id") {
|
||||
self.out.write_str(r#" id=""#)?;
|
||||
self.write_escape(id)?;
|
||||
self.write_attr(id)?;
|
||||
self.out.write_char('"')?;
|
||||
}
|
||||
}
|
||||
|
@ -249,7 +249,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
self.out.write_char(' ')?;
|
||||
}
|
||||
first_written = true;
|
||||
self.out.write_str(cls)?;
|
||||
cls.parts().try_for_each(|part| self.write_attr(part))?;
|
||||
}
|
||||
// div class goes after classes from attrs
|
||||
if let Container::Div { class: Some(cls) } = c {
|
||||
|
@ -276,7 +276,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
Container::CodeBlock { lang } => {
|
||||
if let Some(l) = lang {
|
||||
self.out.write_str(r#"><code class="language-"#)?;
|
||||
self.write_escape(l)?;
|
||||
self.write_attr(l)?;
|
||||
self.out.write_str(r#"">"#)?;
|
||||
} else {
|
||||
self.out.write_str("><code>")?;
|
||||
|
@ -388,7 +388,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
}
|
||||
}
|
||||
Event::Str(s) => match self.raw {
|
||||
Raw::None => self.write_escape(&s)?,
|
||||
Raw::None => self.write_text(&s)?,
|
||||
Raw::Html => self.out.write_str(&s)?,
|
||||
Raw::Other => {}
|
||||
},
|
||||
|
@ -415,7 +415,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
self.out.write_str("\n<hr")?;
|
||||
for (a, v) in attrs.iter() {
|
||||
write!(self.out, r#" {}=""#, a)?;
|
||||
self.write_escape(v)?;
|
||||
v.parts().try_for_each(|part| self.write_attr(part))?;
|
||||
self.out.write_char('"')?;
|
||||
}
|
||||
self.out.write_str(">")?;
|
||||
|
@ -430,13 +430,14 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn write_escape(&mut self, mut s: &str) -> std::fmt::Result {
|
||||
fn write_escape(&mut self, mut s: &str, escape_quotes: bool) -> std::fmt::Result {
|
||||
let mut ent = "";
|
||||
while let Some(i) = s.find(|c| {
|
||||
match c {
|
||||
'<' => Some("<"),
|
||||
'>' => Some(">"),
|
||||
'&' => Some("&"),
|
||||
'"' if escape_quotes => Some("""),
|
||||
_ => None,
|
||||
}
|
||||
.map_or(false, |s| {
|
||||
|
@ -450,4 +451,12 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
}
|
||||
self.out.write_str(s)
|
||||
}
|
||||
|
||||
fn write_text(&mut self, s: &str) -> std::fmt::Result {
|
||||
self.write_escape(s, false)
|
||||
}
|
||||
|
||||
fn write_attr(&mut self, s: &str) -> std::fmt::Result {
|
||||
self.write_escape(s, true)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -63,7 +63,7 @@ mod tree;
|
|||
use span::DiscontinuousString;
|
||||
use span::Span;
|
||||
|
||||
pub use attr::Attributes;
|
||||
pub use attr::{AttributeValue, AttributeValueParts, Attributes};
|
||||
|
||||
type CowStr<'s> = std::borrow::Cow<'s, str>;
|
||||
|
||||
|
|
|
@ -1,7 +1,5 @@
|
|||
38d85f9:multi-line block attributes
|
||||
6c14561:multi-line block attributes
|
||||
48546bb:escape in attributes
|
||||
6bc4257:escape in attributes
|
||||
613a9d6:attribute container precedence
|
||||
f4f22fc:attribute key class order
|
||||
ae6fc15:bugged left/right quote
|
||||
|
|
Loading…
Reference in a new issue