PR #19 Support escapes in attributes
This commit is contained in:
commit
16491a4a99
4 changed files with 152 additions and 34 deletions
157
src/attr.rs
157
src/attr.rs
|
@ -1,6 +1,8 @@
|
||||||
use crate::CowStr;
|
use crate::CowStr;
|
||||||
use crate::DiscontinuousString;
|
use crate::DiscontinuousString;
|
||||||
use crate::Span;
|
use crate::Span;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::fmt;
|
||||||
|
|
||||||
use State::*;
|
use State::*;
|
||||||
|
|
||||||
|
@ -24,12 +26,80 @@ pub fn valid<I: Iterator<Item = char>>(chars: I) -> (usize, bool) {
|
||||||
(p.pos, has_attr)
|
(p.pos, has_attr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying,
|
||||||
|
/// without allocating.
|
||||||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
|
pub struct AttributeValue<'s> {
|
||||||
|
raw: CowStr<'s>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> AttributeValue<'s> {
|
||||||
|
/// Processes the attribute value escapes and returns an iterator of the parts of the value
|
||||||
|
/// that should be displayed.
|
||||||
|
pub fn parts(&'s self) -> AttributeValueParts<'s> {
|
||||||
|
AttributeValueParts { ahead: &self.raw }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> From<&'s str> for AttributeValue<'s> {
|
||||||
|
fn from(value: &'s str) -> Self {
|
||||||
|
Self { raw: value.into() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> From<CowStr<'s>> for AttributeValue<'s> {
|
||||||
|
fn from(value: CowStr<'s>) -> Self {
|
||||||
|
Self { raw: value }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> From<String> for AttributeValue<'s> {
|
||||||
|
fn from(value: String) -> Self {
|
||||||
|
Self { raw: value.into() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> fmt::Display for AttributeValue<'s> {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
self.parts().try_for_each(|part| f.write_str(part))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over the parts of an [`AttributeValue`] that should be displayed.
|
||||||
|
pub struct AttributeValueParts<'s> {
|
||||||
|
ahead: &'s str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s> Iterator for AttributeValueParts<'s> {
|
||||||
|
type Item = &'s str;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
for (i, _) in self.ahead.match_indices('\\') {
|
||||||
|
match self.ahead.as_bytes().get(i + 1) {
|
||||||
|
Some(b'\\') => {
|
||||||
|
let next = &self.ahead[..i + 1];
|
||||||
|
self.ahead = &self.ahead[i + 2..];
|
||||||
|
return Some(next);
|
||||||
|
}
|
||||||
|
Some(c) if c.is_ascii_punctuation() => {
|
||||||
|
let next = &self.ahead[..i];
|
||||||
|
self.ahead = &self.ahead[i + 1..];
|
||||||
|
return Some(next);
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(!self.ahead.is_empty()).then(|| std::mem::take(&mut self.ahead))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// A collection of attributes, i.e. a key-value map.
|
/// A collection of attributes, i.e. a key-value map.
|
||||||
// Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
|
// Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
|
||||||
// indirection instead of always 24 bytes.
|
// indirection instead of always 24 bytes.
|
||||||
#[allow(clippy::box_vec)]
|
#[allow(clippy::box_vec)]
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Default)]
|
#[derive(Debug, Clone, PartialEq, Eq, Default)]
|
||||||
pub struct Attributes<'s>(Option<Box<Vec<(&'s str, CowStr<'s>)>>>);
|
pub struct Attributes<'s>(Option<Box<Vec<(&'s str, AttributeValue<'s>)>>>);
|
||||||
|
|
||||||
impl<'s> Attributes<'s> {
|
impl<'s> Attributes<'s> {
|
||||||
/// Create an empty collection.
|
/// Create an empty collection.
|
||||||
|
@ -44,17 +114,19 @@ impl<'s> Attributes<'s> {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool {
|
pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool {
|
||||||
|
#[inline]
|
||||||
|
fn borrow(cow: CowStr) -> &str {
|
||||||
|
match cow {
|
||||||
|
Cow::Owned(_) => panic!(),
|
||||||
|
Cow::Borrowed(s) => s,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for elem in Parser::new(input.chars()) {
|
for elem in Parser::new(input.chars()) {
|
||||||
match elem {
|
match elem {
|
||||||
Element::Class(c) => self.insert("class", input.src(c)),
|
Element::Class(c) => self.insert("class", input.src(c).into()),
|
||||||
Element::Identifier(i) => self.insert("id", input.src(i)),
|
Element::Identifier(i) => self.insert("id", input.src(i).into()),
|
||||||
Element::Attribute(a, v) => self.insert(
|
Element::Attribute(a, v) => self.insert(borrow(input.src(a)), input.src(v).into()),
|
||||||
match input.src(a) {
|
|
||||||
CowStr::Owned(_) => panic!(),
|
|
||||||
CowStr::Borrowed(s) => s,
|
|
||||||
},
|
|
||||||
input.src(v),
|
|
||||||
),
|
|
||||||
Element::Invalid => return false,
|
Element::Invalid => return false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -79,7 +151,7 @@ impl<'s> Attributes<'s> {
|
||||||
/// Insert an attribute. If the attribute already exists, the previous value will be
|
/// Insert an attribute. If the attribute already exists, the previous value will be
|
||||||
/// overwritten, unless it is a "class" attribute. In that case the provided value will be
|
/// overwritten, unless it is a "class" attribute. In that case the provided value will be
|
||||||
/// appended to the existing value.
|
/// appended to the existing value.
|
||||||
pub fn insert(&mut self, key: &'s str, val: CowStr<'s>) {
|
pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
|
||||||
if self.0.is_none() {
|
if self.0.is_none() {
|
||||||
self.0 = Some(Vec::new().into());
|
self.0 = Some(Vec::new().into());
|
||||||
};
|
};
|
||||||
|
@ -105,15 +177,13 @@ impl<'s> Attributes<'s> {
|
||||||
|
|
||||||
/// Returns a reference to the value corresponding to the attribute key.
|
/// Returns a reference to the value corresponding to the attribute key.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn get(&self, key: &str) -> Option<&str> {
|
pub fn get(&self, key: &str) -> Option<&AttributeValue<'s>> {
|
||||||
self.iter().find(|(k, _)| *k == key).map(|(_, v)| v)
|
self.iter().find(|(k, _)| *k == key).map(|(_, v)| v)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an iterator over the attributes in undefined order.
|
/// Returns an iterator over the attributes in undefined order.
|
||||||
pub fn iter(&self) -> impl Iterator<Item = (&'s str, &str)> + '_ {
|
pub fn iter(&self) -> impl Iterator<Item = (&'s str, &AttributeValue<'s>)> + '_ {
|
||||||
self.0
|
self.0.iter().flat_map(|v| v.iter().map(|(a, b)| (*a, b)))
|
||||||
.iter()
|
|
||||||
.flat_map(|v| v.iter().map(|(a, b)| (*a, b.as_ref())))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -231,13 +301,17 @@ impl<I: Iterator<Item = char>> Parser<I> {
|
||||||
Invalid
|
Invalid
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ValueQuoted => {
|
ValueQuoted => match c {
|
||||||
if c == '"' {
|
'\\' => {
|
||||||
Whitespace
|
if let Some(c) = self.chars.next() {
|
||||||
} else {
|
self.pos_prev = self.pos;
|
||||||
|
self.pos += c.len_utf8();
|
||||||
|
}
|
||||||
ValueQuoted
|
ValueQuoted
|
||||||
}
|
}
|
||||||
}
|
'"' => Whitespace,
|
||||||
|
_ => ValueQuoted,
|
||||||
|
},
|
||||||
Invalid | Done => panic!("{:?}", self.state),
|
Invalid | Done => panic!("{:?}", self.state),
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
@ -330,11 +404,14 @@ mod test {
|
||||||
macro_rules! test_attr {
|
macro_rules! test_attr {
|
||||||
($src:expr $(,$($av:expr),* $(,)?)?) => {
|
($src:expr $(,$($av:expr),* $(,)?)?) => {
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
let mut attr =super::Attributes::new();
|
let mut attr = super::Attributes::new();
|
||||||
attr.parse($src);
|
attr.parse($src);
|
||||||
let actual = attr.iter().collect::<Vec<_>>();
|
let actual = attr.iter().collect::<Vec<_>>();
|
||||||
let expected = &[$($($av),*,)?];
|
let expected = &[$($($av),*,)?];
|
||||||
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
|
for i in 0..actual.len() {
|
||||||
|
let actual_val = format!("{}", actual[i].1);
|
||||||
|
assert_eq!((actual[i].0, actual_val.as_str()), expected[i], "\n\n{}\n\n", $src);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -394,6 +471,40 @@ mod test {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn escape() {
|
||||||
|
test_attr!(
|
||||||
|
r#"{attr="with escaped \~ char"}"#,
|
||||||
|
("attr", "with escaped ~ char")
|
||||||
|
);
|
||||||
|
test_attr!(
|
||||||
|
r#"{key="quotes \" should be escaped"}"#,
|
||||||
|
("key", r#"quotes " should be escaped"#)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn escape_backslash() {
|
||||||
|
test_attr!(r#"{attr="with\\backslash"}"#, ("attr", r"with\backslash"));
|
||||||
|
test_attr!(
|
||||||
|
r#"{attr="with many backslashes\\\\"}"#,
|
||||||
|
("attr", r"with many backslashes\\")
|
||||||
|
);
|
||||||
|
test_attr!(
|
||||||
|
r#"{attr="\\escaped backslash at start"}"#,
|
||||||
|
("attr", r"\escaped backslash at start")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn only_escape_punctuation() {
|
||||||
|
test_attr!(r#"{attr="do not \escape"}"#, ("attr", r"do not \escape"));
|
||||||
|
test_attr!(
|
||||||
|
r#"{attr="\backslash at the beginning"}"#,
|
||||||
|
("attr", r"\backslash at the beginning")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn valid_full() {
|
fn valid_full() {
|
||||||
let src = "{.class %comment%}";
|
let src = "{.class %comment%}";
|
||||||
|
|
25
src/html.rs
25
src/html.rs
|
@ -166,7 +166,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
||||||
self.out.write_str("<a")?;
|
self.out.write_str("<a")?;
|
||||||
} else {
|
} else {
|
||||||
self.out.write_str(r#"<a href=""#)?;
|
self.out.write_str(r#"<a href=""#)?;
|
||||||
self.write_escape(dst)?;
|
self.write_attr(dst)?;
|
||||||
self.out.write_char('"')?;
|
self.out.write_char('"')?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -194,7 +194,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
||||||
|
|
||||||
for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") {
|
for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") {
|
||||||
write!(self.out, r#" {}=""#, a)?;
|
write!(self.out, r#" {}=""#, a)?;
|
||||||
self.write_escape(v)?;
|
v.parts().try_for_each(|part| self.write_attr(part))?;
|
||||||
self.out.write_char('"')?;
|
self.out.write_char('"')?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -207,7 +207,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
||||||
{
|
{
|
||||||
if !attrs.iter().any(|(a, _)| a == "id") {
|
if !attrs.iter().any(|(a, _)| a == "id") {
|
||||||
self.out.write_str(r#" id=""#)?;
|
self.out.write_str(r#" id=""#)?;
|
||||||
self.write_escape(id)?;
|
self.write_attr(id)?;
|
||||||
self.out.write_char('"')?;
|
self.out.write_char('"')?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -249,7 +249,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
||||||
self.out.write_char(' ')?;
|
self.out.write_char(' ')?;
|
||||||
}
|
}
|
||||||
first_written = true;
|
first_written = true;
|
||||||
self.out.write_str(cls)?;
|
cls.parts().try_for_each(|part| self.write_attr(part))?;
|
||||||
}
|
}
|
||||||
// div class goes after classes from attrs
|
// div class goes after classes from attrs
|
||||||
if let Container::Div { class: Some(cls) } = c {
|
if let Container::Div { class: Some(cls) } = c {
|
||||||
|
@ -276,7 +276,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
||||||
Container::CodeBlock { lang } => {
|
Container::CodeBlock { lang } => {
|
||||||
if let Some(l) = lang {
|
if let Some(l) = lang {
|
||||||
self.out.write_str(r#"><code class="language-"#)?;
|
self.out.write_str(r#"><code class="language-"#)?;
|
||||||
self.write_escape(l)?;
|
self.write_attr(l)?;
|
||||||
self.out.write_str(r#"">"#)?;
|
self.out.write_str(r#"">"#)?;
|
||||||
} else {
|
} else {
|
||||||
self.out.write_str("><code>")?;
|
self.out.write_str("><code>")?;
|
||||||
|
@ -388,7 +388,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Event::Str(s) => match self.raw {
|
Event::Str(s) => match self.raw {
|
||||||
Raw::None => self.write_escape(&s)?,
|
Raw::None => self.write_text(&s)?,
|
||||||
Raw::Html => self.out.write_str(&s)?,
|
Raw::Html => self.out.write_str(&s)?,
|
||||||
Raw::Other => {}
|
Raw::Other => {}
|
||||||
},
|
},
|
||||||
|
@ -415,7 +415,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
||||||
self.out.write_str("\n<hr")?;
|
self.out.write_str("\n<hr")?;
|
||||||
for (a, v) in attrs.iter() {
|
for (a, v) in attrs.iter() {
|
||||||
write!(self.out, r#" {}=""#, a)?;
|
write!(self.out, r#" {}=""#, a)?;
|
||||||
self.write_escape(v)?;
|
v.parts().try_for_each(|part| self.write_attr(part))?;
|
||||||
self.out.write_char('"')?;
|
self.out.write_char('"')?;
|
||||||
}
|
}
|
||||||
self.out.write_str(">")?;
|
self.out.write_str(">")?;
|
||||||
|
@ -430,13 +430,14 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_escape(&mut self, mut s: &str) -> std::fmt::Result {
|
fn write_escape(&mut self, mut s: &str, escape_quotes: bool) -> std::fmt::Result {
|
||||||
let mut ent = "";
|
let mut ent = "";
|
||||||
while let Some(i) = s.find(|c| {
|
while let Some(i) = s.find(|c| {
|
||||||
match c {
|
match c {
|
||||||
'<' => Some("<"),
|
'<' => Some("<"),
|
||||||
'>' => Some(">"),
|
'>' => Some(">"),
|
||||||
'&' => Some("&"),
|
'&' => Some("&"),
|
||||||
|
'"' if escape_quotes => Some("""),
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
.map_or(false, |s| {
|
.map_or(false, |s| {
|
||||||
|
@ -450,4 +451,12 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
||||||
}
|
}
|
||||||
self.out.write_str(s)
|
self.out.write_str(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn write_text(&mut self, s: &str) -> std::fmt::Result {
|
||||||
|
self.write_escape(s, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_attr(&mut self, s: &str) -> std::fmt::Result {
|
||||||
|
self.write_escape(s, true)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -63,7 +63,7 @@ mod tree;
|
||||||
use span::DiscontinuousString;
|
use span::DiscontinuousString;
|
||||||
use span::Span;
|
use span::Span;
|
||||||
|
|
||||||
pub use attr::Attributes;
|
pub use attr::{AttributeValue, AttributeValueParts, Attributes};
|
||||||
|
|
||||||
type CowStr<'s> = std::borrow::Cow<'s, str>;
|
type CowStr<'s> = std::borrow::Cow<'s, str>;
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
38d85f9:multi-line block attributes
|
38d85f9:multi-line block attributes
|
||||||
6c14561:multi-line block attributes
|
6c14561:multi-line block attributes
|
||||||
48546bb:escape in attributes
|
|
||||||
6bc4257:escape in attributes
|
|
||||||
613a9d6:attribute container precedence
|
613a9d6:attribute container precedence
|
||||||
f4f22fc:attribute key class order
|
f4f22fc:attribute key class order
|
||||||
ae6fc15:bugged left/right quote
|
ae6fc15:bugged left/right quote
|
||||||
|
|
Loading…
Reference in a new issue