PR #19 Support escapes in attributes
This commit is contained in:
		
				commit
				
					
						16491a4a99
					
				
			
		
					 4 changed files with 152 additions and 34 deletions
				
			
		
							
								
								
									
										157
									
								
								src/attr.rs
									
										
									
									
									
								
							
							
						
						
									
										157
									
								
								src/attr.rs
									
										
									
									
									
								
							|  | @ -1,6 +1,8 @@ | ||||||
| use crate::CowStr; | use crate::CowStr; | ||||||
| use crate::DiscontinuousString; | use crate::DiscontinuousString; | ||||||
| use crate::Span; | use crate::Span; | ||||||
|  | use std::borrow::Cow; | ||||||
|  | use std::fmt; | ||||||
| 
 | 
 | ||||||
| use State::*; | use State::*; | ||||||
| 
 | 
 | ||||||
|  | @ -24,12 +26,80 @@ pub fn valid<I: Iterator<Item = char>>(chars: I) -> (usize, bool) { | ||||||
|     (p.pos, has_attr) |     (p.pos, has_attr) | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying,
 | ||||||
|  | /// without allocating.
 | ||||||
|  | #[derive(Clone, Debug, Eq, PartialEq)] | ||||||
|  | pub struct AttributeValue<'s> { | ||||||
|  |     raw: CowStr<'s>, | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | impl<'s> AttributeValue<'s> { | ||||||
|  |     /// Processes the attribute value escapes and returns an iterator of the parts of the value
 | ||||||
|  |     /// that should be displayed.
 | ||||||
|  |     pub fn parts(&'s self) -> AttributeValueParts<'s> { | ||||||
|  |         AttributeValueParts { ahead: &self.raw } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | impl<'s> From<&'s str> for AttributeValue<'s> { | ||||||
|  |     fn from(value: &'s str) -> Self { | ||||||
|  |         Self { raw: value.into() } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | impl<'s> From<CowStr<'s>> for AttributeValue<'s> { | ||||||
|  |     fn from(value: CowStr<'s>) -> Self { | ||||||
|  |         Self { raw: value } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | impl<'s> From<String> for AttributeValue<'s> { | ||||||
|  |     fn from(value: String) -> Self { | ||||||
|  |         Self { raw: value.into() } | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | impl<'s> fmt::Display for AttributeValue<'s> { | ||||||
|  |     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | ||||||
|  |         self.parts().try_for_each(|part| f.write_str(part)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | /// An iterator over the parts of an [`AttributeValue`] that should be displayed.
 | ||||||
|  | pub struct AttributeValueParts<'s> { | ||||||
|  |     ahead: &'s str, | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | impl<'s> Iterator for AttributeValueParts<'s> { | ||||||
|  |     type Item = &'s str; | ||||||
|  | 
 | ||||||
|  |     fn next(&mut self) -> Option<Self::Item> { | ||||||
|  |         for (i, _) in self.ahead.match_indices('\\') { | ||||||
|  |             match self.ahead.as_bytes().get(i + 1) { | ||||||
|  |                 Some(b'\\') => { | ||||||
|  |                     let next = &self.ahead[..i + 1]; | ||||||
|  |                     self.ahead = &self.ahead[i + 2..]; | ||||||
|  |                     return Some(next); | ||||||
|  |                 } | ||||||
|  |                 Some(c) if c.is_ascii_punctuation() => { | ||||||
|  |                     let next = &self.ahead[..i]; | ||||||
|  |                     self.ahead = &self.ahead[i + 1..]; | ||||||
|  |                     return Some(next); | ||||||
|  |                 } | ||||||
|  |                 _ => {} | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         (!self.ahead.is_empty()).then(|| std::mem::take(&mut self.ahead)) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /// A collection of attributes, i.e. a key-value map.
 | /// A collection of attributes, i.e. a key-value map.
 | ||||||
| // Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
 | // Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
 | ||||||
| // indirection instead of always 24 bytes.
 | // indirection instead of always 24 bytes.
 | ||||||
| #[allow(clippy::box_vec)] | #[allow(clippy::box_vec)] | ||||||
| #[derive(Debug, Clone, PartialEq, Eq, Default)] | #[derive(Debug, Clone, PartialEq, Eq, Default)] | ||||||
| pub struct Attributes<'s>(Option<Box<Vec<(&'s str, CowStr<'s>)>>>); | pub struct Attributes<'s>(Option<Box<Vec<(&'s str, AttributeValue<'s>)>>>); | ||||||
| 
 | 
 | ||||||
| impl<'s> Attributes<'s> { | impl<'s> Attributes<'s> { | ||||||
|     /// Create an empty collection.
 |     /// Create an empty collection.
 | ||||||
|  | @ -44,17 +114,19 @@ impl<'s> Attributes<'s> { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool { |     pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool { | ||||||
|  |         #[inline] | ||||||
|  |         fn borrow(cow: CowStr) -> &str { | ||||||
|  |             match cow { | ||||||
|  |                 Cow::Owned(_) => panic!(), | ||||||
|  |                 Cow::Borrowed(s) => s, | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         for elem in Parser::new(input.chars()) { |         for elem in Parser::new(input.chars()) { | ||||||
|             match elem { |             match elem { | ||||||
|                 Element::Class(c) => self.insert("class", input.src(c)), |                 Element::Class(c) => self.insert("class", input.src(c).into()), | ||||||
|                 Element::Identifier(i) => self.insert("id", input.src(i)), |                 Element::Identifier(i) => self.insert("id", input.src(i).into()), | ||||||
|                 Element::Attribute(a, v) => self.insert( |                 Element::Attribute(a, v) => self.insert(borrow(input.src(a)), input.src(v).into()), | ||||||
|                     match input.src(a) { |  | ||||||
|                         CowStr::Owned(_) => panic!(), |  | ||||||
|                         CowStr::Borrowed(s) => s, |  | ||||||
|                     }, |  | ||||||
|                     input.src(v), |  | ||||||
|                 ), |  | ||||||
|                 Element::Invalid => return false, |                 Element::Invalid => return false, | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  | @ -79,7 +151,7 @@ impl<'s> Attributes<'s> { | ||||||
|     /// Insert an attribute. If the attribute already exists, the previous value will be
 |     /// Insert an attribute. If the attribute already exists, the previous value will be
 | ||||||
|     /// overwritten, unless it is a "class" attribute. In that case the provided value will be
 |     /// overwritten, unless it is a "class" attribute. In that case the provided value will be
 | ||||||
|     /// appended to the existing value.
 |     /// appended to the existing value.
 | ||||||
|     pub fn insert(&mut self, key: &'s str, val: CowStr<'s>) { |     pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) { | ||||||
|         if self.0.is_none() { |         if self.0.is_none() { | ||||||
|             self.0 = Some(Vec::new().into()); |             self.0 = Some(Vec::new().into()); | ||||||
|         }; |         }; | ||||||
|  | @ -105,15 +177,13 @@ impl<'s> Attributes<'s> { | ||||||
| 
 | 
 | ||||||
|     /// Returns a reference to the value corresponding to the attribute key.
 |     /// Returns a reference to the value corresponding to the attribute key.
 | ||||||
|     #[must_use] |     #[must_use] | ||||||
|     pub fn get(&self, key: &str) -> Option<&str> { |     pub fn get(&self, key: &str) -> Option<&AttributeValue<'s>> { | ||||||
|         self.iter().find(|(k, _)| *k == key).map(|(_, v)| v) |         self.iter().find(|(k, _)| *k == key).map(|(_, v)| v) | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Returns an iterator over the attributes in undefined order.
 |     /// Returns an iterator over the attributes in undefined order.
 | ||||||
|     pub fn iter(&self) -> impl Iterator<Item = (&'s str, &str)> + '_ { |     pub fn iter(&self) -> impl Iterator<Item = (&'s str, &AttributeValue<'s>)> + '_ { | ||||||
|         self.0 |         self.0.iter().flat_map(|v| v.iter().map(|(a, b)| (*a, b))) | ||||||
|             .iter() |  | ||||||
|             .flat_map(|v| v.iter().map(|(a, b)| (*a, b.as_ref()))) |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -231,13 +301,17 @@ impl<I: Iterator<Item = char>> Parser<I> { | ||||||
|                         Invalid |                         Invalid | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 ValueQuoted => { |                 ValueQuoted => match c { | ||||||
|                     if c == '"' { |                     '\\' => { | ||||||
|                         Whitespace |                         if let Some(c) = self.chars.next() { | ||||||
|                     } else { |                             self.pos_prev = self.pos; | ||||||
|  |                             self.pos += c.len_utf8(); | ||||||
|  |                         } | ||||||
|                         ValueQuoted |                         ValueQuoted | ||||||
|                     } |                     } | ||||||
|                 } |                     '"' => Whitespace, | ||||||
|  |                     _ => ValueQuoted, | ||||||
|  |                 }, | ||||||
|                 Invalid | Done => panic!("{:?}", self.state), |                 Invalid | Done => panic!("{:?}", self.state), | ||||||
|             } |             } | ||||||
|         }) |         }) | ||||||
|  | @ -330,11 +404,14 @@ mod test { | ||||||
|     macro_rules! test_attr { |     macro_rules! test_attr { | ||||||
|         ($src:expr $(,$($av:expr),* $(,)?)?) => { |         ($src:expr $(,$($av:expr),* $(,)?)?) => { | ||||||
|             #[allow(unused)] |             #[allow(unused)] | ||||||
|             let mut attr =super::Attributes::new(); |             let mut attr = super::Attributes::new(); | ||||||
|             attr.parse($src); |             attr.parse($src); | ||||||
|             let actual = attr.iter().collect::<Vec<_>>(); |             let actual = attr.iter().collect::<Vec<_>>(); | ||||||
|             let expected = &[$($($av),*,)?]; |             let expected = &[$($($av),*,)?]; | ||||||
|             assert_eq!(actual, expected, "\n\n{}\n\n", $src); |             for i in 0..actual.len() { | ||||||
|  |                 let actual_val = format!("{}", actual[i].1); | ||||||
|  |                 assert_eq!((actual[i].0, actual_val.as_str()), expected[i], "\n\n{}\n\n", $src); | ||||||
|  |             } | ||||||
|         }; |         }; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -394,6 +471,40 @@ mod test { | ||||||
|         ); |         ); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     #[test] | ||||||
|  |     fn escape() { | ||||||
|  |         test_attr!( | ||||||
|  |             r#"{attr="with escaped \~ char"}"#, | ||||||
|  |             ("attr", "with escaped ~ char") | ||||||
|  |         ); | ||||||
|  |         test_attr!( | ||||||
|  |             r#"{key="quotes \" should be escaped"}"#, | ||||||
|  |             ("key", r#"quotes " should be escaped"#) | ||||||
|  |         ); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     #[test] | ||||||
|  |     fn escape_backslash() { | ||||||
|  |         test_attr!(r#"{attr="with\\backslash"}"#, ("attr", r"with\backslash")); | ||||||
|  |         test_attr!( | ||||||
|  |             r#"{attr="with many backslashes\\\\"}"#, | ||||||
|  |             ("attr", r"with many backslashes\\") | ||||||
|  |         ); | ||||||
|  |         test_attr!( | ||||||
|  |             r#"{attr="\\escaped backslash at start"}"#, | ||||||
|  |             ("attr", r"\escaped backslash at start") | ||||||
|  |         ); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     #[test] | ||||||
|  |     fn only_escape_punctuation() { | ||||||
|  |         test_attr!(r#"{attr="do not \escape"}"#, ("attr", r"do not \escape")); | ||||||
|  |         test_attr!( | ||||||
|  |             r#"{attr="\backslash at the beginning"}"#, | ||||||
|  |             ("attr", r"\backslash at the beginning") | ||||||
|  |         ); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     #[test] |     #[test] | ||||||
|     fn valid_full() { |     fn valid_full() { | ||||||
|         let src = "{.class %comment%}"; |         let src = "{.class %comment%}"; | ||||||
|  |  | ||||||
							
								
								
									
										25
									
								
								src/html.rs
									
										
									
									
									
								
							
							
						
						
									
										25
									
								
								src/html.rs
									
										
									
									
									
								
							|  | @ -166,7 +166,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> { | ||||||
|                                 self.out.write_str("<a")?; |                                 self.out.write_str("<a")?; | ||||||
|                             } else { |                             } else { | ||||||
|                                 self.out.write_str(r#"<a href=""#)?; |                                 self.out.write_str(r#"<a href=""#)?; | ||||||
|                                 self.write_escape(dst)?; |                                 self.write_attr(dst)?; | ||||||
|                                 self.out.write_char('"')?; |                                 self.out.write_char('"')?; | ||||||
|                             } |                             } | ||||||
|                         } |                         } | ||||||
|  | @ -194,7 +194,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> { | ||||||
| 
 | 
 | ||||||
|                     for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") { |                     for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") { | ||||||
|                         write!(self.out, r#" {}=""#, a)?; |                         write!(self.out, r#" {}=""#, a)?; | ||||||
|                         self.write_escape(v)?; |                         v.parts().try_for_each(|part| self.write_attr(part))?; | ||||||
|                         self.out.write_char('"')?; |                         self.out.write_char('"')?; | ||||||
|                     } |                     } | ||||||
| 
 | 
 | ||||||
|  | @ -207,7 +207,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> { | ||||||
|                     { |                     { | ||||||
|                         if !attrs.iter().any(|(a, _)| a == "id") { |                         if !attrs.iter().any(|(a, _)| a == "id") { | ||||||
|                             self.out.write_str(r#" id=""#)?; |                             self.out.write_str(r#" id=""#)?; | ||||||
|                             self.write_escape(id)?; |                             self.write_attr(id)?; | ||||||
|                             self.out.write_char('"')?; |                             self.out.write_char('"')?; | ||||||
|                         } |                         } | ||||||
|                     } |                     } | ||||||
|  | @ -249,7 +249,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> { | ||||||
|                                 self.out.write_char(' ')?; |                                 self.out.write_char(' ')?; | ||||||
|                             } |                             } | ||||||
|                             first_written = true; |                             first_written = true; | ||||||
|                             self.out.write_str(cls)?; |                             cls.parts().try_for_each(|part| self.write_attr(part))?; | ||||||
|                         } |                         } | ||||||
|                         // div class goes after classes from attrs
 |                         // div class goes after classes from attrs
 | ||||||
|                         if let Container::Div { class: Some(cls) } = c { |                         if let Container::Div { class: Some(cls) } = c { | ||||||
|  | @ -276,7 +276,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> { | ||||||
|                         Container::CodeBlock { lang } => { |                         Container::CodeBlock { lang } => { | ||||||
|                             if let Some(l) = lang { |                             if let Some(l) = lang { | ||||||
|                                 self.out.write_str(r#"><code class="language-"#)?; |                                 self.out.write_str(r#"><code class="language-"#)?; | ||||||
|                                 self.write_escape(l)?; |                                 self.write_attr(l)?; | ||||||
|                                 self.out.write_str(r#"">"#)?; |                                 self.out.write_str(r#"">"#)?; | ||||||
|                             } else { |                             } else { | ||||||
|                                 self.out.write_str("><code>")?; |                                 self.out.write_str("><code>")?; | ||||||
|  | @ -388,7 +388,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> { | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|                 Event::Str(s) => match self.raw { |                 Event::Str(s) => match self.raw { | ||||||
|                     Raw::None => self.write_escape(&s)?, |                     Raw::None => self.write_text(&s)?, | ||||||
|                     Raw::Html => self.out.write_str(&s)?, |                     Raw::Html => self.out.write_str(&s)?, | ||||||
|                     Raw::Other => {} |                     Raw::Other => {} | ||||||
|                 }, |                 }, | ||||||
|  | @ -415,7 +415,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> { | ||||||
|                     self.out.write_str("\n<hr")?; |                     self.out.write_str("\n<hr")?; | ||||||
|                     for (a, v) in attrs.iter() { |                     for (a, v) in attrs.iter() { | ||||||
|                         write!(self.out, r#" {}=""#, a)?; |                         write!(self.out, r#" {}=""#, a)?; | ||||||
|                         self.write_escape(v)?; |                         v.parts().try_for_each(|part| self.write_attr(part))?; | ||||||
|                         self.out.write_char('"')?; |                         self.out.write_char('"')?; | ||||||
|                     } |                     } | ||||||
|                     self.out.write_str(">")?; |                     self.out.write_str(">")?; | ||||||
|  | @ -430,13 +430,14 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> { | ||||||
|         Ok(()) |         Ok(()) | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     fn write_escape(&mut self, mut s: &str) -> std::fmt::Result { |     fn write_escape(&mut self, mut s: &str, escape_quotes: bool) -> std::fmt::Result { | ||||||
|         let mut ent = ""; |         let mut ent = ""; | ||||||
|         while let Some(i) = s.find(|c| { |         while let Some(i) = s.find(|c| { | ||||||
|             match c { |             match c { | ||||||
|                 '<' => Some("<"), |                 '<' => Some("<"), | ||||||
|                 '>' => Some(">"), |                 '>' => Some(">"), | ||||||
|                 '&' => Some("&"), |                 '&' => Some("&"), | ||||||
|  |                 '"' if escape_quotes => Some("""), | ||||||
|                 _ => None, |                 _ => None, | ||||||
|             } |             } | ||||||
|             .map_or(false, |s| { |             .map_or(false, |s| { | ||||||
|  | @ -450,4 +451,12 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> { | ||||||
|         } |         } | ||||||
|         self.out.write_str(s) |         self.out.write_str(s) | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     fn write_text(&mut self, s: &str) -> std::fmt::Result { | ||||||
|  |         self.write_escape(s, false) | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     fn write_attr(&mut self, s: &str) -> std::fmt::Result { | ||||||
|  |         self.write_escape(s, true) | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -63,7 +63,7 @@ mod tree; | ||||||
| use span::DiscontinuousString; | use span::DiscontinuousString; | ||||||
| use span::Span; | use span::Span; | ||||||
| 
 | 
 | ||||||
| pub use attr::Attributes; | pub use attr::{AttributeValue, AttributeValueParts, Attributes}; | ||||||
| 
 | 
 | ||||||
| type CowStr<'s> = std::borrow::Cow<'s, str>; | type CowStr<'s> = std::borrow::Cow<'s, str>; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -1,7 +1,5 @@ | ||||||
| 38d85f9:multi-line block attributes | 38d85f9:multi-line block attributes | ||||||
| 6c14561:multi-line block attributes | 6c14561:multi-line block attributes | ||||||
| 48546bb:escape in attributes |  | ||||||
| 6bc4257:escape in attributes |  | ||||||
| 613a9d6:attribute container precedence | 613a9d6:attribute container precedence | ||||||
| f4f22fc:attribute key class order | f4f22fc:attribute key class order | ||||||
| ae6fc15:bugged left/right quote | ae6fc15:bugged left/right quote | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue