jotdown/src/attr.rs

528 lines
15 KiB
Rust
Raw Normal View History

2022-12-18 12:05:39 -05:00
use crate::CowStr;
use std::fmt;
2022-12-18 12:05:39 -05:00
/// Parse attributes, assumed to be valid.
pub(crate) fn parse(src: &str) -> Attributes {
2023-01-15 09:47:28 -05:00
let mut a = Attributes::new();
a.parse(src);
2023-01-15 09:47:28 -05:00
a
}
pub fn valid<I: Iterator<Item = char>>(chars: I) -> (usize, bool) {
use State::*;
let mut has_attr = false;
let mut n = 0;
let mut state = Start;
for c in chars {
n += 1;
state = state.step(c);
match state {
Class | Identifier | Value | ValueQuoted => has_attr = true,
Done | Invalid => break,
_ => {}
}
2022-12-22 14:39:11 -05:00
}
if matches!(state, Done) {
(n, has_attr)
} else {
(0, false)
}
2022-12-18 12:05:39 -05:00
}
/// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying,
/// without allocating.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct AttributeValue<'s> {
raw: CowStr<'s>,
}
impl<'s> AttributeValue<'s> {
/// Processes the attribute value escapes and returns an iterator of the parts of the value
/// that should be displayed.
pub fn parts(&'s self) -> AttributeValueParts<'s> {
AttributeValueParts { ahead: &self.raw }
}
// lifetime is 's to avoid allocation if empty value is concatenated with single value
fn extend(&mut self, s: &'s str) {
match &mut self.raw {
CowStr::Borrowed(prev) => {
if prev.is_empty() {
*prev = s;
} else {
self.raw = format!("{} {}", prev, s).into();
}
}
CowStr::Owned(ref mut prev) => {
prev.push(' ');
prev.push_str(s);
}
}
}
}
impl<'s> From<&'s str> for AttributeValue<'s> {
fn from(value: &'s str) -> Self {
Self { raw: value.into() }
}
}
impl<'s> From<CowStr<'s>> for AttributeValue<'s> {
fn from(value: CowStr<'s>) -> Self {
Self { raw: value }
}
}
impl<'s> From<String> for AttributeValue<'s> {
fn from(value: String) -> Self {
Self { raw: value.into() }
}
}
impl<'s> fmt::Display for AttributeValue<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.parts().try_for_each(|part| f.write_str(part))
}
}
/// An iterator over the parts of an [`AttributeValue`] that should be displayed.
pub struct AttributeValueParts<'s> {
ahead: &'s str,
}
impl<'s> Iterator for AttributeValueParts<'s> {
type Item = &'s str;
fn next(&mut self) -> Option<Self::Item> {
for (i, _) in self.ahead.match_indices('\\') {
match self.ahead.as_bytes().get(i + 1) {
Some(b'\\') => {
let next = &self.ahead[..i + 1];
self.ahead = &self.ahead[i + 2..];
return Some(next);
}
Some(c) if c.is_ascii_punctuation() => {
let next = &self.ahead[..i];
self.ahead = &self.ahead[i + 1..];
return Some(next);
}
_ => {}
}
}
(!self.ahead.is_empty()).then(|| std::mem::take(&mut self.ahead))
}
}
2023-02-01 15:55:51 -05:00
/// A collection of attributes, i.e. a key-value map.
2022-12-18 12:05:39 -05:00
// Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
// indirection instead of always 24 bytes.
2023-02-11 15:21:48 -05:00
#[allow(clippy::box_vec)]
#[derive(Clone, PartialEq, Eq, Default)]
pub struct Attributes<'s>(Option<Box<Vec<(&'s str, AttributeValue<'s>)>>>);
2022-12-18 12:05:39 -05:00
impl<'s> Attributes<'s> {
2023-02-01 15:55:51 -05:00
/// Create an empty collection.
2022-12-18 12:05:39 -05:00
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
2023-02-01 15:55:51 -05:00
pub(crate) fn take(&mut self) -> Self {
2022-12-18 12:05:39 -05:00
Self(self.0.take())
}
/// Parse and append attributes, assumed to be valid.
pub(crate) fn parse(&mut self, input: &'s str) {
let mut parser = Parser::new(self.take());
parser.parse(input);
*self = parser.finish();
2022-12-18 12:05:39 -05:00
}
2023-01-28 10:03:01 -05:00
/// Combine all attributes from both objects, prioritizing self on conflicts.
2023-02-01 15:55:51 -05:00
pub(crate) fn union(&mut self, other: Self) {
2023-01-28 10:03:01 -05:00
if let Some(attrs0) = &mut self.0 {
if let Some(mut attrs1) = other.0 {
for (key, val) in attrs1.drain(..) {
if !attrs0.iter().any(|(k, _)| *k == key) {
attrs0.push((key, val));
2023-01-28 10:03:01 -05:00
}
}
}
} else {
self.0 = other.0;
}
}
2023-02-01 15:55:51 -05:00
/// Insert an attribute. If the attribute already exists, the previous value will be
/// overwritten, unless it is a "class" attribute. In that case the provided value will be
/// appended to the existing value.
pub fn insert(&mut self, key: &'s str, val: AttributeValue<'s>) {
self.insert_pos(key, val);
}
// duplicate of insert but returns position of inserted value
fn insert_pos(&mut self, key: &'s str, val: AttributeValue<'s>) -> usize {
2022-12-18 12:05:39 -05:00
if self.0.is_none() {
self.0 = Some(Vec::new().into());
};
let attrs = self.0.as_mut().unwrap();
2023-02-01 15:55:51 -05:00
if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
2023-01-15 14:03:22 -05:00
let prev = &mut attrs[i].1;
2023-02-01 15:55:51 -05:00
if key == "class" {
match val.raw {
CowStr::Borrowed(s) => prev.extend(s),
CowStr::Owned(s) => {
*prev = format!("{} {}", prev, s).into();
}
}
2023-01-15 14:03:22 -05:00
} else {
*prev = val;
}
i
2023-01-15 14:03:22 -05:00
} else {
let i = attrs.len();
2023-02-01 15:55:51 -05:00
attrs.push((key, val));
i
2023-01-15 14:03:22 -05:00
}
}
2023-02-01 15:55:51 -05:00
/// Returns true if the collection contains no attributes.
2023-01-15 14:03:22 -05:00
#[must_use]
pub fn is_empty(&self) -> bool {
2023-02-01 15:55:51 -05:00
self.0.as_ref().map_or(true, |v| v.is_empty())
2022-12-18 12:05:39 -05:00
}
2023-02-01 15:55:51 -05:00
/// Returns a reference to the value corresponding to the attribute key.
2023-01-29 09:10:01 -05:00
#[must_use]
pub fn get(&self, key: &str) -> Option<&AttributeValue<'s>> {
2023-01-29 09:10:01 -05:00
self.iter().find(|(k, _)| *k == key).map(|(_, v)| v)
}
2023-02-01 15:55:51 -05:00
/// Returns an iterator over the attributes in undefined order.
pub fn iter(&self) -> impl Iterator<Item = (&'s str, &AttributeValue<'s>)> + '_ {
self.0.iter().flat_map(|v| v.iter().map(|(a, b)| (*a, b)))
2022-12-18 12:05:39 -05:00
}
}
#[cfg(test)]
impl<'s> FromIterator<(&'s str, &'s str)> for Attributes<'s> {
fn from_iter<I: IntoIterator<Item = (&'s str, &'s str)>>(iter: I) -> Self {
let attrs = iter
.into_iter()
.map(|(a, v)| (a, v.into()))
.collect::<Vec<_>>();
if attrs.is_empty() {
Attributes::new()
} else {
Attributes(Some(attrs.into()))
}
}
}
impl<'s> std::fmt::Debug for Attributes<'s> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{{")?;
let mut first = true;
for (k, v) in self.iter() {
if !first {
write!(f, ", ")?;
}
first = false;
write!(f, "{}=\"{}\"", k, v.raw)?;
}
write!(f, "}}")
}
}
/// Attributes parser, take input of one or more consecutive attributes and create an `Attributes`
/// object.
///
/// Input is assumed to contain a valid series of attribute sets, the attributes are added as they
/// are encountered.
pub struct Parser<'s> {
attrs: Attributes<'s>,
i_prev: usize,
state: State,
}
impl<'s> Parser<'s> {
pub fn new(attrs: Attributes<'s>) -> Self {
Self {
attrs,
i_prev: usize::MAX,
state: State::Start,
}
}
/// Return value indicates the number of bytes parsed if finished. If None, more input is
/// required to finish the attributes.
pub fn parse(&mut self, input: &'s str) {
use State::*;
let mut pos = 0;
let mut pos_prev = 0;
for c in input.chars() {
let state_next = self.state.step(c);
let st = std::mem::replace(&mut self.state, state_next);
if st != self.state && !matches!((st, self.state), (ValueEscape, _) | (_, ValueEscape))
{
let content = &input[pos_prev..pos];
pos_prev = pos;
match st {
Class => self.attrs.insert("class", content.into()),
Identifier => self.attrs.insert("id", content.into()),
Key => self.i_prev = self.attrs.insert_pos(content, "".into()),
Value | ValueQuoted | ValueContinued => {
self.attrs.0.as_mut().unwrap()[self.i_prev]
.1
.extend(&content[usize::from(matches!(st, ValueQuoted))..]);
}
_ => {}
}
};
pos += c.len_utf8();
debug_assert!(!matches!(self.state, Invalid));
if matches!(self.state, Done) {
if input[pos..].starts_with('{') {
self.state = Start;
} else {
return;
}
}
}
}
fn finish(self) -> Attributes<'s> {
self.attrs
}
}
2022-12-18 12:05:39 -05:00
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum State {
Start,
Whitespace,
Comment,
ClassFirst,
Class,
IdentifierFirst,
Identifier,
Key,
2022-12-18 12:05:39 -05:00
ValueFirst,
Value,
ValueQuoted,
ValueEscape,
ValueNewline,
ValueContinued,
2022-12-18 12:05:39 -05:00
Done,
Invalid,
}
impl State {
fn step(self, c: char) -> State {
use State::*;
match self {
Start if c == '{' => Whitespace,
Start => Invalid,
Whitespace => match c {
'}' => Done,
'.' => ClassFirst,
'#' => IdentifierFirst,
'%' => Comment,
c if is_name(c) => Key,
c if c.is_whitespace() => Whitespace,
_ => Invalid,
},
Comment if c == '%' => Whitespace,
Comment => Comment,
ClassFirst if is_name(c) => Class,
ClassFirst => Invalid,
IdentifierFirst if is_name(c) => Identifier,
IdentifierFirst => Invalid,
s @ (Class | Identifier | Value) if is_name(c) => s,
Class | Identifier | Value if c.is_whitespace() => Whitespace,
Class | Identifier | Value if c == '}' => Done,
Class | Identifier | Value => Invalid,
Key if is_name(c) => Key,
Key if c == '=' => ValueFirst,
Key => Invalid,
ValueFirst if is_name(c) => Value,
ValueFirst if c == '"' => ValueQuoted,
ValueFirst => Invalid,
ValueQuoted | ValueNewline | ValueContinued if c == '"' => Whitespace,
ValueQuoted | ValueNewline | ValueContinued | ValueEscape if c == '\n' => ValueNewline,
ValueQuoted if c == '\\' => ValueEscape,
ValueQuoted | ValueEscape => ValueQuoted,
ValueNewline | ValueContinued => ValueContinued,
Invalid | Done => panic!("{:?}", self),
}
}
}
2023-01-31 15:23:50 -05:00
pub fn is_name(c: char) -> bool {
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
2022-12-18 12:05:39 -05:00
}
#[cfg(test)]
mod test {
macro_rules! test_attr {
($src:expr $(,$($av:expr),* $(,)?)?) => {
#[allow(unused)]
let mut attr = super::Attributes::new();
2023-01-15 09:47:28 -05:00
attr.parse($src);
2022-12-18 12:05:39 -05:00
let actual = attr.iter().collect::<Vec<_>>();
let expected = &[$($($av),*,)?];
for i in 0..actual.len() {
let actual_val = format!("{}", actual[i].1);
assert_eq!((actual[i].0, actual_val.as_str()), expected[i], "\n\n{}\n\n", $src);
}
2022-12-18 12:05:39 -05:00
};
}
#[test]
fn empty() {
test_attr!("{}");
}
#[test]
fn class_id() {
test_attr!(
"{.some_class #some_id}",
("class", "some_class"),
("id", "some_id"),
);
2023-02-01 15:55:51 -05:00
test_attr!("{.a .b}", ("class", "a b"));
test_attr!("{#a #b}", ("id", "b"));
}
#[test]
fn unicode_whitespace() {
test_attr!("{.a .b}", ("class", "a b"));
2022-12-18 12:05:39 -05:00
}
#[test]
fn value_unquoted() {
test_attr!(
"{attr0=val0 attr1=val1}",
("attr0", "val0"),
("attr1", "val1"),
);
}
#[test]
fn value_quoted() {
test_attr!(
r#"{attr0="val0" attr1="val1"}"#,
("attr0", "val0"),
("attr1", "val1"),
);
test_attr!(
r#"{#id .class style="color:red"}"#,
("id", "id"),
("class", "class"),
("style", "color:red")
);
}
#[test]
fn value_newline() {
test_attr!("{attr0=\"abc\ndef\"}", ("attr0", "abc def"));
}
2022-12-18 12:05:39 -05:00
#[test]
fn comment() {
test_attr!("{%%}");
test_attr!("{ % abc % }");
test_attr!(
"{ .some_class % abc % #some_id}",
("class", "some_class"),
("id", "some_id"),
);
}
2022-12-22 14:39:11 -05:00
#[test]
fn escape() {
test_attr!(
r#"{attr="with escaped \~ char"}"#,
("attr", "with escaped ~ char")
);
test_attr!(
r#"{key="quotes \" should be escaped"}"#,
("key", r#"quotes " should be escaped"#)
);
}
#[test]
fn escape_backslash() {
test_attr!(r#"{attr="with\\backslash"}"#, ("attr", r"with\backslash"));
test_attr!(
r#"{attr="with many backslashes\\\\"}"#,
("attr", r"with many backslashes\\")
);
test_attr!(
r#"{attr="\\escaped backslash at start"}"#,
("attr", r"\escaped backslash at start")
);
}
#[test]
fn only_escape_punctuation() {
test_attr!(r#"{attr="do not \escape"}"#, ("attr", r"do not \escape"));
test_attr!(
r#"{attr="\backslash at the beginning"}"#,
("attr", r"\backslash at the beginning")
);
}
2022-12-22 14:39:11 -05:00
#[test]
2023-01-12 11:26:53 -05:00
fn valid_full() {
let src = "{.class %comment%}";
assert_eq!(super::valid(src.chars()), (src.len(), true));
}
#[test]
fn valid_empty() {
let src = "{}";
assert_eq!(super::valid(src.chars()), (src.len(), false));
}
#[test]
fn valid_whitespace() {
let src = "{ \n }";
assert_eq!(super::valid(src.chars()), (src.len(), false));
}
#[test]
fn valid_comment() {
let src = "{%comment%}";
assert_eq!(super::valid(src.chars()), (src.len(), false));
2023-01-12 11:26:53 -05:00
}
#[test]
fn valid_trailing() {
let src = "{.class}";
assert_eq!(
super::valid(src.chars().chain("{.ignore}".chars())),
(src.len(), true),
2023-01-12 11:26:53 -05:00
);
}
2022-12-22 14:39:11 -05:00
2023-01-12 11:26:53 -05:00
#[test]
fn valid_invalid() {
assert_eq!(super::valid(" {.valid}".chars()), (0, false));
assert_eq!(super::valid("{.class invalid}".chars()), (0, false));
assert_eq!(super::valid("abc".chars()), (0, false));
assert_eq!(super::valid("{.abc.}".chars()), (0, false));
2022-12-22 14:39:11 -05:00
}
2022-12-18 12:05:39 -05:00
}