jotdown/src/attr.rs

438 lines
12 KiB
Rust
Raw Normal View History

2022-12-18 12:05:39 -05:00
use crate::CowStr;
use crate::DiscontinuousString;
use crate::Span;
use State::*;
2023-01-15 09:47:28 -05:00
pub(crate) fn parse<'s, S: DiscontinuousString<'s>>(chars: S) -> Attributes<'s> {
let mut a = Attributes::new();
a.parse(chars);
a
}
pub fn valid<I: Iterator<Item = char>>(chars: I) -> (usize, bool) {
let mut has_attr = false;
2022-12-22 14:39:11 -05:00
let mut p = Parser::new(chars);
for e in &mut p {
match e {
Element::Class(..) | Element::Identifier(..) | Element::Attribute(..) => {
has_attr = true;
}
Element::Invalid => return (0, false),
}
2022-12-22 14:39:11 -05:00
}
(p.pos, has_attr)
2022-12-18 12:05:39 -05:00
}
2023-02-01 15:55:51 -05:00
/// A collection of attributes, i.e. a key-value map.
2022-12-18 12:05:39 -05:00
// Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra
// indirection instead of always 24 bytes.
2023-02-11 15:21:48 -05:00
#[allow(clippy::box_vec)]
2022-12-18 12:05:39 -05:00
#[derive(Debug, Clone, PartialEq, Eq, Default)]
pub struct Attributes<'s>(Option<Box<Vec<(&'s str, CowStr<'s>)>>>);
impl<'s> Attributes<'s> {
2023-02-01 15:55:51 -05:00
/// Create an empty collection.
2022-12-18 12:05:39 -05:00
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
2023-02-01 15:55:51 -05:00
pub(crate) fn take(&mut self) -> Self {
2022-12-18 12:05:39 -05:00
Self(self.0.take())
}
2023-01-15 09:47:28 -05:00
pub(crate) fn parse<S: DiscontinuousString<'s>>(&mut self, input: S) -> bool {
2022-12-18 12:05:39 -05:00
for elem in Parser::new(input.chars()) {
match elem {
2023-02-01 15:55:51 -05:00
Element::Class(c) => self.insert("class", input.src(c)),
Element::Identifier(i) => self.insert("id", input.src(i)),
Element::Attribute(a, v) => self.insert(
2022-12-18 12:05:39 -05:00
match input.src(a) {
CowStr::Owned(_) => panic!(),
CowStr::Borrowed(s) => s,
},
input.src(v),
),
Element::Invalid => return false,
}
}
true
}
2023-01-28 10:03:01 -05:00
/// Combine all attributes from both objects, prioritizing self on conflicts.
2023-02-01 15:55:51 -05:00
pub(crate) fn union(&mut self, other: Self) {
2023-01-28 10:03:01 -05:00
if let Some(attrs0) = &mut self.0 {
if let Some(mut attrs1) = other.0 {
for (key, val) in attrs1.drain(..) {
if !attrs0.iter().any(|(k, _)| *k == key) {
attrs0.push((key, val));
2023-01-28 10:03:01 -05:00
}
}
}
} else {
self.0 = other.0;
}
}
2023-02-01 15:55:51 -05:00
/// Insert an attribute. If the attribute already exists, the previous value will be
/// overwritten, unless it is a "class" attribute. In that case the provided value will be
/// appended to the existing value.
pub fn insert(&mut self, key: &'s str, val: CowStr<'s>) {
2022-12-18 12:05:39 -05:00
if self.0.is_none() {
self.0 = Some(Vec::new().into());
};
let attrs = self.0.as_mut().unwrap();
2023-02-01 15:55:51 -05:00
if let Some(i) = attrs.iter().position(|(k, _)| *k == key) {
2023-01-15 14:03:22 -05:00
let prev = &mut attrs[i].1;
2023-02-01 15:55:51 -05:00
if key == "class" {
2023-01-15 14:03:22 -05:00
*prev = format!("{} {}", prev, val).into();
} else {
*prev = val;
}
} else {
2023-02-01 15:55:51 -05:00
attrs.push((key, val));
2023-01-15 14:03:22 -05:00
}
}
2023-02-01 15:55:51 -05:00
/// Returns true if the collection contains no attributes.
2023-01-15 14:03:22 -05:00
#[must_use]
pub fn is_empty(&self) -> bool {
2023-02-01 15:55:51 -05:00
self.0.as_ref().map_or(true, |v| v.is_empty())
2022-12-18 12:05:39 -05:00
}
2023-02-01 15:55:51 -05:00
/// Returns a reference to the value corresponding to the attribute key.
2023-01-29 09:10:01 -05:00
#[must_use]
pub fn get(&self, key: &str) -> Option<&str> {
self.iter().find(|(k, _)| *k == key).map(|(_, v)| v)
}
2023-02-01 15:55:51 -05:00
/// Returns an iterator over the attributes in undefined order.
2022-12-18 12:05:39 -05:00
pub fn iter(&self) -> impl Iterator<Item = (&'s str, &str)> + '_ {
self.0
.iter()
.flat_map(|v| v.iter().map(|(a, b)| (*a, b.as_ref())))
}
}
#[cfg(test)]
impl<'s> FromIterator<(&'s str, &'s str)> for Attributes<'s> {
fn from_iter<I: IntoIterator<Item = (&'s str, &'s str)>>(iter: I) -> Self {
let attrs = iter
.into_iter()
.map(|(a, v)| (a, v.into()))
.collect::<Vec<_>>();
if attrs.is_empty() {
Attributes::new()
} else {
Attributes(Some(attrs.into()))
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum State {
Start,
Whitespace,
Comment,
ClassFirst,
Class,
IdentifierFirst,
Identifier,
Attribute,
ValueFirst,
Value,
ValueQuoted,
Done,
Invalid,
}
struct Parser<I> {
chars: I,
pos: usize,
2023-02-01 15:55:51 -05:00
pos_prev: usize,
2022-12-18 12:05:39 -05:00
state: State,
}
impl<I: Iterator<Item = char>> Parser<I> {
fn new(chars: I) -> Self {
Parser {
chars,
pos: 0,
2023-02-01 15:55:51 -05:00
pos_prev: 0,
2022-12-18 12:05:39 -05:00
state: Start,
}
}
fn step_char(&mut self) -> Option<State> {
self.chars.next().map(|c| {
2023-02-01 15:55:51 -05:00
self.pos_prev = self.pos;
2022-12-18 12:05:39 -05:00
self.pos += c.len_utf8();
match self.state {
Start => match c {
'{' => Whitespace,
_ => Invalid,
},
Whitespace => match c {
'}' => Done,
'.' => ClassFirst,
'#' => IdentifierFirst,
'%' => Comment,
c if c.is_ascii_alphanumeric() || matches!(c, '_' | ':' | '-') => Attribute,
c if c.is_whitespace() => Whitespace,
_ => Invalid,
},
Comment => {
if c == '%' {
Whitespace
} else {
Comment
}
}
s @ (ClassFirst | IdentifierFirst) => {
if is_name_start(c) {
match s {
ClassFirst => Class,
IdentifierFirst => Identifier,
_ => panic!(),
}
} else {
Invalid
}
}
s @ (Class | Identifier | Value) => {
if is_name(c) {
s
} else if c.is_whitespace() {
Whitespace
} else if c == '}' {
Done
} else {
Invalid
}
}
Attribute => {
if is_name(c) {
Attribute
} else if c == '=' {
ValueFirst
} else {
Invalid
}
}
ValueFirst => {
if is_name(c) {
Value
} else if c == '"' {
ValueQuoted
} else {
Invalid
}
}
ValueQuoted => {
if c == '"' {
Whitespace
} else {
ValueQuoted
}
}
2023-01-12 11:26:53 -05:00
Invalid | Done => panic!("{:?}", self.state),
2022-12-18 12:05:39 -05:00
}
})
}
fn step(&mut self) -> (State, Span) {
2023-02-01 15:55:51 -05:00
let start = self.pos_prev;
2022-12-18 12:05:39 -05:00
2022-12-22 14:39:11 -05:00
if self.state == Done {
return (Done, Span::empty_at(start));
}
2023-01-12 11:26:53 -05:00
if self.state == Invalid {
return (Invalid, Span::empty_at(start));
}
2022-12-18 12:05:39 -05:00
while let Some(state_next) = self.step_char() {
if self.state != state_next {
return (
std::mem::replace(&mut self.state, state_next),
2023-02-01 15:55:51 -05:00
Span::new(start, self.pos_prev),
2022-12-18 12:05:39 -05:00
);
}
}
(
if self.state == Done { Done } else { Invalid },
2023-02-01 15:55:51 -05:00
Span::new(start, self.pos_prev),
2022-12-18 12:05:39 -05:00
)
}
}
2023-01-31 15:23:50 -05:00
pub fn is_name_start(c: char) -> bool {
2022-12-18 12:05:39 -05:00
c.is_ascii_alphanumeric() || matches!(c, '_' | ':')
}
2023-01-31 15:23:50 -05:00
pub fn is_name(c: char) -> bool {
2023-01-12 11:26:53 -05:00
is_name_start(c) || c.is_ascii_digit() || matches!(c, '-')
2022-12-18 12:05:39 -05:00
}
enum Element {
Class(Span),
Identifier(Span),
Attribute(Span, Span),
Invalid,
}
impl<I: Iterator<Item = char>> Iterator for Parser<I> {
type Item = Element;
fn next(&mut self) -> Option<Self::Item> {
loop {
let (st, span0) = self.step();
return match st {
ClassFirst | IdentifierFirst => {
let (st, span1) = self.step();
Some(match st {
Class => Element::Class(span1),
Identifier => Element::Identifier(span1),
_ => return Some(Element::Invalid),
})
}
Attribute => {
let (st, _span1) = self.step();
match st {
ValueFirst => {
let (st, span2) = self.step();
match st {
Value => Some(Element::Attribute(span0, span2)),
ValueQuoted => Some(Element::Attribute(span0, span2.skip(1))),
Invalid => Some(Element::Invalid),
_ => panic!("{:?}", st),
}
}
Invalid => Some(Element::Invalid),
_ => panic!("{:?}", st),
}
}
Comment | Start | Whitespace => continue,
Done => None,
Invalid => Some(Element::Invalid),
_ => panic!("{:?}", st),
};
}
}
}
#[cfg(test)]
mod test {
macro_rules! test_attr {
($src:expr $(,$($av:expr),* $(,)?)?) => {
#[allow(unused)]
let mut attr =super::Attributes::new();
2023-01-15 09:47:28 -05:00
attr.parse($src);
2022-12-18 12:05:39 -05:00
let actual = attr.iter().collect::<Vec<_>>();
let expected = &[$($($av),*,)?];
assert_eq!(actual, expected, "\n\n{}\n\n", $src);
};
}
#[test]
fn empty() {
test_attr!("{}");
}
#[test]
fn class_id() {
test_attr!(
"{.some_class #some_id}",
("class", "some_class"),
("id", "some_id"),
);
2023-02-01 15:55:51 -05:00
test_attr!("{.a .b}", ("class", "a b"));
test_attr!("{#a #b}", ("id", "b"));
}
#[test]
fn unicode_whitespace() {
test_attr!("{.a .b}", ("class", "a b"));
2022-12-18 12:05:39 -05:00
}
#[test]
fn value_unquoted() {
test_attr!(
"{attr0=val0 attr1=val1}",
("attr0", "val0"),
("attr1", "val1"),
);
}
#[test]
fn value_quoted() {
test_attr!(
r#"{attr0="val0" attr1="val1"}"#,
("attr0", "val0"),
("attr1", "val1"),
);
test_attr!(
r#"{#id .class style="color:red"}"#,
("id", "id"),
("class", "class"),
("style", "color:red")
);
}
#[test]
fn comment() {
test_attr!("{%%}");
test_attr!("{ % abc % }");
test_attr!(
"{ .some_class % abc % #some_id}",
("class", "some_class"),
("id", "some_id"),
);
}
2022-12-22 14:39:11 -05:00
#[test]
2023-01-12 11:26:53 -05:00
fn valid_full() {
let src = "{.class %comment%}";
assert_eq!(super::valid(src.chars()), (src.len(), true));
}
#[test]
fn valid_empty() {
let src = "{}";
assert_eq!(super::valid(src.chars()), (src.len(), false));
}
#[test]
fn valid_whitespace() {
let src = "{ \n }";
assert_eq!(super::valid(src.chars()), (src.len(), false));
}
#[test]
fn valid_comment() {
let src = "{%comment%}";
assert_eq!(super::valid(src.chars()), (src.len(), false));
2023-01-12 11:26:53 -05:00
}
#[test]
fn valid_trailing() {
let src = "{.class}";
assert_eq!(
super::valid(src.chars().chain("{.ignore}".chars())),
(src.len(), true),
2023-01-12 11:26:53 -05:00
);
}
2022-12-22 14:39:11 -05:00
2023-01-12 11:26:53 -05:00
#[test]
fn valid_invalid() {
assert_eq!(super::valid(" {.valid}".chars()), (0, false));
assert_eq!(super::valid("{.class invalid}".chars()), (0, false));
assert_eq!(super::valid("abc".chars()), (0, false));
assert_eq!(super::valid("{.abc.}".chars()), (0, false));
2022-12-22 14:39:11 -05:00
}
2022-12-18 12:05:39 -05:00
}