diff --git a/Cargo.lock b/Cargo.lock index 3a66d8f..d430cbb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -162,6 +162,29 @@ dependencies = [ "itertools", ] +[[package]] +name = "databake" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82175d72e69414ceafbe2b49686794d3a8bed846e0d50267355f83ea8fdd953a" +dependencies = [ + "databake-derive", + "proc-macro2", + "quote", +] + +[[package]] +name = "databake-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "377af281d8f23663862a7c84623bc5dcf7f8c44b13c7496a590bdc157f941a43" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.16", + "synstructure", +] + [[package]] name = "either" version = "1.8.1" @@ -264,6 +287,9 @@ dependencies = [ [[package]] name = "jotdown" version = "0.3.2" +dependencies = [ + "databake", +] [[package]] name = "jotdown_wasm" @@ -370,9 +396,9 @@ checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] @@ -433,7 +459,7 @@ checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -458,6 +484,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.16", +] + [[package]] name = "test-html-ref" version = "0.1.0" @@ -573,7 +621,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-shared", ] @@ -595,7 +643,7 @@ checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index 9413339..012cf4e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,10 +35,14 @@ exclude = [ [[bin]] name = "jotdown" -required-features = ["html"] +required-features = ["html", "parser"] doc = false [features] default = ["html"] html = [] # html renderer and minimal cli binary deterministic = [] # for stable fuzzing +parser = [] + +[dependencies] +databake = { version = "0.1.7", features = ["derive"] } diff --git a/src/attr.rs b/src/attr.rs index 3bc4e86..ffe178e 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -1,13 +1,17 @@ +use databake::Bake; + use crate::CowStr; -use std::fmt; +use std::{borrow::Cow, fmt}; /// Parse attributes, assumed to be valid. +#[cfg(feature = "parser")] pub(crate) fn parse(src: &str) -> Attributes { let mut a = Attributes::new(); a.parse(src); a } +#[cfg(feature = "parser")] pub fn valid(src: &str) -> usize { use State::*; @@ -31,7 +35,8 @@ pub fn valid(src: &str) -> usize { /// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying, /// without allocating. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Bake)] +#[databake(path = jotdown)] pub struct AttributeValue<'s> { raw: CowStr<'s>, } @@ -118,8 +123,9 @@ impl<'s> Iterator for AttributeValueParts<'s> { // Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra // indirection instead of always 24 bytes. #[allow(clippy::box_vec)] -#[derive(Clone, PartialEq, Eq, Default)] -pub struct Attributes<'s>(Option)>>>); +#[derive(Clone, PartialEq, Eq, Default, Bake)] +#[databake(path = jotdown)] +pub struct Attributes<'s>(pub Option)]>>); impl<'s> Attributes<'s> { /// Create an empty collection. @@ -129,11 +135,13 @@ impl<'s> Attributes<'s> { } #[must_use] + #[cfg(feature = "parser")] pub(crate) fn take(&mut self) -> Self { Self(self.0.take()) } /// Parse and append attributes, assumed to be valid. + #[cfg(feature = "parser")] pub(crate) fn parse(&mut self, input: &'s str) { let mut parser = Parser::new(self.take()); parser.parse(input); @@ -141,12 +149,13 @@ impl<'s> Attributes<'s> { } /// Combine all attributes from both objects, prioritizing self on conflicts. + #[cfg(feature = "parser")] pub(crate) fn union(&mut self, other: Self) { if let Some(attrs0) = &mut self.0 { if let Some(mut attrs1) = other.0 { - for (key, val) in attrs1.drain(..) { + for (key, val) in attrs1.to_mut().drain(..) { if key == "class" || !attrs0.iter().any(|(k, _)| *k == key) { - attrs0.push((key, val)); + attrs0.to_mut().push((key, val)); } } } @@ -170,7 +179,7 @@ impl<'s> Attributes<'s> { let attrs = self.0.as_mut().unwrap(); if let Some(i) = attrs.iter().position(|(k, _)| *k == key) { - let prev = &mut attrs[i].1; + let prev = &mut attrs.to_mut()[i].1; if key == "class" { match val.raw { CowStr::Borrowed(s) => prev.extend(s), @@ -184,7 +193,7 @@ impl<'s> Attributes<'s> { i } else { let i = attrs.len(); - attrs.push((key, val)); + attrs.to_mut().push((key, val)); i } } @@ -238,10 +247,12 @@ impl<'s> std::fmt::Debug for Attributes<'s> { } #[derive(Clone)] +#[cfg(feature = "parser")] pub struct Validator { state: State, } +#[cfg(feature = "parser")] impl Validator { pub fn new() -> Self { Self { @@ -274,12 +285,14 @@ impl Validator { /// /// Input is assumed to contain a valid series of attribute sets, the attributes are added as they /// are encountered. +#[cfg(feature = "parser")] pub struct Parser<'s> { attrs: Attributes<'s>, i_prev: usize, state: State, } +#[cfg(feature = "parser")] impl<'s> Parser<'s> { pub fn new(attrs: Attributes<'s>) -> Self { Self { @@ -310,7 +323,7 @@ impl<'s> Parser<'s> { Identifier => self.attrs.insert("id", content.into()), Key => self.i_prev = self.attrs.insert_pos(content, "".into()), Value | ValueQuoted | ValueContinued => { - self.attrs.0.as_mut().unwrap()[self.i_prev] + self.attrs.0.as_mut().unwrap().to_mut()[self.i_prev] .1 .extend(&content[usize::from(matches!(st, ValueQuoted))..]); } @@ -338,6 +351,7 @@ impl<'s> Parser<'s> { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg(feature = "parser")] enum State { Start, Whitespace, @@ -357,6 +371,7 @@ enum State { Invalid, } +#[cfg(feature = "parser")] impl State { fn step(self, c: u8) -> State { use State::*; @@ -399,11 +414,12 @@ impl State { } } +#[cfg(feature = "parser")] pub fn is_name(c: u8) -> bool { c.is_ascii_alphanumeric() || matches!(c, b':' | b'_' | b'-') } -#[cfg(test)] +#[cfg(all(test, feature = "parser"))] mod test { macro_rules! test_attr { ($src:expr $(,$($av:expr),* $(,)?)?) => { diff --git a/src/lib.rs b/src/lib.rs index 5a94bdc..ae87054 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,19 +49,25 @@ #![allow(clippy::blocks_in_if_conditions)] use std::fmt; +#[cfg(feature = "parser")] use std::fmt::Write as FmtWrite; use std::io; +#[cfg(feature = "parser")] use std::ops::Range; #[cfg(feature = "html")] pub mod html; mod attr; +#[cfg(feature = "parser")] mod block; +#[cfg(feature = "parser")] mod inline; +#[cfg(feature = "parser")] mod lex; pub use attr::{AttributeValue, AttributeValueParts, Attributes}; +use databake::Bake; type CowStr<'s> = std::borrow::Cow<'s, str>; @@ -197,7 +203,8 @@ impl<'s> AsRef> for &Event<'s> { /// multiple events. [`Container`] elements are represented by a [`Event::Start`] followed by /// events representing its content, and finally a [`Event::End`]. Atomic elements without any /// inside elements are represented by a single event. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum Event<'s> { /// Start of a container. Start(Container<'s>, Attributes<'s>), @@ -244,7 +251,8 @@ pub enum Event<'s> { /// - inline, may only contain inline elements, /// - block leaf, may only contain inline elements, /// - block container, may contain any block-level elements. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum Container<'s> { /// A blockquote element. Blockquote, @@ -398,7 +406,8 @@ impl<'s> Container<'s> { } /// Alignment of a table column. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum Alignment { Unspecified, Left, @@ -407,7 +416,8 @@ pub enum Alignment { } /// The type of an inline span link. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum SpanLinkType { /// E.g. `[text](url)` Inline, @@ -418,7 +428,8 @@ pub enum SpanLinkType { } /// The type of an inline link. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum LinkType { /// E.g. `[text](url)`. Span(SpanLinkType), @@ -429,7 +440,8 @@ pub enum LinkType { } /// The type of a list. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum ListKind { /// A bullet list. Unordered, @@ -444,7 +456,8 @@ pub enum ListKind { } /// Numbering type of an ordered list. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum OrderedListNumbering { /// Decimal numbering, e.g. `1)`. Decimal, @@ -459,7 +472,8 @@ pub enum OrderedListNumbering { } /// Style of an ordered list. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum OrderedListStyle { /// Number is followed by a period, e.g. `1.`. Period, @@ -470,6 +484,7 @@ pub enum OrderedListStyle { } impl OrderedListNumbering { + #[cfg(feature = "parser")] fn parse_number(self, n: &str) -> u64 { match self { Self::Decimal => n.parse().unwrap(), @@ -524,6 +539,7 @@ impl OrderedListNumbering { } impl OrderedListStyle { + #[cfg(feature = "parser")] fn number(self, marker: &str) -> &str { &marker[usize::from(matches!(self, Self::ParenParen))..marker.len() - 1] } @@ -534,9 +550,9 @@ type Map = std::collections::HashMap; #[cfg(feature = "deterministic")] type Map = std::collections::BTreeMap; -#[cfg(not(feature = "deterministic"))] +#[cfg(all(not(feature = "deterministic"), feature = "parser"))] type Set = std::collections::HashSet; -#[cfg(feature = "deterministic")] +#[cfg(all(feature = "deterministic", feature = "parser"))] type Set = std::collections::BTreeSet; /// A parser that generates [`Event`]s from a Djot document. @@ -548,6 +564,7 @@ type Set = std::collections::BTreeSet; /// /// It is possible to clone the parser to e.g. avoid performing the block parsing multiple times. #[derive(Clone)] +#[cfg(feature = "parser")] pub struct Parser<'s> { src: &'s str, @@ -572,6 +589,7 @@ pub struct Parser<'s> { } #[derive(Clone)] +#[cfg(feature = "parser")] struct Heading { /// Location of heading in src. location: u32, @@ -585,6 +603,7 @@ struct Heading { /// Because of potential future references, an initial pass is required to obtain all definitions. #[derive(Clone)] +#[cfg(feature = "parser")] struct PrePass<'s> { /// Link definitions and their attributes. link_definitions: Map<&'s str, (CowStr<'s>, attr::Attributes<'s>)>, @@ -594,6 +613,7 @@ struct PrePass<'s> { headings_lex: Vec, } +#[cfg(feature = "parser")] impl<'s> PrePass<'s> { #[must_use] fn new( @@ -782,6 +802,7 @@ impl<'s> PrePass<'s> { } } +#[cfg(feature = "parser")] impl<'s> Parser<'s> { #[must_use] pub fn new(src: &'s str) -> Self { @@ -1165,6 +1186,7 @@ impl<'s> Parser<'s> { } } +#[cfg(feature = "parser")] impl<'s> Iterator for Parser<'s> { type Item = Event<'s>; @@ -1177,10 +1199,12 @@ impl<'s> Iterator for Parser<'s> { /// event within the input. /// /// See the documentation of [`Parser::into_offset_iter`] for more information. +#[cfg(feature = "parser")] pub struct OffsetIter<'s> { parser: Parser<'s>, } +#[cfg(feature = "parser")] impl<'s> Iterator for OffsetIter<'s> { type Item = (Event<'s>, Range); @@ -1190,6 +1214,7 @@ impl<'s> Iterator for OffsetIter<'s> { } #[cfg(test)] +#[cfg(feature = "parser")] mod test { use super::Attributes; use super::Container::*; diff --git a/tests/html-ref/ref.rs b/tests/html-ref/ref.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/html-ref/ref.rs @@ -0,0 +1 @@ + diff --git a/tests/html-ut/ut/footnotes.rs b/tests/html-ut/ut/footnotes.rs new file mode 100644 index 0000000..19a1eb5 --- /dev/null +++ b/tests/html-ut/ut/footnotes.rs @@ -0,0 +1,66 @@ +use crate::compare; + +// Footnote references may appear within a footnote. +#[test] +fn test_1c8325a() { + let src = r##"[^a] + +[^a]: a[^b][^c] +[^b]: b +"##; + let expected = r##"

1

+
+
+
    +
  1. +

    a23↩︎︎

    +
  2. +
  3. +

    b↩︎︎

    +
  4. +
  5. +

    ↩︎︎

    +
  6. +
+
+"##; + compare!(src, expected); +} + +// Footnote references in unreferenced footnotes are ignored. +#[test] +fn test_9eab5c8() { + let src = r##"para + +[^a]: a[^b][^c] +[^b]: b +"##; + let expected = r##"

para

+"##; + compare!(src, expected); +} + +// Footnotes may appear within footnotes. +#[test] +fn test_041f54c() { + let src = r##"[^b] +[^a] + +[^a]: [^b]: inner +"##; + let expected = r##"

1 +2

+
+
+
    +
  1. +

    inner↩︎︎

    +
  2. +
  3. +

    ↩︎︎

    +
  4. +
+
+"##; + compare!(src, expected); +} diff --git a/tests/html-ut/ut/lists.rs b/tests/html-ut/ut/lists.rs new file mode 100644 index 0000000..6230f6a --- /dev/null +++ b/tests/html-ut/ut/lists.rs @@ -0,0 +1,27 @@ +use crate::compare; + +#[test] +fn test_fefa2dc() { + let src = r##"1. item + +para +"##; + let expected = r##"
    +
  1. +item +
  2. +
+

para

+"##; + compare!(src, expected); +} + +// Only single letter alphabetic list markers. +#[test] +fn test_2a0aa95() { + let src = r##"word. Continuing paragraph. +"##; + let expected = r##"

word. Continuing paragraph.

+"##; + compare!(src, expected); +} diff --git a/tests/html-ut/ut/mod.rs b/tests/html-ut/ut/mod.rs new file mode 100644 index 0000000..6fea8e3 --- /dev/null +++ b/tests/html-ut/ut/mod.rs @@ -0,0 +1,3 @@ +mod footnotes; +mod lists; +mod raw_blocks; diff --git a/tests/html-ut/ut/raw_blocks.rs b/tests/html-ut/ut/raw_blocks.rs new file mode 100644 index 0000000..6f7c8ee --- /dev/null +++ b/tests/html-ut/ut/raw_blocks.rs @@ -0,0 +1,24 @@ +use crate::compare; + +#[test] +fn test_bf9dbab() { + let src = r##"```=html + + +``` + +paragraph + +```=html + + +``` +"##; + let expected = r##" + +

paragraph

+
+
+"##; + compare!(src, expected); +}