From 253d1d2d4ba99a8ff1bbf61bc47a24301e3f8132 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Wed, 14 Jun 2023 20:04:31 +0200 Subject: [PATCH 01/10] prepass: avoid peekable for block iter --- src/lib.rs | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3d0754e..c1298c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -598,49 +598,56 @@ impl<'s> PrePass<'s> { #[must_use] fn new( src: &'s str, - blocks: std::slice::Iter>, + mut blocks: std::slice::Iter>, inline_parser: &mut inline::Parser<'s>, ) -> Self { let mut link_definitions = Map::new(); let mut headings: Vec = Vec::new(); let mut used_ids: Set = Set::new(); - let mut blocks = blocks.peekable(); - let mut attr_prev: Option> = None; while let Some(e) = blocks.next() { match e.kind { block::EventKind::Enter(block::Node::Leaf(block::Leaf::LinkDefinition { label, })) => { - fn next_is_inline( - bs: &mut std::iter::Peekable>, - ) -> bool { - matches!(bs.peek().map(|e| &e.kind), Some(block::EventKind::Inline)) - } - // All link definition tags have to be obtained initially, as references can // appear before the definition. let attrs = attr_prev .as_ref() .map_or_else(Attributes::new, |sp| attr::parse(&src[sp.clone()])); - let url = if !next_is_inline(&mut blocks) { - "".into() - } else { - let start = src[blocks.next().as_ref().unwrap().span.clone()] - .trim_matches(|c: char| c.is_ascii_whitespace()); - if !next_is_inline(&mut blocks) { - start.into() - } else { + let url = if let Some(block::Event { + kind: block::EventKind::Inline, + span, + }) = blocks.next() + { + let start = + src[span.clone()].trim_matches(|c: char| c.is_ascii_whitespace()); + if let Some(block::Event { + kind: block::EventKind::Inline, + span, + }) = blocks.next() + { let mut url = start.to_string(); - while next_is_inline(&mut blocks) { + url.push_str( + src[span.clone()].trim_matches(|c: char| c.is_ascii_whitespace()), + ); + while let Some(block::Event { + kind: block::EventKind::Inline, + span, + }) = blocks.next() + { url.push_str( - src[blocks.next().as_ref().unwrap().span.clone()] + src[span.clone()] .trim_matches(|c: char| c.is_ascii_whitespace()), ); } - url.into() + url.into() // owned + } else { + start.into() // borrowed } + } else { + "".into() // static }; link_definitions.insert(label, (url, attrs)); } From b60650dd0d8b4ad1cf86ede7ec674b843a3d35f3 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sun, 25 Jun 2023 18:03:46 +0200 Subject: [PATCH 02/10] inline: fix label of empty multi-line ref links e.g. [some text][] [some text]: url --- src/inline.rs | 43 +++++++++++++++++++------------------------ src/lib.rs | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+), 24 deletions(-) diff --git a/src/inline.rs b/src/inline.rs index cb92020..f1c71c5 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -732,41 +732,36 @@ impl<'s> Parser<'s> { image, } => { let span_spec = self.events[e_opener].span.end..self.input.span.start; - let multiline = + let multiline_spec = self.events[e_opener].span.start < self.input.span_line.start; let spec: CowStr = if span_spec.is_empty() && !inline { - let span_spec = self.events[event_span].span.end - ..self.events[e_opener - 1].span.start; let events_text = self .events .iter() .skip(event_span + 1) .take(e_opener - event_span - 2); - if multiline - || events_text.clone().any(|ev| { - !matches!(ev.kind, EventKind::Str | EventKind::Atom(..)) - }) - { - let mut spec = String::new(); - let mut span = 0..0; - for ev in events_text.filter(|ev| { - matches!(ev.kind, EventKind::Str | EventKind::Atom(..)) - }) { - if span.end == ev.span.start { - span.end = ev.span.end; - } else { - spec.push_str(&self.input.src[span.clone()]); - span = ev.span.clone(); - } + let mut spec = String::new(); + let mut span = 0..0; + for ev in events_text.filter(|ev| { + matches!(ev.kind, EventKind::Str | EventKind::Atom(..)) + && !matches!(ev.kind, EventKind::Atom(Escape)) + }) { + if matches!(ev.kind, EventKind::Atom(Softbreak | Hardbreak)) { + spec.push_str(&self.input.src[span.clone()]); + spec.push(' '); + span = ev.span.end..ev.span.end; + } else if span.end == ev.span.start { + span.end = ev.span.end; + } else { + spec.push_str(&self.input.src[span.clone()]); + span = ev.span.clone(); } - spec.push_str(&self.input.src[span]); - spec.into() - } else { - self.input.src[span_spec].into() } - } else if multiline { + spec.push_str(&self.input.src[span]); + spec.into() + } else if multiline_spec { let mut spec = String::new(); let mut first_part = true; let mut span = diff --git a/src/lib.rs b/src/lib.rs index c1298c1..6a3fbb6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1697,6 +1697,46 @@ mod test { ); } + #[test] + fn link_reference_multiline_empty() { + test_parse!( + concat!( + "> [a\n", // + "> b][]\n", // + "> [a\\\n", // + "> b][]\n", // + "\n", // + "[a b]: url\n", // + ), + Start(Blockquote, Attributes::new()), + Start(Paragraph, Attributes::new()), + Start( + Link("url".into(), LinkType::Span(SpanLinkType::Reference)), + Attributes::new() + ), + Str("a".into()), + Softbreak, + Str("b".into()), + End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))), + Softbreak, + Start( + Link("url".into(), LinkType::Span(SpanLinkType::Reference)), + Attributes::new() + ), + Str("a".into()), + Escape, + Hardbreak, + Str("b".into()), + End(Link("url".into(), LinkType::Span(SpanLinkType::Reference))), + End(Paragraph), + End(Blockquote), + Blankline, + Start(LinkDefinition { label: "a b" }, Attributes::new()), + Str("url".into()), + End(LinkDefinition { label: "a b" }), + ); + } + #[test] fn link_definition_multiline() { test_parse!( From 0586bf6a44b7d6caf9f169759c716407102a43b8 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Fri, 14 Jul 2023 20:34:46 +0200 Subject: [PATCH 03/10] block: rm extra new line after raw blocks match reference implementation --- src/block.rs | 10 +++++++++- src/lib.rs | 34 +++++++++++++++++++++++++++++++- tests/html-ut/ut/raw_blocks.test | 19 ++++++++++++++++++ 3 files changed, 61 insertions(+), 2 deletions(-) create mode 100644 tests/html-ut/ut/raw_blocks.test diff --git a/src/block.rs b/src/block.rs index ba8410c..fe0ef61 100644 --- a/src/block.rs +++ b/src/block.rs @@ -353,7 +353,7 @@ impl<'s> TreeParser<'s> { span_end: Range, mut lines: &mut [Range], ) { - if let Kind::Fenced { indent, .. } = k { + if let Kind::Fenced { indent, spec, .. } = k { for line in lines.iter_mut() { let indent_line = self.src.as_bytes()[line.clone()] .iter() @@ -361,6 +361,14 @@ impl<'s> TreeParser<'s> { .count(); line.start += (*indent).min(indent_line); } + + // trim ending whitespace of raw block + if spec.starts_with('=') { + let l = lines.len(); + if l > 0 { + lines[l - 1] = self.trim_end(lines[l - 1].clone()); + } + } } else { // trim starting whitespace of each inline for line in lines.iter_mut() { diff --git a/src/lib.rs b/src/lib.rs index 6a3fbb6..5a94bdc 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1534,7 +1534,39 @@ mod test { test_parse!( "``` =html\n\n```", Start(RawBlock { format: "html" }, Attributes::new()), - Str("
\n".into()), + Str("
".into()), + End(RawBlock { format: "html" }), + ); + } + + #[test] + fn raw_block_whitespace() { + test_parse!( + concat!( + "```=html\n", // + "\n", // + "\n", // + "```\n", // + "\n", // + "paragraph\n", // + "\n", // + "```=html\n", // + "\n", // + "\n", // + "```\n", // + ), + Start(RawBlock { format: "html" }, Attributes::new()), + Str("\n".into()), + Str("".into()), + End(RawBlock { format: "html" }), + Blankline, + Start(Paragraph, Attributes::new()), + Str("paragraph".into()), + End(Paragraph), + Blankline, + Start(RawBlock { format: "html" }, Attributes::new()), + Str("\n".into()), + Str("".into()), End(RawBlock { format: "html" }), ); } diff --git a/tests/html-ut/ut/raw_blocks.test b/tests/html-ut/ut/raw_blocks.test new file mode 100644 index 0000000..0c6d97f --- /dev/null +++ b/tests/html-ut/ut/raw_blocks.test @@ -0,0 +1,19 @@ +```` +```=html + + +``` + +paragraph + +```=html + + +``` +. + + +

paragraph

+
+
+```` From 6091f2ea70a65e6c923f9193ac40da41f4cfc834 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 5 Aug 2023 13:09:00 +0200 Subject: [PATCH 04/10] html: fix missing

tags after ordered lists resolves #44 --- src/html.rs | 14 +++++--------- tests/html-ut/ut/lists.test | 12 ++++++++++++ 2 files changed, 17 insertions(+), 9 deletions(-) create mode 100644 tests/html-ut/ut/lists.test diff --git a/src/html.rs b/src/html.rs index cbefb10..dd91fdf 100644 --- a/src/html.rs +++ b/src/html.rs @@ -295,17 +295,13 @@ impl<'s> Writer<'s> { } match c { Container::Blockquote => out.write_str("")?, - Container::List { - kind: ListKind::Unordered | ListKind::Task, - .. - } => { + Container::List { kind, .. } => { self.list_tightness.pop(); - out.write_str("")?; + match kind { + ListKind::Unordered | ListKind::Task => out.write_str("")?, + ListKind::Ordered { .. } => out.write_str("")?, + } } - Container::List { - kind: ListKind::Ordered { .. }, - .. - } => out.write_str("")?, Container::ListItem | Container::TaskListItem { .. } => { out.write_str("")?; } diff --git a/tests/html-ut/ut/lists.test b/tests/html-ut/ut/lists.test new file mode 100644 index 0000000..20b5c3e --- /dev/null +++ b/tests/html-ut/ut/lists.test @@ -0,0 +1,12 @@ +``` +1. item + +para +. +

    +
  1. +item +
  2. +
+

para

+``` From aff431e2277b62a80eab485ddce2cc2f2467c9be Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 5 Aug 2023 13:23:10 +0200 Subject: [PATCH 05/10] Release 0.3.1 --- CHANGELOG.md | 15 +++++++++++++++ Cargo.lock | 4 ++-- Cargo.toml | 2 +- examples/jotdown_wasm/Cargo.toml | 2 +- 4 files changed, 19 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 005ee7c..4a68018 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,18 @@ +## [0.3.1](https://github.com/hellux/jotdown/releases/tag/0.3.1) - 2023-08-05 + +### Changed + +- Block parser performance improved, up to 15% faster. +- Last `unsafe` block removed (#5). + +### Fixed + +- Do not require indent for continuing footnotes. +- Transfer classes from reference definitions to links. +- Allow line breaks in reference links (still match reference label). +- Remove excess newline after raw blocks. +- HTML renderer: fix missing `

` tags after ordered lists (#44). + ## [0.3.0](https://github.com/hellux/jotdown/releases/tag/0.3.0) - 2023-05-16 ### Added diff --git a/Cargo.lock b/Cargo.lock index c27d81a..1b0285e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -263,11 +263,11 @@ dependencies = [ [[package]] name = "jotdown" -version = "0.3.0" +version = "0.3.1" [[package]] name = "jotdown_wasm" -version = "0.3.0" +version = "0.3.1" dependencies = [ "git2", "jotdown", diff --git a/Cargo.toml b/Cargo.toml index 2fe74dd..d6476ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "jotdown" description = "A parser for the Djot markup language" authors = ["Noah Hellman "] -version = "0.3.0" +version = "0.3.1" license = "MIT" edition = "2021" keywords = ["djot", "markup"] diff --git a/examples/jotdown_wasm/Cargo.toml b/examples/jotdown_wasm/Cargo.toml index 5f0c3fb..a924503 100644 --- a/examples/jotdown_wasm/Cargo.toml +++ b/examples/jotdown_wasm/Cargo.toml @@ -3,7 +3,7 @@ name = "jotdown_wasm" description = "Web demo of Jotdown" authors = ["Noah Hellman "] license = "MIT" -version = "0.3.0" +version = "0.3.1" edition = "2021" homepage = "https://hllmn.net/projects/jotdown" repository = "https://github.com/hellux/jotdown" From 905d2919e3a512e7d5fd2fa988b87371f8e685be Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Tue, 29 Aug 2023 18:16:52 +0200 Subject: [PATCH 06/10] only allow 1-char alphabetic list markers --- src/block.rs | 32 ++++++-------------------------- tests/html-ut/ut/lists.test | 8 ++++++++ 2 files changed, 14 insertions(+), 26 deletions(-) diff --git a/src/block.rs b/src/block.rs index fe0ef61..c28363e 100644 --- a/src/block.rs +++ b/src/block.rs @@ -1019,21 +1019,22 @@ impl<'s> IdentifiedBlock<'s> { let numbering = if first.is_ascii_digit() { Decimal - } else if first.is_ascii_lowercase() { - AlphaLower - } else if first.is_ascii_uppercase() { - AlphaUpper } else if is_roman_lower_digit(first) { RomanLower } else if is_roman_upper_digit(first) { RomanUpper + } else if first.is_ascii_lowercase() { + AlphaLower + } else if first.is_ascii_uppercase() { + AlphaUpper } else { return None; }; let max_len = match numbering { + AlphaLower | AlphaUpper => 1, Decimal => 19, - AlphaLower | AlphaUpper | RomanLower | RomanUpper => 13, + RomanLower | RomanUpper => 13, }; let chars_num = chars.clone(); @@ -1065,17 +1066,6 @@ impl<'s> IdentifiedBlock<'s> { }; let len_style = usize::from(start_paren) + 1; - let chars_num = std::iter::once(first).chain(chars_num.take(len_num - 1)); - let numbering = if matches!(numbering, AlphaLower) - && chars_num.clone().all(is_roman_lower_digit) - { - RomanLower - } else if matches!(numbering, AlphaUpper) && chars_num.clone().all(is_roman_upper_digit) { - RomanUpper - } else { - numbering - }; - if chars.next().map_or(true, |c| c.is_ascii_whitespace()) { Some((numbering, style, len_num + len_style)) } else { @@ -3129,16 +3119,6 @@ mod test { "I.", 1 ); - test_block!( - "IJ. abc\n", - Kind::ListItem { - indent: 0, - ty: Ordered(AlphaUpper, Period), - last_blankline: false, - }, - "IJ.", - 1 - ); test_block!( "(a) abc\n", Kind::ListItem { diff --git a/tests/html-ut/ut/lists.test b/tests/html-ut/ut/lists.test index 20b5c3e..1f613b6 100644 --- a/tests/html-ut/ut/lists.test +++ b/tests/html-ut/ut/lists.test @@ -10,3 +10,11 @@ item

para

``` + +Only single letter alphabetic list markers. + +``` +word. Continuing paragraph. +. +

word. Continuing paragraph.

+``` From f43a98478a265de66881bfbb85178aeeed917b2d Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Tue, 29 Aug 2023 18:33:19 +0200 Subject: [PATCH 07/10] ci: fix new afl crate name --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b332999..ff15abd 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -81,7 +81,7 @@ jobs: run: | rustup update nightly rustup default nightly - cargo install afl + cargo install cargo-afl - name: "Fuzz" run: | echo core | sudo tee /proc/sys/kernel/core_pattern From 0ea38bf267242f1d3cc2d95626cdec392507f660 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Wed, 6 Sep 2023 20:18:20 +0200 Subject: [PATCH 08/10] Release 0.3.2 --- CHANGELOG.md | 6 ++++++ Cargo.lock | 4 ++-- Cargo.toml | 2 +- examples/jotdown_wasm/Cargo.toml | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4a68018..808e00c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +## [0.3.2](https://github.com/hellux/jotdown/releases/tag/0.3.2) - 2023-09-06 + +### Changed + +- Alphabetic list markers can only be one character long. + ## [0.3.1](https://github.com/hellux/jotdown/releases/tag/0.3.1) - 2023-08-05 ### Changed diff --git a/Cargo.lock b/Cargo.lock index 1b0285e..3a66d8f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -263,11 +263,11 @@ dependencies = [ [[package]] name = "jotdown" -version = "0.3.1" +version = "0.3.2" [[package]] name = "jotdown_wasm" -version = "0.3.1" +version = "0.3.2" dependencies = [ "git2", "jotdown", diff --git a/Cargo.toml b/Cargo.toml index d6476ce..9413339 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,7 +2,7 @@ name = "jotdown" description = "A parser for the Djot markup language" authors = ["Noah Hellman "] -version = "0.3.1" +version = "0.3.2" license = "MIT" edition = "2021" keywords = ["djot", "markup"] diff --git a/examples/jotdown_wasm/Cargo.toml b/examples/jotdown_wasm/Cargo.toml index a924503..3593138 100644 --- a/examples/jotdown_wasm/Cargo.toml +++ b/examples/jotdown_wasm/Cargo.toml @@ -3,7 +3,7 @@ name = "jotdown_wasm" description = "Web demo of Jotdown" authors = ["Noah Hellman "] license = "MIT" -version = "0.3.1" +version = "0.3.2" edition = "2021" homepage = "https://hllmn.net/projects/jotdown" repository = "https://github.com/hellux/jotdown" From 35891f8f4920806973e354727df1c977251a6202 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Fri, 12 Jan 2024 22:47:51 +0100 Subject: [PATCH 09/10] add release ci action --- .github/workflows/release.yml | 104 ++++++++++++++++++++++++++++++++++ 1 file changed, 104 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..fb43acd --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,104 @@ +name: release + +on: + push: + tags: ["[0-9]+.[0-9]+.[0-9]+*"] + +permissions: + contents: write + +env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + +jobs: + create: + name: create release + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + ref: ${{ github.ref_name }} + - name: verify version matches + shell: bash + run: grep -q 'version = "${{ github.ref_name }}"' Cargo.toml || { echo version mismatch >&2 && exit 1; } + - name: create release + run: gh release create ${{ github.ref_name }} --draft --verify-tag --title "Release ${{ github.ref_name }}" + + build: + name: build + needs: ['create'] + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-latest + target: x86_64-unknown-linux-gnu + - os: ubuntu-latest + target: i686-unknown-linux-musl + - os: macos-latest + target: x86_64-apple-darwin + - os: macos-latest + target: aarch64-apple-darwin + - os: windows-latest + target: x86_64-pc-windows-msvc + - os: windows-latest + target: i686-pc-windows-msvc + + steps: + - name: checkout + uses: actions/checkout@v4 + with: + ref: ${{ github.ref_name }} + + - name: install rust + shell: bash + run: | + rustup update stable + rustup target add ${{ matrix.target }} + + - name: build + shell: bash + run: | + if [ -n "${{ matrix.linker }}" ]; then + export RUSTFLAGS="-Clinker=${{ matrix.linker }}" + fi + cargo build --verbose --release --target ${{ matrix.target }} + find . + bin="target/${{ matrix.target }}/release/jotdown" + [ "${{ matrix.os }}" = "windows-latest" ] && bin="$bin.exe" + echo "BIN=$bin" >> $GITHUB_ENV + + - name: strip + if: ${{ startsWith(matrix.os, 'ubuntu') }} + run: strip $BIN + + - name: set archive name + shell: bash + run: echo "ARCHIVE=jotdown-${{ github.ref_name }}-${{ matrix.target }}" >> $GITHUB_ENV + + - name: init archive dir + shell: bash + run: | + mkdir "$ARCHIVE"/ + cp "$BIN" "$ARCHIVE"/ + cp {COPYING,CHANGELOG.md,README.md} "$ARCHIVE"/ + + - name: archive (win) + if: ${{ startsWith(matrix.os, 'windows') }} + shell: bash + run: | + 7z a "$ARCHIVE.zip" "$ARCHIVE" + echo "ASSET=$ARCHIVE.zip" >> $GITHUB_ENV + + - name: archive (unix) + if: ${{ ! startsWith(matrix.os, 'windows') }} + shell: bash + run: | + tar czf "$ARCHIVE.tar.gz" "$ARCHIVE" + echo "ASSET=$ARCHIVE.tar.gz" >> $GITHUB_ENV + + - name: Upload release archive + shell: bash + run: | + gh release upload ${{ github.ref_name }} ${{ env.ASSET }} From 8239b2b51d2ad3517bb0e71440e7bcd5114e08d2 Mon Sep 17 00:00:00 2001 From: Isaac Mills Date: Mon, 18 Mar 2024 18:35:08 -0400 Subject: [PATCH 10/10] Make databake compatible --- Cargo.lock | 58 +++++++++++++++++++++++++++--- Cargo.toml | 6 +++- src/attr.rs | 36 +++++++++++++------ src/lib.rs | 45 +++++++++++++++++------ tests/html-ref/ref.rs | 1 + tests/html-ut/ut/footnotes.rs | 66 ++++++++++++++++++++++++++++++++++ tests/html-ut/ut/lists.rs | 27 ++++++++++++++ tests/html-ut/ut/mod.rs | 3 ++ tests/html-ut/ut/raw_blocks.rs | 24 +++++++++++++ 9 files changed, 240 insertions(+), 26 deletions(-) create mode 100644 tests/html-ref/ref.rs create mode 100644 tests/html-ut/ut/footnotes.rs create mode 100644 tests/html-ut/ut/lists.rs create mode 100644 tests/html-ut/ut/mod.rs create mode 100644 tests/html-ut/ut/raw_blocks.rs diff --git a/Cargo.lock b/Cargo.lock index 3a66d8f..d430cbb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -162,6 +162,29 @@ dependencies = [ "itertools", ] +[[package]] +name = "databake" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82175d72e69414ceafbe2b49686794d3a8bed846e0d50267355f83ea8fdd953a" +dependencies = [ + "databake-derive", + "proc-macro2", + "quote", +] + +[[package]] +name = "databake-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "377af281d8f23663862a7c84623bc5dcf7f8c44b13c7496a590bdc157f941a43" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.16", + "synstructure", +] + [[package]] name = "either" version = "1.8.1" @@ -264,6 +287,9 @@ dependencies = [ [[package]] name = "jotdown" version = "0.3.2" +dependencies = [ + "databake", +] [[package]] name = "jotdown_wasm" @@ -370,9 +396,9 @@ checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160" [[package]] name = "proc-macro2" -version = "1.0.56" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b63bdb0cd06f1f4dedf69b254734f9b45af66e4a031e42a7480257d9898b435" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] @@ -433,7 +459,7 @@ checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", ] [[package]] @@ -458,6 +484,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "syn" +version = "2.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6f671d4b5ffdb8eadec19c0ae67fe2639df8684bd7bc4b83d986b8db549cf01" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.16", +] + [[package]] name = "test-html-ref" version = "0.1.0" @@ -573,7 +621,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-shared", ] @@ -595,7 +643,7 @@ checksum = "2aff81306fcac3c7515ad4e177f521b5c9a15f2b08f4e32d823066102f35a5f6" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 1.0.107", "wasm-bindgen-backend", "wasm-bindgen-shared", ] diff --git a/Cargo.toml b/Cargo.toml index 9413339..012cf4e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,10 +35,14 @@ exclude = [ [[bin]] name = "jotdown" -required-features = ["html"] +required-features = ["html", "parser"] doc = false [features] default = ["html"] html = [] # html renderer and minimal cli binary deterministic = [] # for stable fuzzing +parser = [] + +[dependencies] +databake = { version = "0.1.7", features = ["derive"] } diff --git a/src/attr.rs b/src/attr.rs index 3bc4e86..ffe178e 100644 --- a/src/attr.rs +++ b/src/attr.rs @@ -1,13 +1,17 @@ +use databake::Bake; + use crate::CowStr; -use std::fmt; +use std::{borrow::Cow, fmt}; /// Parse attributes, assumed to be valid. +#[cfg(feature = "parser")] pub(crate) fn parse(src: &str) -> Attributes { let mut a = Attributes::new(); a.parse(src); a } +#[cfg(feature = "parser")] pub fn valid(src: &str) -> usize { use State::*; @@ -31,7 +35,8 @@ pub fn valid(src: &str) -> usize { /// Stores an attribute value that supports backslash escapes of ASCII punctuation upon displaying, /// without allocating. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Bake)] +#[databake(path = jotdown)] pub struct AttributeValue<'s> { raw: CowStr<'s>, } @@ -118,8 +123,9 @@ impl<'s> Iterator for AttributeValueParts<'s> { // Attributes are relatively rare, we choose to pay 8 bytes always and sometimes an extra // indirection instead of always 24 bytes. #[allow(clippy::box_vec)] -#[derive(Clone, PartialEq, Eq, Default)] -pub struct Attributes<'s>(Option)>>>); +#[derive(Clone, PartialEq, Eq, Default, Bake)] +#[databake(path = jotdown)] +pub struct Attributes<'s>(pub Option)]>>); impl<'s> Attributes<'s> { /// Create an empty collection. @@ -129,11 +135,13 @@ impl<'s> Attributes<'s> { } #[must_use] + #[cfg(feature = "parser")] pub(crate) fn take(&mut self) -> Self { Self(self.0.take()) } /// Parse and append attributes, assumed to be valid. + #[cfg(feature = "parser")] pub(crate) fn parse(&mut self, input: &'s str) { let mut parser = Parser::new(self.take()); parser.parse(input); @@ -141,12 +149,13 @@ impl<'s> Attributes<'s> { } /// Combine all attributes from both objects, prioritizing self on conflicts. + #[cfg(feature = "parser")] pub(crate) fn union(&mut self, other: Self) { if let Some(attrs0) = &mut self.0 { if let Some(mut attrs1) = other.0 { - for (key, val) in attrs1.drain(..) { + for (key, val) in attrs1.to_mut().drain(..) { if key == "class" || !attrs0.iter().any(|(k, _)| *k == key) { - attrs0.push((key, val)); + attrs0.to_mut().push((key, val)); } } } @@ -170,7 +179,7 @@ impl<'s> Attributes<'s> { let attrs = self.0.as_mut().unwrap(); if let Some(i) = attrs.iter().position(|(k, _)| *k == key) { - let prev = &mut attrs[i].1; + let prev = &mut attrs.to_mut()[i].1; if key == "class" { match val.raw { CowStr::Borrowed(s) => prev.extend(s), @@ -184,7 +193,7 @@ impl<'s> Attributes<'s> { i } else { let i = attrs.len(); - attrs.push((key, val)); + attrs.to_mut().push((key, val)); i } } @@ -238,10 +247,12 @@ impl<'s> std::fmt::Debug for Attributes<'s> { } #[derive(Clone)] +#[cfg(feature = "parser")] pub struct Validator { state: State, } +#[cfg(feature = "parser")] impl Validator { pub fn new() -> Self { Self { @@ -274,12 +285,14 @@ impl Validator { /// /// Input is assumed to contain a valid series of attribute sets, the attributes are added as they /// are encountered. +#[cfg(feature = "parser")] pub struct Parser<'s> { attrs: Attributes<'s>, i_prev: usize, state: State, } +#[cfg(feature = "parser")] impl<'s> Parser<'s> { pub fn new(attrs: Attributes<'s>) -> Self { Self { @@ -310,7 +323,7 @@ impl<'s> Parser<'s> { Identifier => self.attrs.insert("id", content.into()), Key => self.i_prev = self.attrs.insert_pos(content, "".into()), Value | ValueQuoted | ValueContinued => { - self.attrs.0.as_mut().unwrap()[self.i_prev] + self.attrs.0.as_mut().unwrap().to_mut()[self.i_prev] .1 .extend(&content[usize::from(matches!(st, ValueQuoted))..]); } @@ -338,6 +351,7 @@ impl<'s> Parser<'s> { } #[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[cfg(feature = "parser")] enum State { Start, Whitespace, @@ -357,6 +371,7 @@ enum State { Invalid, } +#[cfg(feature = "parser")] impl State { fn step(self, c: u8) -> State { use State::*; @@ -399,11 +414,12 @@ impl State { } } +#[cfg(feature = "parser")] pub fn is_name(c: u8) -> bool { c.is_ascii_alphanumeric() || matches!(c, b':' | b'_' | b'-') } -#[cfg(test)] +#[cfg(all(test, feature = "parser"))] mod test { macro_rules! test_attr { ($src:expr $(,$($av:expr),* $(,)?)?) => { diff --git a/src/lib.rs b/src/lib.rs index 5a94bdc..ae87054 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -49,19 +49,25 @@ #![allow(clippy::blocks_in_if_conditions)] use std::fmt; +#[cfg(feature = "parser")] use std::fmt::Write as FmtWrite; use std::io; +#[cfg(feature = "parser")] use std::ops::Range; #[cfg(feature = "html")] pub mod html; mod attr; +#[cfg(feature = "parser")] mod block; +#[cfg(feature = "parser")] mod inline; +#[cfg(feature = "parser")] mod lex; pub use attr::{AttributeValue, AttributeValueParts, Attributes}; +use databake::Bake; type CowStr<'s> = std::borrow::Cow<'s, str>; @@ -197,7 +203,8 @@ impl<'s> AsRef> for &Event<'s> { /// multiple events. [`Container`] elements are represented by a [`Event::Start`] followed by /// events representing its content, and finally a [`Event::End`]. Atomic elements without any /// inside elements are represented by a single event. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum Event<'s> { /// Start of a container. Start(Container<'s>, Attributes<'s>), @@ -244,7 +251,8 @@ pub enum Event<'s> { /// - inline, may only contain inline elements, /// - block leaf, may only contain inline elements, /// - block container, may contain any block-level elements. -#[derive(Debug, Clone, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum Container<'s> { /// A blockquote element. Blockquote, @@ -398,7 +406,8 @@ impl<'s> Container<'s> { } /// Alignment of a table column. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum Alignment { Unspecified, Left, @@ -407,7 +416,8 @@ pub enum Alignment { } /// The type of an inline span link. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum SpanLinkType { /// E.g. `[text](url)` Inline, @@ -418,7 +428,8 @@ pub enum SpanLinkType { } /// The type of an inline link. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum LinkType { /// E.g. `[text](url)`. Span(SpanLinkType), @@ -429,7 +440,8 @@ pub enum LinkType { } /// The type of a list. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum ListKind { /// A bullet list. Unordered, @@ -444,7 +456,8 @@ pub enum ListKind { } /// Numbering type of an ordered list. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum OrderedListNumbering { /// Decimal numbering, e.g. `1)`. Decimal, @@ -459,7 +472,8 @@ pub enum OrderedListNumbering { } /// Style of an ordered list. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Bake)] +#[databake(path = jotdown)] pub enum OrderedListStyle { /// Number is followed by a period, e.g. `1.`. Period, @@ -470,6 +484,7 @@ pub enum OrderedListStyle { } impl OrderedListNumbering { + #[cfg(feature = "parser")] fn parse_number(self, n: &str) -> u64 { match self { Self::Decimal => n.parse().unwrap(), @@ -524,6 +539,7 @@ impl OrderedListNumbering { } impl OrderedListStyle { + #[cfg(feature = "parser")] fn number(self, marker: &str) -> &str { &marker[usize::from(matches!(self, Self::ParenParen))..marker.len() - 1] } @@ -534,9 +550,9 @@ type Map = std::collections::HashMap; #[cfg(feature = "deterministic")] type Map = std::collections::BTreeMap; -#[cfg(not(feature = "deterministic"))] +#[cfg(all(not(feature = "deterministic"), feature = "parser"))] type Set = std::collections::HashSet; -#[cfg(feature = "deterministic")] +#[cfg(all(feature = "deterministic", feature = "parser"))] type Set = std::collections::BTreeSet; /// A parser that generates [`Event`]s from a Djot document. @@ -548,6 +564,7 @@ type Set = std::collections::BTreeSet; /// /// It is possible to clone the parser to e.g. avoid performing the block parsing multiple times. #[derive(Clone)] +#[cfg(feature = "parser")] pub struct Parser<'s> { src: &'s str, @@ -572,6 +589,7 @@ pub struct Parser<'s> { } #[derive(Clone)] +#[cfg(feature = "parser")] struct Heading { /// Location of heading in src. location: u32, @@ -585,6 +603,7 @@ struct Heading { /// Because of potential future references, an initial pass is required to obtain all definitions. #[derive(Clone)] +#[cfg(feature = "parser")] struct PrePass<'s> { /// Link definitions and their attributes. link_definitions: Map<&'s str, (CowStr<'s>, attr::Attributes<'s>)>, @@ -594,6 +613,7 @@ struct PrePass<'s> { headings_lex: Vec, } +#[cfg(feature = "parser")] impl<'s> PrePass<'s> { #[must_use] fn new( @@ -782,6 +802,7 @@ impl<'s> PrePass<'s> { } } +#[cfg(feature = "parser")] impl<'s> Parser<'s> { #[must_use] pub fn new(src: &'s str) -> Self { @@ -1165,6 +1186,7 @@ impl<'s> Parser<'s> { } } +#[cfg(feature = "parser")] impl<'s> Iterator for Parser<'s> { type Item = Event<'s>; @@ -1177,10 +1199,12 @@ impl<'s> Iterator for Parser<'s> { /// event within the input. /// /// See the documentation of [`Parser::into_offset_iter`] for more information. +#[cfg(feature = "parser")] pub struct OffsetIter<'s> { parser: Parser<'s>, } +#[cfg(feature = "parser")] impl<'s> Iterator for OffsetIter<'s> { type Item = (Event<'s>, Range); @@ -1190,6 +1214,7 @@ impl<'s> Iterator for OffsetIter<'s> { } #[cfg(test)] +#[cfg(feature = "parser")] mod test { use super::Attributes; use super::Container::*; diff --git a/tests/html-ref/ref.rs b/tests/html-ref/ref.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/tests/html-ref/ref.rs @@ -0,0 +1 @@ + diff --git a/tests/html-ut/ut/footnotes.rs b/tests/html-ut/ut/footnotes.rs new file mode 100644 index 0000000..19a1eb5 --- /dev/null +++ b/tests/html-ut/ut/footnotes.rs @@ -0,0 +1,66 @@ +use crate::compare; + +// Footnote references may appear within a footnote. +#[test] +fn test_1c8325a() { + let src = r##"[^a] + +[^a]: a[^b][^c] +[^b]: b +"##; + let expected = r##"

1

+
+
+
    +
  1. +

    a23↩︎︎

    +
  2. +
  3. +

    b↩︎︎

    +
  4. +
  5. +

    ↩︎︎

    +
  6. +
+
+"##; + compare!(src, expected); +} + +// Footnote references in unreferenced footnotes are ignored. +#[test] +fn test_9eab5c8() { + let src = r##"para + +[^a]: a[^b][^c] +[^b]: b +"##; + let expected = r##"

para

+"##; + compare!(src, expected); +} + +// Footnotes may appear within footnotes. +#[test] +fn test_041f54c() { + let src = r##"[^b] +[^a] + +[^a]: [^b]: inner +"##; + let expected = r##"

1 +2

+
+
+
    +
  1. +

    inner↩︎︎

    +
  2. +
  3. +

    ↩︎︎

    +
  4. +
+
+"##; + compare!(src, expected); +} diff --git a/tests/html-ut/ut/lists.rs b/tests/html-ut/ut/lists.rs new file mode 100644 index 0000000..6230f6a --- /dev/null +++ b/tests/html-ut/ut/lists.rs @@ -0,0 +1,27 @@ +use crate::compare; + +#[test] +fn test_fefa2dc() { + let src = r##"1. item + +para +"##; + let expected = r##"
    +
  1. +item +
  2. +
+

para

+"##; + compare!(src, expected); +} + +// Only single letter alphabetic list markers. +#[test] +fn test_2a0aa95() { + let src = r##"word. Continuing paragraph. +"##; + let expected = r##"

word. Continuing paragraph.

+"##; + compare!(src, expected); +} diff --git a/tests/html-ut/ut/mod.rs b/tests/html-ut/ut/mod.rs new file mode 100644 index 0000000..6fea8e3 --- /dev/null +++ b/tests/html-ut/ut/mod.rs @@ -0,0 +1,3 @@ +mod footnotes; +mod lists; +mod raw_blocks; diff --git a/tests/html-ut/ut/raw_blocks.rs b/tests/html-ut/ut/raw_blocks.rs new file mode 100644 index 0000000..6f7c8ee --- /dev/null +++ b/tests/html-ut/ut/raw_blocks.rs @@ -0,0 +1,24 @@ +use crate::compare; + +#[test] +fn test_bf9dbab() { + let src = r##"```=html + + +``` + +paragraph + +```=html + + +``` +"##; + let expected = r##" + +

paragraph

+
+
+"##; + compare!(src, expected); +}