commit
e458955d00
11 changed files with 270 additions and 49 deletions
7
.github/workflows/ci.yml
vendored
7
.github/workflows/ci.yml
vendored
|
@ -77,10 +77,13 @@ jobs:
|
|||
rustup update nightly
|
||||
rustup default nightly
|
||||
cargo install afl
|
||||
- name: "Fuzz"
|
||||
- name: "Fuzz parser"
|
||||
run: |
|
||||
echo core | sudo tee /proc/sys/kernel/core_pattern
|
||||
make afl_quick
|
||||
AFL_TARGET=parse make afl_quick
|
||||
- name: "Fuzz html"
|
||||
run: |
|
||||
AFL_TARGET=html make afl_quick
|
||||
bench:
|
||||
name: Benchmark
|
||||
runs-on: ubuntu-latest
|
||||
|
|
21
Makefile
21
Makefile
|
@ -53,14 +53,14 @@ bench:
|
|||
cov: suite suite_bench
|
||||
LLVM_COV=llvm-cov LLVM_PROFDATA=llvm-profdata cargo llvm-cov --features=suite,suite_bench --workspace --html --ignore-run-fail
|
||||
|
||||
AFL_TARGET?=gen
|
||||
AFL_TARGET?=parse
|
||||
AFL_JOBS?=1
|
||||
AFL_TARGET_CRASH?=crashes
|
||||
|
||||
afl:
|
||||
rm -rf tests/afl/out
|
||||
(cd tests/afl && \
|
||||
cargo afl build --release --config profile.release.debug-assertions=true && \
|
||||
cargo afl build --no-default-features --release --config profile.release.debug-assertions=true && \
|
||||
(AFL_NO_UI=1 cargo afl fuzz -i in -o out -Mm target/release/${AFL_TARGET} &) && \
|
||||
for i in $$(seq $$((${AFL_JOBS} - 1))); do \
|
||||
AFL_NO_UI=1 cargo afl fuzz -i in -o out -Ss$$i target/release/${AFL_TARGET} & \
|
||||
|
@ -71,24 +71,31 @@ afl:
|
|||
afl_quick:
|
||||
rm -rf tests/afl/out
|
||||
(cd tests/afl && \
|
||||
cargo afl build --release --config profile.release.debug-assertions=true && \
|
||||
cargo afl build --no-default-features --release --config profile.release.debug-assertions=true && \
|
||||
AFL_NO_UI=1 AFL_BENCH_UNTIL_CRASH=1 \
|
||||
cargo afl fuzz -i in -o out -V 60 target/release/${AFL_TARGET})
|
||||
|
||||
afl_crash:
|
||||
set +e; \
|
||||
for f in $$(find tests/afl/out -path '*/${AFL_TARGET_CRASH}/id*'); do \
|
||||
echo "cat $$f | RUST_BACKTRACE=1 cargo run"; \
|
||||
out=$$(cat $$f | RUST_BACKTRACE=1 cargo run 2>&1); \
|
||||
failures="$$(find . -path './tmin/*') $$(find tests/afl/out -path '*/${AFL_TARGET_CRASH}/id*')"; \
|
||||
for f in $$failures; do \
|
||||
echo $$f; \
|
||||
out=$$(cat $$f | (cd tests/afl && RUST_BACKTRACE=1 cargo run ${AFL_TARGET} 2>&1)); \
|
||||
if [ $$? -ne 0 ]; then \
|
||||
echo; \
|
||||
echo "FAIL"; \
|
||||
echo "$$out"; \
|
||||
echo "cat $$f | RUST_BACKTRACE=1 cargo run"; \
|
||||
exit 1; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
afl_tmin:
|
||||
rm -rf tmin
|
||||
mkdir tmin
|
||||
for f in $$(find tests/afl/out -path '*/${AFL_TARGET_CRASH}/id*'); do \
|
||||
cargo afl tmin -i $$f -o tmin/$$(basename $$f) tests/afl/target/release/${AFL_TARGET}; \
|
||||
done
|
||||
|
||||
clean:
|
||||
cargo clean
|
||||
git submodule deinit -f --all
|
||||
|
|
|
@ -262,7 +262,7 @@ impl<I: Iterator<Item = char>> Parser<I> {
|
|||
}
|
||||
}
|
||||
s @ (ClassFirst | IdentifierFirst) => {
|
||||
if is_name_start(c) {
|
||||
if is_name(c) {
|
||||
match s {
|
||||
ClassFirst => Class,
|
||||
IdentifierFirst => Identifier,
|
||||
|
@ -344,12 +344,8 @@ impl<I: Iterator<Item = char>> Parser<I> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn is_name_start(c: char) -> bool {
|
||||
c.is_ascii_alphanumeric() || matches!(c, '_' | ':')
|
||||
}
|
||||
|
||||
pub fn is_name(c: char) -> bool {
|
||||
is_name_start(c) || c.is_ascii_digit() || matches!(c, '-')
|
||||
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
|
||||
}
|
||||
|
||||
enum Element {
|
||||
|
|
|
@ -734,9 +734,8 @@ impl IdentifiedBlock {
|
|||
f @ ('`' | ':' | '~') => {
|
||||
let fence_length = 1 + (&mut chars).take_while(|c| *c == f).count();
|
||||
let spec = &line_t[fence_length..].trim_start();
|
||||
let valid_spec = if f == ':' && !spec.starts_with('=') {
|
||||
spec.chars().next().map_or(true, attr::is_name_start)
|
||||
&& spec.chars().skip(1).all(attr::is_name)
|
||||
let valid_spec = if f == ':' {
|
||||
spec.chars().all(attr::is_name)
|
||||
} else {
|
||||
!spec.chars().any(char::is_whitespace) && !spec.chars().any(|c| c == '`')
|
||||
};
|
||||
|
|
43
src/html.rs
43
src/html.rs
|
@ -67,7 +67,7 @@ struct Writer<'s, I: Iterator<Item = Event<'s>>, W> {
|
|||
events: std::iter::Peekable<FilteredEvents<I>>,
|
||||
out: W,
|
||||
raw: Raw,
|
||||
text_only: bool,
|
||||
img_alt_text: usize,
|
||||
list_tightness: Vec<bool>,
|
||||
encountered_footnote: bool,
|
||||
footnote_number: Option<std::num::NonZeroUsize>,
|
||||
|
@ -81,7 +81,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
events: FilteredEvents { events }.peekable(),
|
||||
out,
|
||||
raw: Raw::None,
|
||||
text_only: false,
|
||||
img_alt_text: 0,
|
||||
list_tightness: Vec::new(),
|
||||
encountered_footnote: false,
|
||||
footnote_number: None,
|
||||
|
@ -97,7 +97,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
if c.is_block() && !self.first_line {
|
||||
self.out.write_char('\n')?;
|
||||
}
|
||||
if self.text_only && !matches!(c, Container::Image(..)) {
|
||||
if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) {
|
||||
continue;
|
||||
}
|
||||
match &c {
|
||||
|
@ -171,8 +171,12 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
}
|
||||
}
|
||||
Container::Image(..) => {
|
||||
self.text_only = true;
|
||||
self.out.write_str("<img")?;
|
||||
self.img_alt_text += 1;
|
||||
if self.img_alt_text == 1 {
|
||||
self.out.write_str("<img")?;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Container::Verbatim => self.out.write_str("<code")?,
|
||||
Container::RawBlock { format } | Container::RawInline { format } => {
|
||||
|
@ -283,7 +287,9 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
}
|
||||
}
|
||||
Container::Image(..) => {
|
||||
self.out.write_str(r#" alt=""#)?;
|
||||
if self.img_alt_text == 1 {
|
||||
self.out.write_str(r#" alt=""#)?;
|
||||
}
|
||||
}
|
||||
Container::Math { display } => {
|
||||
self.out
|
||||
|
@ -296,7 +302,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
if c.is_block_container() && !matches!(c, Container::Footnote { .. }) {
|
||||
self.out.write_char('\n')?;
|
||||
}
|
||||
if self.text_only && !matches!(c, Container::Image(..)) {
|
||||
if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) {
|
||||
continue;
|
||||
}
|
||||
match c {
|
||||
|
@ -360,12 +366,14 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
Container::Span => self.out.write_str("</span>")?,
|
||||
Container::Link(..) => self.out.write_str("</a>")?,
|
||||
Container::Image(src, ..) => {
|
||||
self.text_only = false;
|
||||
if src.is_empty() {
|
||||
if self.img_alt_text == 1 {
|
||||
if !src.is_empty() {
|
||||
self.out.write_str(r#"" src=""#)?;
|
||||
self.write_attr(&src)?;
|
||||
}
|
||||
self.out.write_str(r#"">"#)?;
|
||||
} else {
|
||||
write!(self.out, r#"" src="{}">"#, src)?;
|
||||
}
|
||||
self.img_alt_text -= 1;
|
||||
}
|
||||
Container::Verbatim => self.out.write_str("</code>")?,
|
||||
Container::Math { display } => {
|
||||
|
@ -388,16 +396,19 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
|
|||
}
|
||||
}
|
||||
Event::Str(s) => match self.raw {
|
||||
Raw::None if self.img_alt_text > 0 => self.write_attr(&s)?,
|
||||
Raw::None => self.write_text(&s)?,
|
||||
Raw::Html => self.out.write_str(&s)?,
|
||||
Raw::Other => {}
|
||||
},
|
||||
Event::FootnoteReference(_tag, number) => {
|
||||
write!(
|
||||
self.out,
|
||||
r##"<a id="fnref{}" href="#fn{}" role="doc-noteref"><sup>{}</sup></a>"##,
|
||||
number, number, number
|
||||
)?;
|
||||
if self.img_alt_text == 0 {
|
||||
write!(
|
||||
self.out,
|
||||
r##"<a id="fnref{}" href="#fn{}" role="doc-noteref"><sup>{}</sup></a>"##,
|
||||
number, number, number
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Event::Symbol(sym) => write!(self.out, ":{}:", sym)?,
|
||||
Event::LeftSingleQuote => self.out.write_str("‘")?,
|
||||
|
|
|
@ -2,11 +2,25 @@
|
|||
name = "jotdown-afl"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
default-run = "main"
|
||||
|
||||
[dependencies]
|
||||
afl = "0.11"
|
||||
jotdown = { path = "../../", features = ["deterministic"] }
|
||||
html5ever = "0.26"
|
||||
|
||||
[[bin]]
|
||||
name = "gen"
|
||||
path = "src/gen.rs"
|
||||
name = "main"
|
||||
path = "src/main.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "parse"
|
||||
path = "src/parse.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "html"
|
||||
path = "src/html.rs"
|
||||
|
||||
[features]
|
||||
default = ["debug"]
|
||||
debug = []
|
||||
|
|
|
@ -1,13 +0,0 @@
|
|||
use afl::fuzz;
|
||||
|
||||
use jotdown::Render;
|
||||
|
||||
fn main() {
|
||||
fuzz!(|data: &[u8]| {
|
||||
if let Ok(s) = std::str::from_utf8(data) {
|
||||
let p = jotdown::Parser::new(s);
|
||||
let mut output = String::new();
|
||||
jotdown::html::Renderer.push(p, &mut output).unwrap();
|
||||
}
|
||||
});
|
||||
}
|
3
tests/afl/src/html.rs
Normal file
3
tests/afl/src/html.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
fn main() {
|
||||
afl::fuzz!(|data: &[u8]| { jotdown_afl::html(data) });
|
||||
}
|
180
tests/afl/src/lib.rs
Normal file
180
tests/afl/src/lib.rs
Normal file
|
@ -0,0 +1,180 @@
|
|||
use jotdown::Render;
|
||||
|
||||
use html5ever::tendril;
|
||||
use html5ever::tendril::TendrilSink;
|
||||
use html5ever::tokenizer;
|
||||
use html5ever::tree_builder;
|
||||
|
||||
pub fn parse(data: &[u8]) {
|
||||
if let Ok(s) = std::str::from_utf8(data) {
|
||||
jotdown::Parser::new(s).last();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn html(data: &[u8]) {
|
||||
if data.iter().any(|i| *i == 0) {
|
||||
return;
|
||||
}
|
||||
if let Ok(s) = std::str::from_utf8(data) {
|
||||
if !s.contains("=html") {
|
||||
let p = jotdown::Parser::new(s);
|
||||
let mut html = "<!DOCTYPE html>\n".to_string();
|
||||
jotdown::html::Renderer.push(p, &mut html).unwrap();
|
||||
validate_html(&html);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_html(html: &str) {
|
||||
#[cfg(feature = "debug")]
|
||||
let mut has_error = false;
|
||||
|
||||
html5ever::parse_document(
|
||||
Dom {
|
||||
names: Vec::new(),
|
||||
#[cfg(feature = "debug")]
|
||||
has_error: &mut has_error,
|
||||
#[cfg(feature = "debug")]
|
||||
line_no: 1,
|
||||
#[cfg(not(feature = "debug"))]
|
||||
_lifetime: std::marker::PhantomData,
|
||||
},
|
||||
html5ever::ParseOpts {
|
||||
tokenizer: tokenizer::TokenizerOpts {
|
||||
exact_errors: true,
|
||||
..tokenizer::TokenizerOpts::default()
|
||||
},
|
||||
tree_builder: tree_builder::TreeBuilderOpts {
|
||||
exact_errors: true,
|
||||
scripting_enabled: false,
|
||||
..tree_builder::TreeBuilderOpts::default()
|
||||
},
|
||||
},
|
||||
)
|
||||
.from_utf8()
|
||||
.read_from(&mut std::io::Cursor::new(html))
|
||||
.unwrap();
|
||||
|
||||
#[cfg(feature = "debug")]
|
||||
if has_error {
|
||||
eprintln!("html:");
|
||||
html.split('\n').enumerate().for_each(|(i, l)| {
|
||||
eprintln!("{:>2}:{}", i + 1, l);
|
||||
});
|
||||
eprintln!("\n");
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
|
||||
struct Dom<'a> {
|
||||
names: Vec<html5ever::QualName>,
|
||||
#[cfg(feature = "debug")]
|
||||
has_error: &'a mut bool,
|
||||
#[cfg(feature = "debug")]
|
||||
line_no: u64,
|
||||
#[cfg(not(feature = "debug"))]
|
||||
_lifetime: std::marker::PhantomData<&'a ()>,
|
||||
}
|
||||
|
||||
impl<'a> tree_builder::TreeSink for Dom<'a> {
|
||||
type Handle = usize;
|
||||
type Output = Self;
|
||||
|
||||
fn get_document(&mut self) -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
fn finish(self) -> Self {
|
||||
self
|
||||
}
|
||||
|
||||
fn same_node(&self, x: &usize, y: &usize) -> bool {
|
||||
x == y
|
||||
}
|
||||
|
||||
fn elem_name(&self, i: &usize) -> html5ever::ExpandedName {
|
||||
self.names[i - 1].expanded()
|
||||
}
|
||||
|
||||
fn create_element(
|
||||
&mut self,
|
||||
name: html5ever::QualName,
|
||||
_: Vec<html5ever::Attribute>,
|
||||
_: tree_builder::ElementFlags,
|
||||
) -> usize {
|
||||
self.names.push(name);
|
||||
self.names.len()
|
||||
}
|
||||
|
||||
fn parse_error(&mut self, msg: std::borrow::Cow<'static, str>) {
|
||||
let whitelist = &[
|
||||
"Bad character", // bad characters in input will pass through
|
||||
"Duplicate attribute", // djot is case-sensitive while html is not
|
||||
// tags may be nested incorrectly, e.g. <a> within <a>
|
||||
"Unexpected token Tag",
|
||||
"Found special tag while closing generic tag",
|
||||
"Formatting element not current node",
|
||||
"Formatting element not open",
|
||||
// FIXME bug caused by empty table at end of list
|
||||
"No matching tag to close",
|
||||
"Unexpected open element while closing",
|
||||
];
|
||||
if !whitelist.iter().any(|e| msg.starts_with(e)) {
|
||||
#[cfg(feature = "debug")]
|
||||
{
|
||||
*self.has_error = true;
|
||||
eprintln!("{}: {}\n", self.line_no, msg);
|
||||
}
|
||||
#[cfg(not(feature = "debug"))]
|
||||
{
|
||||
panic!("invalid html");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn set_quirks_mode(&mut self, _: tree_builder::QuirksMode) {}
|
||||
|
||||
#[cfg(feature = "debug")]
|
||||
fn set_current_line(&mut self, l: u64) {
|
||||
self.line_no = l;
|
||||
}
|
||||
#[cfg(not(feature = "debug"))]
|
||||
fn set_current_line(&mut self, _: u64) {}
|
||||
|
||||
fn append(&mut self, _: &usize, _: tree_builder::NodeOrText<usize>) {}
|
||||
fn append_before_sibling(&mut self, _: &usize, _: tree_builder::NodeOrText<usize>) {}
|
||||
fn append_based_on_parent_node(
|
||||
&mut self,
|
||||
_: &usize,
|
||||
_: &usize,
|
||||
_: tree_builder::NodeOrText<usize>,
|
||||
) {
|
||||
}
|
||||
fn append_doctype_to_document(
|
||||
&mut self,
|
||||
_: tendril::StrTendril,
|
||||
_: tendril::StrTendril,
|
||||
_: tendril::StrTendril,
|
||||
) {
|
||||
}
|
||||
fn remove_from_parent(&mut self, _: &usize) {}
|
||||
fn reparent_children(&mut self, _: &usize, _: &usize) {}
|
||||
|
||||
fn mark_script_already_started(&mut self, _: &usize) {}
|
||||
|
||||
fn add_attrs_if_missing(&mut self, _: &usize, _: Vec<html5ever::Attribute>) {
|
||||
panic!();
|
||||
}
|
||||
|
||||
fn create_pi(&mut self, _: tendril::StrTendril, _: tendril::StrTendril) -> usize {
|
||||
panic!()
|
||||
}
|
||||
|
||||
fn get_template_contents(&mut self, _: &usize) -> usize {
|
||||
panic!();
|
||||
}
|
||||
|
||||
fn create_comment(&mut self, _: tendril::StrTendril) -> usize {
|
||||
panic!()
|
||||
}
|
||||
}
|
18
tests/afl/src/main.rs
Normal file
18
tests/afl/src/main.rs
Normal file
|
@ -0,0 +1,18 @@
|
|||
use std::io::Read;
|
||||
|
||||
fn main() {
|
||||
let mut args = std::env::args();
|
||||
let _program = args.next();
|
||||
let target = args.next().expect("no target");
|
||||
assert_eq!(args.next(), None);
|
||||
|
||||
let f = match target.as_str() {
|
||||
"parse" => jotdown_afl::parse,
|
||||
"html" => jotdown_afl::html,
|
||||
_ => panic!("unknown target '{}'", target),
|
||||
};
|
||||
|
||||
let mut input = Vec::new();
|
||||
std::io::stdin().read_to_end(&mut input).unwrap();
|
||||
f(&input);
|
||||
}
|
3
tests/afl/src/parse.rs
Normal file
3
tests/afl/src/parse.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
fn main() {
|
||||
afl::fuzz!(|data: &[u8]| { jotdown_afl::parse(data) });
|
||||
}
|
Loading…
Reference in a new issue