From 9a7c57f524a69d15733a2629999ad7e596b593c9 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sat, 11 Mar 2023 23:56:27 +0100 Subject: [PATCH] afl: add html target, checking for invalid html --- tests/afl/Cargo.toml | 5 ++ tests/afl/src/html.rs | 3 + tests/afl/src/lib.rs | 155 ++++++++++++++++++++++++++++++++++++++++++ tests/afl/src/main.rs | 1 + 4 files changed, 164 insertions(+) create mode 100644 tests/afl/src/html.rs diff --git a/tests/afl/Cargo.toml b/tests/afl/Cargo.toml index 36dcf82..6b92727 100644 --- a/tests/afl/Cargo.toml +++ b/tests/afl/Cargo.toml @@ -7,6 +7,7 @@ default-run = "main" [dependencies] afl = "0.11" jotdown = { path = "../../", features = ["deterministic"] } +html5ever = "0.26" [[bin]] name = "main" @@ -15,3 +16,7 @@ path = "src/main.rs" [[bin]] name = "parse" path = "src/parse.rs" + +[[bin]] +name = "html" +path = "src/html.rs" diff --git a/tests/afl/src/html.rs b/tests/afl/src/html.rs new file mode 100644 index 0000000..ae8b3f0 --- /dev/null +++ b/tests/afl/src/html.rs @@ -0,0 +1,3 @@ +fn main() { + afl::fuzz!(|data: &[u8]| { jotdown_afl::html(data) }); +} diff --git a/tests/afl/src/lib.rs b/tests/afl/src/lib.rs index a4f190b..694720b 100644 --- a/tests/afl/src/lib.rs +++ b/tests/afl/src/lib.rs @@ -1,5 +1,160 @@ +use jotdown::Render; + +use html5ever::tendril; +use html5ever::tendril::TendrilSink; +use html5ever::tokenizer; +use html5ever::tree_builder; + pub fn parse(data: &[u8]) { if let Ok(s) = std::str::from_utf8(data) { jotdown::Parser::new(s).last(); } } + +pub fn html(data: &[u8]) { + if data.iter().any(|i| *i == 0) { + return; + } + if let Ok(s) = std::str::from_utf8(data) { + if !s.contains("=html") { + let p = jotdown::Parser::new(s); + let mut html = "\n".to_string(); + jotdown::html::Renderer.push(p, &mut html).unwrap(); + validate_html(&html); + } + } +} + +fn validate_html(html: &str) { + let mut has_error = false; + + html5ever::parse_document( + Dom { + names: Vec::new(), + has_error: &mut has_error, + line_no: 1, + }, + html5ever::ParseOpts { + tokenizer: tokenizer::TokenizerOpts { + exact_errors: true, + ..tokenizer::TokenizerOpts::default() + }, + tree_builder: tree_builder::TreeBuilderOpts { + exact_errors: true, + scripting_enabled: false, + ..tree_builder::TreeBuilderOpts::default() + }, + }, + ) + .from_utf8() + .read_from(&mut std::io::Cursor::new(html)) + .unwrap(); + + if has_error { + eprintln!("html:"); + html.split('\n').enumerate().for_each(|(i, l)| { + eprintln!("{:>2}:{}", i + 1, l); + }); + eprintln!("\n"); + panic!(); + } +} + +struct Dom<'a> { + names: Vec, + has_error: &'a mut bool, + line_no: u64, +} + +impl<'a> tree_builder::TreeSink for Dom<'a> { + type Handle = usize; + type Output = Self; + + fn get_document(&mut self) -> usize { + 0 + } + + fn finish(self) -> Self { + self + } + + fn same_node(&self, x: &usize, y: &usize) -> bool { + x == y + } + + fn elem_name(&self, i: &usize) -> html5ever::ExpandedName { + self.names[i - 1].expanded() + } + + fn create_element( + &mut self, + name: html5ever::QualName, + _: Vec, + _: tree_builder::ElementFlags, + ) -> usize { + self.names.push(name); + self.names.len() + } + + fn parse_error(&mut self, msg: std::borrow::Cow<'static, str>) { + let whitelist = &[ + "Bad character", // bad characters in input will pass through + "Duplicate attribute", // djot is case-sensitive while html is not + // tags may be nested incorrectly, e.g. within + "Unexpected token Tag", + "Found special tag while closing generic tag", + "Formatting element not current node", + "Formatting element not open", + // FIXME bug caused by empty table at end of list + "No matching tag to close", + "Unexpected open element while closing", + ]; + if !whitelist.iter().any(|e| msg.starts_with(e)) { + *self.has_error = true; + eprintln!("{}: {}\n", self.line_no, msg); + } + } + + fn set_quirks_mode(&mut self, _: tree_builder::QuirksMode) {} + + fn set_current_line(&mut self, l: u64) { + self.line_no = l; + } + + fn append(&mut self, _: &usize, _: tree_builder::NodeOrText) {} + fn append_before_sibling(&mut self, _: &usize, _: tree_builder::NodeOrText) {} + fn append_based_on_parent_node( + &mut self, + _: &usize, + _: &usize, + _: tree_builder::NodeOrText, + ) { + } + fn append_doctype_to_document( + &mut self, + _: tendril::StrTendril, + _: tendril::StrTendril, + _: tendril::StrTendril, + ) { + } + fn remove_from_parent(&mut self, _: &usize) {} + fn reparent_children(&mut self, _: &usize, _: &usize) {} + + fn mark_script_already_started(&mut self, _: &usize) {} + + fn add_attrs_if_missing(&mut self, _: &usize, _: Vec) { + panic!(); + } + + fn create_pi(&mut self, _: tendril::StrTendril, _: tendril::StrTendril) -> usize { + panic!() + } + + fn get_template_contents(&mut self, _: &usize) -> usize { + panic!(); + } + + fn create_comment(&mut self, _: tendril::StrTendril) -> usize { + panic!() + } +} diff --git a/tests/afl/src/main.rs b/tests/afl/src/main.rs index c04247b..b0a66c8 100644 --- a/tests/afl/src/main.rs +++ b/tests/afl/src/main.rs @@ -8,6 +8,7 @@ fn main() { let f = match target.as_str() { "parse" => jotdown_afl::parse, + "html" => jotdown_afl::html, _ => panic!("unknown target '{}'", target), };