afl: add html target, checking for invalid html
This commit is contained in:
parent
8f70f596b9
commit
9a7c57f524
4 changed files with 164 additions and 0 deletions
|
@ -7,6 +7,7 @@ default-run = "main"
|
|||
[dependencies]
|
||||
afl = "0.11"
|
||||
jotdown = { path = "../../", features = ["deterministic"] }
|
||||
html5ever = "0.26"
|
||||
|
||||
[[bin]]
|
||||
name = "main"
|
||||
|
@ -15,3 +16,7 @@ path = "src/main.rs"
|
|||
[[bin]]
|
||||
name = "parse"
|
||||
path = "src/parse.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "html"
|
||||
path = "src/html.rs"
|
||||
|
|
3
tests/afl/src/html.rs
Normal file
3
tests/afl/src/html.rs
Normal file
|
@ -0,0 +1,3 @@
|
|||
fn main() {
|
||||
afl::fuzz!(|data: &[u8]| { jotdown_afl::html(data) });
|
||||
}
|
|
@ -1,5 +1,160 @@
|
|||
use jotdown::Render;
|
||||
|
||||
use html5ever::tendril;
|
||||
use html5ever::tendril::TendrilSink;
|
||||
use html5ever::tokenizer;
|
||||
use html5ever::tree_builder;
|
||||
|
||||
pub fn parse(data: &[u8]) {
|
||||
if let Ok(s) = std::str::from_utf8(data) {
|
||||
jotdown::Parser::new(s).last();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn html(data: &[u8]) {
|
||||
if data.iter().any(|i| *i == 0) {
|
||||
return;
|
||||
}
|
||||
if let Ok(s) = std::str::from_utf8(data) {
|
||||
if !s.contains("=html") {
|
||||
let p = jotdown::Parser::new(s);
|
||||
let mut html = "<!DOCTYPE html>\n".to_string();
|
||||
jotdown::html::Renderer.push(p, &mut html).unwrap();
|
||||
validate_html(&html);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn validate_html(html: &str) {
|
||||
let mut has_error = false;
|
||||
|
||||
html5ever::parse_document(
|
||||
Dom {
|
||||
names: Vec::new(),
|
||||
has_error: &mut has_error,
|
||||
line_no: 1,
|
||||
},
|
||||
html5ever::ParseOpts {
|
||||
tokenizer: tokenizer::TokenizerOpts {
|
||||
exact_errors: true,
|
||||
..tokenizer::TokenizerOpts::default()
|
||||
},
|
||||
tree_builder: tree_builder::TreeBuilderOpts {
|
||||
exact_errors: true,
|
||||
scripting_enabled: false,
|
||||
..tree_builder::TreeBuilderOpts::default()
|
||||
},
|
||||
},
|
||||
)
|
||||
.from_utf8()
|
||||
.read_from(&mut std::io::Cursor::new(html))
|
||||
.unwrap();
|
||||
|
||||
if has_error {
|
||||
eprintln!("html:");
|
||||
html.split('\n').enumerate().for_each(|(i, l)| {
|
||||
eprintln!("{:>2}:{}", i + 1, l);
|
||||
});
|
||||
eprintln!("\n");
|
||||
panic!();
|
||||
}
|
||||
}
|
||||
|
||||
struct Dom<'a> {
|
||||
names: Vec<html5ever::QualName>,
|
||||
has_error: &'a mut bool,
|
||||
line_no: u64,
|
||||
}
|
||||
|
||||
impl<'a> tree_builder::TreeSink for Dom<'a> {
|
||||
type Handle = usize;
|
||||
type Output = Self;
|
||||
|
||||
fn get_document(&mut self) -> usize {
|
||||
0
|
||||
}
|
||||
|
||||
fn finish(self) -> Self {
|
||||
self
|
||||
}
|
||||
|
||||
fn same_node(&self, x: &usize, y: &usize) -> bool {
|
||||
x == y
|
||||
}
|
||||
|
||||
fn elem_name(&self, i: &usize) -> html5ever::ExpandedName {
|
||||
self.names[i - 1].expanded()
|
||||
}
|
||||
|
||||
fn create_element(
|
||||
&mut self,
|
||||
name: html5ever::QualName,
|
||||
_: Vec<html5ever::Attribute>,
|
||||
_: tree_builder::ElementFlags,
|
||||
) -> usize {
|
||||
self.names.push(name);
|
||||
self.names.len()
|
||||
}
|
||||
|
||||
fn parse_error(&mut self, msg: std::borrow::Cow<'static, str>) {
|
||||
let whitelist = &[
|
||||
"Bad character", // bad characters in input will pass through
|
||||
"Duplicate attribute", // djot is case-sensitive while html is not
|
||||
// tags may be nested incorrectly, e.g. <a> within <a>
|
||||
"Unexpected token Tag",
|
||||
"Found special tag while closing generic tag",
|
||||
"Formatting element not current node",
|
||||
"Formatting element not open",
|
||||
// FIXME bug caused by empty table at end of list
|
||||
"No matching tag to close",
|
||||
"Unexpected open element while closing",
|
||||
];
|
||||
if !whitelist.iter().any(|e| msg.starts_with(e)) {
|
||||
*self.has_error = true;
|
||||
eprintln!("{}: {}\n", self.line_no, msg);
|
||||
}
|
||||
}
|
||||
|
||||
fn set_quirks_mode(&mut self, _: tree_builder::QuirksMode) {}
|
||||
|
||||
fn set_current_line(&mut self, l: u64) {
|
||||
self.line_no = l;
|
||||
}
|
||||
|
||||
fn append(&mut self, _: &usize, _: tree_builder::NodeOrText<usize>) {}
|
||||
fn append_before_sibling(&mut self, _: &usize, _: tree_builder::NodeOrText<usize>) {}
|
||||
fn append_based_on_parent_node(
|
||||
&mut self,
|
||||
_: &usize,
|
||||
_: &usize,
|
||||
_: tree_builder::NodeOrText<usize>,
|
||||
) {
|
||||
}
|
||||
fn append_doctype_to_document(
|
||||
&mut self,
|
||||
_: tendril::StrTendril,
|
||||
_: tendril::StrTendril,
|
||||
_: tendril::StrTendril,
|
||||
) {
|
||||
}
|
||||
fn remove_from_parent(&mut self, _: &usize) {}
|
||||
fn reparent_children(&mut self, _: &usize, _: &usize) {}
|
||||
|
||||
fn mark_script_already_started(&mut self, _: &usize) {}
|
||||
|
||||
fn add_attrs_if_missing(&mut self, _: &usize, _: Vec<html5ever::Attribute>) {
|
||||
panic!();
|
||||
}
|
||||
|
||||
fn create_pi(&mut self, _: tendril::StrTendril, _: tendril::StrTendril) -> usize {
|
||||
panic!()
|
||||
}
|
||||
|
||||
fn get_template_contents(&mut self, _: &usize) -> usize {
|
||||
panic!();
|
||||
}
|
||||
|
||||
fn create_comment(&mut self, _: tendril::StrTendril) -> usize {
|
||||
panic!()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,6 +8,7 @@ fn main() {
|
|||
|
||||
let f = match target.as_str() {
|
||||
"parse" => jotdown_afl::parse,
|
||||
"html" => jotdown_afl::html,
|
||||
_ => panic!("unknown target '{}'", target),
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue