afl: add html target, checking for invalid html
This commit is contained in:
		
					parent
					
						
							
								8f70f596b9
							
						
					
				
			
			
				commit
				
					
						9a7c57f524
					
				
			
		
					 4 changed files with 164 additions and 0 deletions
				
			
		| 
						 | 
					@ -7,6 +7,7 @@ default-run = "main"
 | 
				
			||||||
[dependencies]
 | 
					[dependencies]
 | 
				
			||||||
afl = "0.11"
 | 
					afl = "0.11"
 | 
				
			||||||
jotdown = { path = "../../", features = ["deterministic"] }
 | 
					jotdown = { path = "../../", features = ["deterministic"] }
 | 
				
			||||||
 | 
					html5ever = "0.26"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
[[bin]]
 | 
					[[bin]]
 | 
				
			||||||
name = "main"
 | 
					name = "main"
 | 
				
			||||||
| 
						 | 
					@ -15,3 +16,7 @@ path = "src/main.rs"
 | 
				
			||||||
[[bin]]
 | 
					[[bin]]
 | 
				
			||||||
name = "parse"
 | 
					name = "parse"
 | 
				
			||||||
path = "src/parse.rs"
 | 
					path = "src/parse.rs"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					[[bin]]
 | 
				
			||||||
 | 
					name = "html"
 | 
				
			||||||
 | 
					path = "src/html.rs"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										3
									
								
								tests/afl/src/html.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								tests/afl/src/html.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,3 @@
 | 
				
			||||||
 | 
					fn main() {
 | 
				
			||||||
 | 
					    afl::fuzz!(|data: &[u8]| { jotdown_afl::html(data) });
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1,5 +1,160 @@
 | 
				
			||||||
 | 
					use jotdown::Render;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					use html5ever::tendril;
 | 
				
			||||||
 | 
					use html5ever::tendril::TendrilSink;
 | 
				
			||||||
 | 
					use html5ever::tokenizer;
 | 
				
			||||||
 | 
					use html5ever::tree_builder;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
pub fn parse(data: &[u8]) {
 | 
					pub fn parse(data: &[u8]) {
 | 
				
			||||||
    if let Ok(s) = std::str::from_utf8(data) {
 | 
					    if let Ok(s) = std::str::from_utf8(data) {
 | 
				
			||||||
        jotdown::Parser::new(s).last();
 | 
					        jotdown::Parser::new(s).last();
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					pub fn html(data: &[u8]) {
 | 
				
			||||||
 | 
					    if data.iter().any(|i| *i == 0) {
 | 
				
			||||||
 | 
					        return;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if let Ok(s) = std::str::from_utf8(data) {
 | 
				
			||||||
 | 
					        if !s.contains("=html") {
 | 
				
			||||||
 | 
					            let p = jotdown::Parser::new(s);
 | 
				
			||||||
 | 
					            let mut html = "<!DOCTYPE html>\n".to_string();
 | 
				
			||||||
 | 
					            jotdown::html::Renderer.push(p, &mut html).unwrap();
 | 
				
			||||||
 | 
					            validate_html(&html);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					fn validate_html(html: &str) {
 | 
				
			||||||
 | 
					    let mut has_error = false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    html5ever::parse_document(
 | 
				
			||||||
 | 
					        Dom {
 | 
				
			||||||
 | 
					            names: Vec::new(),
 | 
				
			||||||
 | 
					            has_error: &mut has_error,
 | 
				
			||||||
 | 
					            line_no: 1,
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					        html5ever::ParseOpts {
 | 
				
			||||||
 | 
					            tokenizer: tokenizer::TokenizerOpts {
 | 
				
			||||||
 | 
					                exact_errors: true,
 | 
				
			||||||
 | 
					                ..tokenizer::TokenizerOpts::default()
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            tree_builder: tree_builder::TreeBuilderOpts {
 | 
				
			||||||
 | 
					                exact_errors: true,
 | 
				
			||||||
 | 
					                scripting_enabled: false,
 | 
				
			||||||
 | 
					                ..tree_builder::TreeBuilderOpts::default()
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					        },
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
 | 
					    .from_utf8()
 | 
				
			||||||
 | 
					    .read_from(&mut std::io::Cursor::new(html))
 | 
				
			||||||
 | 
					    .unwrap();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if has_error {
 | 
				
			||||||
 | 
					        eprintln!("html:");
 | 
				
			||||||
 | 
					        html.split('\n').enumerate().for_each(|(i, l)| {
 | 
				
			||||||
 | 
					            eprintln!("{:>2}:{}", i + 1, l);
 | 
				
			||||||
 | 
					        });
 | 
				
			||||||
 | 
					        eprintln!("\n");
 | 
				
			||||||
 | 
					        panic!();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct Dom<'a> {
 | 
				
			||||||
 | 
					    names: Vec<html5ever::QualName>,
 | 
				
			||||||
 | 
					    has_error: &'a mut bool,
 | 
				
			||||||
 | 
					    line_no: u64,
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					impl<'a> tree_builder::TreeSink for Dom<'a> {
 | 
				
			||||||
 | 
					    type Handle = usize;
 | 
				
			||||||
 | 
					    type Output = Self;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn get_document(&mut self) -> usize {
 | 
				
			||||||
 | 
					        0
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn finish(self) -> Self {
 | 
				
			||||||
 | 
					        self
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn same_node(&self, x: &usize, y: &usize) -> bool {
 | 
				
			||||||
 | 
					        x == y
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn elem_name(&self, i: &usize) -> html5ever::ExpandedName {
 | 
				
			||||||
 | 
					        self.names[i - 1].expanded()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn create_element(
 | 
				
			||||||
 | 
					        &mut self,
 | 
				
			||||||
 | 
					        name: html5ever::QualName,
 | 
				
			||||||
 | 
					        _: Vec<html5ever::Attribute>,
 | 
				
			||||||
 | 
					        _: tree_builder::ElementFlags,
 | 
				
			||||||
 | 
					    ) -> usize {
 | 
				
			||||||
 | 
					        self.names.push(name);
 | 
				
			||||||
 | 
					        self.names.len()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn parse_error(&mut self, msg: std::borrow::Cow<'static, str>) {
 | 
				
			||||||
 | 
					        let whitelist = &[
 | 
				
			||||||
 | 
					            "Bad character",       // bad characters in input will pass through
 | 
				
			||||||
 | 
					            "Duplicate attribute", // djot is case-sensitive while html is not
 | 
				
			||||||
 | 
					            // tags may be nested incorrectly, e.g. <a> within <a>
 | 
				
			||||||
 | 
					            "Unexpected token Tag",
 | 
				
			||||||
 | 
					            "Found special tag while closing generic tag",
 | 
				
			||||||
 | 
					            "Formatting element not current node",
 | 
				
			||||||
 | 
					            "Formatting element not open",
 | 
				
			||||||
 | 
					            // FIXME bug caused by empty table at end of list
 | 
				
			||||||
 | 
					            "No matching tag to close",
 | 
				
			||||||
 | 
					            "Unexpected open element while closing",
 | 
				
			||||||
 | 
					        ];
 | 
				
			||||||
 | 
					        if !whitelist.iter().any(|e| msg.starts_with(e)) {
 | 
				
			||||||
 | 
					            *self.has_error = true;
 | 
				
			||||||
 | 
					            eprintln!("{}: {}\n", self.line_no, msg);
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn set_quirks_mode(&mut self, _: tree_builder::QuirksMode) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn set_current_line(&mut self, l: u64) {
 | 
				
			||||||
 | 
					        self.line_no = l;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn append(&mut self, _: &usize, _: tree_builder::NodeOrText<usize>) {}
 | 
				
			||||||
 | 
					    fn append_before_sibling(&mut self, _: &usize, _: tree_builder::NodeOrText<usize>) {}
 | 
				
			||||||
 | 
					    fn append_based_on_parent_node(
 | 
				
			||||||
 | 
					        &mut self,
 | 
				
			||||||
 | 
					        _: &usize,
 | 
				
			||||||
 | 
					        _: &usize,
 | 
				
			||||||
 | 
					        _: tree_builder::NodeOrText<usize>,
 | 
				
			||||||
 | 
					    ) {
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fn append_doctype_to_document(
 | 
				
			||||||
 | 
					        &mut self,
 | 
				
			||||||
 | 
					        _: tendril::StrTendril,
 | 
				
			||||||
 | 
					        _: tendril::StrTendril,
 | 
				
			||||||
 | 
					        _: tendril::StrTendril,
 | 
				
			||||||
 | 
					    ) {
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    fn remove_from_parent(&mut self, _: &usize) {}
 | 
				
			||||||
 | 
					    fn reparent_children(&mut self, _: &usize, _: &usize) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn mark_script_already_started(&mut self, _: &usize) {}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn add_attrs_if_missing(&mut self, _: &usize, _: Vec<html5ever::Attribute>) {
 | 
				
			||||||
 | 
					        panic!();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn create_pi(&mut self, _: tendril::StrTendril, _: tendril::StrTendril) -> usize {
 | 
				
			||||||
 | 
					        panic!()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn get_template_contents(&mut self, _: &usize) -> usize {
 | 
				
			||||||
 | 
					        panic!();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    fn create_comment(&mut self, _: tendril::StrTendril) -> usize {
 | 
				
			||||||
 | 
					        panic!()
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -8,6 +8,7 @@ fn main() {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    let f = match target.as_str() {
 | 
					    let f = match target.as_str() {
 | 
				
			||||||
        "parse" => jotdown_afl::parse,
 | 
					        "parse" => jotdown_afl::parse,
 | 
				
			||||||
 | 
					        "html" => jotdown_afl::html,
 | 
				
			||||||
        _ => panic!("unknown target '{}'", target),
 | 
					        _ => panic!("unknown target '{}'", target),
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue