PR #25 Fuzz for invalid HTML

Merge branch 'fuzz_html'

closes #25
This commit is contained in:
Noah Hellman 2023-03-20 23:37:54 +01:00
commit e458955d00
11 changed files with 270 additions and 49 deletions

View file

@ -77,10 +77,13 @@ jobs:
rustup update nightly
rustup default nightly
cargo install afl
- name: "Fuzz"
- name: "Fuzz parser"
run: |
echo core | sudo tee /proc/sys/kernel/core_pattern
make afl_quick
AFL_TARGET=parse make afl_quick
- name: "Fuzz html"
run: |
AFL_TARGET=html make afl_quick
bench:
name: Benchmark
runs-on: ubuntu-latest

View file

@ -53,14 +53,14 @@ bench:
cov: suite suite_bench
LLVM_COV=llvm-cov LLVM_PROFDATA=llvm-profdata cargo llvm-cov --features=suite,suite_bench --workspace --html --ignore-run-fail
AFL_TARGET?=gen
AFL_TARGET?=parse
AFL_JOBS?=1
AFL_TARGET_CRASH?=crashes
afl:
rm -rf tests/afl/out
(cd tests/afl && \
cargo afl build --release --config profile.release.debug-assertions=true && \
cargo afl build --no-default-features --release --config profile.release.debug-assertions=true && \
(AFL_NO_UI=1 cargo afl fuzz -i in -o out -Mm target/release/${AFL_TARGET} &) && \
for i in $$(seq $$((${AFL_JOBS} - 1))); do \
AFL_NO_UI=1 cargo afl fuzz -i in -o out -Ss$$i target/release/${AFL_TARGET} & \
@ -71,24 +71,31 @@ afl:
afl_quick:
rm -rf tests/afl/out
(cd tests/afl && \
cargo afl build --release --config profile.release.debug-assertions=true && \
cargo afl build --no-default-features --release --config profile.release.debug-assertions=true && \
AFL_NO_UI=1 AFL_BENCH_UNTIL_CRASH=1 \
cargo afl fuzz -i in -o out -V 60 target/release/${AFL_TARGET})
afl_crash:
set +e; \
for f in $$(find tests/afl/out -path '*/${AFL_TARGET_CRASH}/id*'); do \
echo "cat $$f | RUST_BACKTRACE=1 cargo run"; \
out=$$(cat $$f | RUST_BACKTRACE=1 cargo run 2>&1); \
failures="$$(find . -path './tmin/*') $$(find tests/afl/out -path '*/${AFL_TARGET_CRASH}/id*')"; \
for f in $$failures; do \
echo $$f; \
out=$$(cat $$f | (cd tests/afl && RUST_BACKTRACE=1 cargo run ${AFL_TARGET} 2>&1)); \
if [ $$? -ne 0 ]; then \
echo; \
echo "FAIL"; \
echo "$$out"; \
echo "cat $$f | RUST_BACKTRACE=1 cargo run"; \
exit 1; \
fi; \
done
afl_tmin:
rm -rf tmin
mkdir tmin
for f in $$(find tests/afl/out -path '*/${AFL_TARGET_CRASH}/id*'); do \
cargo afl tmin -i $$f -o tmin/$$(basename $$f) tests/afl/target/release/${AFL_TARGET}; \
done
clean:
cargo clean
git submodule deinit -f --all

View file

@ -262,7 +262,7 @@ impl<I: Iterator<Item = char>> Parser<I> {
}
}
s @ (ClassFirst | IdentifierFirst) => {
if is_name_start(c) {
if is_name(c) {
match s {
ClassFirst => Class,
IdentifierFirst => Identifier,
@ -344,12 +344,8 @@ impl<I: Iterator<Item = char>> Parser<I> {
}
}
pub fn is_name_start(c: char) -> bool {
c.is_ascii_alphanumeric() || matches!(c, '_' | ':')
}
pub fn is_name(c: char) -> bool {
is_name_start(c) || c.is_ascii_digit() || matches!(c, '-')
c.is_ascii_alphanumeric() || matches!(c, ':' | '_' | '-')
}
enum Element {

View file

@ -734,9 +734,8 @@ impl IdentifiedBlock {
f @ ('`' | ':' | '~') => {
let fence_length = 1 + (&mut chars).take_while(|c| *c == f).count();
let spec = &line_t[fence_length..].trim_start();
let valid_spec = if f == ':' && !spec.starts_with('=') {
spec.chars().next().map_or(true, attr::is_name_start)
&& spec.chars().skip(1).all(attr::is_name)
let valid_spec = if f == ':' {
spec.chars().all(attr::is_name)
} else {
!spec.chars().any(char::is_whitespace) && !spec.chars().any(|c| c == '`')
};

View file

@ -67,7 +67,7 @@ struct Writer<'s, I: Iterator<Item = Event<'s>>, W> {
events: std::iter::Peekable<FilteredEvents<I>>,
out: W,
raw: Raw,
text_only: bool,
img_alt_text: usize,
list_tightness: Vec<bool>,
encountered_footnote: bool,
footnote_number: Option<std::num::NonZeroUsize>,
@ -81,7 +81,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
events: FilteredEvents { events }.peekable(),
out,
raw: Raw::None,
text_only: false,
img_alt_text: 0,
list_tightness: Vec::new(),
encountered_footnote: false,
footnote_number: None,
@ -97,7 +97,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
if c.is_block() && !self.first_line {
self.out.write_char('\n')?;
}
if self.text_only && !matches!(c, Container::Image(..)) {
if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) {
continue;
}
match &c {
@ -171,8 +171,12 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
}
}
Container::Image(..) => {
self.text_only = true;
self.out.write_str("<img")?;
self.img_alt_text += 1;
if self.img_alt_text == 1 {
self.out.write_str("<img")?;
} else {
continue;
}
}
Container::Verbatim => self.out.write_str("<code")?,
Container::RawBlock { format } | Container::RawInline { format } => {
@ -283,7 +287,9 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
}
}
Container::Image(..) => {
self.out.write_str(r#" alt=""#)?;
if self.img_alt_text == 1 {
self.out.write_str(r#" alt=""#)?;
}
}
Container::Math { display } => {
self.out
@ -296,7 +302,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
if c.is_block_container() && !matches!(c, Container::Footnote { .. }) {
self.out.write_char('\n')?;
}
if self.text_only && !matches!(c, Container::Image(..)) {
if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) {
continue;
}
match c {
@ -360,12 +366,14 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
Container::Span => self.out.write_str("</span>")?,
Container::Link(..) => self.out.write_str("</a>")?,
Container::Image(src, ..) => {
self.text_only = false;
if src.is_empty() {
if self.img_alt_text == 1 {
if !src.is_empty() {
self.out.write_str(r#"" src=""#)?;
self.write_attr(&src)?;
}
self.out.write_str(r#"">"#)?;
} else {
write!(self.out, r#"" src="{}">"#, src)?;
}
self.img_alt_text -= 1;
}
Container::Verbatim => self.out.write_str("</code>")?,
Container::Math { display } => {
@ -388,16 +396,19 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
}
}
Event::Str(s) => match self.raw {
Raw::None if self.img_alt_text > 0 => self.write_attr(&s)?,
Raw::None => self.write_text(&s)?,
Raw::Html => self.out.write_str(&s)?,
Raw::Other => {}
},
Event::FootnoteReference(_tag, number) => {
write!(
self.out,
r##"<a id="fnref{}" href="#fn{}" role="doc-noteref"><sup>{}</sup></a>"##,
number, number, number
)?;
if self.img_alt_text == 0 {
write!(
self.out,
r##"<a id="fnref{}" href="#fn{}" role="doc-noteref"><sup>{}</sup></a>"##,
number, number, number
)?;
}
}
Event::Symbol(sym) => write!(self.out, ":{}:", sym)?,
Event::LeftSingleQuote => self.out.write_str("&lsquo;")?,

View file

@ -2,11 +2,25 @@
name = "jotdown-afl"
version = "0.1.0"
edition = "2021"
default-run = "main"
[dependencies]
afl = "0.11"
jotdown = { path = "../../", features = ["deterministic"] }
html5ever = "0.26"
[[bin]]
name = "gen"
path = "src/gen.rs"
name = "main"
path = "src/main.rs"
[[bin]]
name = "parse"
path = "src/parse.rs"
[[bin]]
name = "html"
path = "src/html.rs"
[features]
default = ["debug"]
debug = []

View file

@ -1,13 +0,0 @@
use afl::fuzz;
use jotdown::Render;
fn main() {
fuzz!(|data: &[u8]| {
if let Ok(s) = std::str::from_utf8(data) {
let p = jotdown::Parser::new(s);
let mut output = String::new();
jotdown::html::Renderer.push(p, &mut output).unwrap();
}
});
}

3
tests/afl/src/html.rs Normal file
View file

@ -0,0 +1,3 @@
fn main() {
afl::fuzz!(|data: &[u8]| { jotdown_afl::html(data) });
}

180
tests/afl/src/lib.rs Normal file
View file

@ -0,0 +1,180 @@
use jotdown::Render;
use html5ever::tendril;
use html5ever::tendril::TendrilSink;
use html5ever::tokenizer;
use html5ever::tree_builder;
pub fn parse(data: &[u8]) {
if let Ok(s) = std::str::from_utf8(data) {
jotdown::Parser::new(s).last();
}
}
pub fn html(data: &[u8]) {
if data.iter().any(|i| *i == 0) {
return;
}
if let Ok(s) = std::str::from_utf8(data) {
if !s.contains("=html") {
let p = jotdown::Parser::new(s);
let mut html = "<!DOCTYPE html>\n".to_string();
jotdown::html::Renderer.push(p, &mut html).unwrap();
validate_html(&html);
}
}
}
fn validate_html(html: &str) {
#[cfg(feature = "debug")]
let mut has_error = false;
html5ever::parse_document(
Dom {
names: Vec::new(),
#[cfg(feature = "debug")]
has_error: &mut has_error,
#[cfg(feature = "debug")]
line_no: 1,
#[cfg(not(feature = "debug"))]
_lifetime: std::marker::PhantomData,
},
html5ever::ParseOpts {
tokenizer: tokenizer::TokenizerOpts {
exact_errors: true,
..tokenizer::TokenizerOpts::default()
},
tree_builder: tree_builder::TreeBuilderOpts {
exact_errors: true,
scripting_enabled: false,
..tree_builder::TreeBuilderOpts::default()
},
},
)
.from_utf8()
.read_from(&mut std::io::Cursor::new(html))
.unwrap();
#[cfg(feature = "debug")]
if has_error {
eprintln!("html:");
html.split('\n').enumerate().for_each(|(i, l)| {
eprintln!("{:>2}:{}", i + 1, l);
});
eprintln!("\n");
panic!();
}
}
struct Dom<'a> {
names: Vec<html5ever::QualName>,
#[cfg(feature = "debug")]
has_error: &'a mut bool,
#[cfg(feature = "debug")]
line_no: u64,
#[cfg(not(feature = "debug"))]
_lifetime: std::marker::PhantomData<&'a ()>,
}
impl<'a> tree_builder::TreeSink for Dom<'a> {
type Handle = usize;
type Output = Self;
fn get_document(&mut self) -> usize {
0
}
fn finish(self) -> Self {
self
}
fn same_node(&self, x: &usize, y: &usize) -> bool {
x == y
}
fn elem_name(&self, i: &usize) -> html5ever::ExpandedName {
self.names[i - 1].expanded()
}
fn create_element(
&mut self,
name: html5ever::QualName,
_: Vec<html5ever::Attribute>,
_: tree_builder::ElementFlags,
) -> usize {
self.names.push(name);
self.names.len()
}
fn parse_error(&mut self, msg: std::borrow::Cow<'static, str>) {
let whitelist = &[
"Bad character", // bad characters in input will pass through
"Duplicate attribute", // djot is case-sensitive while html is not
// tags may be nested incorrectly, e.g. <a> within <a>
"Unexpected token Tag",
"Found special tag while closing generic tag",
"Formatting element not current node",
"Formatting element not open",
// FIXME bug caused by empty table at end of list
"No matching tag to close",
"Unexpected open element while closing",
];
if !whitelist.iter().any(|e| msg.starts_with(e)) {
#[cfg(feature = "debug")]
{
*self.has_error = true;
eprintln!("{}: {}\n", self.line_no, msg);
}
#[cfg(not(feature = "debug"))]
{
panic!("invalid html");
}
}
}
fn set_quirks_mode(&mut self, _: tree_builder::QuirksMode) {}
#[cfg(feature = "debug")]
fn set_current_line(&mut self, l: u64) {
self.line_no = l;
}
#[cfg(not(feature = "debug"))]
fn set_current_line(&mut self, _: u64) {}
fn append(&mut self, _: &usize, _: tree_builder::NodeOrText<usize>) {}
fn append_before_sibling(&mut self, _: &usize, _: tree_builder::NodeOrText<usize>) {}
fn append_based_on_parent_node(
&mut self,
_: &usize,
_: &usize,
_: tree_builder::NodeOrText<usize>,
) {
}
fn append_doctype_to_document(
&mut self,
_: tendril::StrTendril,
_: tendril::StrTendril,
_: tendril::StrTendril,
) {
}
fn remove_from_parent(&mut self, _: &usize) {}
fn reparent_children(&mut self, _: &usize, _: &usize) {}
fn mark_script_already_started(&mut self, _: &usize) {}
fn add_attrs_if_missing(&mut self, _: &usize, _: Vec<html5ever::Attribute>) {
panic!();
}
fn create_pi(&mut self, _: tendril::StrTendril, _: tendril::StrTendril) -> usize {
panic!()
}
fn get_template_contents(&mut self, _: &usize) -> usize {
panic!();
}
fn create_comment(&mut self, _: tendril::StrTendril) -> usize {
panic!()
}
}

18
tests/afl/src/main.rs Normal file
View file

@ -0,0 +1,18 @@
use std::io::Read;
fn main() {
let mut args = std::env::args();
let _program = args.next();
let target = args.next().expect("no target");
assert_eq!(args.next(), None);
let f = match target.as_str() {
"parse" => jotdown_afl::parse,
"html" => jotdown_afl::html,
_ => panic!("unknown target '{}'", target),
};
let mut input = Vec::new();
std::io::stdin().read_to_end(&mut input).unwrap();
f(&input);
}

3
tests/afl/src/parse.rs Normal file
View file

@ -0,0 +1,3 @@
fn main() {
afl::fuzz!(|data: &[u8]| { jotdown_afl::parse(data) });
}