handle html escapes in html mod

This commit is contained in:
Noah Hellman 2022-12-08 18:25:24 +01:00
parent 564256f1b8
commit 89390cf5f6
2 changed files with 25 additions and 1 deletions

View file

@ -145,7 +145,29 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<I, W> {
Container::DoubleQuoted => self.out.write_str("&rdquo;")?, Container::DoubleQuoted => self.out.write_str("&rdquo;")?,
} }
} }
Event::Str(s) => self.out.write_str(s)?, Event::Str(mut s) => {
let mut ent = "";
while let Some(i) = s.chars().position(|c| {
if let Some(s) = match c {
'<' => Some("&lt;"),
'>' => Some("&gt;"),
'&' => Some("&amp;"),
'"' => Some("&quot;"),
_ => None,
} {
ent = s;
true
} else {
false
}
}) {
self.out.write_str(&s[..i])?;
self.out.write_str(ent)?;
s = &s[i + 1..];
}
self.out.write_str(s)?;
}
Event::Atom(a) => match a { Event::Atom(a) => match a {
Atom::Ellipsis => self.out.write_str("&hellip;")?, Atom::Ellipsis => self.out.write_str("&hellip;")?,
Atom::EnDash => self.out.write_str("&ndash;")?, Atom::EnDash => self.out.write_str("&ndash;")?,

View file

@ -125,6 +125,8 @@ impl<'s> Parser<'s> {
fn parse_atom(&mut self, first: &lex::Token) -> Option<Event> { fn parse_atom(&mut self, first: &lex::Token) -> Option<Event> {
let atom = match first.kind { let atom = match first.kind {
lex::Kind::Newline => Softbreak,
lex::Kind::Hardbreak => Hardbreak,
lex::Kind::Escape => Escape, lex::Kind::Escape => Escape,
lex::Kind::Nbsp => Nbsp, lex::Kind::Nbsp => Nbsp,
lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis, lex::Kind::Seq(lex::Sequence::Period) if first.len == 3 => Ellipsis,