jotdown/src/html.rs

474 lines
21 KiB
Rust
Raw Normal View History

2023-02-04 13:37:33 -05:00
//! An HTML renderer that takes an iterator of [`Event`]s and emits HTML.
2023-02-04 14:07:20 -05:00
//!
//! The HTML can be written to either a [`std::fmt::Write`] or a [`std::io::Write`] object.
//!
//! # Examples
//!
//! Push to a [`String`] (implements [`std::fmt::Write`]):
//!
//! ```
//! # use jotdown::Render;
2023-02-04 14:07:20 -05:00
//! # let events = std::iter::empty();
//! let mut html = String::new();
//! jotdown::html::Renderer.push(events, &mut html);
2023-02-04 14:07:20 -05:00
//! ```
//!
//! Write to standard output with buffering ([`std::io::Stdout`] implements [`std::io::Write`]):
//!
//! ```
//! # use jotdown::Render;
2023-02-04 14:07:20 -05:00
//! # let events = std::iter::empty();
//! let mut out = std::io::BufWriter::new(std::io::stdout());
//! jotdown::html::Renderer.write(events, &mut out).unwrap();
2023-02-04 14:07:20 -05:00
//! ```
2023-02-04 13:37:33 -05:00
2023-01-25 14:58:29 -05:00
use crate::Alignment;
2022-11-29 12:34:13 -05:00
use crate::Container;
2022-11-27 18:53:49 -05:00
use crate::Event;
2023-01-22 06:39:04 -05:00
use crate::ListKind;
2023-01-22 04:41:15 -05:00
use crate::OrderedListNumbering::*;
use crate::Render;
2022-11-27 18:53:49 -05:00
pub struct Renderer;
2022-11-29 12:34:13 -05:00
impl Render for Renderer {
fn push<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write>(
&self,
events: I,
out: W,
) -> std::fmt::Result {
Writer::new(events, out).write()
2022-11-29 12:34:13 -05:00
}
2022-11-27 18:53:49 -05:00
}
2022-12-11 04:45:05 -05:00
enum Raw {
None,
Html,
Other,
}
struct FilteredEvents<I> {
events: I,
}
impl<'s, I: Iterator<Item = Event<'s>>> Iterator for FilteredEvents<I> {
type Item = Event<'s>;
fn next(&mut self) -> Option<Self::Item> {
let mut ev = self.events.next();
while matches!(ev, Some(Event::Blankline | Event::Escape)) {
ev = self.events.next();
}
ev
}
}
struct Writer<'s, I: Iterator<Item = Event<'s>>, W> {
events: std::iter::Peekable<FilteredEvents<I>>,
2022-11-29 12:34:13 -05:00
out: W,
2022-12-11 04:45:05 -05:00
raw: Raw,
2023-03-12 12:03:44 -04:00
img_alt_text: usize,
2023-01-22 11:42:47 -05:00
list_tightness: Vec<bool>,
2023-01-18 16:30:24 -05:00
encountered_footnote: bool,
footnote_number: Option<std::num::NonZeroUsize>,
footnote_backlink_written: bool,
2023-02-04 14:06:01 -05:00
first_line: bool,
2022-11-29 12:34:13 -05:00
}
impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
2022-11-29 12:34:13 -05:00
fn new(events: I, out: W) -> Self {
2022-12-11 04:45:05 -05:00
Self {
events: FilteredEvents { events }.peekable(),
2022-12-11 04:45:05 -05:00
out,
raw: Raw::None,
2023-03-12 12:03:44 -04:00
img_alt_text: 0,
2023-01-22 11:42:47 -05:00
list_tightness: Vec::new(),
2023-01-18 16:30:24 -05:00
encountered_footnote: false,
footnote_number: None,
footnote_backlink_written: false,
2023-02-04 14:06:01 -05:00
first_line: true,
2022-12-11 04:45:05 -05:00
}
2022-11-27 18:53:49 -05:00
}
2022-11-29 12:34:13 -05:00
fn write(&mut self) -> std::fmt::Result {
2023-01-18 16:30:24 -05:00
while let Some(e) = self.events.next() {
2022-11-29 12:34:13 -05:00
match e {
2023-01-15 14:03:25 -05:00
Event::Start(c, attrs) => {
2023-02-04 14:06:01 -05:00
if c.is_block() && !self.first_line {
self.out.write_char('\n')?;
}
2023-03-12 12:03:44 -04:00
if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) {
2022-12-17 12:03:06 -05:00
continue;
}
2023-01-15 14:03:25 -05:00
match &c {
Container::Blockquote => self.out.write_str("<blockquote")?,
Container::List { kind, tight } => {
self.list_tightness.push(*tight);
match kind {
ListKind::Unordered | ListKind::Task => {
self.out.write_str("<ul")?
}
2023-01-22 06:39:04 -05:00
ListKind::Ordered {
numbering, start, ..
} => {
self.out.write_str("<ol")?;
if *start > 1 {
write!(self.out, r#" start="{}""#, start)?;
}
if let Some(ty) = match numbering {
Decimal => None,
AlphaLower => Some('a'),
AlphaUpper => Some('A'),
RomanLower => Some('i'),
RomanUpper => Some('I'),
} {
write!(self.out, r#" type="{}""#, ty)?;
}
}
2023-01-22 04:41:15 -05:00
}
}
Container::ListItem | Container::TaskListItem { .. } => {
self.out.write_str("<li")?;
}
2023-01-27 13:04:01 -05:00
Container::DescriptionList => self.out.write_str("<dl")?,
2023-01-15 14:03:25 -05:00
Container::DescriptionDetails => self.out.write_str("<dd")?,
2023-01-18 16:30:24 -05:00
Container::Footnote { number, .. } => {
assert!(self.footnote_number.is_none());
self.footnote_number = Some((*number).try_into().unwrap());
if !self.encountered_footnote {
self.encountered_footnote = true;
self.out
.write_str("<section role=\"doc-endnotes\">\n<hr>\n<ol>\n")?;
}
write!(self.out, "<li id=\"fn{}\">", number)?;
self.footnote_backlink_written = false;
continue;
}
2023-01-15 14:03:25 -05:00
Container::Table => self.out.write_str("<table")?,
2023-01-25 13:27:12 -05:00
Container::TableRow { .. } => self.out.write_str("<tr")?,
2023-01-29 09:10:01 -05:00
Container::Section { .. } => self.out.write_str("<section")?,
2023-01-15 14:03:25 -05:00
Container::Div { .. } => self.out.write_str("<div")?,
2023-01-22 11:42:47 -05:00
Container::Paragraph => {
if matches!(self.list_tightness.last(), Some(true)) {
continue;
}
self.out.write_str("<p")?;
}
2023-01-29 09:10:01 -05:00
Container::Heading { level, .. } => write!(self.out, "<h{}", level)?,
2023-01-25 14:58:29 -05:00
Container::TableCell { head: false, .. } => self.out.write_str("<td")?,
Container::TableCell { head: true, .. } => self.out.write_str("<th")?,
2023-01-26 14:16:20 -05:00
Container::Caption => self.out.write_str("<caption")?,
2023-01-15 14:03:25 -05:00
Container::DescriptionTerm => self.out.write_str("<dt")?,
Container::CodeBlock { .. } => self.out.write_str("<pre")?,
Container::Span | Container::Math { .. } => self.out.write_str("<span")?,
2023-01-16 17:31:47 -05:00
Container::Link(dst, ..) => {
if dst.is_empty() {
self.out.write_str("<a")?;
} else {
self.out.write_str(r#"<a href=""#)?;
self.write_attr(dst)?;
self.out.write_char('"')?;
2023-01-16 17:31:47 -05:00
}
}
2022-12-17 12:03:06 -05:00
Container::Image(..) => {
2023-03-12 12:03:44 -04:00
self.img_alt_text += 1;
if self.img_alt_text == 1 {
self.out.write_str("<img")?;
} else {
continue;
}
2022-12-17 12:03:06 -05:00
}
2023-01-15 14:03:25 -05:00
Container::Verbatim => self.out.write_str("<code")?,
2022-12-11 04:45:05 -05:00
Container::RawBlock { format } | Container::RawInline { format } => {
2023-01-15 14:03:25 -05:00
self.raw = if format == &"html" {
2022-12-11 04:45:05 -05:00
Raw::Html
} else {
Raw::Other
2023-01-15 14:03:25 -05:00
};
continue;
2022-12-11 04:45:05 -05:00
}
2023-01-15 14:03:25 -05:00
Container::Subscript => self.out.write_str("<sub")?,
Container::Superscript => self.out.write_str("<sup")?,
Container::Insert => self.out.write_str("<ins")?,
Container::Delete => self.out.write_str("<del")?,
Container::Strong => self.out.write_str("<strong")?,
Container::Emphasis => self.out.write_str("<em")?,
Container::Mark => self.out.write_str("<mark")?,
2023-01-15 14:06:51 -05:00
}
2023-01-22 04:41:15 -05:00
for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") {
write!(self.out, r#" {}=""#, a)?;
v.parts().try_for_each(|part| self.write_attr(part))?;
self.out.write_char('"')?;
2023-01-22 04:41:15 -05:00
}
2023-01-29 09:10:01 -05:00
if let Container::Heading {
id,
has_section: false,
..
}
| Container::Section { id } = &c
{
if !attrs.iter().any(|(a, _)| a == "id") {
self.out.write_str(r#" id=""#)?;
self.write_attr(id)?;
self.out.write_char('"')?;
2023-01-29 09:10:01 -05:00
}
}
2023-01-15 14:03:25 -05:00
if attrs.iter().any(|(a, _)| a == "class")
|| matches!(
c,
2023-01-22 04:41:15 -05:00
Container::Div { class: Some(_) }
| Container::Math { .. }
2023-01-22 06:39:04 -05:00
| Container::List {
kind: ListKind::Task,
..
}
2023-01-22 04:41:15 -05:00
| Container::TaskListItem { .. }
2023-01-15 14:03:25 -05:00
)
{
self.out.write_str(r#" class=""#)?;
2023-01-22 04:41:15 -05:00
let mut first_written = false;
if let Some(cls) = match c {
2023-01-22 06:39:04 -05:00
Container::List {
kind: ListKind::Task,
..
} => Some("task-list"),
2023-01-22 04:41:15 -05:00
Container::TaskListItem { checked: false } => Some("unchecked"),
Container::TaskListItem { checked: true } => Some("checked"),
Container::Math { display: false } => Some("math inline"),
Container::Math { display: true } => Some("math display"),
_ => None,
} {
first_written = true;
self.out.write_str(cls)?;
}
for cls in attrs
2023-01-15 14:03:25 -05:00
.iter()
.filter(|(a, _)| a == &"class")
2023-01-22 04:41:15 -05:00
.map(|(_, cls)| cls)
{
if first_written {
2023-01-15 14:03:25 -05:00
self.out.write_char(' ')?;
}
2023-01-22 04:41:15 -05:00
first_written = true;
cls.parts().try_for_each(|part| self.write_attr(part))?;
2023-01-22 04:41:15 -05:00
}
// div class goes after classes from attrs
2023-01-15 14:03:25 -05:00
if let Container::Div { class: Some(cls) } = c {
2023-01-22 04:41:15 -05:00
if first_written {
2023-01-15 14:03:25 -05:00
self.out.write_char(' ')?;
}
self.out.write_str(cls)?;
}
self.out.write_char('"')?;
}
match c {
2023-01-25 14:58:29 -05:00
Container::TableCell { alignment, .. }
if !matches!(alignment, Alignment::Unspecified) =>
{
let a = match alignment {
Alignment::Unspecified => unreachable!(),
Alignment::Left => "left",
Alignment::Center => "center",
Alignment::Right => "right",
};
write!(self.out, r#" style="text-align: {};">"#, a)?;
}
2023-01-15 14:03:25 -05:00
Container::CodeBlock { lang } => {
if let Some(l) = lang {
self.out.write_str(r#"><code class="language-"#)?;
self.write_attr(l)?;
self.out.write_str(r#"">"#)?;
2023-01-15 14:03:25 -05:00
} else {
self.out.write_str("><code>")?;
}
}
Container::Image(..) => {
2023-03-12 12:03:44 -04:00
if self.img_alt_text == 1 {
self.out.write_str(r#" alt=""#)?;
}
2023-01-15 14:03:25 -05:00
}
Container::Math { display } => {
self.out
.write_str(if display { r#">\["# } else { r#">\("# })?;
}
_ => self.out.write_char('>')?,
}
2022-11-29 12:34:13 -05:00
}
Event::End(c) => {
if c.is_block_container() && !matches!(c, Container::Footnote { .. }) {
self.out.write_char('\n')?;
}
2023-03-12 12:03:44 -04:00
if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) {
2022-12-17 12:03:06 -05:00
continue;
}
2022-11-29 12:34:13 -05:00
match c {
Container::Blockquote => self.out.write_str("</blockquote>")?,
2023-01-22 06:39:04 -05:00
Container::List {
kind: ListKind::Unordered | ListKind::Task,
..
} => {
2023-01-22 11:42:47 -05:00
self.list_tightness.pop();
2023-01-22 04:41:15 -05:00
self.out.write_str("</ul>")?;
}
2023-01-22 06:39:04 -05:00
Container::List {
kind: ListKind::Ordered { .. },
..
} => self.out.write_str("</ol>")?,
2023-01-22 04:41:15 -05:00
Container::ListItem | Container::TaskListItem { .. } => {
self.out.write_str("</li>")?;
}
2023-01-27 13:04:01 -05:00
Container::DescriptionList => self.out.write_str("</dl>")?,
2022-11-29 12:34:13 -05:00
Container::DescriptionDetails => self.out.write_str("</dd>")?,
2023-01-18 16:30:24 -05:00
Container::Footnote { number, .. } => {
if !self.footnote_backlink_written {
write!(
self.out,
"\n<p><a href=\"#fnref{}\" role=\"doc-backlink\">↩︎︎</a></p>",
number,
)?;
}
self.out.write_str("\n</li>")?;
self.footnote_number = None;
}
2022-11-29 12:34:13 -05:00
Container::Table => self.out.write_str("</table>")?,
2023-01-25 13:27:12 -05:00
Container::TableRow { .. } => self.out.write_str("</tr>")?,
2023-01-29 09:10:01 -05:00
Container::Section { .. } => self.out.write_str("</section>")?,
2022-12-07 13:32:42 -05:00
Container::Div { .. } => self.out.write_str("</div>")?,
2023-01-18 16:30:24 -05:00
Container::Paragraph => {
2023-01-22 11:42:47 -05:00
if matches!(self.list_tightness.last(), Some(true)) {
continue;
}
2023-01-18 16:30:24 -05:00
if let Some(num) = self.footnote_number {
if matches!(
self.events.peek(),
Some(Event::End(Container::Footnote { .. }))
) {
write!(
self.out,
r##"<a href="#fnref{}" role="doc-backlink">↩︎︎</a>"##,
num
)?;
self.footnote_backlink_written = true;
}
}
self.out.write_str("</p>")?;
}
2023-01-29 09:10:01 -05:00
Container::Heading { level, .. } => write!(self.out, "</h{}>", level)?,
2023-01-25 14:58:29 -05:00
Container::TableCell { head: false, .. } => self.out.write_str("</td>")?,
Container::TableCell { head: true, .. } => self.out.write_str("</th>")?,
2023-01-26 14:16:20 -05:00
Container::Caption => self.out.write_str("</caption>")?,
Container::DescriptionTerm => self.out.write_str("</dt>")?,
Container::CodeBlock { .. } => self.out.write_str("</code></pre>")?,
2023-01-15 14:03:25 -05:00
Container::Span => self.out.write_str("</span>")?,
2022-12-17 12:03:06 -05:00
Container::Link(..) => self.out.write_str("</a>")?,
Container::Image(src, ..) => {
2023-03-12 12:03:44 -04:00
if self.img_alt_text == 1 {
if !src.is_empty() {
self.out.write_str(r#"" src=""#)?;
self.write_attr(&src)?;
}
self.out.write_str(r#"">"#)?;
2022-12-17 12:03:06 -05:00
}
2023-03-12 12:03:44 -04:00
self.img_alt_text -= 1;
2022-12-17 12:03:06 -05:00
}
2022-12-08 11:42:54 -05:00
Container::Verbatim => self.out.write_str("</code>")?,
2023-01-15 14:03:25 -05:00
Container::Math { display } => {
self.out.write_str(if display {
r#"\]</span>"#
} else {
r#"\)</span>"#
})?;
}
2022-12-11 04:45:05 -05:00
Container::RawBlock { .. } | Container::RawInline { .. } => {
2022-12-17 12:03:06 -05:00
self.raw = Raw::None;
2022-12-11 04:45:05 -05:00
}
2022-11-29 12:34:13 -05:00
Container::Subscript => self.out.write_str("</sub>")?,
Container::Superscript => self.out.write_str("</sup>")?,
Container::Insert => self.out.write_str("</ins>")?,
Container::Delete => self.out.write_str("</del>")?,
Container::Strong => self.out.write_str("</strong>")?,
Container::Emphasis => self.out.write_str("</em>")?,
Container::Mark => self.out.write_str("</mark>")?,
}
}
Event::Str(s) => match self.raw {
2023-03-12 12:03:44 -04:00
Raw::None if self.img_alt_text > 0 => self.write_attr(&s)?,
Raw::None => self.write_text(&s)?,
Raw::Html => self.out.write_str(&s)?,
Raw::Other => {}
},
Event::FootnoteReference(_tag, number) => {
if self.img_alt_text == 0 {
write!(
self.out,
r##"<a id="fnref{}" href="#fn{}" role="doc-noteref"><sup>{}</sup></a>"##,
number, number, number
)?;
}
}
Event::Symbol(sym) => write!(self.out, ":{}:", sym)?,
Event::LeftSingleQuote => self.out.write_str("&lsquo;")?,
Event::RightSingleQuote => self.out.write_str("&rsquo;")?,
Event::LeftDoubleQuote => self.out.write_str("&ldquo;")?,
Event::RightDoubleQuote => self.out.write_str("&rdquo;")?,
Event::Ellipsis => self.out.write_str("&hellip;")?,
Event::EnDash => self.out.write_str("&ndash;")?,
Event::EmDash => self.out.write_str("&mdash;")?,
Event::NonBreakingSpace => self.out.write_str("&nbsp;")?,
Event::Hardbreak => self.out.write_str("<br>\n")?,
Event::Softbreak => self.out.write_char('\n')?,
Event::Escape | Event::Blankline => unreachable!("filtered out"),
2023-02-01 16:32:22 -05:00
Event::ThematicBreak(attrs) => {
self.out.write_str("\n<hr")?;
for (a, v) in attrs.iter() {
write!(self.out, r#" {}=""#, a)?;
v.parts().try_for_each(|part| self.write_attr(part))?;
2023-02-01 16:32:22 -05:00
self.out.write_char('"')?;
}
self.out.write_str(">")?;
}
2022-11-29 12:34:13 -05:00
}
2023-02-04 14:06:01 -05:00
self.first_line = false;
2022-11-29 12:34:13 -05:00
}
2023-01-18 16:30:24 -05:00
if self.encountered_footnote {
self.out.write_str("\n</ol>\n</section>")?;
}
self.out.write_char('\n')?;
2022-11-29 12:34:13 -05:00
Ok(())
}
fn write_escape(&mut self, mut s: &str, escape_quotes: bool) -> std::fmt::Result {
let mut ent = "";
while let Some(i) = s.find(|c| {
match c {
'<' => Some("&lt;"),
'>' => Some("&gt;"),
'&' => Some("&amp;"),
'"' if escape_quotes => Some("&quot;"),
_ => None,
}
.map_or(false, |s| {
ent = s;
true
})
}) {
self.out.write_str(&s[..i])?;
self.out.write_str(ent)?;
s = &s[i + 1..];
}
self.out.write_str(s)
}
fn write_text(&mut self, s: &str) -> std::fmt::Result {
self.write_escape(s, false)
}
fn write_attr(&mut self, s: &str) -> std::fmt::Result {
self.write_escape(s, true)
}
2022-11-27 18:53:49 -05:00
}