jotdown/src/html.rs

420 lines
19 KiB
Rust
Raw Normal View History

2022-11-29 12:34:13 -05:00
use crate::Atom;
use crate::Container;
2022-11-27 18:53:49 -05:00
use crate::Event;
2023-01-22 06:39:04 -05:00
use crate::ListKind;
2023-01-22 04:41:15 -05:00
use crate::OrderedListNumbering::*;
2022-11-27 18:53:49 -05:00
2022-11-29 12:34:13 -05:00
/// Generate HTML from parsed events and push it to a unicode-accepting buffer or stream.
pub fn push<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write>(out: W, events: I) {
Writer::new(events, out).write().unwrap();
2022-11-27 18:53:49 -05:00
}
2022-11-29 12:34:13 -05:00
/// Generate HTML from parsed events and write it to a byte sink, encoded as UTF-8.
///
/// NOTE: This performs many small writes, so IO writes should be buffered with e.g.
/// [`std::io::BufWriter`].
pub fn write<'s, I: Iterator<Item = Event<'s>>, W: std::io::Write>(
mut out: W,
2022-11-27 18:53:49 -05:00
events: I,
2022-11-29 12:34:13 -05:00
) -> std::io::Result<()> {
struct Adapter<'a, T: ?Sized + 'a> {
inner: &'a mut T,
error: std::io::Result<()>,
}
impl<T: std::io::Write + ?Sized> std::fmt::Write for Adapter<'_, T> {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
match self.inner.write_all(s.as_bytes()) {
Ok(()) => Ok(()),
Err(e) => {
self.error = Err(e);
Err(std::fmt::Error)
}
}
}
}
let mut output = Adapter {
inner: &mut out,
error: Ok(()),
};
Writer::new(events, &mut output)
.write()
.map_err(|_| output.error.unwrap_err())
2022-11-27 18:53:49 -05:00
}
2022-12-11 04:45:05 -05:00
enum Raw {
None,
Html,
Other,
}
struct FilteredEvents<I> {
events: I,
}
impl<'s, I: Iterator<Item = Event<'s>>> Iterator for FilteredEvents<I> {
type Item = Event<'s>;
fn next(&mut self) -> Option<Self::Item> {
let mut ev = self.events.next();
while matches!(ev, Some(Event::Atom(Atom::Blankline | Atom::Escape))) {
ev = self.events.next();
}
ev
}
}
struct Writer<'s, I: Iterator<Item = Event<'s>>, W> {
events: std::iter::Peekable<FilteredEvents<I>>,
2022-11-29 12:34:13 -05:00
out: W,
2022-12-11 04:45:05 -05:00
raw: Raw,
2022-12-17 12:03:06 -05:00
text_only: bool,
2023-01-22 11:42:47 -05:00
list_tightness: Vec<bool>,
2023-01-18 16:30:24 -05:00
encountered_footnote: bool,
footnote_number: Option<std::num::NonZeroUsize>,
footnote_backlink_written: bool,
2022-11-29 12:34:13 -05:00
}
impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
2022-11-29 12:34:13 -05:00
fn new(events: I, out: W) -> Self {
2022-12-11 04:45:05 -05:00
Self {
events: FilteredEvents { events }.peekable(),
2022-12-11 04:45:05 -05:00
out,
raw: Raw::None,
2022-12-17 12:03:06 -05:00
text_only: false,
2023-01-22 11:42:47 -05:00
list_tightness: Vec::new(),
2023-01-18 16:30:24 -05:00
encountered_footnote: false,
footnote_number: None,
footnote_backlink_written: false,
2022-12-11 04:45:05 -05:00
}
2022-11-27 18:53:49 -05:00
}
2022-11-29 12:34:13 -05:00
fn write(&mut self) -> std::fmt::Result {
2023-01-18 16:30:24 -05:00
while let Some(e) = self.events.next() {
2022-11-29 12:34:13 -05:00
match e {
2023-01-15 14:03:25 -05:00
Event::Start(c, attrs) => {
if c.is_block() {
self.out.write_char('\n')?;
}
2022-12-17 12:03:06 -05:00
if self.text_only && !matches!(c, Container::Image(..)) {
continue;
}
2023-01-15 14:03:25 -05:00
match &c {
Container::Blockquote => self.out.write_str("<blockquote")?,
Container::List { kind, tight } => {
self.list_tightness.push(*tight);
match kind {
ListKind::Unordered | ListKind::Task => {
self.out.write_str("<ul")?
}
2023-01-22 06:39:04 -05:00
ListKind::Ordered {
numbering, start, ..
} => {
self.out.write_str("<ol")?;
if *start > 1 {
write!(self.out, r#" start="{}""#, start)?;
}
if let Some(ty) = match numbering {
Decimal => None,
AlphaLower => Some('a'),
AlphaUpper => Some('A'),
RomanLower => Some('i'),
RomanUpper => Some('I'),
} {
write!(self.out, r#" type="{}""#, ty)?;
}
}
2023-01-22 04:41:15 -05:00
}
}
Container::ListItem | Container::TaskListItem { .. } => {
self.out.write_str("<li")?;
}
2023-01-15 14:03:25 -05:00
Container::DescriptionDetails => self.out.write_str("<dd")?,
2023-01-18 16:30:24 -05:00
Container::Footnote { number, .. } => {
assert!(self.footnote_number.is_none());
self.footnote_number = Some((*number).try_into().unwrap());
if !self.encountered_footnote {
self.encountered_footnote = true;
self.out
.write_str("<section role=\"doc-endnotes\">\n<hr>\n<ol>\n")?;
}
write!(self.out, "<li id=\"fn{}\">", number)?;
self.footnote_backlink_written = false;
continue;
}
2023-01-15 14:03:25 -05:00
Container::Table => self.out.write_str("<table")?,
2023-01-25 13:27:12 -05:00
Container::TableRow { .. } => self.out.write_str("<tr")?,
2023-01-15 14:03:25 -05:00
Container::Div { .. } => self.out.write_str("<div")?,
2023-01-22 11:42:47 -05:00
Container::Paragraph => {
if matches!(self.list_tightness.last(), Some(true)) {
continue;
}
self.out.write_str("<p")?;
}
2023-01-15 14:03:25 -05:00
Container::Heading { level } => write!(self.out, "<h{}", level)?,
2023-01-25 13:27:12 -05:00
Container::TableCell { .. } => self.out.write_str("<td")?,
2023-01-15 14:03:25 -05:00
Container::DescriptionTerm => self.out.write_str("<dt")?,
Container::CodeBlock { .. } => self.out.write_str("<pre")?,
Container::Span | Container::Math { .. } => self.out.write_str("<span")?,
2023-01-16 17:31:47 -05:00
Container::Link(dst, ..) => {
if dst.is_empty() {
self.out.write_str("<a")?;
} else {
write!(self.out, r#"<a href="{}""#, dst)?;
}
}
2022-12-17 12:03:06 -05:00
Container::Image(..) => {
self.text_only = true;
self.out.write_str("<img")?;
}
2023-01-15 14:03:25 -05:00
Container::Verbatim => self.out.write_str("<code")?,
2022-12-11 04:45:05 -05:00
Container::RawBlock { format } | Container::RawInline { format } => {
2023-01-15 14:03:25 -05:00
self.raw = if format == &"html" {
2022-12-11 04:45:05 -05:00
Raw::Html
} else {
Raw::Other
2023-01-15 14:03:25 -05:00
};
continue;
2022-12-11 04:45:05 -05:00
}
2023-01-15 14:03:25 -05:00
Container::Subscript => self.out.write_str("<sub")?,
Container::Superscript => self.out.write_str("<sup")?,
Container::Insert => self.out.write_str("<ins")?,
Container::Delete => self.out.write_str("<del")?,
Container::Strong => self.out.write_str("<strong")?,
Container::Emphasis => self.out.write_str("<em")?,
Container::Mark => self.out.write_str("<mark")?,
2022-11-29 12:34:13 -05:00
Container::SingleQuoted => self.out.write_str("&lsquo;")?,
Container::DoubleQuoted => self.out.write_str("&ldquo;")?,
2023-01-22 04:41:15 -05:00
_ => panic!(),
2022-11-29 12:34:13 -05:00
}
2023-01-15 14:03:25 -05:00
2023-01-15 14:06:51 -05:00
if matches!(c, Container::SingleQuoted | Container::DoubleQuoted) {
continue; // TODO add span to allow attributes?
}
2023-01-22 04:41:15 -05:00
for (a, v) in attrs.iter().filter(|(a, _)| *a != "class") {
write!(self.out, r#" {}="{}""#, a, v)?;
}
2023-01-15 14:03:25 -05:00
if attrs.iter().any(|(a, _)| a == "class")
|| matches!(
c,
2023-01-22 04:41:15 -05:00
Container::Div { class: Some(_) }
| Container::Math { .. }
2023-01-22 06:39:04 -05:00
| Container::List {
kind: ListKind::Task,
..
}
2023-01-22 04:41:15 -05:00
| Container::TaskListItem { .. }
2023-01-15 14:03:25 -05:00
)
{
self.out.write_str(r#" class=""#)?;
2023-01-22 04:41:15 -05:00
let mut first_written = false;
if let Some(cls) = match c {
2023-01-22 06:39:04 -05:00
Container::List {
kind: ListKind::Task,
..
} => Some("task-list"),
2023-01-22 04:41:15 -05:00
Container::TaskListItem { checked: false } => Some("unchecked"),
Container::TaskListItem { checked: true } => Some("checked"),
Container::Math { display: false } => Some("math inline"),
Container::Math { display: true } => Some("math display"),
_ => None,
} {
first_written = true;
self.out.write_str(cls)?;
}
for cls in attrs
2023-01-15 14:03:25 -05:00
.iter()
.filter(|(a, _)| a == &"class")
2023-01-22 04:41:15 -05:00
.map(|(_, cls)| cls)
{
if first_written {
2023-01-15 14:03:25 -05:00
self.out.write_char(' ')?;
}
2023-01-22 04:41:15 -05:00
first_written = true;
self.out.write_str(cls)?;
}
// div class goes after classes from attrs
2023-01-15 14:03:25 -05:00
if let Container::Div { class: Some(cls) } = c {
2023-01-22 04:41:15 -05:00
if first_written {
2023-01-15 14:03:25 -05:00
self.out.write_char(' ')?;
}
self.out.write_str(cls)?;
}
self.out.write_char('"')?;
}
match c {
Container::CodeBlock { lang } => {
if let Some(l) = lang {
write!(self.out, r#"><code class="language-{}">"#, l)?;
} else {
self.out.write_str("><code>")?;
}
}
Container::Image(..) => {
self.out.write_str(r#" alt=""#)?;
}
Container::Math { display } => {
self.out
.write_str(if display { r#">\["# } else { r#">\("# })?;
}
_ => self.out.write_char('>')?,
}
2022-11-29 12:34:13 -05:00
}
Event::End(c) => {
if c.is_block_container() && !matches!(c, Container::Footnote { .. }) {
self.out.write_char('\n')?;
}
2022-12-17 12:03:06 -05:00
if self.text_only && !matches!(c, Container::Image(..)) {
continue;
}
2022-11-29 12:34:13 -05:00
match c {
Container::Blockquote => self.out.write_str("</blockquote>")?,
2023-01-22 06:39:04 -05:00
Container::List {
kind: ListKind::Unordered | ListKind::Task,
..
} => {
2023-01-22 11:42:47 -05:00
self.list_tightness.pop();
2023-01-22 04:41:15 -05:00
self.out.write_str("</ul>")?;
}
2023-01-22 06:39:04 -05:00
Container::List {
kind: ListKind::Ordered { .. },
..
} => self.out.write_str("</ol>")?,
2023-01-22 04:41:15 -05:00
Container::ListItem | Container::TaskListItem { .. } => {
self.out.write_str("</li>")?;
}
2022-11-29 12:34:13 -05:00
Container::DescriptionDetails => self.out.write_str("</dd>")?,
2023-01-18 16:30:24 -05:00
Container::Footnote { number, .. } => {
if !self.footnote_backlink_written {
write!(
self.out,
"\n<p><a href=\"#fnref{}\" role=\"doc-backlink\">↩︎︎</a></p>",
number,
)?;
}
self.out.write_str("\n</li>")?;
self.footnote_number = None;
}
2022-11-29 12:34:13 -05:00
Container::Table => self.out.write_str("</table>")?,
2023-01-25 13:27:12 -05:00
Container::TableRow { .. } => self.out.write_str("</tr>")?,
2022-12-07 13:32:42 -05:00
Container::Div { .. } => self.out.write_str("</div>")?,
2023-01-18 16:30:24 -05:00
Container::Paragraph => {
2023-01-22 11:42:47 -05:00
if matches!(self.list_tightness.last(), Some(true)) {
continue;
}
2023-01-18 16:30:24 -05:00
if let Some(num) = self.footnote_number {
if matches!(
self.events.peek(),
Some(Event::End(Container::Footnote { .. }))
) {
write!(
self.out,
r##"<a href="#fnref{}" role="doc-backlink">↩︎︎</a>"##,
num
)?;
self.footnote_backlink_written = true;
}
}
self.out.write_str("</p>")?;
}
2022-11-29 12:34:13 -05:00
Container::Heading { level } => write!(self.out, "</h{}>", level)?,
2023-01-25 13:27:12 -05:00
Container::TableCell { .. } => self.out.write_str("</td>")?,
Container::DescriptionTerm => self.out.write_str("</dt>")?,
Container::CodeBlock { .. } => self.out.write_str("</code></pre>")?,
2023-01-15 14:03:25 -05:00
Container::Span => self.out.write_str("</span>")?,
2022-12-17 12:03:06 -05:00
Container::Link(..) => self.out.write_str("</a>")?,
Container::Image(src, ..) => {
self.text_only = false;
if src.is_empty() {
self.out.write_str(r#"">"#)?;
} else {
write!(self.out, r#"" src="{}">"#, src)?;
}
}
2022-12-08 11:42:54 -05:00
Container::Verbatim => self.out.write_str("</code>")?,
2023-01-15 14:03:25 -05:00
Container::Math { display } => {
self.out.write_str(if display {
r#"\]</span>"#
} else {
r#"\)</span>"#
})?;
}
2022-12-11 04:45:05 -05:00
Container::RawBlock { .. } | Container::RawInline { .. } => {
2022-12-17 12:03:06 -05:00
self.raw = Raw::None;
2022-12-11 04:45:05 -05:00
}
2022-11-29 12:34:13 -05:00
Container::Subscript => self.out.write_str("</sub>")?,
Container::Superscript => self.out.write_str("</sup>")?,
Container::Insert => self.out.write_str("</ins>")?,
Container::Delete => self.out.write_str("</del>")?,
Container::Strong => self.out.write_str("</strong>")?,
Container::Emphasis => self.out.write_str("</em>")?,
Container::Mark => self.out.write_str("</mark>")?,
Container::SingleQuoted => self.out.write_str("&rsquo;")?,
Container::DoubleQuoted => self.out.write_str("&rdquo;")?,
2023-01-22 04:41:15 -05:00
_ => panic!(),
2022-11-29 12:34:13 -05:00
}
}
2022-12-13 15:19:16 -05:00
Event::Str(s) => {
let mut s: &str = s.as_ref();
match self.raw {
Raw::None => {
let mut ent = "";
while let Some(i) = s.chars().position(|c| {
if let Some(s) = match c {
'<' => Some("&lt;"),
'>' => Some("&gt;"),
'&' => Some("&amp;"),
'"' => Some("&quot;"),
_ => None,
} {
ent = s;
true
} else {
false
}
}) {
self.out.write_str(&s[..i])?;
self.out.write_str(ent)?;
s = &s[i + 1..];
2022-12-11 04:45:05 -05:00
}
2022-12-13 15:19:16 -05:00
self.out.write_str(s)?;
2022-12-08 12:25:24 -05:00
}
2022-12-13 15:19:16 -05:00
Raw::Html => {
self.out.write_str(s)?;
}
Raw::Other => {}
2022-12-11 04:45:05 -05:00
}
2022-12-13 15:19:16 -05:00
}
2022-12-08 12:25:24 -05:00
2022-11-29 12:34:13 -05:00
Event::Atom(a) => match a {
2023-01-18 16:30:24 -05:00
Atom::FootnoteReference(_tag, number) => {
write!(
self.out,
r##"<a id="fnref{}" href="#fn{}" role="doc-noteref"><sup>{}</sup></a>"##,
number, number, number
)?;
}
2022-11-29 12:34:13 -05:00
Atom::Ellipsis => self.out.write_str("&hellip;")?,
Atom::EnDash => self.out.write_str("&ndash;")?,
Atom::EmDash => self.out.write_str("&mdash;")?,
Atom::ThematicBreak => self.out.write_str("\n<hr>")?,
Atom::NonBreakingSpace => self.out.write_str("&nbsp;")?,
Atom::Hardbreak => self.out.write_str("<br>\n")?,
Atom::Softbreak => self.out.write_char('\n')?,
Atom::Escape | Atom::Blankline => unreachable!("filtered out"),
2022-11-29 12:34:13 -05:00
},
}
}
2023-01-18 16:30:24 -05:00
if self.encountered_footnote {
self.out.write_str("\n</ol>\n</section>")?;
}
self.out.write_char('\n')?;
2022-11-29 12:34:13 -05:00
Ok(())
}
2022-11-27 18:53:49 -05:00
}