wip djot -> html

This commit is contained in:
Noah Hellman 2022-11-29 18:34:13 +01:00
parent e84e7dd50b
commit 8ada539709
3 changed files with 224 additions and 85 deletions

View file

@ -1,17 +1,149 @@
use crate::Atom;
use crate::Container;
use crate::Event; use crate::Event;
pub fn push_html<'s, I: Iterator<Item = Event<'s>>>(s: &mut String, events: I) { /// Generate HTML from parsed events and push it to a unicode-accepting buffer or stream.
Writer::new(events).write() pub fn push<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write>(out: W, events: I) {
Writer::new(events, out).write().unwrap();
} }
struct Writer<I> { /// Generate HTML from parsed events and write it to a byte sink, encoded as UTF-8.
///
/// NOTE: This performs many small writes, so IO writes should be buffered with e.g.
/// [`std::io::BufWriter`].
pub fn write<'s, I: Iterator<Item = Event<'s>>, W: std::io::Write>(
mut out: W,
events: I, events: I,
} ) -> std::io::Result<()> {
struct Adapter<'a, T: ?Sized + 'a> {
impl<'s, I: Iterator<Item = Event<'s>>> Writer<I> { inner: &'a mut T,
fn new(events: I) -> Self { error: std::io::Result<()>,
Self { events }
} }
fn write(self) {} impl<T: std::io::Write + ?Sized> std::fmt::Write for Adapter<'_, T> {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
match self.inner.write_all(s.as_bytes()) {
Ok(()) => Ok(()),
Err(e) => {
self.error = Err(e);
Err(std::fmt::Error)
}
}
}
}
let mut output = Adapter {
inner: &mut out,
error: Ok(()),
};
Writer::new(events, &mut output)
.write()
.map_err(|_| output.error.unwrap_err())
}
struct Writer<I, W> {
events: I,
out: W,
}
impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<I, W> {
fn new(events: I, out: W) -> Self {
Self { events, out }
}
fn write(&mut self) -> std::fmt::Result {
for e in &mut self.events {
match e {
Event::Start(c, _attrs) => {
match c {
Container::Blockquote => self.out.write_str("<blockquote>")?,
Container::List(..) => todo!(),
Container::ListItem => self.out.write_str("<li>")?,
Container::DescriptionList => self.out.write_str("<dl>")?,
Container::DescriptionDetails => self.out.write_str("<dd>")?,
Container::Footnote { .. } => todo!(),
Container::Table => self.out.write_str("<table>")?,
Container::TableRow => self.out.write_str("<tr>")?,
Container::Div => self.out.write_str("<div>")?,
Container::Span => self.out.write_str("<span>")?,
Container::Paragraph => self.out.write_str("<p>")?,
Container::Heading { level } => write!(self.out, "<h{}>", level)?,
Container::Link(..) => todo!(),
Container::Image(..) => todo!(),
Container::TableCell => self.out.write_str("<td>")?,
Container::RawBlock { .. } => todo!(),
Container::CodeBlock { .. } => todo!(),
Container::Subscript => self.out.write_str("<sub>")?,
Container::Superscript => self.out.write_str("<sup>")?,
Container::Insert => self.out.write_str("<ins>")?,
Container::Delete => self.out.write_str("<del>")?,
Container::Strong => self.out.write_str("<strong>")?,
Container::Emphasis => self.out.write_str("<em>")?,
Container::Mark => self.out.write_str("<mark>")?,
Container::SingleQuoted => self.out.write_str("&lsquo;")?,
Container::DoubleQuoted => self.out.write_str("&ldquo;")?,
}
}
Event::End(c) => {
match c {
Container::Blockquote => self.out.write_str("</blockquote>")?,
Container::List(..) => todo!(),
Container::ListItem => self.out.write_str("</li>")?,
Container::DescriptionList => self.out.write_str("</dl>")?,
Container::DescriptionDetails => self.out.write_str("</dd>")?,
Container::Footnote { .. } => todo!(),
Container::Table => self.out.write_str("</table>")?,
Container::TableRow => self.out.write_str("</tr>")?,
Container::Div => self.out.write_str("</div>")?,
Container::Span => self.out.write_str("</span>")?,
Container::Paragraph => self.out.write_str("</p>")?,
Container::Heading { level } => write!(self.out, "</h{}>", level)?,
Container::TableCell => self.out.write_str("</td>")?,
Container::RawBlock { .. } => todo!(),
Container::CodeBlock { .. } => todo!(),
Container::Link(..) => todo!(),
Container::Image(..) => todo!(),
Container::Subscript => self.out.write_str("</sub>")?,
Container::Superscript => self.out.write_str("</sup>")?,
Container::Insert => self.out.write_str("</ins>")?,
Container::Delete => self.out.write_str("</del>")?,
Container::Strong => self.out.write_str("</strong>")?,
Container::Emphasis => self.out.write_str("</em>")?,
Container::Mark => self.out.write_str("</mark>")?,
Container::SingleQuoted => self.out.write_str("&rsquo;")?,
Container::DoubleQuoted => self.out.write_str("&rdquo;")?,
}
}
Event::Str(s) => self.out.write_str(s)?,
Event::Verbatim(s) => write!(self.out, "<code>{}</code>", s)?,
Event::Math { content, display } => {
if display {
write!(
self.out,
r#"<span class="math display">\[{}\]</span>"#,
content,
)?;
} else {
write!(
self.out,
r#"<span class="math inline">\({}\)</span>"#,
content,
)?;
}
}
Event::Atom(a) => match a {
Atom::Ellipsis => self.out.write_str("&hellip;")?,
Atom::EnDash => self.out.write_str("&ndash;")?,
Atom::EmDash => self.out.write_str("&mdash;")?,
Atom::ThematicBreak => self.out.write_str("\n<hr>")?,
Atom::NonBreakingSpace => self.out.write_str("&nbsp;")?,
Atom::Hardbreak => self.out.write_str("<br>\n")?,
Atom::Softbreak => self.out.write_char('\n')?,
Atom::Blankline | Atom::Escape => {}
},
}
}
Ok(())
}
} }

View file

@ -1,5 +1,6 @@
pub mod html;
mod block; mod block;
mod html;
mod inline; mod inline;
mod lex; mod lex;
mod span; mod span;
@ -13,60 +14,22 @@ const EOF: char = '\0';
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum Event<'s> { pub enum Event<'s> {
/// Start of a tag. /// Start of a container.
Start(Tag<'s>, Attributes<'s>), Start(Container<'s>, Attributes<'s>),
/// End of a tag. /// End of a container.
End(Tag<'s>), End(Container<'s>),
/// A string object, text only. /// A string object, text only.
Str(&'s str), Str(&'s str),
/// An atomic element.
Atom(Atom),
/// A verbatim string. /// A verbatim string.
Verbatim(&'s str), Verbatim(&'s str),
/// An inline or display math element. /// An inline or display math element.
Math { content: &'s str, display: bool }, Math { content: &'s str, display: bool },
/// An ellipsis, i.e. a set of three periods.
Ellipsis,
/// An en dash.
EnDash,
/// An em dash.
EmDash,
/// A thematic break, typically a horizontal rule.
ThematicBreak,
/// A blank line.
Blankline,
/// A space that may not break a line.
NonBreakingSpace,
/// A newline that may or may not break a line in the output format.
Softbreak,
/// A newline that must break a line.
Hardbreak,
/// An escape character, not visible in output.
Escape,
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum Tag<'s> { pub enum Container<'s> {
/// A paragraph.
Paragraph,
/// A heading.
Heading { level: u8 },
/// A link with a destination URL.
Link(&'s str, LinkType),
/// An image.
Image(&'s str),
/// A divider element.
Div,
/// An inline divider element.
Span,
/// A table element.
Table,
/// A row element of a table.
TableRow,
/// A cell element of row within a table.
TableCell,
/// A block with raw markup for a specific output format.
RawBlock { format: &'s str },
/// A block with code in a specific language.
CodeBlock { language: Option<&'s str> },
/// A blockquote element. /// A blockquote element.
Blockquote, Blockquote,
/// A list. /// A list.
@ -75,10 +38,32 @@ pub enum Tag<'s> {
ListItem, ListItem,
/// A description list element. /// A description list element.
DescriptionList, DescriptionList,
/// A item of a description list. /// Details describing a term within a description list.
DescriptionItem, DescriptionDetails,
/// A footnote definition. /// A footnote definition.
Footnote { tag: &'s str }, Footnote { tag: &'s str },
/// A table element.
Table,
/// A row element of a table.
TableRow,
/// A block-level divider element.
Div,
/// A paragraph.
Paragraph,
/// A heading.
Heading { level: u8 },
/// A link with a destination URL.
Link(&'s str, LinkType),
/// An image.
Image(&'s str),
/// An inline divider element.
Span,
/// A cell element of row within a table.
TableCell,
/// A block with raw markup for a specific output format.
RawBlock { format: &'s str },
/// A block with code in a specific language.
CodeBlock { language: Option<&'s str> },
/// A subscripted element. /// A subscripted element.
Subscript, Subscript,
/// A superscripted element. /// A superscripted element.
@ -143,22 +128,44 @@ pub enum OrderedListFormat {
ParenParen, ParenParen,
} }
#[derive(Debug, PartialEq, Eq)]
pub enum Atom {
/// An ellipsis, i.e. a set of three periods.
Ellipsis,
/// An en dash.
EnDash,
/// An em dash.
EmDash,
/// A thematic break, typically a horizontal rule.
ThematicBreak,
/// A blank line.
Blankline,
/// A space that may not break a line.
NonBreakingSpace,
/// A newline that may or may not break a line in the output format.
Softbreak,
/// A newline that must break a line.
Hardbreak,
/// An escape character, not visible in output.
Escape,
}
impl<'s> Event<'s> { impl<'s> Event<'s> {
fn from_inline(src: &'s str, inline: inline::Event) -> Self { fn from_inline(src: &'s str, inline: inline::Event) -> Self {
let content = inline.span.of(src); let content = inline.span.of(src);
match inline.kind { match inline.kind {
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => { inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
let t = match c { let t = match c {
inline::Container::Span => Tag::Span, inline::Container::Span => Container::Span,
inline::Container::Subscript => Tag::Subscript, inline::Container::Subscript => Container::Subscript,
inline::Container::Superscript => Tag::Superscript, inline::Container::Superscript => Container::Superscript,
inline::Container::Insert => Tag::Insert, inline::Container::Insert => Container::Insert,
inline::Container::Delete => Tag::Delete, inline::Container::Delete => Container::Delete,
inline::Container::Emphasis => Tag::Emphasis, inline::Container::Emphasis => Container::Emphasis,
inline::Container::Strong => Tag::Strong, inline::Container::Strong => Container::Strong,
inline::Container::Mark => Tag::Mark, inline::Container::Mark => Container::Mark,
inline::Container::SingleQuoted => Tag::SingleQuoted, inline::Container::SingleQuoted => Container::SingleQuoted,
inline::Container::DoubleQuoted => Tag::DoubleQuoted, inline::Container::DoubleQuoted => Container::DoubleQuoted,
_ => todo!(), _ => todo!(),
}; };
if matches!(inline.kind, inline::EventKind::Enter(_)) { if matches!(inline.kind, inline::EventKind::Enter(_)) {
@ -167,16 +174,16 @@ impl<'s> Event<'s> {
Self::End(t) Self::End(t)
} }
} }
inline::EventKind::Atom(a) => match a { inline::EventKind::Atom(a) => Event::Atom(match a {
inline::Atom::Ellipsis => Self::Ellipsis, inline::Atom::Ellipsis => Atom::Ellipsis,
inline::Atom::EnDash => Self::EnDash, inline::Atom::EnDash => Atom::EnDash,
inline::Atom::EmDash => Self::EmDash, inline::Atom::EmDash => Atom::EmDash,
inline::Atom::Nbsp => Self::NonBreakingSpace, inline::Atom::Nbsp => Atom::NonBreakingSpace,
inline::Atom::Softbreak => Self::Softbreak, inline::Atom::Softbreak => Atom::Softbreak,
inline::Atom::Hardbreak => Self::Hardbreak, inline::Atom::Hardbreak => Atom::Hardbreak,
inline::Atom::Escape => Self::Escape, inline::Atom::Escape => Atom::Escape,
_ => todo!(), _ => todo!(),
}, }),
inline::EventKind::Node(n) => match n { inline::EventKind::Node(n) => match n {
inline::Node::Str => Self::Str(content), inline::Node::Str => Self::Str(content),
inline::Node::Verbatim => Self::Verbatim(content), inline::Node::Verbatim => Self::Verbatim(content),
@ -194,7 +201,7 @@ impl<'s> Event<'s> {
} }
} }
impl<'s> Tag<'s> { impl<'s> Container<'s> {
fn from_block(src: &'s str, block: block::Block) -> Self { fn from_block(src: &'s str, block: block::Block) -> Self {
match block { match block {
block::Block::Leaf(l) => match l { block::Block::Leaf(l) => match l {
@ -272,7 +279,7 @@ impl<'s> Iterator for Parser<'s> {
} }
tree::EventKind::Exit(block) => { tree::EventKind::Exit(block) => {
self.parser = None; self.parser = None;
return Some(Event::End(Tag::from_block(self.src, block))); return Some(Event::End(Container::from_block(self.src, block)));
} }
tree::EventKind::Enter(..) => unreachable!(), tree::EventKind::Enter(..) => unreachable!(),
} }
@ -282,24 +289,25 @@ impl<'s> Iterator for Parser<'s> {
self.tree.next().map(|ev| match ev.kind { self.tree.next().map(|ev| match ev.kind {
tree::EventKind::Element(atom) => { tree::EventKind::Element(atom) => {
assert_eq!(atom, block::Atom::Blankline); assert_eq!(atom, block::Atom::Blankline);
Event::Blankline Event::Atom(Atom::Blankline)
} }
tree::EventKind::Enter(block) => { tree::EventKind::Enter(block) => {
if matches!(block, block::Block::Leaf(..)) { if matches!(block, block::Block::Leaf(..)) {
self.parser = Some(inline::Parser::new()); self.parser = Some(inline::Parser::new());
} }
Event::Start(Tag::from_block(self.src, block), Attributes::none()) Event::Start(Container::from_block(self.src, block), Attributes::none())
} }
tree::EventKind::Exit(block) => Event::End(Tag::from_block(self.src, block)), tree::EventKind::Exit(block) => Event::End(Container::from_block(self.src, block)),
}) })
} }
} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::Atom::*;
use super::Attributes; use super::Attributes;
use super::Container::*;
use super::Event::*; use super::Event::*;
use super::Tag::*;
macro_rules! test_parse { macro_rules! test_parse {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
@ -364,7 +372,7 @@ mod test {
Start(Paragraph, Attributes::none()), Start(Paragraph, Attributes::none()),
Str("para0\n"), Str("para0\n"),
End(Paragraph), End(Paragraph),
Blankline, Atom(Blankline),
Start(Paragraph, Attributes::none()), Start(Paragraph, Attributes::none()),
Str("para1"), Str("para1"),
End(Paragraph), End(Paragraph),

View file

@ -7,6 +7,5 @@ fn main() {
.expect("failed to read unicode file"); .expect("failed to read unicode file");
let p = jotdown::Parser::new(&src); let p = jotdown::Parser::new(&src);
//let v = p.parse().collect::<Vec<_>>(); jotdown::html::write(std::io::stdout(), p).unwrap();
//print!("{:?}", v);
} }