wip djot -> html

This commit is contained in:
Noah Hellman 2022-11-29 18:34:13 +01:00
parent e84e7dd50b
commit 8ada539709
3 changed files with 224 additions and 85 deletions

View file

@ -1,17 +1,149 @@
use crate::Atom;
use crate::Container;
use crate::Event;
pub fn push_html<'s, I: Iterator<Item = Event<'s>>>(s: &mut String, events: I) {
Writer::new(events).write()
/// Generate HTML from parsed events and push it to a unicode-accepting buffer or stream.
pub fn push<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write>(out: W, events: I) {
Writer::new(events, out).write().unwrap();
}
struct Writer<I> {
/// Generate HTML from parsed events and write it to a byte sink, encoded as UTF-8.
///
/// NOTE: This performs many small writes, so IO writes should be buffered with e.g.
/// [`std::io::BufWriter`].
pub fn write<'s, I: Iterator<Item = Event<'s>>, W: std::io::Write>(
mut out: W,
events: I,
}
impl<'s, I: Iterator<Item = Event<'s>>> Writer<I> {
fn new(events: I) -> Self {
Self { events }
) -> std::io::Result<()> {
struct Adapter<'a, T: ?Sized + 'a> {
inner: &'a mut T,
error: std::io::Result<()>,
}
fn write(self) {}
impl<T: std::io::Write + ?Sized> std::fmt::Write for Adapter<'_, T> {
fn write_str(&mut self, s: &str) -> std::fmt::Result {
match self.inner.write_all(s.as_bytes()) {
Ok(()) => Ok(()),
Err(e) => {
self.error = Err(e);
Err(std::fmt::Error)
}
}
}
}
let mut output = Adapter {
inner: &mut out,
error: Ok(()),
};
Writer::new(events, &mut output)
.write()
.map_err(|_| output.error.unwrap_err())
}
struct Writer<I, W> {
events: I,
out: W,
}
impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<I, W> {
fn new(events: I, out: W) -> Self {
Self { events, out }
}
fn write(&mut self) -> std::fmt::Result {
for e in &mut self.events {
match e {
Event::Start(c, _attrs) => {
match c {
Container::Blockquote => self.out.write_str("<blockquote>")?,
Container::List(..) => todo!(),
Container::ListItem => self.out.write_str("<li>")?,
Container::DescriptionList => self.out.write_str("<dl>")?,
Container::DescriptionDetails => self.out.write_str("<dd>")?,
Container::Footnote { .. } => todo!(),
Container::Table => self.out.write_str("<table>")?,
Container::TableRow => self.out.write_str("<tr>")?,
Container::Div => self.out.write_str("<div>")?,
Container::Span => self.out.write_str("<span>")?,
Container::Paragraph => self.out.write_str("<p>")?,
Container::Heading { level } => write!(self.out, "<h{}>", level)?,
Container::Link(..) => todo!(),
Container::Image(..) => todo!(),
Container::TableCell => self.out.write_str("<td>")?,
Container::RawBlock { .. } => todo!(),
Container::CodeBlock { .. } => todo!(),
Container::Subscript => self.out.write_str("<sub>")?,
Container::Superscript => self.out.write_str("<sup>")?,
Container::Insert => self.out.write_str("<ins>")?,
Container::Delete => self.out.write_str("<del>")?,
Container::Strong => self.out.write_str("<strong>")?,
Container::Emphasis => self.out.write_str("<em>")?,
Container::Mark => self.out.write_str("<mark>")?,
Container::SingleQuoted => self.out.write_str("&lsquo;")?,
Container::DoubleQuoted => self.out.write_str("&ldquo;")?,
}
}
Event::End(c) => {
match c {
Container::Blockquote => self.out.write_str("</blockquote>")?,
Container::List(..) => todo!(),
Container::ListItem => self.out.write_str("</li>")?,
Container::DescriptionList => self.out.write_str("</dl>")?,
Container::DescriptionDetails => self.out.write_str("</dd>")?,
Container::Footnote { .. } => todo!(),
Container::Table => self.out.write_str("</table>")?,
Container::TableRow => self.out.write_str("</tr>")?,
Container::Div => self.out.write_str("</div>")?,
Container::Span => self.out.write_str("</span>")?,
Container::Paragraph => self.out.write_str("</p>")?,
Container::Heading { level } => write!(self.out, "</h{}>", level)?,
Container::TableCell => self.out.write_str("</td>")?,
Container::RawBlock { .. } => todo!(),
Container::CodeBlock { .. } => todo!(),
Container::Link(..) => todo!(),
Container::Image(..) => todo!(),
Container::Subscript => self.out.write_str("</sub>")?,
Container::Superscript => self.out.write_str("</sup>")?,
Container::Insert => self.out.write_str("</ins>")?,
Container::Delete => self.out.write_str("</del>")?,
Container::Strong => self.out.write_str("</strong>")?,
Container::Emphasis => self.out.write_str("</em>")?,
Container::Mark => self.out.write_str("</mark>")?,
Container::SingleQuoted => self.out.write_str("&rsquo;")?,
Container::DoubleQuoted => self.out.write_str("&rdquo;")?,
}
}
Event::Str(s) => self.out.write_str(s)?,
Event::Verbatim(s) => write!(self.out, "<code>{}</code>", s)?,
Event::Math { content, display } => {
if display {
write!(
self.out,
r#"<span class="math display">\[{}\]</span>"#,
content,
)?;
} else {
write!(
self.out,
r#"<span class="math inline">\({}\)</span>"#,
content,
)?;
}
}
Event::Atom(a) => match a {
Atom::Ellipsis => self.out.write_str("&hellip;")?,
Atom::EnDash => self.out.write_str("&ndash;")?,
Atom::EmDash => self.out.write_str("&mdash;")?,
Atom::ThematicBreak => self.out.write_str("\n<hr>")?,
Atom::NonBreakingSpace => self.out.write_str("&nbsp;")?,
Atom::Hardbreak => self.out.write_str("<br>\n")?,
Atom::Softbreak => self.out.write_char('\n')?,
Atom::Blankline | Atom::Escape => {}
},
}
}
Ok(())
}
}

View file

@ -1,5 +1,6 @@
pub mod html;
mod block;
mod html;
mod inline;
mod lex;
mod span;
@ -13,60 +14,22 @@ const EOF: char = '\0';
#[derive(Debug, PartialEq, Eq)]
pub enum Event<'s> {
/// Start of a tag.
Start(Tag<'s>, Attributes<'s>),
/// End of a tag.
End(Tag<'s>),
/// Start of a container.
Start(Container<'s>, Attributes<'s>),
/// End of a container.
End(Container<'s>),
/// A string object, text only.
Str(&'s str),
/// An atomic element.
Atom(Atom),
/// A verbatim string.
Verbatim(&'s str),
/// An inline or display math element.
Math { content: &'s str, display: bool },
/// An ellipsis, i.e. a set of three periods.
Ellipsis,
/// An en dash.
EnDash,
/// An em dash.
EmDash,
/// A thematic break, typically a horizontal rule.
ThematicBreak,
/// A blank line.
Blankline,
/// A space that may not break a line.
NonBreakingSpace,
/// A newline that may or may not break a line in the output format.
Softbreak,
/// A newline that must break a line.
Hardbreak,
/// An escape character, not visible in output.
Escape,
}
#[derive(Debug, PartialEq, Eq)]
pub enum Tag<'s> {
/// A paragraph.
Paragraph,
/// A heading.
Heading { level: u8 },
/// A link with a destination URL.
Link(&'s str, LinkType),
/// An image.
Image(&'s str),
/// A divider element.
Div,
/// An inline divider element.
Span,
/// A table element.
Table,
/// A row element of a table.
TableRow,
/// A cell element of row within a table.
TableCell,
/// A block with raw markup for a specific output format.
RawBlock { format: &'s str },
/// A block with code in a specific language.
CodeBlock { language: Option<&'s str> },
pub enum Container<'s> {
/// A blockquote element.
Blockquote,
/// A list.
@ -75,10 +38,32 @@ pub enum Tag<'s> {
ListItem,
/// A description list element.
DescriptionList,
/// A item of a description list.
DescriptionItem,
/// Details describing a term within a description list.
DescriptionDetails,
/// A footnote definition.
Footnote { tag: &'s str },
/// A table element.
Table,
/// A row element of a table.
TableRow,
/// A block-level divider element.
Div,
/// A paragraph.
Paragraph,
/// A heading.
Heading { level: u8 },
/// A link with a destination URL.
Link(&'s str, LinkType),
/// An image.
Image(&'s str),
/// An inline divider element.
Span,
/// A cell element of row within a table.
TableCell,
/// A block with raw markup for a specific output format.
RawBlock { format: &'s str },
/// A block with code in a specific language.
CodeBlock { language: Option<&'s str> },
/// A subscripted element.
Subscript,
/// A superscripted element.
@ -143,22 +128,44 @@ pub enum OrderedListFormat {
ParenParen,
}
#[derive(Debug, PartialEq, Eq)]
pub enum Atom {
/// An ellipsis, i.e. a set of three periods.
Ellipsis,
/// An en dash.
EnDash,
/// An em dash.
EmDash,
/// A thematic break, typically a horizontal rule.
ThematicBreak,
/// A blank line.
Blankline,
/// A space that may not break a line.
NonBreakingSpace,
/// A newline that may or may not break a line in the output format.
Softbreak,
/// A newline that must break a line.
Hardbreak,
/// An escape character, not visible in output.
Escape,
}
impl<'s> Event<'s> {
fn from_inline(src: &'s str, inline: inline::Event) -> Self {
let content = inline.span.of(src);
match inline.kind {
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
let t = match c {
inline::Container::Span => Tag::Span,
inline::Container::Subscript => Tag::Subscript,
inline::Container::Superscript => Tag::Superscript,
inline::Container::Insert => Tag::Insert,
inline::Container::Delete => Tag::Delete,
inline::Container::Emphasis => Tag::Emphasis,
inline::Container::Strong => Tag::Strong,
inline::Container::Mark => Tag::Mark,
inline::Container::SingleQuoted => Tag::SingleQuoted,
inline::Container::DoubleQuoted => Tag::DoubleQuoted,
inline::Container::Span => Container::Span,
inline::Container::Subscript => Container::Subscript,
inline::Container::Superscript => Container::Superscript,
inline::Container::Insert => Container::Insert,
inline::Container::Delete => Container::Delete,
inline::Container::Emphasis => Container::Emphasis,
inline::Container::Strong => Container::Strong,
inline::Container::Mark => Container::Mark,
inline::Container::SingleQuoted => Container::SingleQuoted,
inline::Container::DoubleQuoted => Container::DoubleQuoted,
_ => todo!(),
};
if matches!(inline.kind, inline::EventKind::Enter(_)) {
@ -167,16 +174,16 @@ impl<'s> Event<'s> {
Self::End(t)
}
}
inline::EventKind::Atom(a) => match a {
inline::Atom::Ellipsis => Self::Ellipsis,
inline::Atom::EnDash => Self::EnDash,
inline::Atom::EmDash => Self::EmDash,
inline::Atom::Nbsp => Self::NonBreakingSpace,
inline::Atom::Softbreak => Self::Softbreak,
inline::Atom::Hardbreak => Self::Hardbreak,
inline::Atom::Escape => Self::Escape,
inline::EventKind::Atom(a) => Event::Atom(match a {
inline::Atom::Ellipsis => Atom::Ellipsis,
inline::Atom::EnDash => Atom::EnDash,
inline::Atom::EmDash => Atom::EmDash,
inline::Atom::Nbsp => Atom::NonBreakingSpace,
inline::Atom::Softbreak => Atom::Softbreak,
inline::Atom::Hardbreak => Atom::Hardbreak,
inline::Atom::Escape => Atom::Escape,
_ => todo!(),
},
}),
inline::EventKind::Node(n) => match n {
inline::Node::Str => Self::Str(content),
inline::Node::Verbatim => Self::Verbatim(content),
@ -194,7 +201,7 @@ impl<'s> Event<'s> {
}
}
impl<'s> Tag<'s> {
impl<'s> Container<'s> {
fn from_block(src: &'s str, block: block::Block) -> Self {
match block {
block::Block::Leaf(l) => match l {
@ -272,7 +279,7 @@ impl<'s> Iterator for Parser<'s> {
}
tree::EventKind::Exit(block) => {
self.parser = None;
return Some(Event::End(Tag::from_block(self.src, block)));
return Some(Event::End(Container::from_block(self.src, block)));
}
tree::EventKind::Enter(..) => unreachable!(),
}
@ -282,24 +289,25 @@ impl<'s> Iterator for Parser<'s> {
self.tree.next().map(|ev| match ev.kind {
tree::EventKind::Element(atom) => {
assert_eq!(atom, block::Atom::Blankline);
Event::Blankline
Event::Atom(Atom::Blankline)
}
tree::EventKind::Enter(block) => {
if matches!(block, block::Block::Leaf(..)) {
self.parser = Some(inline::Parser::new());
}
Event::Start(Tag::from_block(self.src, block), Attributes::none())
Event::Start(Container::from_block(self.src, block), Attributes::none())
}
tree::EventKind::Exit(block) => Event::End(Tag::from_block(self.src, block)),
tree::EventKind::Exit(block) => Event::End(Container::from_block(self.src, block)),
})
}
}
#[cfg(test)]
mod test {
use super::Atom::*;
use super::Attributes;
use super::Container::*;
use super::Event::*;
use super::Tag::*;
macro_rules! test_parse {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
@ -364,7 +372,7 @@ mod test {
Start(Paragraph, Attributes::none()),
Str("para0\n"),
End(Paragraph),
Blankline,
Atom(Blankline),
Start(Paragraph, Attributes::none()),
Str("para1"),
End(Paragraph),

View file

@ -7,6 +7,5 @@ fn main() {
.expect("failed to read unicode file");
let p = jotdown::Parser::new(&src);
//let v = p.parse().collect::<Vec<_>>();
//print!("{:?}", v);
jotdown::html::write(std::io::stdout(), p).unwrap();
}