From 8ada539709539679ccde4bb78bf2f0739ad5633e Mon Sep 17 00:00:00 2001
From: Noah Hellman
Date: Tue, 29 Nov 2022 18:34:13 +0100
Subject: [PATCH] wip djot -> html
---
src/html.rs | 150 +++++++++++++++++++++++++++++++++++++++++++++++---
src/lib.rs | 156 +++++++++++++++++++++++++++-------------------------
src/main.rs | 3 +-
3 files changed, 224 insertions(+), 85 deletions(-)
diff --git a/src/html.rs b/src/html.rs
index 2d86be1..79d6c75 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -1,17 +1,149 @@
+use crate::Atom;
+use crate::Container;
use crate::Event;
-pub fn push_html<'s, I: Iterator- >>(s: &mut String, events: I) {
- Writer::new(events).write()
+/// Generate HTML from parsed events and push it to a unicode-accepting buffer or stream.
+pub fn push<'s, I: Iterator
- >, W: std::fmt::Write>(out: W, events: I) {
+ Writer::new(events, out).write().unwrap();
}
-struct Writer {
+/// Generate HTML from parsed events and write it to a byte sink, encoded as UTF-8.
+///
+/// NOTE: This performs many small writes, so IO writes should be buffered with e.g.
+/// [`std::io::BufWriter`].
+pub fn write<'s, I: Iterator
- >, W: std::io::Write>(
+ mut out: W,
events: I,
-}
-
-impl<'s, I: Iterator
- >> Writer {
- fn new(events: I) -> Self {
- Self { events }
+) -> std::io::Result<()> {
+ struct Adapter<'a, T: ?Sized + 'a> {
+ inner: &'a mut T,
+ error: std::io::Result<()>,
}
- fn write(self) {}
+ impl std::fmt::Write for Adapter<'_, T> {
+ fn write_str(&mut self, s: &str) -> std::fmt::Result {
+ match self.inner.write_all(s.as_bytes()) {
+ Ok(()) => Ok(()),
+ Err(e) => {
+ self.error = Err(e);
+ Err(std::fmt::Error)
+ }
+ }
+ }
+ }
+
+ let mut output = Adapter {
+ inner: &mut out,
+ error: Ok(()),
+ };
+
+ Writer::new(events, &mut output)
+ .write()
+ .map_err(|_| output.error.unwrap_err())
+}
+
+struct Writer {
+ events: I,
+ out: W,
+}
+
+impl<'s, I: Iterator
- >, W: std::fmt::Write> Writer {
+ fn new(events: I, out: W) -> Self {
+ Self { events, out }
+ }
+
+ fn write(&mut self) -> std::fmt::Result {
+ for e in &mut self.events {
+ match e {
+ Event::Start(c, _attrs) => {
+ match c {
+ Container::Blockquote => self.out.write_str("
")?,
+ Container::List(..) => todo!(),
+ Container::ListItem => self.out.write_str("")?,
+ Container::DescriptionList => self.out.write_str("")?,
+ Container::DescriptionDetails => self.out.write_str("- ")?,
+ Container::Footnote { .. } => todo!(),
+ Container::Table => self.out.write_str("
")?,
+ Container::TableRow => self.out.write_str("")?,
+ Container::Div => self.out.write_str("")?,
+ Container::Span => self.out.write_str("
")?,
+ Container::Paragraph => self.out.write_str("")?,
+ Container::Heading { level } => write!(self.out, "", level)?,
+ Container::Link(..) => todo!(),
+ Container::Image(..) => todo!(),
+ Container::TableCell => self.out.write_str("")?,
+ Container::RawBlock { .. } => todo!(),
+ Container::CodeBlock { .. } => todo!(),
+ Container::Subscript => self.out.write_str("")?,
+ Container::Superscript => self.out.write_str("")?,
+ Container::Insert => self.out.write_str("")?,
+ Container::Delete => self.out.write_str("")?,
+ Container::Strong => self.out.write_str("")?,
+ Container::Emphasis => self.out.write_str("")?,
+ Container::Mark => self.out.write_str("")?,
+ Container::SingleQuoted => self.out.write_str("‘")?,
+ Container::DoubleQuoted => self.out.write_str("“")?,
+ }
+ }
+ Event::End(c) => {
+ match c {
+ Container::Blockquote => self.out.write_str("")?,
+ Container::List(..) => todo!(),
+ Container::ListItem => self.out.write_str("")?,
+ Container::DescriptionList => self.out.write_str("")?,
+ Container::DescriptionDetails => self.out.write_str("")?,
+ Container::Footnote { .. } => todo!(),
+ Container::Table => self.out.write_str(" |
")?,
+ Container::TableRow => self.out.write_str("")?,
+ Container::Div => self.out.write_str("")?,
+ Container::Span => self.out.write_str("")?,
+ Container::Paragraph => self.out.write_str("
")?,
+ Container::Heading { level } => write!(self.out, "", level)?,
+ Container::TableCell => self.out.write_str("")?,
+ Container::RawBlock { .. } => todo!(),
+ Container::CodeBlock { .. } => todo!(),
+ Container::Link(..) => todo!(),
+ Container::Image(..) => todo!(),
+ Container::Subscript => self.out.write_str("")?,
+ Container::Superscript => self.out.write_str("")?,
+ Container::Insert => self.out.write_str("")?,
+ Container::Delete => self.out.write_str("")?,
+ Container::Strong => self.out.write_str("")?,
+ Container::Emphasis => self.out.write_str("")?,
+ Container::Mark => self.out.write_str("")?,
+ Container::SingleQuoted => self.out.write_str("’")?,
+ Container::DoubleQuoted => self.out.write_str("”")?,
+ }
+ }
+ Event::Str(s) => self.out.write_str(s)?,
+ Event::Verbatim(s) => write!(self.out, "{}
", s)?,
+ Event::Math { content, display } => {
+ if display {
+ write!(
+ self.out,
+ r#"\[{}\]"#,
+ content,
+ )?;
+ } else {
+ write!(
+ self.out,
+ r#"\({}\)"#,
+ content,
+ )?;
+ }
+ }
+ Event::Atom(a) => match a {
+ Atom::Ellipsis => self.out.write_str("…")?,
+ Atom::EnDash => self.out.write_str("–")?,
+ Atom::EmDash => self.out.write_str("—")?,
+ Atom::ThematicBreak => self.out.write_str("\n
")?,
+ Atom::NonBreakingSpace => self.out.write_str(" ")?,
+ Atom::Hardbreak => self.out.write_str("
\n")?,
+ Atom::Softbreak => self.out.write_char('\n')?,
+ Atom::Blankline | Atom::Escape => {}
+ },
+ }
+ }
+ Ok(())
+ }
}
diff --git a/src/lib.rs b/src/lib.rs
index 91ecc3f..3112f68 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,5 +1,6 @@
+pub mod html;
+
mod block;
-mod html;
mod inline;
mod lex;
mod span;
@@ -13,60 +14,22 @@ const EOF: char = '\0';
#[derive(Debug, PartialEq, Eq)]
pub enum Event<'s> {
- /// Start of a tag.
- Start(Tag<'s>, Attributes<'s>),
- /// End of a tag.
- End(Tag<'s>),
+ /// Start of a container.
+ Start(Container<'s>, Attributes<'s>),
+ /// End of a container.
+ End(Container<'s>),
/// A string object, text only.
Str(&'s str),
+ /// An atomic element.
+ Atom(Atom),
/// A verbatim string.
Verbatim(&'s str),
/// An inline or display math element.
Math { content: &'s str, display: bool },
- /// An ellipsis, i.e. a set of three periods.
- Ellipsis,
- /// An en dash.
- EnDash,
- /// An em dash.
- EmDash,
- /// A thematic break, typically a horizontal rule.
- ThematicBreak,
- /// A blank line.
- Blankline,
- /// A space that may not break a line.
- NonBreakingSpace,
- /// A newline that may or may not break a line in the output format.
- Softbreak,
- /// A newline that must break a line.
- Hardbreak,
- /// An escape character, not visible in output.
- Escape,
}
#[derive(Debug, PartialEq, Eq)]
-pub enum Tag<'s> {
- /// A paragraph.
- Paragraph,
- /// A heading.
- Heading { level: u8 },
- /// A link with a destination URL.
- Link(&'s str, LinkType),
- /// An image.
- Image(&'s str),
- /// A divider element.
- Div,
- /// An inline divider element.
- Span,
- /// A table element.
- Table,
- /// A row element of a table.
- TableRow,
- /// A cell element of row within a table.
- TableCell,
- /// A block with raw markup for a specific output format.
- RawBlock { format: &'s str },
- /// A block with code in a specific language.
- CodeBlock { language: Option<&'s str> },
+pub enum Container<'s> {
/// A blockquote element.
Blockquote,
/// A list.
@@ -75,10 +38,32 @@ pub enum Tag<'s> {
ListItem,
/// A description list element.
DescriptionList,
- /// A item of a description list.
- DescriptionItem,
+ /// Details describing a term within a description list.
+ DescriptionDetails,
/// A footnote definition.
Footnote { tag: &'s str },
+ /// A table element.
+ Table,
+ /// A row element of a table.
+ TableRow,
+ /// A block-level divider element.
+ Div,
+ /// A paragraph.
+ Paragraph,
+ /// A heading.
+ Heading { level: u8 },
+ /// A link with a destination URL.
+ Link(&'s str, LinkType),
+ /// An image.
+ Image(&'s str),
+ /// An inline divider element.
+ Span,
+ /// A cell element of row within a table.
+ TableCell,
+ /// A block with raw markup for a specific output format.
+ RawBlock { format: &'s str },
+ /// A block with code in a specific language.
+ CodeBlock { language: Option<&'s str> },
/// A subscripted element.
Subscript,
/// A superscripted element.
@@ -143,22 +128,44 @@ pub enum OrderedListFormat {
ParenParen,
}
+#[derive(Debug, PartialEq, Eq)]
+pub enum Atom {
+ /// An ellipsis, i.e. a set of three periods.
+ Ellipsis,
+ /// An en dash.
+ EnDash,
+ /// An em dash.
+ EmDash,
+ /// A thematic break, typically a horizontal rule.
+ ThematicBreak,
+ /// A blank line.
+ Blankline,
+ /// A space that may not break a line.
+ NonBreakingSpace,
+ /// A newline that may or may not break a line in the output format.
+ Softbreak,
+ /// A newline that must break a line.
+ Hardbreak,
+ /// An escape character, not visible in output.
+ Escape,
+}
+
impl<'s> Event<'s> {
fn from_inline(src: &'s str, inline: inline::Event) -> Self {
let content = inline.span.of(src);
match inline.kind {
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
let t = match c {
- inline::Container::Span => Tag::Span,
- inline::Container::Subscript => Tag::Subscript,
- inline::Container::Superscript => Tag::Superscript,
- inline::Container::Insert => Tag::Insert,
- inline::Container::Delete => Tag::Delete,
- inline::Container::Emphasis => Tag::Emphasis,
- inline::Container::Strong => Tag::Strong,
- inline::Container::Mark => Tag::Mark,
- inline::Container::SingleQuoted => Tag::SingleQuoted,
- inline::Container::DoubleQuoted => Tag::DoubleQuoted,
+ inline::Container::Span => Container::Span,
+ inline::Container::Subscript => Container::Subscript,
+ inline::Container::Superscript => Container::Superscript,
+ inline::Container::Insert => Container::Insert,
+ inline::Container::Delete => Container::Delete,
+ inline::Container::Emphasis => Container::Emphasis,
+ inline::Container::Strong => Container::Strong,
+ inline::Container::Mark => Container::Mark,
+ inline::Container::SingleQuoted => Container::SingleQuoted,
+ inline::Container::DoubleQuoted => Container::DoubleQuoted,
_ => todo!(),
};
if matches!(inline.kind, inline::EventKind::Enter(_)) {
@@ -167,16 +174,16 @@ impl<'s> Event<'s> {
Self::End(t)
}
}
- inline::EventKind::Atom(a) => match a {
- inline::Atom::Ellipsis => Self::Ellipsis,
- inline::Atom::EnDash => Self::EnDash,
- inline::Atom::EmDash => Self::EmDash,
- inline::Atom::Nbsp => Self::NonBreakingSpace,
- inline::Atom::Softbreak => Self::Softbreak,
- inline::Atom::Hardbreak => Self::Hardbreak,
- inline::Atom::Escape => Self::Escape,
+ inline::EventKind::Atom(a) => Event::Atom(match a {
+ inline::Atom::Ellipsis => Atom::Ellipsis,
+ inline::Atom::EnDash => Atom::EnDash,
+ inline::Atom::EmDash => Atom::EmDash,
+ inline::Atom::Nbsp => Atom::NonBreakingSpace,
+ inline::Atom::Softbreak => Atom::Softbreak,
+ inline::Atom::Hardbreak => Atom::Hardbreak,
+ inline::Atom::Escape => Atom::Escape,
_ => todo!(),
- },
+ }),
inline::EventKind::Node(n) => match n {
inline::Node::Str => Self::Str(content),
inline::Node::Verbatim => Self::Verbatim(content),
@@ -194,7 +201,7 @@ impl<'s> Event<'s> {
}
}
-impl<'s> Tag<'s> {
+impl<'s> Container<'s> {
fn from_block(src: &'s str, block: block::Block) -> Self {
match block {
block::Block::Leaf(l) => match l {
@@ -272,7 +279,7 @@ impl<'s> Iterator for Parser<'s> {
}
tree::EventKind::Exit(block) => {
self.parser = None;
- return Some(Event::End(Tag::from_block(self.src, block)));
+ return Some(Event::End(Container::from_block(self.src, block)));
}
tree::EventKind::Enter(..) => unreachable!(),
}
@@ -282,24 +289,25 @@ impl<'s> Iterator for Parser<'s> {
self.tree.next().map(|ev| match ev.kind {
tree::EventKind::Element(atom) => {
assert_eq!(atom, block::Atom::Blankline);
- Event::Blankline
+ Event::Atom(Atom::Blankline)
}
tree::EventKind::Enter(block) => {
if matches!(block, block::Block::Leaf(..)) {
self.parser = Some(inline::Parser::new());
}
- Event::Start(Tag::from_block(self.src, block), Attributes::none())
+ Event::Start(Container::from_block(self.src, block), Attributes::none())
}
- tree::EventKind::Exit(block) => Event::End(Tag::from_block(self.src, block)),
+ tree::EventKind::Exit(block) => Event::End(Container::from_block(self.src, block)),
})
}
}
#[cfg(test)]
mod test {
+ use super::Atom::*;
use super::Attributes;
+ use super::Container::*;
use super::Event::*;
- use super::Tag::*;
macro_rules! test_parse {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
@@ -364,7 +372,7 @@ mod test {
Start(Paragraph, Attributes::none()),
Str("para0\n"),
End(Paragraph),
- Blankline,
+ Atom(Blankline),
Start(Paragraph, Attributes::none()),
Str("para1"),
End(Paragraph),
diff --git a/src/main.rs b/src/main.rs
index 4771392..babdaed 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -7,6 +7,5 @@ fn main() {
.expect("failed to read unicode file");
let p = jotdown::Parser::new(&src);
- //let v = p.parse().collect::>();
- //print!("{:?}", v);
+ jotdown::html::write(std::io::stdout(), p).unwrap();
}