wip djot -> html
This commit is contained in:
parent
e84e7dd50b
commit
8ada539709
3 changed files with 224 additions and 85 deletions
146
src/html.rs
146
src/html.rs
|
@ -1,17 +1,149 @@
|
|||
use crate::Atom;
|
||||
use crate::Container;
|
||||
use crate::Event;
|
||||
|
||||
pub fn push_html<'s, I: Iterator<Item = Event<'s>>>(s: &mut String, events: I) {
|
||||
Writer::new(events).write()
|
||||
/// Generate HTML from parsed events and push it to a unicode-accepting buffer or stream.
|
||||
pub fn push<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write>(out: W, events: I) {
|
||||
Writer::new(events, out).write().unwrap();
|
||||
}
|
||||
|
||||
struct Writer<I> {
|
||||
/// Generate HTML from parsed events and write it to a byte sink, encoded as UTF-8.
|
||||
///
|
||||
/// NOTE: This performs many small writes, so IO writes should be buffered with e.g.
|
||||
/// [`std::io::BufWriter`].
|
||||
pub fn write<'s, I: Iterator<Item = Event<'s>>, W: std::io::Write>(
|
||||
mut out: W,
|
||||
events: I,
|
||||
) -> std::io::Result<()> {
|
||||
struct Adapter<'a, T: ?Sized + 'a> {
|
||||
inner: &'a mut T,
|
||||
error: std::io::Result<()>,
|
||||
}
|
||||
|
||||
impl<'s, I: Iterator<Item = Event<'s>>> Writer<I> {
|
||||
fn new(events: I) -> Self {
|
||||
Self { events }
|
||||
impl<T: std::io::Write + ?Sized> std::fmt::Write for Adapter<'_, T> {
|
||||
fn write_str(&mut self, s: &str) -> std::fmt::Result {
|
||||
match self.inner.write_all(s.as_bytes()) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
self.error = Err(e);
|
||||
Err(std::fmt::Error)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn write(self) {}
|
||||
let mut output = Adapter {
|
||||
inner: &mut out,
|
||||
error: Ok(()),
|
||||
};
|
||||
|
||||
Writer::new(events, &mut output)
|
||||
.write()
|
||||
.map_err(|_| output.error.unwrap_err())
|
||||
}
|
||||
|
||||
struct Writer<I, W> {
|
||||
events: I,
|
||||
out: W,
|
||||
}
|
||||
|
||||
impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<I, W> {
|
||||
fn new(events: I, out: W) -> Self {
|
||||
Self { events, out }
|
||||
}
|
||||
|
||||
fn write(&mut self) -> std::fmt::Result {
|
||||
for e in &mut self.events {
|
||||
match e {
|
||||
Event::Start(c, _attrs) => {
|
||||
match c {
|
||||
Container::Blockquote => self.out.write_str("<blockquote>")?,
|
||||
Container::List(..) => todo!(),
|
||||
Container::ListItem => self.out.write_str("<li>")?,
|
||||
Container::DescriptionList => self.out.write_str("<dl>")?,
|
||||
Container::DescriptionDetails => self.out.write_str("<dd>")?,
|
||||
Container::Footnote { .. } => todo!(),
|
||||
Container::Table => self.out.write_str("<table>")?,
|
||||
Container::TableRow => self.out.write_str("<tr>")?,
|
||||
Container::Div => self.out.write_str("<div>")?,
|
||||
Container::Span => self.out.write_str("<span>")?,
|
||||
Container::Paragraph => self.out.write_str("<p>")?,
|
||||
Container::Heading { level } => write!(self.out, "<h{}>", level)?,
|
||||
Container::Link(..) => todo!(),
|
||||
Container::Image(..) => todo!(),
|
||||
Container::TableCell => self.out.write_str("<td>")?,
|
||||
Container::RawBlock { .. } => todo!(),
|
||||
Container::CodeBlock { .. } => todo!(),
|
||||
Container::Subscript => self.out.write_str("<sub>")?,
|
||||
Container::Superscript => self.out.write_str("<sup>")?,
|
||||
Container::Insert => self.out.write_str("<ins>")?,
|
||||
Container::Delete => self.out.write_str("<del>")?,
|
||||
Container::Strong => self.out.write_str("<strong>")?,
|
||||
Container::Emphasis => self.out.write_str("<em>")?,
|
||||
Container::Mark => self.out.write_str("<mark>")?,
|
||||
Container::SingleQuoted => self.out.write_str("‘")?,
|
||||
Container::DoubleQuoted => self.out.write_str("“")?,
|
||||
}
|
||||
}
|
||||
Event::End(c) => {
|
||||
match c {
|
||||
Container::Blockquote => self.out.write_str("</blockquote>")?,
|
||||
Container::List(..) => todo!(),
|
||||
Container::ListItem => self.out.write_str("</li>")?,
|
||||
Container::DescriptionList => self.out.write_str("</dl>")?,
|
||||
Container::DescriptionDetails => self.out.write_str("</dd>")?,
|
||||
Container::Footnote { .. } => todo!(),
|
||||
Container::Table => self.out.write_str("</table>")?,
|
||||
Container::TableRow => self.out.write_str("</tr>")?,
|
||||
Container::Div => self.out.write_str("</div>")?,
|
||||
Container::Span => self.out.write_str("</span>")?,
|
||||
Container::Paragraph => self.out.write_str("</p>")?,
|
||||
Container::Heading { level } => write!(self.out, "</h{}>", level)?,
|
||||
Container::TableCell => self.out.write_str("</td>")?,
|
||||
Container::RawBlock { .. } => todo!(),
|
||||
Container::CodeBlock { .. } => todo!(),
|
||||
Container::Link(..) => todo!(),
|
||||
Container::Image(..) => todo!(),
|
||||
Container::Subscript => self.out.write_str("</sub>")?,
|
||||
Container::Superscript => self.out.write_str("</sup>")?,
|
||||
Container::Insert => self.out.write_str("</ins>")?,
|
||||
Container::Delete => self.out.write_str("</del>")?,
|
||||
Container::Strong => self.out.write_str("</strong>")?,
|
||||
Container::Emphasis => self.out.write_str("</em>")?,
|
||||
Container::Mark => self.out.write_str("</mark>")?,
|
||||
Container::SingleQuoted => self.out.write_str("’")?,
|
||||
Container::DoubleQuoted => self.out.write_str("”")?,
|
||||
}
|
||||
}
|
||||
Event::Str(s) => self.out.write_str(s)?,
|
||||
Event::Verbatim(s) => write!(self.out, "<code>{}</code>", s)?,
|
||||
Event::Math { content, display } => {
|
||||
if display {
|
||||
write!(
|
||||
self.out,
|
||||
r#"<span class="math display">\[{}\]</span>"#,
|
||||
content,
|
||||
)?;
|
||||
} else {
|
||||
write!(
|
||||
self.out,
|
||||
r#"<span class="math inline">\({}\)</span>"#,
|
||||
content,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
Event::Atom(a) => match a {
|
||||
Atom::Ellipsis => self.out.write_str("…")?,
|
||||
Atom::EnDash => self.out.write_str("–")?,
|
||||
Atom::EmDash => self.out.write_str("—")?,
|
||||
Atom::ThematicBreak => self.out.write_str("\n<hr>")?,
|
||||
Atom::NonBreakingSpace => self.out.write_str(" ")?,
|
||||
Atom::Hardbreak => self.out.write_str("<br>\n")?,
|
||||
Atom::Softbreak => self.out.write_char('\n')?,
|
||||
Atom::Blankline | Atom::Escape => {}
|
||||
},
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
156
src/lib.rs
156
src/lib.rs
|
@ -1,5 +1,6 @@
|
|||
pub mod html;
|
||||
|
||||
mod block;
|
||||
mod html;
|
||||
mod inline;
|
||||
mod lex;
|
||||
mod span;
|
||||
|
@ -13,60 +14,22 @@ const EOF: char = '\0';
|
|||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Event<'s> {
|
||||
/// Start of a tag.
|
||||
Start(Tag<'s>, Attributes<'s>),
|
||||
/// End of a tag.
|
||||
End(Tag<'s>),
|
||||
/// Start of a container.
|
||||
Start(Container<'s>, Attributes<'s>),
|
||||
/// End of a container.
|
||||
End(Container<'s>),
|
||||
/// A string object, text only.
|
||||
Str(&'s str),
|
||||
/// An atomic element.
|
||||
Atom(Atom),
|
||||
/// A verbatim string.
|
||||
Verbatim(&'s str),
|
||||
/// An inline or display math element.
|
||||
Math { content: &'s str, display: bool },
|
||||
/// An ellipsis, i.e. a set of three periods.
|
||||
Ellipsis,
|
||||
/// An en dash.
|
||||
EnDash,
|
||||
/// An em dash.
|
||||
EmDash,
|
||||
/// A thematic break, typically a horizontal rule.
|
||||
ThematicBreak,
|
||||
/// A blank line.
|
||||
Blankline,
|
||||
/// A space that may not break a line.
|
||||
NonBreakingSpace,
|
||||
/// A newline that may or may not break a line in the output format.
|
||||
Softbreak,
|
||||
/// A newline that must break a line.
|
||||
Hardbreak,
|
||||
/// An escape character, not visible in output.
|
||||
Escape,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Tag<'s> {
|
||||
/// A paragraph.
|
||||
Paragraph,
|
||||
/// A heading.
|
||||
Heading { level: u8 },
|
||||
/// A link with a destination URL.
|
||||
Link(&'s str, LinkType),
|
||||
/// An image.
|
||||
Image(&'s str),
|
||||
/// A divider element.
|
||||
Div,
|
||||
/// An inline divider element.
|
||||
Span,
|
||||
/// A table element.
|
||||
Table,
|
||||
/// A row element of a table.
|
||||
TableRow,
|
||||
/// A cell element of row within a table.
|
||||
TableCell,
|
||||
/// A block with raw markup for a specific output format.
|
||||
RawBlock { format: &'s str },
|
||||
/// A block with code in a specific language.
|
||||
CodeBlock { language: Option<&'s str> },
|
||||
pub enum Container<'s> {
|
||||
/// A blockquote element.
|
||||
Blockquote,
|
||||
/// A list.
|
||||
|
@ -75,10 +38,32 @@ pub enum Tag<'s> {
|
|||
ListItem,
|
||||
/// A description list element.
|
||||
DescriptionList,
|
||||
/// A item of a description list.
|
||||
DescriptionItem,
|
||||
/// Details describing a term within a description list.
|
||||
DescriptionDetails,
|
||||
/// A footnote definition.
|
||||
Footnote { tag: &'s str },
|
||||
/// A table element.
|
||||
Table,
|
||||
/// A row element of a table.
|
||||
TableRow,
|
||||
/// A block-level divider element.
|
||||
Div,
|
||||
/// A paragraph.
|
||||
Paragraph,
|
||||
/// A heading.
|
||||
Heading { level: u8 },
|
||||
/// A link with a destination URL.
|
||||
Link(&'s str, LinkType),
|
||||
/// An image.
|
||||
Image(&'s str),
|
||||
/// An inline divider element.
|
||||
Span,
|
||||
/// A cell element of row within a table.
|
||||
TableCell,
|
||||
/// A block with raw markup for a specific output format.
|
||||
RawBlock { format: &'s str },
|
||||
/// A block with code in a specific language.
|
||||
CodeBlock { language: Option<&'s str> },
|
||||
/// A subscripted element.
|
||||
Subscript,
|
||||
/// A superscripted element.
|
||||
|
@ -143,22 +128,44 @@ pub enum OrderedListFormat {
|
|||
ParenParen,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum Atom {
|
||||
/// An ellipsis, i.e. a set of three periods.
|
||||
Ellipsis,
|
||||
/// An en dash.
|
||||
EnDash,
|
||||
/// An em dash.
|
||||
EmDash,
|
||||
/// A thematic break, typically a horizontal rule.
|
||||
ThematicBreak,
|
||||
/// A blank line.
|
||||
Blankline,
|
||||
/// A space that may not break a line.
|
||||
NonBreakingSpace,
|
||||
/// A newline that may or may not break a line in the output format.
|
||||
Softbreak,
|
||||
/// A newline that must break a line.
|
||||
Hardbreak,
|
||||
/// An escape character, not visible in output.
|
||||
Escape,
|
||||
}
|
||||
|
||||
impl<'s> Event<'s> {
|
||||
fn from_inline(src: &'s str, inline: inline::Event) -> Self {
|
||||
let content = inline.span.of(src);
|
||||
match inline.kind {
|
||||
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
||||
let t = match c {
|
||||
inline::Container::Span => Tag::Span,
|
||||
inline::Container::Subscript => Tag::Subscript,
|
||||
inline::Container::Superscript => Tag::Superscript,
|
||||
inline::Container::Insert => Tag::Insert,
|
||||
inline::Container::Delete => Tag::Delete,
|
||||
inline::Container::Emphasis => Tag::Emphasis,
|
||||
inline::Container::Strong => Tag::Strong,
|
||||
inline::Container::Mark => Tag::Mark,
|
||||
inline::Container::SingleQuoted => Tag::SingleQuoted,
|
||||
inline::Container::DoubleQuoted => Tag::DoubleQuoted,
|
||||
inline::Container::Span => Container::Span,
|
||||
inline::Container::Subscript => Container::Subscript,
|
||||
inline::Container::Superscript => Container::Superscript,
|
||||
inline::Container::Insert => Container::Insert,
|
||||
inline::Container::Delete => Container::Delete,
|
||||
inline::Container::Emphasis => Container::Emphasis,
|
||||
inline::Container::Strong => Container::Strong,
|
||||
inline::Container::Mark => Container::Mark,
|
||||
inline::Container::SingleQuoted => Container::SingleQuoted,
|
||||
inline::Container::DoubleQuoted => Container::DoubleQuoted,
|
||||
_ => todo!(),
|
||||
};
|
||||
if matches!(inline.kind, inline::EventKind::Enter(_)) {
|
||||
|
@ -167,16 +174,16 @@ impl<'s> Event<'s> {
|
|||
Self::End(t)
|
||||
}
|
||||
}
|
||||
inline::EventKind::Atom(a) => match a {
|
||||
inline::Atom::Ellipsis => Self::Ellipsis,
|
||||
inline::Atom::EnDash => Self::EnDash,
|
||||
inline::Atom::EmDash => Self::EmDash,
|
||||
inline::Atom::Nbsp => Self::NonBreakingSpace,
|
||||
inline::Atom::Softbreak => Self::Softbreak,
|
||||
inline::Atom::Hardbreak => Self::Hardbreak,
|
||||
inline::Atom::Escape => Self::Escape,
|
||||
inline::EventKind::Atom(a) => Event::Atom(match a {
|
||||
inline::Atom::Ellipsis => Atom::Ellipsis,
|
||||
inline::Atom::EnDash => Atom::EnDash,
|
||||
inline::Atom::EmDash => Atom::EmDash,
|
||||
inline::Atom::Nbsp => Atom::NonBreakingSpace,
|
||||
inline::Atom::Softbreak => Atom::Softbreak,
|
||||
inline::Atom::Hardbreak => Atom::Hardbreak,
|
||||
inline::Atom::Escape => Atom::Escape,
|
||||
_ => todo!(),
|
||||
},
|
||||
}),
|
||||
inline::EventKind::Node(n) => match n {
|
||||
inline::Node::Str => Self::Str(content),
|
||||
inline::Node::Verbatim => Self::Verbatim(content),
|
||||
|
@ -194,7 +201,7 @@ impl<'s> Event<'s> {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'s> Tag<'s> {
|
||||
impl<'s> Container<'s> {
|
||||
fn from_block(src: &'s str, block: block::Block) -> Self {
|
||||
match block {
|
||||
block::Block::Leaf(l) => match l {
|
||||
|
@ -272,7 +279,7 @@ impl<'s> Iterator for Parser<'s> {
|
|||
}
|
||||
tree::EventKind::Exit(block) => {
|
||||
self.parser = None;
|
||||
return Some(Event::End(Tag::from_block(self.src, block)));
|
||||
return Some(Event::End(Container::from_block(self.src, block)));
|
||||
}
|
||||
tree::EventKind::Enter(..) => unreachable!(),
|
||||
}
|
||||
|
@ -282,24 +289,25 @@ impl<'s> Iterator for Parser<'s> {
|
|||
self.tree.next().map(|ev| match ev.kind {
|
||||
tree::EventKind::Element(atom) => {
|
||||
assert_eq!(atom, block::Atom::Blankline);
|
||||
Event::Blankline
|
||||
Event::Atom(Atom::Blankline)
|
||||
}
|
||||
tree::EventKind::Enter(block) => {
|
||||
if matches!(block, block::Block::Leaf(..)) {
|
||||
self.parser = Some(inline::Parser::new());
|
||||
}
|
||||
Event::Start(Tag::from_block(self.src, block), Attributes::none())
|
||||
Event::Start(Container::from_block(self.src, block), Attributes::none())
|
||||
}
|
||||
tree::EventKind::Exit(block) => Event::End(Tag::from_block(self.src, block)),
|
||||
tree::EventKind::Exit(block) => Event::End(Container::from_block(self.src, block)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::Atom::*;
|
||||
use super::Attributes;
|
||||
use super::Container::*;
|
||||
use super::Event::*;
|
||||
use super::Tag::*;
|
||||
|
||||
macro_rules! test_parse {
|
||||
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
||||
|
@ -364,7 +372,7 @@ mod test {
|
|||
Start(Paragraph, Attributes::none()),
|
||||
Str("para0\n"),
|
||||
End(Paragraph),
|
||||
Blankline,
|
||||
Atom(Blankline),
|
||||
Start(Paragraph, Attributes::none()),
|
||||
Str("para1"),
|
||||
End(Paragraph),
|
||||
|
|
|
@ -7,6 +7,5 @@ fn main() {
|
|||
.expect("failed to read unicode file");
|
||||
|
||||
let p = jotdown::Parser::new(&src);
|
||||
//let v = p.parse().collect::<Vec<_>>();
|
||||
//print!("{:?}", v);
|
||||
jotdown::html::write(std::io::stdout(), p).unwrap();
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue