wip djot -> html
This commit is contained in:
parent
e84e7dd50b
commit
8ada539709
3 changed files with 224 additions and 85 deletions
150
src/html.rs
150
src/html.rs
|
@ -1,17 +1,149 @@
|
||||||
|
use crate::Atom;
|
||||||
|
use crate::Container;
|
||||||
use crate::Event;
|
use crate::Event;
|
||||||
|
|
||||||
pub fn push_html<'s, I: Iterator<Item = Event<'s>>>(s: &mut String, events: I) {
|
/// Generate HTML from parsed events and push it to a unicode-accepting buffer or stream.
|
||||||
Writer::new(events).write()
|
pub fn push<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write>(out: W, events: I) {
|
||||||
|
Writer::new(events, out).write().unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Writer<I> {
|
/// Generate HTML from parsed events and write it to a byte sink, encoded as UTF-8.
|
||||||
|
///
|
||||||
|
/// NOTE: This performs many small writes, so IO writes should be buffered with e.g.
|
||||||
|
/// [`std::io::BufWriter`].
|
||||||
|
pub fn write<'s, I: Iterator<Item = Event<'s>>, W: std::io::Write>(
|
||||||
|
mut out: W,
|
||||||
events: I,
|
events: I,
|
||||||
}
|
) -> std::io::Result<()> {
|
||||||
|
struct Adapter<'a, T: ?Sized + 'a> {
|
||||||
impl<'s, I: Iterator<Item = Event<'s>>> Writer<I> {
|
inner: &'a mut T,
|
||||||
fn new(events: I) -> Self {
|
error: std::io::Result<()>,
|
||||||
Self { events }
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write(self) {}
|
impl<T: std::io::Write + ?Sized> std::fmt::Write for Adapter<'_, T> {
|
||||||
|
fn write_str(&mut self, s: &str) -> std::fmt::Result {
|
||||||
|
match self.inner.write_all(s.as_bytes()) {
|
||||||
|
Ok(()) => Ok(()),
|
||||||
|
Err(e) => {
|
||||||
|
self.error = Err(e);
|
||||||
|
Err(std::fmt::Error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut output = Adapter {
|
||||||
|
inner: &mut out,
|
||||||
|
error: Ok(()),
|
||||||
|
};
|
||||||
|
|
||||||
|
Writer::new(events, &mut output)
|
||||||
|
.write()
|
||||||
|
.map_err(|_| output.error.unwrap_err())
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Writer<I, W> {
|
||||||
|
events: I,
|
||||||
|
out: W,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<I, W> {
|
||||||
|
fn new(events: I, out: W) -> Self {
|
||||||
|
Self { events, out }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write(&mut self) -> std::fmt::Result {
|
||||||
|
for e in &mut self.events {
|
||||||
|
match e {
|
||||||
|
Event::Start(c, _attrs) => {
|
||||||
|
match c {
|
||||||
|
Container::Blockquote => self.out.write_str("<blockquote>")?,
|
||||||
|
Container::List(..) => todo!(),
|
||||||
|
Container::ListItem => self.out.write_str("<li>")?,
|
||||||
|
Container::DescriptionList => self.out.write_str("<dl>")?,
|
||||||
|
Container::DescriptionDetails => self.out.write_str("<dd>")?,
|
||||||
|
Container::Footnote { .. } => todo!(),
|
||||||
|
Container::Table => self.out.write_str("<table>")?,
|
||||||
|
Container::TableRow => self.out.write_str("<tr>")?,
|
||||||
|
Container::Div => self.out.write_str("<div>")?,
|
||||||
|
Container::Span => self.out.write_str("<span>")?,
|
||||||
|
Container::Paragraph => self.out.write_str("<p>")?,
|
||||||
|
Container::Heading { level } => write!(self.out, "<h{}>", level)?,
|
||||||
|
Container::Link(..) => todo!(),
|
||||||
|
Container::Image(..) => todo!(),
|
||||||
|
Container::TableCell => self.out.write_str("<td>")?,
|
||||||
|
Container::RawBlock { .. } => todo!(),
|
||||||
|
Container::CodeBlock { .. } => todo!(),
|
||||||
|
Container::Subscript => self.out.write_str("<sub>")?,
|
||||||
|
Container::Superscript => self.out.write_str("<sup>")?,
|
||||||
|
Container::Insert => self.out.write_str("<ins>")?,
|
||||||
|
Container::Delete => self.out.write_str("<del>")?,
|
||||||
|
Container::Strong => self.out.write_str("<strong>")?,
|
||||||
|
Container::Emphasis => self.out.write_str("<em>")?,
|
||||||
|
Container::Mark => self.out.write_str("<mark>")?,
|
||||||
|
Container::SingleQuoted => self.out.write_str("‘")?,
|
||||||
|
Container::DoubleQuoted => self.out.write_str("“")?,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Event::End(c) => {
|
||||||
|
match c {
|
||||||
|
Container::Blockquote => self.out.write_str("</blockquote>")?,
|
||||||
|
Container::List(..) => todo!(),
|
||||||
|
Container::ListItem => self.out.write_str("</li>")?,
|
||||||
|
Container::DescriptionList => self.out.write_str("</dl>")?,
|
||||||
|
Container::DescriptionDetails => self.out.write_str("</dd>")?,
|
||||||
|
Container::Footnote { .. } => todo!(),
|
||||||
|
Container::Table => self.out.write_str("</table>")?,
|
||||||
|
Container::TableRow => self.out.write_str("</tr>")?,
|
||||||
|
Container::Div => self.out.write_str("</div>")?,
|
||||||
|
Container::Span => self.out.write_str("</span>")?,
|
||||||
|
Container::Paragraph => self.out.write_str("</p>")?,
|
||||||
|
Container::Heading { level } => write!(self.out, "</h{}>", level)?,
|
||||||
|
Container::TableCell => self.out.write_str("</td>")?,
|
||||||
|
Container::RawBlock { .. } => todo!(),
|
||||||
|
Container::CodeBlock { .. } => todo!(),
|
||||||
|
Container::Link(..) => todo!(),
|
||||||
|
Container::Image(..) => todo!(),
|
||||||
|
Container::Subscript => self.out.write_str("</sub>")?,
|
||||||
|
Container::Superscript => self.out.write_str("</sup>")?,
|
||||||
|
Container::Insert => self.out.write_str("</ins>")?,
|
||||||
|
Container::Delete => self.out.write_str("</del>")?,
|
||||||
|
Container::Strong => self.out.write_str("</strong>")?,
|
||||||
|
Container::Emphasis => self.out.write_str("</em>")?,
|
||||||
|
Container::Mark => self.out.write_str("</mark>")?,
|
||||||
|
Container::SingleQuoted => self.out.write_str("’")?,
|
||||||
|
Container::DoubleQuoted => self.out.write_str("”")?,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Event::Str(s) => self.out.write_str(s)?,
|
||||||
|
Event::Verbatim(s) => write!(self.out, "<code>{}</code>", s)?,
|
||||||
|
Event::Math { content, display } => {
|
||||||
|
if display {
|
||||||
|
write!(
|
||||||
|
self.out,
|
||||||
|
r#"<span class="math display">\[{}\]</span>"#,
|
||||||
|
content,
|
||||||
|
)?;
|
||||||
|
} else {
|
||||||
|
write!(
|
||||||
|
self.out,
|
||||||
|
r#"<span class="math inline">\({}\)</span>"#,
|
||||||
|
content,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Event::Atom(a) => match a {
|
||||||
|
Atom::Ellipsis => self.out.write_str("…")?,
|
||||||
|
Atom::EnDash => self.out.write_str("–")?,
|
||||||
|
Atom::EmDash => self.out.write_str("—")?,
|
||||||
|
Atom::ThematicBreak => self.out.write_str("\n<hr>")?,
|
||||||
|
Atom::NonBreakingSpace => self.out.write_str(" ")?,
|
||||||
|
Atom::Hardbreak => self.out.write_str("<br>\n")?,
|
||||||
|
Atom::Softbreak => self.out.write_char('\n')?,
|
||||||
|
Atom::Blankline | Atom::Escape => {}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
156
src/lib.rs
156
src/lib.rs
|
@ -1,5 +1,6 @@
|
||||||
|
pub mod html;
|
||||||
|
|
||||||
mod block;
|
mod block;
|
||||||
mod html;
|
|
||||||
mod inline;
|
mod inline;
|
||||||
mod lex;
|
mod lex;
|
||||||
mod span;
|
mod span;
|
||||||
|
@ -13,60 +14,22 @@ const EOF: char = '\0';
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub enum Event<'s> {
|
pub enum Event<'s> {
|
||||||
/// Start of a tag.
|
/// Start of a container.
|
||||||
Start(Tag<'s>, Attributes<'s>),
|
Start(Container<'s>, Attributes<'s>),
|
||||||
/// End of a tag.
|
/// End of a container.
|
||||||
End(Tag<'s>),
|
End(Container<'s>),
|
||||||
/// A string object, text only.
|
/// A string object, text only.
|
||||||
Str(&'s str),
|
Str(&'s str),
|
||||||
|
/// An atomic element.
|
||||||
|
Atom(Atom),
|
||||||
/// A verbatim string.
|
/// A verbatim string.
|
||||||
Verbatim(&'s str),
|
Verbatim(&'s str),
|
||||||
/// An inline or display math element.
|
/// An inline or display math element.
|
||||||
Math { content: &'s str, display: bool },
|
Math { content: &'s str, display: bool },
|
||||||
/// An ellipsis, i.e. a set of three periods.
|
|
||||||
Ellipsis,
|
|
||||||
/// An en dash.
|
|
||||||
EnDash,
|
|
||||||
/// An em dash.
|
|
||||||
EmDash,
|
|
||||||
/// A thematic break, typically a horizontal rule.
|
|
||||||
ThematicBreak,
|
|
||||||
/// A blank line.
|
|
||||||
Blankline,
|
|
||||||
/// A space that may not break a line.
|
|
||||||
NonBreakingSpace,
|
|
||||||
/// A newline that may or may not break a line in the output format.
|
|
||||||
Softbreak,
|
|
||||||
/// A newline that must break a line.
|
|
||||||
Hardbreak,
|
|
||||||
/// An escape character, not visible in output.
|
|
||||||
Escape,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
pub enum Tag<'s> {
|
pub enum Container<'s> {
|
||||||
/// A paragraph.
|
|
||||||
Paragraph,
|
|
||||||
/// A heading.
|
|
||||||
Heading { level: u8 },
|
|
||||||
/// A link with a destination URL.
|
|
||||||
Link(&'s str, LinkType),
|
|
||||||
/// An image.
|
|
||||||
Image(&'s str),
|
|
||||||
/// A divider element.
|
|
||||||
Div,
|
|
||||||
/// An inline divider element.
|
|
||||||
Span,
|
|
||||||
/// A table element.
|
|
||||||
Table,
|
|
||||||
/// A row element of a table.
|
|
||||||
TableRow,
|
|
||||||
/// A cell element of row within a table.
|
|
||||||
TableCell,
|
|
||||||
/// A block with raw markup for a specific output format.
|
|
||||||
RawBlock { format: &'s str },
|
|
||||||
/// A block with code in a specific language.
|
|
||||||
CodeBlock { language: Option<&'s str> },
|
|
||||||
/// A blockquote element.
|
/// A blockquote element.
|
||||||
Blockquote,
|
Blockquote,
|
||||||
/// A list.
|
/// A list.
|
||||||
|
@ -75,10 +38,32 @@ pub enum Tag<'s> {
|
||||||
ListItem,
|
ListItem,
|
||||||
/// A description list element.
|
/// A description list element.
|
||||||
DescriptionList,
|
DescriptionList,
|
||||||
/// A item of a description list.
|
/// Details describing a term within a description list.
|
||||||
DescriptionItem,
|
DescriptionDetails,
|
||||||
/// A footnote definition.
|
/// A footnote definition.
|
||||||
Footnote { tag: &'s str },
|
Footnote { tag: &'s str },
|
||||||
|
/// A table element.
|
||||||
|
Table,
|
||||||
|
/// A row element of a table.
|
||||||
|
TableRow,
|
||||||
|
/// A block-level divider element.
|
||||||
|
Div,
|
||||||
|
/// A paragraph.
|
||||||
|
Paragraph,
|
||||||
|
/// A heading.
|
||||||
|
Heading { level: u8 },
|
||||||
|
/// A link with a destination URL.
|
||||||
|
Link(&'s str, LinkType),
|
||||||
|
/// An image.
|
||||||
|
Image(&'s str),
|
||||||
|
/// An inline divider element.
|
||||||
|
Span,
|
||||||
|
/// A cell element of row within a table.
|
||||||
|
TableCell,
|
||||||
|
/// A block with raw markup for a specific output format.
|
||||||
|
RawBlock { format: &'s str },
|
||||||
|
/// A block with code in a specific language.
|
||||||
|
CodeBlock { language: Option<&'s str> },
|
||||||
/// A subscripted element.
|
/// A subscripted element.
|
||||||
Subscript,
|
Subscript,
|
||||||
/// A superscripted element.
|
/// A superscripted element.
|
||||||
|
@ -143,22 +128,44 @@ pub enum OrderedListFormat {
|
||||||
ParenParen,
|
ParenParen,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub enum Atom {
|
||||||
|
/// An ellipsis, i.e. a set of three periods.
|
||||||
|
Ellipsis,
|
||||||
|
/// An en dash.
|
||||||
|
EnDash,
|
||||||
|
/// An em dash.
|
||||||
|
EmDash,
|
||||||
|
/// A thematic break, typically a horizontal rule.
|
||||||
|
ThematicBreak,
|
||||||
|
/// A blank line.
|
||||||
|
Blankline,
|
||||||
|
/// A space that may not break a line.
|
||||||
|
NonBreakingSpace,
|
||||||
|
/// A newline that may or may not break a line in the output format.
|
||||||
|
Softbreak,
|
||||||
|
/// A newline that must break a line.
|
||||||
|
Hardbreak,
|
||||||
|
/// An escape character, not visible in output.
|
||||||
|
Escape,
|
||||||
|
}
|
||||||
|
|
||||||
impl<'s> Event<'s> {
|
impl<'s> Event<'s> {
|
||||||
fn from_inline(src: &'s str, inline: inline::Event) -> Self {
|
fn from_inline(src: &'s str, inline: inline::Event) -> Self {
|
||||||
let content = inline.span.of(src);
|
let content = inline.span.of(src);
|
||||||
match inline.kind {
|
match inline.kind {
|
||||||
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
inline::EventKind::Enter(c) | inline::EventKind::Exit(c) => {
|
||||||
let t = match c {
|
let t = match c {
|
||||||
inline::Container::Span => Tag::Span,
|
inline::Container::Span => Container::Span,
|
||||||
inline::Container::Subscript => Tag::Subscript,
|
inline::Container::Subscript => Container::Subscript,
|
||||||
inline::Container::Superscript => Tag::Superscript,
|
inline::Container::Superscript => Container::Superscript,
|
||||||
inline::Container::Insert => Tag::Insert,
|
inline::Container::Insert => Container::Insert,
|
||||||
inline::Container::Delete => Tag::Delete,
|
inline::Container::Delete => Container::Delete,
|
||||||
inline::Container::Emphasis => Tag::Emphasis,
|
inline::Container::Emphasis => Container::Emphasis,
|
||||||
inline::Container::Strong => Tag::Strong,
|
inline::Container::Strong => Container::Strong,
|
||||||
inline::Container::Mark => Tag::Mark,
|
inline::Container::Mark => Container::Mark,
|
||||||
inline::Container::SingleQuoted => Tag::SingleQuoted,
|
inline::Container::SingleQuoted => Container::SingleQuoted,
|
||||||
inline::Container::DoubleQuoted => Tag::DoubleQuoted,
|
inline::Container::DoubleQuoted => Container::DoubleQuoted,
|
||||||
_ => todo!(),
|
_ => todo!(),
|
||||||
};
|
};
|
||||||
if matches!(inline.kind, inline::EventKind::Enter(_)) {
|
if matches!(inline.kind, inline::EventKind::Enter(_)) {
|
||||||
|
@ -167,16 +174,16 @@ impl<'s> Event<'s> {
|
||||||
Self::End(t)
|
Self::End(t)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inline::EventKind::Atom(a) => match a {
|
inline::EventKind::Atom(a) => Event::Atom(match a {
|
||||||
inline::Atom::Ellipsis => Self::Ellipsis,
|
inline::Atom::Ellipsis => Atom::Ellipsis,
|
||||||
inline::Atom::EnDash => Self::EnDash,
|
inline::Atom::EnDash => Atom::EnDash,
|
||||||
inline::Atom::EmDash => Self::EmDash,
|
inline::Atom::EmDash => Atom::EmDash,
|
||||||
inline::Atom::Nbsp => Self::NonBreakingSpace,
|
inline::Atom::Nbsp => Atom::NonBreakingSpace,
|
||||||
inline::Atom::Softbreak => Self::Softbreak,
|
inline::Atom::Softbreak => Atom::Softbreak,
|
||||||
inline::Atom::Hardbreak => Self::Hardbreak,
|
inline::Atom::Hardbreak => Atom::Hardbreak,
|
||||||
inline::Atom::Escape => Self::Escape,
|
inline::Atom::Escape => Atom::Escape,
|
||||||
_ => todo!(),
|
_ => todo!(),
|
||||||
},
|
}),
|
||||||
inline::EventKind::Node(n) => match n {
|
inline::EventKind::Node(n) => match n {
|
||||||
inline::Node::Str => Self::Str(content),
|
inline::Node::Str => Self::Str(content),
|
||||||
inline::Node::Verbatim => Self::Verbatim(content),
|
inline::Node::Verbatim => Self::Verbatim(content),
|
||||||
|
@ -194,7 +201,7 @@ impl<'s> Event<'s> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s> Tag<'s> {
|
impl<'s> Container<'s> {
|
||||||
fn from_block(src: &'s str, block: block::Block) -> Self {
|
fn from_block(src: &'s str, block: block::Block) -> Self {
|
||||||
match block {
|
match block {
|
||||||
block::Block::Leaf(l) => match l {
|
block::Block::Leaf(l) => match l {
|
||||||
|
@ -272,7 +279,7 @@ impl<'s> Iterator for Parser<'s> {
|
||||||
}
|
}
|
||||||
tree::EventKind::Exit(block) => {
|
tree::EventKind::Exit(block) => {
|
||||||
self.parser = None;
|
self.parser = None;
|
||||||
return Some(Event::End(Tag::from_block(self.src, block)));
|
return Some(Event::End(Container::from_block(self.src, block)));
|
||||||
}
|
}
|
||||||
tree::EventKind::Enter(..) => unreachable!(),
|
tree::EventKind::Enter(..) => unreachable!(),
|
||||||
}
|
}
|
||||||
|
@ -282,24 +289,25 @@ impl<'s> Iterator for Parser<'s> {
|
||||||
self.tree.next().map(|ev| match ev.kind {
|
self.tree.next().map(|ev| match ev.kind {
|
||||||
tree::EventKind::Element(atom) => {
|
tree::EventKind::Element(atom) => {
|
||||||
assert_eq!(atom, block::Atom::Blankline);
|
assert_eq!(atom, block::Atom::Blankline);
|
||||||
Event::Blankline
|
Event::Atom(Atom::Blankline)
|
||||||
}
|
}
|
||||||
tree::EventKind::Enter(block) => {
|
tree::EventKind::Enter(block) => {
|
||||||
if matches!(block, block::Block::Leaf(..)) {
|
if matches!(block, block::Block::Leaf(..)) {
|
||||||
self.parser = Some(inline::Parser::new());
|
self.parser = Some(inline::Parser::new());
|
||||||
}
|
}
|
||||||
Event::Start(Tag::from_block(self.src, block), Attributes::none())
|
Event::Start(Container::from_block(self.src, block), Attributes::none())
|
||||||
}
|
}
|
||||||
tree::EventKind::Exit(block) => Event::End(Tag::from_block(self.src, block)),
|
tree::EventKind::Exit(block) => Event::End(Container::from_block(self.src, block)),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
|
use super::Atom::*;
|
||||||
use super::Attributes;
|
use super::Attributes;
|
||||||
|
use super::Container::*;
|
||||||
use super::Event::*;
|
use super::Event::*;
|
||||||
use super::Tag::*;
|
|
||||||
|
|
||||||
macro_rules! test_parse {
|
macro_rules! test_parse {
|
||||||
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
||||||
|
@ -364,7 +372,7 @@ mod test {
|
||||||
Start(Paragraph, Attributes::none()),
|
Start(Paragraph, Attributes::none()),
|
||||||
Str("para0\n"),
|
Str("para0\n"),
|
||||||
End(Paragraph),
|
End(Paragraph),
|
||||||
Blankline,
|
Atom(Blankline),
|
||||||
Start(Paragraph, Attributes::none()),
|
Start(Paragraph, Attributes::none()),
|
||||||
Str("para1"),
|
Str("para1"),
|
||||||
End(Paragraph),
|
End(Paragraph),
|
||||||
|
|
|
@ -7,6 +7,5 @@ fn main() {
|
||||||
.expect("failed to read unicode file");
|
.expect("failed to read unicode file");
|
||||||
|
|
||||||
let p = jotdown::Parser::new(&src);
|
let p = jotdown::Parser::new(&src);
|
||||||
//let v = p.parse().collect::<Vec<_>>();
|
jotdown::html::write(std::io::stdout(), p).unwrap();
|
||||||
//print!("{:?}", v);
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue