diff --git a/src/block.rs b/src/block.rs index 7dc40ec..241dcb3 100644 --- a/src/block.rs +++ b/src/block.rs @@ -1,9 +1,11 @@ +use crate::Alignment; use crate::OrderedListNumbering::*; use crate::OrderedListStyle::*; use crate::Span; use crate::EOF; use crate::attr; +use crate::lex; use crate::tree; use Atom::*; @@ -59,9 +61,9 @@ pub enum Leaf { /// Each inline is a line. Heading, - /// Span is first `|` character. - /// Each inline is a line (row). - Table, + /// Span is '|'. + /// Has zero or one inline for the cell contents. + TableCell(Alignment), /// Span is the link tag. /// Inlines are lines of the URL. @@ -91,6 +93,12 @@ pub enum Container { /// Span is footnote tag. Footnote, + + /// Span is empty, before first '|' character. + Table, + + /// Span is first '|' character. + TableRow { head: bool }, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -121,6 +129,8 @@ struct TreeParser<'s> { prev_blankline: bool, /// Stack of currently open lists. open_lists: Vec, + /// Alignments for each column in for the current table. + alignments: Vec, } impl<'s> TreeParser<'s> { @@ -131,6 +141,7 @@ impl<'s> TreeParser<'s> { tree: TreeBuilder::new(), prev_blankline: false, open_lists: Vec::new(), + alignments: Vec::new(), } } @@ -253,6 +264,134 @@ impl<'s> TreeParser<'s> { lines.iter().for_each(|line| self.tree.inline(*line)); self.tree.exit(); } + Block::Container(Table) => { + self.alignments.clear(); + self.tree.enter(Node::Container(Table), span); + let mut last_row_node = None; + for row in lines { + let row_node = self + .tree + .enter(Node::Container(TableRow { head: false }), row.with_len(1)); + let rem = row.skip(1); + let lex = lex::Lexer::new(row.skip(1).of(self.src).chars()); + let mut pos = rem.start(); + let mut cell_start = pos; + let mut separator_row = true; + let mut verbatim = None; + let mut column_index = 0; + for lex::Token { kind, len } in lex { + if let Some(l) = verbatim { + if matches!(kind, lex::Kind::Seq(lex::Sequence::Backtick)) + && len == l + { + verbatim = None; + } + } else { + match kind { + lex::Kind::Sym(lex::Symbol::Pipe) => { + { + let span = + Span::new(cell_start, pos).trim(self.src); + let cell = span.of(self.src); + let separator_cell = match cell.len() { + 0 => false, + 1 => cell == "-", + 2 => matches!(cell, ":-" | "--" | "-:"), + l => { + matches!(cell.as_bytes()[0], b'-' | b':') + && matches!( + cell.as_bytes()[l - 1], + b'-' | b':' + ) + && cell + .chars() + .skip(1) + .take(l - 2) + .all(|c| c == '-') + } + }; + separator_row &= separator_cell; + self.tree.enter( + Node::Leaf(TableCell( + self.alignments + .get(column_index) + .copied() + .unwrap_or(Alignment::Unspecified), + )), + Span::by_len(cell_start - 1, 1), + ); + self.tree.inline(span); + self.tree.exit(); // cell + cell_start = pos + len; + column_index += 1; + } + } + lex::Kind::Seq(lex::Sequence::Backtick) => { + verbatim = Some(len); + } + _ => {} + } + } + pos += len; + } + if separator_row { + self.alignments.clear(); + self.alignments.extend( + self.tree + .children(row_node) + .filter(|(kind, _)| matches!(kind, tree::Element::Inline)) + .map(|(_, sp)| { + let cell = sp.of(self.src); + let l = cell.as_bytes()[0] == b':'; + let r = cell.as_bytes()[cell.len() - 1] == b':'; + match (l, r) { + (false, false) => Alignment::Unspecified, + (false, true) => Alignment::Right, + (true, false) => Alignment::Left, + (true, true) => Alignment::Center, + } + }), + ); + self.tree.exit_discard(); // table row + if let Some(head_row) = last_row_node { + self.tree + .children(head_row) + .filter(|(e, _sp)| { + matches!( + e, + tree::Element::Container(Node::Leaf(TableCell(..))) + ) + }) + .zip( + self.alignments + .iter() + .copied() + .chain(std::iter::repeat(Alignment::Unspecified)), + ) + .for_each(|((e, _), new_align)| { + if let tree::Element::Container(Node::Leaf( + TableCell(alignment), + )) = e + { + *alignment = new_align; + } + }); + if let tree::Element::Container(Node::Container(TableRow { + head, + })) = self.tree.elem(head_row) + { + *head = true; + } else { + panic!() + } + } + } else { + self.tree.exit(); // table row + } + last_row_node = Some(row_node); + } + self.tree.exit(); // table + } Block::Container(c) => { let line_count_inner = lines.len() - usize::from(matches!(c, Div)); @@ -270,7 +409,9 @@ impl<'s> TreeParser<'s> { let skip = match c { Blockquote => spaces + "> ".len(), ListItem(..) | Footnote | Div => spaces.min(indent), - List { .. } | DescriptionList => panic!(), + List { .. } | DescriptionList | Table | TableRow { .. } => { + panic!() + } }; let len = sp.len() - usize::from(sp.of(self.src).ends_with('\n')); *sp = sp.skip(skip.min(len)); @@ -381,9 +522,12 @@ impl BlockParser { } '{' => (attr::valid(line_t.chars()).0 == line_t.trim_end().len()) .then(|| (Block::Atom(Attributes), Span::by_len(start, line_t.len()))), - '|' => (&line_t[line_t.len() - 1..] == "|" - && &line_t[line_t.len() - 2..line_t.len() - 1] != "\\") - .then(|| (Block::Leaf(Table), Span::by_len(start, 1))), + '|' => { + let l = line_t.trim_end().len(); + // FIXME: last byte may be pipe but end of prefixed unicode char + (line_t.as_bytes()[l - 1] == b'|' && line_t.as_bytes()[l - 2] != b'\\') + .then(|| (Block::Container(Table), Span::empty_at(start))) + } '[' => chars.as_str().find("]:").map(|l| { let tag = &chars.as_str()[0..l]; let (tag, is_footnote) = if let Some(tag) = tag.strip_prefix('^') { @@ -472,7 +616,7 @@ impl BlockParser { let empty = line_t.is_empty(); match self.kind { Block::Atom(..) => false, - Block::Leaf(Paragraph | Heading | Table) => !line.trim().is_empty(), + Block::Leaf(Paragraph | Heading) => !line.trim().is_empty(), Block::Leaf(LinkDefinition) => line.starts_with(' ') && !line.trim().is_empty(), Block::Container(Blockquote) => line.trim().starts_with('>'), Block::Container(ListItem(..)) => { @@ -494,7 +638,15 @@ impl BlockParser { !((&mut c).take(fence_length).all(|c| c == fence) && c.next().map_or(true, char::is_whitespace)) } - Block::Container(List { .. } | DescriptionList) => panic!(), + Block::Container(List { .. } | DescriptionList | TableRow { .. }) + | Block::Leaf(TableCell(..)) => { + panic!() + } + Block::Container(Table) => { + let line = line.trim(); + let l = line.len(); + line.as_bytes()[l - 1] == b'|' && line.as_bytes()[l - 2] != b'\\' + } } } @@ -615,6 +767,7 @@ fn lines(src: &str) -> impl Iterator + '_ { mod test { use crate::tree::EventKind; use crate::tree::EventKind::*; + use crate::Alignment; use crate::OrderedListNumbering::*; use crate::OrderedListStyle::*; @@ -1242,6 +1395,90 @@ mod test { ); } + #[test] + fn parse_table() { + test_parse!( + concat!( + "|a|b|c|\n", // + "|-|-|-|\n", // + "|1|2|3|\n", // + ), + (Enter(Container(Table)), ""), + (Enter(Container(TableRow { head: true })), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Inline, "a"), + (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Inline, "b"), + (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Inline, "c"), + (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Exit(Container(TableRow { head: true })), "|"), + (Enter(Container(TableRow { head: false })), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Inline, "1"), + (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Inline, "2"), + (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Inline, "3"), + (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(Table)), "") + ); + } + + #[test] + fn parse_table_post() { + test_parse!( + "|a|\npara", + (Enter(Container(Table)), ""), + (Enter(Container(TableRow { head: false })), "|"), + (Enter(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Inline, "a"), + (Exit(Leaf(TableCell(Alignment::Unspecified))), "|"), + (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(Table)), ""), + (Enter(Leaf(Paragraph)), ""), + (Inline, "para"), + (Exit(Leaf(Paragraph)), ""), + ); + } + + #[test] + fn parse_table_align() { + test_parse!( + concat!( + "|:---|:----:|----:|\n", + "|left|center|right|\n", // + ), + (Enter(Container(Table)), ""), + (Enter(Container(TableRow { head: false })), "|"), + (Enter(Leaf(TableCell(Alignment::Left))), "|"), + (Inline, "left"), + (Exit(Leaf(TableCell(Alignment::Left))), "|"), + (Enter(Leaf(TableCell(Alignment::Center))), "|"), + (Inline, "center"), + (Exit(Leaf(TableCell(Alignment::Center))), "|"), + (Enter(Leaf(TableCell(Alignment::Right))), "|"), + (Inline, "right"), + (Exit(Leaf(TableCell(Alignment::Right))), "|"), + (Exit(Container(TableRow { head: false })), "|"), + (Exit(Container(Table)), "") + ); + } + + #[test] + fn parse_table_sep_row_only() { + test_parse!( + "|-|-|", + (Enter(Container(Table)), ""), + (Exit(Container(Table)), "") + ); + } + macro_rules! test_block { ($src:expr, $kind:expr, $str:expr, $len:expr $(,)?) => { let lines = super::lines($src).map(|sp| sp.of($src)); diff --git a/src/html.rs b/src/html.rs index 552be8d..ab10330 100644 --- a/src/html.rs +++ b/src/html.rs @@ -145,7 +145,7 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { continue; } Container::Table => self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" { if matches!(self.list_tightness.last(), Some(true)) { @@ -154,7 +154,7 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { self.out.write_str(" write!(self.out, " self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(" self.out.write_str(">, W: std::fmt::Write> Writer<'s, I, W> { self.footnote_number = None; } Container::Table => self.out.write_str("")?, - Container::TableRow => self.out.write_str("")?, + Container::TableRow { .. } => self.out.write_str("")?, Container::Div { .. } => self.out.write_str("")?, Container::Paragraph => { if matches!(self.list_tightness.last(), Some(true)) { @@ -323,7 +323,7 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer<'s, I, W> { self.out.write_str("

")?; } Container::Heading { level } => write!(self.out, "", level)?, - Container::TableCell => self.out.write_str("")?, + Container::TableCell { .. } => self.out.write_str("")?, Container::DescriptionTerm => self.out.write_str("")?, Container::CodeBlock { .. } => self.out.write_str("")?, Container::Span => self.out.write_str("")?, diff --git a/src/lib.rs b/src/lib.rs index 70a23c6..469f9c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,7 +47,7 @@ pub enum Container<'s> { /// A table element. Table, /// A row element of a table. - TableRow, + TableRow { head: bool }, /// A block-level divider element. Div { class: Option<&'s str> }, /// A paragraph. @@ -55,7 +55,7 @@ pub enum Container<'s> { /// A heading. Heading { level: usize }, /// A cell element of row within a table. - TableCell, + TableCell { alignment: Alignment, head: bool }, /// A term within a description list. DescriptionTerm, /// A block with raw markup for a specific output format. @@ -106,12 +106,12 @@ impl<'s> Container<'s> { | Self::DescriptionDetails | Self::Footnote { .. } | Self::Table - | Self::TableRow + | Self::TableRow { .. } | Self::Div { .. } | Self::Paragraph | Self::Heading { .. } + | Self::TableCell { .. } | Self::DescriptionTerm - | Self::TableCell | Self::RawBlock { .. } | Self::CodeBlock { .. } => true, Self::Span @@ -143,11 +143,11 @@ impl<'s> Container<'s> { | Self::DescriptionDetails | Self::Footnote { .. } | Self::Table - | Self::TableRow + | Self::TableRow { .. } | Self::Div { .. } => true, Self::Paragraph | Self::Heading { .. } - | Self::TableCell + | Self::TableCell { .. } | Self::DescriptionTerm | Self::RawBlock { .. } | Self::CodeBlock { .. } @@ -170,6 +170,14 @@ impl<'s> Container<'s> { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Alignment { + Unspecified, + Left, + Center, + Right, +} + #[derive(Debug, PartialEq, Eq)] pub enum SpanLinkType { Inline, @@ -242,27 +250,6 @@ pub enum Atom<'s> { Blankline, } -impl<'s> Container<'s> { - fn from_leaf_block(content: &'s str, l: block::Leaf) -> Self { - match l { - block::Leaf::Paragraph => Self::Paragraph, - block::Leaf::Heading => Self::Heading { - level: content.len(), - }, - block::Leaf::CodeBlock => { - if let Some(format) = content.strip_prefix('=') { - Self::RawBlock { format } - } else { - Self::CodeBlock { - lang: (!content.is_empty()).then(|| content), - } - } - } - _ => todo!(), - } - } -} - impl OrderedListNumbering { fn parse_number(self, n: &str) -> u32 { match self { @@ -336,6 +323,8 @@ pub struct Parser<'s> { /// Inline parser, recreated for each new inline. inline_parser: Option>>, + table_head_row: bool, + /// Footnote references in the order they were encountered, without duplicates. footnote_references: Vec<&'s str>, /// Cache of footnotes to emit at the end. @@ -376,6 +365,7 @@ impl<'s> Parser<'s> { src, link_definitions, tree: branch, + table_head_row: false, footnote_references: Vec::new(), footnotes: std::collections::HashMap::new(), footnote_index: 0, @@ -533,7 +523,26 @@ impl<'s> Parser<'s> { self.inline_parser = Some(inline::Parser::new(self.inlines.chars())); } - Container::from_leaf_block(content, l) + match l { + block::Leaf::Paragraph => Container::Paragraph, + block::Leaf::Heading => Container::Heading { + level: content.len(), + }, + block::Leaf::CodeBlock => { + if let Some(format) = content.strip_prefix('=') { + Container::RawBlock { format } + } else { + Container::CodeBlock { + lang: (!content.is_empty()).then(|| content), + } + } + } + block::Leaf::TableCell(alignment) => Container::TableCell { + alignment, + head: self.table_head_row, + }, + block::Leaf::LinkDefinition => unreachable!(), + } } block::Node::Container(c) => match c { block::Container::Blockquote => Container::Blockquote, @@ -573,6 +582,13 @@ impl<'s> Parser<'s> { Container::ListItem } } + block::Container::Table => Container::Table, + block::Container::TableRow { head } => { + if enter { + self.table_head_row = head; + } + Container::TableRow { head } + } }, }; if enter { diff --git a/src/tree.rs b/src/tree.rs index 3266570..de049cc 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -157,7 +157,18 @@ pub struct Builder { depth: usize, } -impl Builder { +impl<'a, C, A> From<&'a mut NodeKind> for Element<'a, C, A> { + fn from(kind: &'a mut NodeKind) -> Self { + match kind { + NodeKind::Root => unreachable!(), + NodeKind::Container(c, ..) => Element::Container(c), + NodeKind::Atom(a) => Element::Atom(a), + NodeKind::Inline => Element::Inline, + } + } +} + +impl Builder { pub(super) fn new() -> Self { Builder { nodes: vec![Node { @@ -206,6 +217,19 @@ impl Builder { } } + /// Exit and discard all the contents of the current container. + pub(super) fn exit_discard(&mut self) { + self.exit(); + let exited = self.branch.pop().unwrap(); + self.nodes.drain(exited.index()..); + let (ni, has_parent) = self.relink(exited, None); + if has_parent { + self.head = Some(ni); + } else { + self.branch.push(ni); + } + } + pub(super) fn depth(&self) -> usize { self.depth } @@ -219,6 +243,23 @@ impl Builder { } } + /// Retrieve all children nodes for the specified node. Order is in the order they were added. + pub(super) fn children( + &mut self, + node: NodeIndex, + ) -> impl Iterator, Span)> { + assert!(matches!( + self.nodes[node.index()].kind, + NodeKind::Container(..) + )); + let end = self.nodes[node.index()] + .next + .map_or(self.nodes.len(), NodeIndex::index); + self.nodes[node.index()..end] + .iter_mut() + .map(|n| (Element::from(&mut n.kind), n.span)) + } + pub(super) fn finish(self) -> Tree { assert_eq!(self.depth, 0); let head = self.nodes[NodeIndex::root().index()].next; @@ -257,6 +298,25 @@ impl Builder { self.head = Some(ni); ni } + + /// Remove the link from the node that points to the specified node. Return the pointer node + /// and whether it is a container or not. + fn relink(&mut self, prev: NodeIndex, next: Option) -> (NodeIndex, bool) { + for (i, n) in self.nodes.iter_mut().enumerate().rev() { + let ni = NodeIndex::new(i); + if n.next == Some(prev) { + n.next = next; + return (ni, false); + } else if let NodeKind::Container(kind, child) = &mut n.kind { + if *child == Some(prev) { + dbg!(kind, next); + *child = next; + return (ni, true); + } + } + } + panic!() + } } impl std::fmt::Debug