block: parse tables

This commit is contained in:
Noah Hellman 2023-01-25 19:27:12 +01:00
parent 6ec5b09367
commit c288264aee
4 changed files with 355 additions and 42 deletions

View file

@ -1,9 +1,11 @@
use crate::Alignment;
use crate::OrderedListNumbering::*;
use crate::OrderedListStyle::*;
use crate::Span;
use crate::EOF;
use crate::attr;
use crate::lex;
use crate::tree;
use Atom::*;
@ -59,9 +61,9 @@ pub enum Leaf {
/// Each inline is a line.
Heading,
/// Span is first `|` character.
/// Each inline is a line (row).
Table,
/// Span is '|'.
/// Has zero or one inline for the cell contents.
TableCell(Alignment),
/// Span is the link tag.
/// Inlines are lines of the URL.
@ -91,6 +93,12 @@ pub enum Container {
/// Span is footnote tag.
Footnote,
/// Span is empty, before first '|' character.
Table,
/// Span is first '|' character.
TableRow { head: bool },
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
@ -121,6 +129,8 @@ struct TreeParser<'s> {
prev_blankline: bool,
/// Stack of currently open lists.
open_lists: Vec<OpenList>,
/// Alignments for each column in for the current table.
alignments: Vec<Alignment>,
}
impl<'s> TreeParser<'s> {
@ -131,6 +141,7 @@ impl<'s> TreeParser<'s> {
tree: TreeBuilder::new(),
prev_blankline: false,
open_lists: Vec::new(),
alignments: Vec::new(),
}
}
@ -253,6 +264,134 @@ impl<'s> TreeParser<'s> {
lines.iter().for_each(|line| self.tree.inline(*line));
self.tree.exit();
}
Block::Container(Table) => {
self.alignments.clear();
self.tree.enter(Node::Container(Table), span);
let mut last_row_node = None;
for row in lines {
let row_node = self
.tree
.enter(Node::Container(TableRow { head: false }), row.with_len(1));
let rem = row.skip(1);
let lex = lex::Lexer::new(row.skip(1).of(self.src).chars());
let mut pos = rem.start();
let mut cell_start = pos;
let mut separator_row = true;
let mut verbatim = None;
let mut column_index = 0;
for lex::Token { kind, len } in lex {
if let Some(l) = verbatim {
if matches!(kind, lex::Kind::Seq(lex::Sequence::Backtick))
&& len == l
{
verbatim = None;
}
} else {
match kind {
lex::Kind::Sym(lex::Symbol::Pipe) => {
{
let span =
Span::new(cell_start, pos).trim(self.src);
let cell = span.of(self.src);
let separator_cell = match cell.len() {
0 => false,
1 => cell == "-",
2 => matches!(cell, ":-" | "--" | "-:"),
l => {
matches!(cell.as_bytes()[0], b'-' | b':')
&& matches!(
cell.as_bytes()[l - 1],
b'-' | b':'
)
&& cell
.chars()
.skip(1)
.take(l - 2)
.all(|c| c == '-')
}
};
separator_row &= separator_cell;
self.tree.enter(
Node::Leaf(TableCell(
self.alignments
.get(column_index)
.copied()
.unwrap_or(Alignment::Unspecified),
)),
Span::by_len(cell_start - 1, 1),
);
self.tree.inline(span);
self.tree.exit(); // cell
cell_start = pos + len;
column_index += 1;
}
}
lex::Kind::Seq(lex::Sequence::Backtick) => {
verbatim = Some(len);
}
_ => {}
}
}
pos += len;
}
if separator_row {
self.alignments.clear();
self.alignments.extend(
self.tree
.children(row_node)
.filter(|(kind, _)| matches!(kind, tree::Element::Inline))
.map(|(_, sp)| {
let cell = sp.of(self.src);
let l = cell.as_bytes()[0] == b':';
let r = cell.as_bytes()[cell.len() - 1] == b':';
match (l, r) {
(false, false) => Alignment::Unspecified,
(false, true) => Alignment::Right,
(true, false) => Alignment::Left,
(true, true) => Alignment::Center,
}
}),
);
self.tree.exit_discard(); // table row
if let Some(head_row) = last_row_node {
self.tree
.children(head_row)
.filter(|(e, _sp)| {
matches!(
e,
tree::Element::Container(Node::Leaf(TableCell(..)))
)
})
.zip(
self.alignments
.iter()
.copied()
.chain(std::iter::repeat(Alignment::Unspecified)),
)
.for_each(|((e, _), new_align)| {
if let tree::Element::Container(Node::Leaf(
TableCell(alignment),
)) = e
{
*alignment = new_align;
}
});
if let tree::Element::Container(Node::Container(TableRow {
head,
})) = self.tree.elem(head_row)
{
*head = true;
} else {
panic!()
}
}
} else {
self.tree.exit(); // table row
}
last_row_node = Some(row_node);
}
self.tree.exit(); // table
}
Block::Container(c) => {
let line_count_inner = lines.len() - usize::from(matches!(c, Div));
@ -270,7 +409,9 @@ impl<'s> TreeParser<'s> {
let skip = match c {
Blockquote => spaces + "> ".len(),
ListItem(..) | Footnote | Div => spaces.min(indent),
List { .. } | DescriptionList => panic!(),
List { .. } | DescriptionList | Table | TableRow { .. } => {
panic!()
}
};
let len = sp.len() - usize::from(sp.of(self.src).ends_with('\n'));
*sp = sp.skip(skip.min(len));
@ -381,9 +522,12 @@ impl BlockParser {
}
'{' => (attr::valid(line_t.chars()).0 == line_t.trim_end().len())
.then(|| (Block::Atom(Attributes), Span::by_len(start, line_t.len()))),
'|' => (&line_t[line_t.len() - 1..] == "|"
&& &line_t[line_t.len() - 2..line_t.len() - 1] != "\\")
.then(|| (Block::Leaf(Table), Span::by_len(start, 1))),
'|' => {
let l = line_t.trim_end().len();
// FIXME: last byte may be pipe but end of prefixed unicode char
(line_t.as_bytes()[l - 1] == b'|' && line_t.as_bytes()[l - 2] != b'\\')
.then(|| (Block::Container(Table), Span::empty_at(start)))
}
'[' => chars.as_str().find("]:").map(|l| {
let tag = &chars.as_str()[0..l];
let (tag, is_footnote) = if let Some(tag) = tag.strip_prefix('^') {
@ -472,7 +616,7 @@ impl BlockParser {
let empty = line_t.is_empty();
match self.kind {
Block::Atom(..) => false,
Block::Leaf(Paragraph | Heading | Table) => !line.trim().is_empty(),
Block::Leaf(Paragraph | Heading) => !line.trim().is_empty(),
Block::Leaf(LinkDefinition) => line.starts_with(' ') && !line.trim().is_empty(),
Block::Container(Blockquote) => line.trim().starts_with('>'),
Block::Container(ListItem(..)) => {
@ -494,7 +638,15 @@ impl BlockParser {
!((&mut c).take(fence_length).all(|c| c == fence)
&& c.next().map_or(true, char::is_whitespace))
}
Block::Container(List { .. } | DescriptionList) => panic!(),
Block::Container(List { .. } | DescriptionList | TableRow { .. })
| Block::Leaf(TableCell(..)) => {
panic!()
}
Block::Container(Table) => {
let line = line.trim();
let l = line.len();
line.as_bytes()[l - 1] == b'|' && line.as_bytes()[l - 2] != b'\\'
}
}
}
@ -615,6 +767,7 @@ fn lines(src: &str) -> impl Iterator<Item = Span> + '_ {
mod test {
use crate::tree::EventKind;
use crate::tree::EventKind::*;
use crate::Alignment;
use crate::OrderedListNumbering::*;
use crate::OrderedListStyle::*;
@ -1242,6 +1395,90 @@ mod test {
);
}
#[test]
fn parse_table() {
test_parse!(
concat!(
"|a|b|c|\n", //
"|-|-|-|\n", //
"|1|2|3|\n", //
),
(Enter(Container(Table)), ""),
(Enter(Container(TableRow { head: true })), "|"),
(Enter(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Inline, "a"),
(Exit(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Enter(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Inline, "b"),
(Exit(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Enter(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Inline, "c"),
(Exit(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Exit(Container(TableRow { head: true })), "|"),
(Enter(Container(TableRow { head: false })), "|"),
(Enter(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Inline, "1"),
(Exit(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Enter(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Inline, "2"),
(Exit(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Enter(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Inline, "3"),
(Exit(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Exit(Container(TableRow { head: false })), "|"),
(Exit(Container(Table)), "")
);
}
#[test]
fn parse_table_post() {
test_parse!(
"|a|\npara",
(Enter(Container(Table)), ""),
(Enter(Container(TableRow { head: false })), "|"),
(Enter(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Inline, "a"),
(Exit(Leaf(TableCell(Alignment::Unspecified))), "|"),
(Exit(Container(TableRow { head: false })), "|"),
(Exit(Container(Table)), ""),
(Enter(Leaf(Paragraph)), ""),
(Inline, "para"),
(Exit(Leaf(Paragraph)), ""),
);
}
#[test]
fn parse_table_align() {
test_parse!(
concat!(
"|:---|:----:|----:|\n",
"|left|center|right|\n", //
),
(Enter(Container(Table)), ""),
(Enter(Container(TableRow { head: false })), "|"),
(Enter(Leaf(TableCell(Alignment::Left))), "|"),
(Inline, "left"),
(Exit(Leaf(TableCell(Alignment::Left))), "|"),
(Enter(Leaf(TableCell(Alignment::Center))), "|"),
(Inline, "center"),
(Exit(Leaf(TableCell(Alignment::Center))), "|"),
(Enter(Leaf(TableCell(Alignment::Right))), "|"),
(Inline, "right"),
(Exit(Leaf(TableCell(Alignment::Right))), "|"),
(Exit(Container(TableRow { head: false })), "|"),
(Exit(Container(Table)), "")
);
}
#[test]
fn parse_table_sep_row_only() {
test_parse!(
"|-|-|",
(Enter(Container(Table)), ""),
(Exit(Container(Table)), "")
);
}
macro_rules! test_block {
($src:expr, $kind:expr, $str:expr, $len:expr $(,)?) => {
let lines = super::lines($src).map(|sp| sp.of($src));

View file

@ -145,7 +145,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
continue;
}
Container::Table => self.out.write_str("<table")?,
Container::TableRow => self.out.write_str("<tr")?,
Container::TableRow { .. } => self.out.write_str("<tr")?,
Container::Div { .. } => self.out.write_str("<div")?,
Container::Paragraph => {
if matches!(self.list_tightness.last(), Some(true)) {
@ -154,7 +154,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
self.out.write_str("<p")?;
}
Container::Heading { level } => write!(self.out, "<h{}", level)?,
Container::TableCell => self.out.write_str("<td")?,
Container::TableCell { .. } => self.out.write_str("<td")?,
Container::DescriptionTerm => self.out.write_str("<dt")?,
Container::CodeBlock { .. } => self.out.write_str("<pre")?,
Container::Span | Container::Math { .. } => self.out.write_str("<span")?,
@ -301,7 +301,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
self.footnote_number = None;
}
Container::Table => self.out.write_str("</table>")?,
Container::TableRow => self.out.write_str("</tr>")?,
Container::TableRow { .. } => self.out.write_str("</tr>")?,
Container::Div { .. } => self.out.write_str("</div>")?,
Container::Paragraph => {
if matches!(self.list_tightness.last(), Some(true)) {
@ -323,7 +323,7 @@ impl<'s, I: Iterator<Item = Event<'s>>, W: std::fmt::Write> Writer<'s, I, W> {
self.out.write_str("</p>")?;
}
Container::Heading { level } => write!(self.out, "</h{}>", level)?,
Container::TableCell => self.out.write_str("</td>")?,
Container::TableCell { .. } => self.out.write_str("</td>")?,
Container::DescriptionTerm => self.out.write_str("</dt>")?,
Container::CodeBlock { .. } => self.out.write_str("</code></pre>")?,
Container::Span => self.out.write_str("</span>")?,

View file

@ -47,7 +47,7 @@ pub enum Container<'s> {
/// A table element.
Table,
/// A row element of a table.
TableRow,
TableRow { head: bool },
/// A block-level divider element.
Div { class: Option<&'s str> },
/// A paragraph.
@ -55,7 +55,7 @@ pub enum Container<'s> {
/// A heading.
Heading { level: usize },
/// A cell element of row within a table.
TableCell,
TableCell { alignment: Alignment, head: bool },
/// A term within a description list.
DescriptionTerm,
/// A block with raw markup for a specific output format.
@ -106,12 +106,12 @@ impl<'s> Container<'s> {
| Self::DescriptionDetails
| Self::Footnote { .. }
| Self::Table
| Self::TableRow
| Self::TableRow { .. }
| Self::Div { .. }
| Self::Paragraph
| Self::Heading { .. }
| Self::TableCell { .. }
| Self::DescriptionTerm
| Self::TableCell
| Self::RawBlock { .. }
| Self::CodeBlock { .. } => true,
Self::Span
@ -143,11 +143,11 @@ impl<'s> Container<'s> {
| Self::DescriptionDetails
| Self::Footnote { .. }
| Self::Table
| Self::TableRow
| Self::TableRow { .. }
| Self::Div { .. } => true,
Self::Paragraph
| Self::Heading { .. }
| Self::TableCell
| Self::TableCell { .. }
| Self::DescriptionTerm
| Self::RawBlock { .. }
| Self::CodeBlock { .. }
@ -170,6 +170,14 @@ impl<'s> Container<'s> {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Alignment {
Unspecified,
Left,
Center,
Right,
}
#[derive(Debug, PartialEq, Eq)]
pub enum SpanLinkType {
Inline,
@ -242,27 +250,6 @@ pub enum Atom<'s> {
Blankline,
}
impl<'s> Container<'s> {
fn from_leaf_block(content: &'s str, l: block::Leaf) -> Self {
match l {
block::Leaf::Paragraph => Self::Paragraph,
block::Leaf::Heading => Self::Heading {
level: content.len(),
},
block::Leaf::CodeBlock => {
if let Some(format) = content.strip_prefix('=') {
Self::RawBlock { format }
} else {
Self::CodeBlock {
lang: (!content.is_empty()).then(|| content),
}
}
}
_ => todo!(),
}
}
}
impl OrderedListNumbering {
fn parse_number(self, n: &str) -> u32 {
match self {
@ -336,6 +323,8 @@ pub struct Parser<'s> {
/// Inline parser, recreated for each new inline.
inline_parser: Option<inline::Parser<span::InlineCharsIter<'s>>>,
table_head_row: bool,
/// Footnote references in the order they were encountered, without duplicates.
footnote_references: Vec<&'s str>,
/// Cache of footnotes to emit at the end.
@ -376,6 +365,7 @@ impl<'s> Parser<'s> {
src,
link_definitions,
tree: branch,
table_head_row: false,
footnote_references: Vec::new(),
footnotes: std::collections::HashMap::new(),
footnote_index: 0,
@ -533,7 +523,26 @@ impl<'s> Parser<'s> {
self.inline_parser =
Some(inline::Parser::new(self.inlines.chars()));
}
Container::from_leaf_block(content, l)
match l {
block::Leaf::Paragraph => Container::Paragraph,
block::Leaf::Heading => Container::Heading {
level: content.len(),
},
block::Leaf::CodeBlock => {
if let Some(format) = content.strip_prefix('=') {
Container::RawBlock { format }
} else {
Container::CodeBlock {
lang: (!content.is_empty()).then(|| content),
}
}
}
block::Leaf::TableCell(alignment) => Container::TableCell {
alignment,
head: self.table_head_row,
},
block::Leaf::LinkDefinition => unreachable!(),
}
}
block::Node::Container(c) => match c {
block::Container::Blockquote => Container::Blockquote,
@ -573,6 +582,13 @@ impl<'s> Parser<'s> {
Container::ListItem
}
}
block::Container::Table => Container::Table,
block::Container::TableRow { head } => {
if enter {
self.table_head_row = head;
}
Container::TableRow { head }
}
},
};
if enter {

View file

@ -157,7 +157,18 @@ pub struct Builder<C, A> {
depth: usize,
}
impl<C: Clone, A: Clone> Builder<C, A> {
impl<'a, C, A> From<&'a mut NodeKind<C, A>> for Element<'a, C, A> {
fn from(kind: &'a mut NodeKind<C, A>) -> Self {
match kind {
NodeKind::Root => unreachable!(),
NodeKind::Container(c, ..) => Element::Container(c),
NodeKind::Atom(a) => Element::Atom(a),
NodeKind::Inline => Element::Inline,
}
}
}
impl<C: std::fmt::Debug, A: std::fmt::Debug> Builder<C, A> {
pub(super) fn new() -> Self {
Builder {
nodes: vec![Node {
@ -206,6 +217,19 @@ impl<C: Clone, A: Clone> Builder<C, A> {
}
}
/// Exit and discard all the contents of the current container.
pub(super) fn exit_discard(&mut self) {
self.exit();
let exited = self.branch.pop().unwrap();
self.nodes.drain(exited.index()..);
let (ni, has_parent) = self.relink(exited, None);
if has_parent {
self.head = Some(ni);
} else {
self.branch.push(ni);
}
}
pub(super) fn depth(&self) -> usize {
self.depth
}
@ -219,6 +243,23 @@ impl<C: Clone, A: Clone> Builder<C, A> {
}
}
/// Retrieve all children nodes for the specified node. Order is in the order they were added.
pub(super) fn children(
&mut self,
node: NodeIndex,
) -> impl Iterator<Item = (Element<C, A>, Span)> {
assert!(matches!(
self.nodes[node.index()].kind,
NodeKind::Container(..)
));
let end = self.nodes[node.index()]
.next
.map_or(self.nodes.len(), NodeIndex::index);
self.nodes[node.index()..end]
.iter_mut()
.map(|n| (Element::from(&mut n.kind), n.span))
}
pub(super) fn finish(self) -> Tree<C, A> {
assert_eq!(self.depth, 0);
let head = self.nodes[NodeIndex::root().index()].next;
@ -257,6 +298,25 @@ impl<C: Clone, A: Clone> Builder<C, A> {
self.head = Some(ni);
ni
}
/// Remove the link from the node that points to the specified node. Return the pointer node
/// and whether it is a container or not.
fn relink(&mut self, prev: NodeIndex, next: Option<NodeIndex>) -> (NodeIndex, bool) {
for (i, n) in self.nodes.iter_mut().enumerate().rev() {
let ni = NodeIndex::new(i);
if n.next == Some(prev) {
n.next = next;
return (ni, false);
} else if let NodeKind::Container(kind, child) = &mut n.kind {
if *child == Some(prev) {
dbg!(kind, next);
*child = next;
return (ni, true);
}
}
}
panic!()
}
}
impl<C: std::fmt::Debug + Clone + 'static, A: std::fmt::Debug + Clone + 'static> std::fmt::Debug