wip block+inline

This commit is contained in:
Noah Hellman 2022-11-22 19:19:21 +01:00
parent a41673a3b6
commit f3e6db018e
4 changed files with 100 additions and 139 deletions

View file

@ -57,50 +57,49 @@ pub enum Container {
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]
pub enum Event { pub enum Event {
Start(Container), Enter(Container),
End(Container), Exit(Container),
Atom(Atom), Atom(Atom),
} }
/* #[derive(Debug, Clone, Copy)]
#[derive(Debug)]
pub enum OpenerState {
Unclosed,
Closed,
Discarded,
}
*/
#[derive(Debug)]
pub enum Dir { pub enum Dir {
Open, Open,
Close, Close,
Both, Both,
} }
pub struct Parser { pub struct Parser<'s> {
openers: Vec<Container>, openers: Vec<Container>,
events: Vec<Event>, events: Vec<Event>,
lexer: Option<std::iter::Peekable<lex::Lexer<'s>>>,
} }
impl Parser { impl<'s> Parser<'s> {
pub fn new() -> Self { pub fn new() -> Self {
Self { Self {
openers: Vec::new(), openers: Vec::new(),
events: Vec::new(), events: Vec::new(),
lexer: None,
} }
} }
pub fn parse<'a>(&'a mut self, src: &'a str) -> impl Iterator<Item = Event> + 'a { pub fn parse(&mut self, src: &'s str) {
let mut lexer = lex::Lexer::new(src).peekable(); self.lexer = Some(lex::Lexer::new(src).peekable());
std::iter::from_fn(move || { }
dbg!(&src); }
if self.events.is_empty() {
Parse::new(&mut lexer, &mut self.openers, &mut self.events).parse();
}
self.events.pop() impl<'s> Iterator for Parser<'s> {
}) type Item = Event;
fn next(&mut self) -> Option<Self::Item> {
if self.events.is_empty() {
if let Some(lexer) = &mut self.lexer {
Parse::new(lexer, &mut self.openers, &mut self.events).parse();
}
}
self.events.pop()
} }
} }
@ -123,26 +122,6 @@ impl<'l, 's, 'e> Parse<'l, 's, 'e> {
} }
} }
/*
fn step(&mut self) -> lex::Token {
let token = self.lexer.next_token();
dbg!(&token, self.pos);
self.pos += token.len;
std::mem::replace(&mut self.next_token, token)
}
fn eat(&mut self) -> lex::Kind {
let end = self.pos;
let token = self.step();
self.span = Span::new(end - token.len, end);
token.kind
}
fn peek(&mut self) -> &lex::Kind {
&self.next_token.kind
}
*/
fn peek(&mut self) -> Option<&lex::Kind> { fn peek(&mut self) -> Option<&lex::Kind> {
self.tokens.peek().map(|t| &t.kind) self.tokens.peek().map(|t| &t.kind)
} }
@ -154,8 +133,6 @@ impl<'l, 's, 'e> Parse<'l, 's, 'e> {
return; return;
}; };
//dbg!(&kind);
{ {
let verbatim_opt = match t.kind { let verbatim_opt = match t.kind {
lex::Kind::Seq(lex::Sequence::Dollar) => { lex::Kind::Seq(lex::Sequence::Dollar) => {
@ -216,21 +193,21 @@ impl<'l, 's, 'e> Parse<'l, 's, 'e> {
_ => None, _ => None,
}; };
if let Some((cont, ty)) = container_opt { if let Some((cont, dir)) = container_opt {
if matches!(ty, Dir::Close | Dir::Both) && self.openers.contains(&cont) { if matches!(dir, Dir::Close | Dir::Both) && self.openers.contains(&cont) {
loop { loop {
let c = self.openers.pop().unwrap(); let c = self.openers.pop().unwrap();
self.events.push(Event::End(c)); self.events.push(Event::Exit(c));
if c == cont { if c == cont {
break; break;
} }
} }
return; return;
} else if matches!(ty, Dir::Open | Dir::Both) { } else if matches!(dir, Dir::Open | Dir::Both) {
self.openers.push(cont); self.openers.push(cont);
self.events.push(Event::Start(cont)); self.events.push(Event::Enter(cont));
return;
} }
return;
} }
} }
@ -250,14 +227,16 @@ impl<'l, 's, 'e> Parse<'l, 's, 'e> {
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use super::Atom::*; use super::Atom::*;
use super::Container::*;
use super::Event::*; use super::Event::*;
#[test] #[test]
fn container_brace() { fn container_brace() {
let mut p = super::Parser::new(); let mut p = super::Parser::new();
p.parse("{_hej_}");
assert_eq!( assert_eq!(
&[Atom(Str)], p.collect::<Vec<_>>().as_slice(),
p.parse("{_hej_}").collect::<Vec<_>>().as_slice(), &[Enter(Emphasis), Atom(Str), Exit(Emphasis)],
); );
} }
} }

View file

@ -5,13 +5,13 @@ use Kind::*;
use Sequence::*; use Sequence::*;
use Symbol::*; use Symbol::*;
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct Token { pub(crate) struct Token {
pub kind: Kind, pub kind: Kind,
pub len: usize, pub len: usize,
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Kind { pub enum Kind {
Text, Text,
Whitespace, Whitespace,
@ -25,7 +25,7 @@ pub enum Kind {
Eof, Eof,
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Delimiter { pub enum Delimiter {
Brace, Brace,
BraceAsterisk, BraceAsterisk,
@ -39,7 +39,7 @@ pub enum Delimiter {
Paren, Paren,
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Symbol { pub enum Symbol {
Asterisk, Asterisk,
Caret, Caret,
@ -79,6 +79,7 @@ impl Sequence {
} }
} }
#[derive(Clone)]
pub(crate) struct Lexer<'s> { pub(crate) struct Lexer<'s> {
src: &'s str, src: &'s str,
chars: std::str::Chars<'s>, chars: std::str::Chars<'s>,
@ -254,20 +255,6 @@ mod test {
use super::Sequence::*; use super::Sequence::*;
use super::Symbol::*; use super::Symbol::*;
/*
fn tokenize(src: &str) -> impl Iterator<Item = super::Token> + '_ {
let mut lexer = super::Lexer::new(src);
std::iter::from_fn(move || {
let tok = lexer.next_token();
if matches!(tok.kind, Eof) {
None
} else {
Some(tok)
}
})
}
*/
macro_rules! test_lex { macro_rules! test_lex {
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
#[allow(unused)] #[allow(unused)]

View file

@ -4,40 +4,10 @@ mod lex;
mod span; mod span;
mod tree; mod tree;
use inline::Atom;
use inline::Container as InlineTag;
pub struct Block; pub struct Block;
const EOF: char = '\0'; const EOF: char = '\0';
type CowStr<'s> = std::borrow::Cow<'s, str>;
/*
pub enum Tag<'s> {
Paragraph,
Heading { level: u8 },
BlockQuote,
CodeBlock { info_string: CowStr<'s> },
List { start_index: Option<u64> },
ListItem,
FootnoteDefinition { label: CowStr<'s> },
Table,
Image {},
Link {},
Block(Block),
Inline(InlineTag),
}
pub struct Attributes; // TODO
pub enum Event<'s> {
Start(Tag<'s>, Attributes),
End(Tag<'s>),
Atom(Atom<'s>),
}
*/
use span::Span; use span::Span;
pub struct Parser<'s> { pub struct Parser<'s> {
@ -46,6 +16,7 @@ pub struct Parser<'s> {
} }
impl<'s> Parser<'s> { impl<'s> Parser<'s> {
#[must_use]
pub fn new(src: &'s str) -> Self { pub fn new(src: &'s str) -> Self {
Self { Self {
src, src,
@ -53,60 +24,84 @@ impl<'s> Parser<'s> {
} }
} }
pub fn parse(&mut self) {} #[must_use]
pub fn iter(&self) -> Iter { pub fn iter(&self) -> Iter {
Iter { Iter {
src: self.src, src: self.src,
tree: self.tree.iter().peekable(), tree: self.tree.iter(),
events: Vec::new(), parser: None,
} }
} }
} }
#[derive(Debug, PartialEq, Eq)]
pub enum Event {
Start(block::Block),
End,
Inline(inline::Event),
Blankline,
}
pub struct Iter<'s> { pub struct Iter<'s> {
src: &'s str, src: &'s str,
tree: std::iter::Peekable<block::TreeIter<'s>>, tree: block::TreeIter<'s>,
events: Vec<inline::Event>, parser: Option<inline::Parser<'s>>,
} }
impl<'s> Iterator for Iter<'s> { impl<'s> Iterator for Iter<'s> {
type Item = String; type Item = Event;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.tree.next().map(|ev| match ev { while let Some(parser) = &mut self.parser {
tree::Event::Enter(block::Block::Container(cont), _sp) => { // inside leaf block, with inline content
format!("cont {:?}", cont) if let Some(inline) = parser.next() {
} return Some(Event::Inline(inline));
tree::Event::Enter(block::Block::Leaf(leaf), _sp) => { } else if let Some(ev) = self.tree.next() {
// concatenate all inlines match ev {
let chars = (&mut self.tree) tree::Event::Element(atom, sp) => {
.take_while(|ev| matches!(ev, tree::Event::Element(..))) assert_eq!(*atom, block::Atom::Inline);
.flat_map(|ev| ev.span().of(self.src).chars()); parser.parse(sp.of(self.src));
//inline::Parser::new(chars).parse(&mut self.events);
/*
let chars = std::iter::from_fn(|| {
let mut eat = false;
let ret = if let Some(tree::Event::Element(_a, sp)) = self.tree.peek() {
eat = true;
let chars = sp.of(self.src).chars();
Some(chars)
} else {
None
};
if eat {
self.tree.next();
} }
ret tree::Event::Exit => {
}) self.parser = None;
.flatten(); return Some(Event::End);
*/ }
format!("leaf {:?} {:?}", leaf, self.events) tree::Event::Enter(..) => unreachable!(),
}
} }
}
self.tree.next().map(|ev| match ev {
tree::Event::Element(atom, _sp) => { tree::Event::Element(atom, _sp) => {
format!("atom {:?}", atom) assert_eq!(*atom, block::Atom::Blankline);
Event::Blankline
} }
tree::Event::Exit => "exit".to_string(), tree::Event::Enter(block @ block::Block::Container(..), ..) => {
Event::Start(block.clone())
}
tree::Event::Enter(block @ block::Block::Leaf(..), ..) => {
self.parser = Some(inline::Parser::new());
Event::Start(block.clone())
}
tree::Event::Exit => Event::End,
}) })
} }
} }
#[cfg(test)]
mod test {
use super::Event::*;
use crate::block::Block::*;
use crate::block::Container::*;
use crate::block::Leaf::*;
use crate::inline::Atom::*;
use crate::inline::Event::*;
#[test]
fn basic() {
assert_eq!(
super::Parser::new("abc").iter().collect::<Vec<_>>(),
&[Start(Leaf(Paragraph)), Inline(Atom(Str)), End]
);
}
}

View file

@ -7,6 +7,6 @@ fn main() {
.expect("failed to read unicode file"); .expect("failed to read unicode file");
let p = jotdown::Parser::new(&src); let p = jotdown::Parser::new(&src);
let v = p.iter().collect::<Vec<_>>(); //let v = p.parse().collect::<Vec<_>>();
print!("{:?}", v); //print!("{:?}", v);
} }