wip
This commit is contained in:
parent
fe45519ca9
commit
cc59484086
3 changed files with 137 additions and 139 deletions
240
src/inline.rs
240
src/inline.rs
|
@ -1,40 +1,32 @@
|
|||
use crate::Span;
|
||||
use crate::lex;
|
||||
|
||||
use crate::tree;
|
||||
use crate::CowStr;
|
||||
use lex::Delimiter;
|
||||
use lex::Symbol;
|
||||
|
||||
use Atom::*;
|
||||
use Container::*;
|
||||
|
||||
pub type Tree<'s> = tree::Tree<Container, Atom<'s>>;
|
||||
|
||||
/*
|
||||
pub fn parse<'s, I: Iterator<Item = Span>>(src: &'s str, inlines: I) -> Vec<Event<'s>> {
|
||||
Parser::new(src).parse(inlines)
|
||||
}
|
||||
*/
|
||||
|
||||
pub enum Inline<'s> {
|
||||
Atom(Atom<'s>),
|
||||
Container(Container),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Atom<'s> {
|
||||
pub enum Atom {
|
||||
Str,
|
||||
Softbreak,
|
||||
Hardbreak,
|
||||
Escape,
|
||||
Nbsp, // ??
|
||||
Nbsp,
|
||||
OpenMarker, // ??
|
||||
Ellipses, // ??
|
||||
Ellipses,
|
||||
ImageMarker, // ??
|
||||
EmDash, // ??
|
||||
FootnoteReference { label: CowStr<'s> },
|
||||
ExplicitLink { label: CowStr<'s> },
|
||||
ReferenceLink { label: CowStr<'s> },
|
||||
Emoji { name: CowStr<'s> },
|
||||
RawFormat { format: CowStr<'s> },
|
||||
EmDash,
|
||||
EnDash,
|
||||
FootnoteReference,
|
||||
Link,
|
||||
ReferenceLink,
|
||||
Emoji,
|
||||
RawFormat,
|
||||
// math
|
||||
DisplayMath,
|
||||
InlineMath,
|
||||
Verbatim,
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||
|
@ -47,18 +39,14 @@ pub enum Container {
|
|||
Superscript,
|
||||
Insert,
|
||||
Delete,
|
||||
Emph,
|
||||
Emphasis,
|
||||
Strong,
|
||||
Mark,
|
||||
Verbatim,
|
||||
// smart quoting
|
||||
SingleQuoted,
|
||||
DoubleQuoted,
|
||||
// math
|
||||
DisplayMath,
|
||||
InlineMath,
|
||||
// URLs
|
||||
Email,
|
||||
AutoUrl,
|
||||
Url,
|
||||
ImageText,
|
||||
LinkText,
|
||||
|
@ -67,124 +55,138 @@ pub enum Container {
|
|||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Event<'s> {
|
||||
Start(Container, OpenerState),
|
||||
pub enum Event {
|
||||
Start(Container),
|
||||
End(Container),
|
||||
Atom(Atom<'s>),
|
||||
Atom(Atom),
|
||||
}
|
||||
|
||||
/*
|
||||
#[derive(Debug)]
|
||||
pub enum OpenerState {
|
||||
Unclosed,
|
||||
Closed,
|
||||
Discarded,
|
||||
}
|
||||
*/
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ContainerType {
|
||||
Opener,
|
||||
Closer,
|
||||
pub enum Dir {
|
||||
Open,
|
||||
Close,
|
||||
Both,
|
||||
}
|
||||
|
||||
pub struct Parser<'s, I: Iterator<Item = char>> {
|
||||
chars: std::iter::Peekable<I>,
|
||||
openers: Vec<(Container, usize)>,
|
||||
events: Vec<Event<'s>>,
|
||||
pub struct Parser<I: Iterator<Item = char>> {
|
||||
tokens: std::iter::Peekable<lex::Lexer<I>>,
|
||||
openers: Vec<Container>,
|
||||
//tree: tree::Builder<Container, Atom>,
|
||||
}
|
||||
|
||||
impl<'s, I: Iterator<Item = char>> Parser<'s, I> {
|
||||
impl<I: Iterator<Item = char>> Parser<I> {
|
||||
pub fn new(chars: I) -> Self {
|
||||
Self {
|
||||
chars: chars.peekable(),
|
||||
tokens: lex::Lexer::new(chars).peekable(),
|
||||
openers: Vec::new(),
|
||||
events: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
fn step(&mut self) -> lex::Token {
|
||||
let token = lex::Lexer::new(&self.src[self.pos..]).next_token();
|
||||
self.pos += token.len;
|
||||
std::mem::replace(&mut self.next_token, token)
|
||||
}
|
||||
|
||||
fn eat(&mut self) -> lex::TokenKind {
|
||||
loop {
|
||||
let end = self.pos;
|
||||
let token = self.step();
|
||||
if !matches!(token.kind, lex::TokenKind::Whitespace) {
|
||||
self.span = Span::new(end - token.len, end);
|
||||
return token.kind;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> &lex::TokenKind {
|
||||
if matches!(self.next_token.kind, lex::TokenKind::Whitespace) {
|
||||
let _whitespace = self.step();
|
||||
}
|
||||
&self.next_token.kind
|
||||
}
|
||||
*/
|
||||
|
||||
pub fn parse(mut self) -> Vec<(Event<'s>, u32)> {
|
||||
let mut len = 0;
|
||||
|
||||
while let Some(c) = self.chars.peek() {
|
||||
//let start = self.pos();
|
||||
|
||||
let cont = match c {
|
||||
'*' => Some((Strong, ContainerType::Both)),
|
||||
'_' => Some((Emph, ContainerType::Both)),
|
||||
'^' => Some((Superscript, ContainerType::Both)),
|
||||
'~' => Some((Subscript, ContainerType::Both)),
|
||||
'\'' => Some((SingleQuoted, ContainerType::Both)),
|
||||
'"' => Some((DoubleQuoted, ContainerType::Both)),
|
||||
'`' => todo!(),
|
||||
'{' => todo!(),
|
||||
'$' => todo!(),
|
||||
'<' => todo!(),
|
||||
'[' => todo!(),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let ev = cont
|
||||
.and_then(|(cont, ty)| {
|
||||
self.openers
|
||||
.iter()
|
||||
.rposition(|(c, _)| *c == cont)
|
||||
.map(|i| {
|
||||
if let Event::Start(c, state) = &mut self.events[i] {
|
||||
assert_eq!(*c, cont);
|
||||
if matches!(ty, ContainerType::Closer | ContainerType::Both) {
|
||||
*state = OpenerState::Closed;
|
||||
Some(Event::End(cont))
|
||||
} else if matches!(ty, ContainerType::Opener | ContainerType::Both)
|
||||
pub fn parse(mut self, evs: &mut Vec<Event>) {
|
||||
while let Some(t) = self.tokens.next() {
|
||||
{
|
||||
*state = OpenerState::Discarded;
|
||||
Some(Event::Start(cont, OpenerState::Unclosed))
|
||||
let verbatim_opt = match t.kind {
|
||||
lex::Kind::Seq(lex::Sequence::Dollar) => {
|
||||
let math_opt = (t.len <= 2)
|
||||
.then(|| {
|
||||
if let Some(lex::Token {
|
||||
kind: lex::Kind::Seq(lex::Sequence::Backtick),
|
||||
len,
|
||||
}) = self.tokens.peek()
|
||||
{
|
||||
Some((DisplayMath, *len))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
} else {
|
||||
unreachable!()
|
||||
})
|
||||
.flatten();
|
||||
if math_opt.is_some() {
|
||||
self.tokens.next(); // backticks
|
||||
}
|
||||
})
|
||||
.unwrap_or_else(|| {
|
||||
matches!(ty, ContainerType::Opener | ContainerType::Both).then(|| {
|
||||
self.openers.push((cont, self.events.len()));
|
||||
Event::Start(cont, OpenerState::Unclosed)
|
||||
})
|
||||
})
|
||||
})
|
||||
.unwrap_or(Event::Atom(Str));
|
||||
math_opt
|
||||
}
|
||||
lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, t.len)),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
self.events.push(ev);
|
||||
if let Some((atom, opener_len)) = verbatim_opt {
|
||||
for tok in self.tokens {
|
||||
if let lex::Kind::Seq(lex::Sequence::Backtick) = tok.kind {
|
||||
if tok.len >= opener_len {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
evs.push(Event::Atom(atom));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
let container_opt = match t.kind {
|
||||
lex::Kind::Sym(Symbol::Asterisk) => Some((Strong, Dir::Both)),
|
||||
lex::Kind::Sym(Symbol::Underscore) => Some((Emphasis, Dir::Both)),
|
||||
lex::Kind::Sym(Symbol::Caret) => Some((Superscript, Dir::Both)),
|
||||
lex::Kind::Sym(Symbol::Tilde) => Some((Subscript, Dir::Both)),
|
||||
lex::Kind::Sym(Symbol::Quote1) => Some((SingleQuoted, Dir::Both)),
|
||||
lex::Kind::Sym(Symbol::Quote2) => Some((DoubleQuoted, Dir::Both)),
|
||||
lex::Kind::Open(Delimiter::Bracket) => Some((LinkText, Dir::Open)),
|
||||
lex::Kind::Open(Delimiter::BraceAsterisk) => Some((Strong, Dir::Open)),
|
||||
lex::Kind::Open(Delimiter::BraceCaret) => Some((Superscript, Dir::Open)),
|
||||
lex::Kind::Open(Delimiter::BraceEqual) => Some((Mark, Dir::Open)),
|
||||
lex::Kind::Open(Delimiter::BraceHyphen) => Some((Delete, Dir::Open)),
|
||||
lex::Kind::Open(Delimiter::BracePlus) => Some((Insert, Dir::Open)),
|
||||
lex::Kind::Open(Delimiter::BraceTilde) => Some((Subscript, Dir::Open)),
|
||||
lex::Kind::Open(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Open)),
|
||||
lex::Kind::Close(Delimiter::Bracket) => Some((LinkText, Dir::Close)),
|
||||
lex::Kind::Close(Delimiter::BraceAsterisk) => Some((Strong, Dir::Close)),
|
||||
lex::Kind::Close(Delimiter::BraceCaret) => Some((Superscript, Dir::Close)),
|
||||
lex::Kind::Close(Delimiter::BraceEqual) => Some((Mark, Dir::Close)),
|
||||
lex::Kind::Close(Delimiter::BraceHyphen) => Some((Delete, Dir::Close)),
|
||||
lex::Kind::Close(Delimiter::BracePlus) => Some((Insert, Dir::Close)),
|
||||
lex::Kind::Close(Delimiter::BraceTilde) => Some((Subscript, Dir::Close)),
|
||||
lex::Kind::Close(Delimiter::BraceUnderscore) => Some((Emphasis, Dir::Close)),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
if let Some((cont, ty)) = container_opt {
|
||||
if matches!(ty, Dir::Close | Dir::Both) && self.openers.contains(&cont) {
|
||||
loop {
|
||||
let c = self.openers.pop().unwrap();
|
||||
evs.push(Event::End(c));
|
||||
if c == cont {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return;
|
||||
} else if matches!(ty, Dir::Open | Dir::Both) {
|
||||
self.openers.push(cont);
|
||||
evs.push(Event::Start(cont));
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
if let lex::Kind::Open(Delimiter::Brace) = t.kind {
|
||||
todo!(); // check for attr
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(Event::Atom(Str)) = evs.last() {
|
||||
} else {
|
||||
evs.push(Event::Atom(Str));
|
||||
}
|
||||
}
|
||||
//self.events
|
||||
todo!()
|
||||
}
|
||||
}
|
||||
|
||||
|
|
28
src/lex.rs
28
src/lex.rs
|
@ -3,16 +3,16 @@ use crate::EOF;
|
|||
use Delimiter::*;
|
||||
use Sequence::*;
|
||||
use Symbol::*;
|
||||
use TokenKind::*;
|
||||
use Kind::*;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Token {
|
||||
pub kind: TokenKind,
|
||||
pub kind: Kind,
|
||||
pub len: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub enum TokenKind {
|
||||
pub enum Kind {
|
||||
Text,
|
||||
Whitespace,
|
||||
Nbsp,
|
||||
|
@ -42,8 +42,6 @@ pub enum Delimiter {
|
|||
pub enum Symbol {
|
||||
Asterisk,
|
||||
Caret,
|
||||
Dollar1,
|
||||
Dollar2,
|
||||
Equal,
|
||||
Exclaim,
|
||||
Gt,
|
||||
|
@ -61,6 +59,7 @@ pub enum Symbol {
|
|||
pub enum Sequence {
|
||||
Backtick,
|
||||
Colon,
|
||||
Dollar,
|
||||
Hash,
|
||||
Hyphen,
|
||||
Period,
|
||||
|
@ -71,6 +70,7 @@ impl Sequence {
|
|||
match self {
|
||||
Self::Backtick => '`',
|
||||
Self::Colon => ':',
|
||||
Self::Dollar => '$',
|
||||
Self::Hash => '#',
|
||||
Self::Period => '.',
|
||||
Self::Hyphen => '-',
|
||||
|
@ -176,14 +176,6 @@ impl<I: Iterator<Item = char>> Lexer<I> {
|
|||
}
|
||||
}
|
||||
|
||||
'$' => {
|
||||
if self.peek() == '$' {
|
||||
self.eat();
|
||||
Sym(Dollar2)
|
||||
} else {
|
||||
Sym(Dollar1)
|
||||
}
|
||||
}
|
||||
'!' => Sym(Exclaim),
|
||||
'%' => Sym(Percentage),
|
||||
'<' => Sym(Lt),
|
||||
|
@ -194,6 +186,7 @@ impl<I: Iterator<Item = char>> Lexer<I> {
|
|||
|
||||
'`' => self.eat_seq(Backtick),
|
||||
':' => self.eat_seq(Colon),
|
||||
'$' => self.eat_seq(Dollar),
|
||||
'#' => self.eat_seq(Hash),
|
||||
'.' => self.eat_seq(Period),
|
||||
|
||||
|
@ -214,12 +207,12 @@ impl<I: Iterator<Item = char>> Lexer<I> {
|
|||
Some(Token { kind, len })
|
||||
}
|
||||
|
||||
fn eat_seq(&mut self, s: Sequence) -> TokenKind {
|
||||
fn eat_seq(&mut self, s: Sequence) -> Kind {
|
||||
self.eat_while(|c| c == s.ch());
|
||||
Seq(s)
|
||||
}
|
||||
|
||||
fn maybe_eat_close_brace(&mut self, s: Symbol, d: Delimiter) -> TokenKind {
|
||||
fn maybe_eat_close_brace(&mut self, s: Symbol, d: Delimiter) -> Kind {
|
||||
if self.peek() == '}' {
|
||||
self.eat();
|
||||
Close(d)
|
||||
|
@ -257,7 +250,7 @@ mod test {
|
|||
use super::Delimiter::*;
|
||||
use super::Sequence::*;
|
||||
use super::Symbol::*;
|
||||
use super::TokenKind::*;
|
||||
use super::Kind::*;
|
||||
|
||||
macro_rules! test_lex {
|
||||
($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => {
|
||||
|
@ -339,9 +332,10 @@ mod test {
|
|||
test_lex!("`", Seq(Backtick));
|
||||
test_lex!("```", Seq(Backtick));
|
||||
test_lex!(
|
||||
"`:#-.",
|
||||
"`:$#-.",
|
||||
Seq(Backtick),
|
||||
Seq(Colon),
|
||||
Seq(Dollar),
|
||||
Seq(Hash),
|
||||
Seq(Hyphen),
|
||||
Seq(Period),
|
||||
|
|
|
@ -59,6 +59,7 @@ impl<'s> Parser<'s> {
|
|||
Iter {
|
||||
src: self.src,
|
||||
tree: self.tree.iter().peekable(),
|
||||
events: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -66,6 +67,7 @@ impl<'s> Parser<'s> {
|
|||
pub struct Iter<'s> {
|
||||
src: &'s str,
|
||||
tree: std::iter::Peekable<block::TreeIter<'s>>,
|
||||
events: Vec<inline::Event>,
|
||||
}
|
||||
|
||||
impl<'s> Iterator for Iter<'s> {
|
||||
|
@ -81,7 +83,7 @@ impl<'s> Iterator for Iter<'s> {
|
|||
let chars = (&mut self.tree)
|
||||
.take_while(|ev| matches!(ev, tree::Event::Element(..)))
|
||||
.flat_map(|ev| ev.span().of(self.src).chars());
|
||||
let evs = inline::Parser::new(chars).parse();
|
||||
inline::Parser::new(chars).parse(&mut self.events);
|
||||
/*
|
||||
let chars = std::iter::from_fn(|| {
|
||||
let mut eat = false;
|
||||
|
@ -99,7 +101,7 @@ impl<'s> Iterator for Iter<'s> {
|
|||
})
|
||||
.flatten();
|
||||
*/
|
||||
format!("leaf {:?} {:?}", leaf, evs)
|
||||
format!("leaf {:?} {:?}", leaf, self.events)
|
||||
}
|
||||
tree::Event::Element(atom, _sp) => {
|
||||
format!("atom {:?}", atom)
|
||||
|
|
Loading…
Reference in a new issue