diff --git a/src/inline.rs b/src/inline.rs index dc31507..c4e7048 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -97,11 +97,11 @@ impl AtomicState { } } -pub struct Parser<'s> { +pub struct Parser { /// The last inline element has been provided, finish current events. last: bool, /// Lexer, hosting upcoming source. - lexer: lex::Lexer<'s>, + lexer: lex::Lexer, /// Span of current event. span: Span, /// State of non-recursive elements. @@ -116,11 +116,11 @@ pub struct Parser<'s> { events: std::collections::VecDeque, } -impl<'s> Parser<'s> { - pub fn new() -> Self { +impl + Clone> Parser { + pub fn new(chars: I) -> Self { Self { - last: false, - lexer: lex::Lexer::new(""), + last: true, + lexer: lex::Lexer::new(chars), span: Span::new(0, 0), atomic_state: AtomicState::None, typesets: Vec::new(), @@ -129,13 +129,15 @@ impl<'s> Parser<'s> { } } - pub fn parse(&mut self, src: &'s str, last: bool) { - self.lexer = lex::Lexer::new(src); + /* + pub fn parse(&mut self, src: &str, last: bool) { + self.lexer = lex::Lexer::new(src.chars()); if last { assert!(!self.last); } self.last = last; } + */ fn eat(&mut self) -> Option { let tok = self.lexer.next(); @@ -181,7 +183,8 @@ impl<'s> Parser<'s> { && first.len == opener_len { self.atomic_state = AtomicState::None; - let kind = + let kind = todo!(); + /* if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") { let mut chars = self.lexer.peek_ahead()["{=".len()..].chars(); let len = chars @@ -201,6 +204,7 @@ impl<'s> Parser<'s> { } else { kind }; + */ EventKind::Exit(kind) } else { EventKind::Str @@ -261,13 +265,12 @@ impl<'s> Parser<'s> { } fn parse_span(&mut self, first: &lex::Token) -> Option { - match first.kind { + if let Some(open) = match first.kind { lex::Kind::Open(Delimiter::Bracket) => Some(true), lex::Kind::Close(Delimiter::Bracket) => Some(false), _ => None, - } - .map(|open| { - if open { + } { + Some(if open { self.spans.push(self.events.len()); // use str for now, replace if closed later Event { @@ -275,21 +278,44 @@ impl<'s> Parser<'s> { span: self.span, } } else { - if self.lexer.peek_ahead().starts_with('[') { + /* + let kind = if self.lexer.peek_ahead().starts_with('[') { let mut chars = self.lexer.peek_ahead()["[".len()..].chars(); let len = chars .clone() .take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']')) .count(); match chars.nth(len) { - Some(']') => todo!(), - None => self.atomic_state = AtomicState::ReferenceLinkTag, - _ => todo!(), + Some(']') => EventKind::Exit(ReferenceLink), + None => { + self.atomic_state = AtomicState::ReferenceLinkTag; + return None; + } + _ => EventKind::Str, } - } + } else if self.lexer.peek_ahead().starts_with('(') { + let mut chars = self.lexer.peek_ahead()["[".len()..].chars(); + let len = chars + .clone() + .take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']')) + .count(); + match chars.nth(len) { + Some(']') => EventKind::Exit(ReferenceLink), + None => { + self.atomic_state = AtomicState::Url { auto: false }; + return None; + } + _ => EventKind::Str, + } + } else { + return None; + }; + */ todo!() - } - }) + }) + } else { + None + } } fn parse_typeset(&mut self, first: &lex::Token) -> Option { @@ -365,7 +391,7 @@ impl<'s> Parser<'s> { } } -impl<'s> Iterator for Parser<'s> { +impl + Clone> Iterator for Parser { type Item = Event; fn next(&mut self) -> Option { @@ -437,8 +463,7 @@ mod test { macro_rules! test_parse { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { #[allow(unused)] - let mut p = super::Parser::new(); - p.parse($src, true); + let mut p = super::Parser::new($src.chars()); let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::>(); let expected = &[$($($token),*,)?]; assert_eq!(actual, expected, "\n\n{}\n\n", $src); diff --git a/src/lex.rs b/src/lex.rs index 333d173..e12014c 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -82,9 +82,8 @@ impl Sequence { } #[derive(Clone)] -pub(crate) struct Lexer<'s> { - pub src: &'s str, - chars: std::str::Chars<'s>, +pub(crate) struct Lexer { + chars: I, /// Next character should be escaped. escape: bool, /// Token to be peeked or next'ed. @@ -93,11 +92,10 @@ pub(crate) struct Lexer<'s> { len: usize, } -impl<'s> Lexer<'s> { - pub fn new(src: &'s str) -> Lexer<'s> { +impl + Clone> Lexer { + pub fn new(chars: I) -> Lexer { Lexer { - src, - chars: src.chars(), + chars, escape: false, next: None, len: 0, @@ -111,15 +109,19 @@ impl<'s> Lexer<'s> { self.next.as_ref() } + /* pub fn pos(&self) -> usize { self.src.len() - self.chars.as_str().len() - self.next.as_ref().map(|t| t.len).unwrap_or_default() } + */ + /* pub fn peek_ahead(&mut self) -> &'s str { &self.src[self.pos()..] } + */ fn next_token(&mut self) -> Option { let mut current = self.token(); @@ -272,7 +274,7 @@ impl<'s> Lexer<'s> { } } -impl<'s> Iterator for Lexer<'s> { +impl + Clone> Iterator for Lexer { type Item = Token; fn next(&mut self) -> Option { @@ -290,7 +292,7 @@ mod test { macro_rules! test_lex { ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { #[allow(unused)] - let actual = super::Lexer::new($src).collect::>(); + let actual = super::Lexer::new($src.chars()).collect::>(); let expected = vec![$($($token),*,)?]; assert_eq!(actual, expected, "{}", $src); }; diff --git a/src/lib.rs b/src/lib.rs index ced2533..483b349 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -304,20 +304,31 @@ impl<'s> Attributes<'s> { Self(self.0.take()) } - #[must_use] - pub fn valid(src: &str) -> bool { - todo!() - } - pub fn parse(&mut self, src: &'s str) { todo!() } } +#[derive(Clone)] +struct InlineChars<'t, 's> { + src: &'s str, + inlines: tree::Atoms<'t, block::Block, block::Atom>, +} + +impl<'t, 's> Iterator for InlineChars<'t, 's> { + type Item = char; + + fn next(&mut self) -> Option { + (&mut self.inlines) + .flat_map(|sp| sp.of(self.src).chars()) + .next() + } +} + pub struct Parser<'s> { src: &'s str, tree: block::Tree, - parser: Option>, + inline_parser: Option>>, inline_start: usize, block_attributes: Attributes<'s>, } @@ -328,7 +339,7 @@ impl<'s> Parser<'s> { Self { src, tree: block::parse(src), - parser: None, + inline_parser: None, inline_start: 0, block_attributes: Attributes::none(), } @@ -339,12 +350,15 @@ impl<'s> Iterator for Parser<'s> { type Item = Event<'s>; fn next(&mut self) -> Option { - while let Some(parser) = &mut self.parser { + if let Some(parser) = &mut self.inline_parser { // inside leaf block, with inline content if let Some(mut inline) = parser.next() { inline.span = inline.span.translate(self.inline_start); return Some(Event::from_inline(self.src, inline)); - } else if let Some(ev) = self.tree.next() { + } + self.inline_parser = None; + /* + else if let Some(ev) = self.tree.next() { match ev.kind { tree::EventKind::Atom(a) => { assert_eq!(a, block::Atom::Inline); @@ -352,12 +366,13 @@ impl<'s> Iterator for Parser<'s> { parser.parse(ev.span.of(self.src), last_inline); } tree::EventKind::Exit(c) => { - self.parser = None; + self.inline_parser = None; return Some(Event::End(Container::from_block(ev.span.of(self.src), c))); } tree::EventKind::Enter(..) => unreachable!(), } } + */ } for ev in &mut self.tree { @@ -372,12 +387,18 @@ impl<'s> Iterator for Parser<'s> { continue; } }, - tree::EventKind::Enter(c) => { - if matches!(c, block::Block::Leaf(_)) { - self.parser = Some(inline::Parser::new()); + tree::EventKind::Enter(b) => { + if matches!(b, block::Block::Leaf(_)) { + let chars = InlineChars { + src: self.src, + inlines: self.tree.atoms(), + }; + // TODO solve self-referential reference here without unsafe + self.inline_parser = + unsafe { Some(std::mem::transmute(inline::Parser::new(chars))) }; self.inline_start = ev.span.end(); } - let container = match c { + let container = match b { block::Block::Leaf(block::Leaf::CodeBlock { .. }) => { self.inline_start += 1; // skip newline Container::CodeBlock { diff --git a/src/tree.rs b/src/tree.rs index cd5b74b..6fa6b0c 100644 --- a/src/tree.rs +++ b/src/tree.rs @@ -20,6 +20,19 @@ pub struct Tree { head: Option, } +#[derive(Clone)] +pub struct Atoms<'t, C, A> { + iter: std::slice::Iter<'t, Node>, +} + +impl<'t, C, A> Iterator for Atoms<'t, C, A> { + type Item = Span; + + fn next(&mut self) -> Option { + self.iter.next().map(|n| n.span) + } +} + impl Tree { fn new(nodes: Vec>) -> Self { let head = nodes[NodeIndex::root().index()].next; @@ -30,7 +43,15 @@ impl Tree { } } - pub fn atoms(&self) -> impl Iterator + '_ { + pub fn atoms(&self) -> Atoms { + let start = self.nodes[self.head.unwrap().index()].next.unwrap().index(); + let end = start + self.atoms_().count(); + Atoms { + iter: self.nodes[start..end].iter(), + } + } + + pub fn atoms_(&self) -> impl Iterator + '_ { let mut head = self.head; std::iter::from_fn(move || { head.take().map(|h| {