pre remove atomic state
This commit is contained in:
		
					parent
					
						
							
								227c86f4f0
							
						
					
				
			
			
				commit
				
					
						946d88e5c0
					
				
			
		
					 4 changed files with 116 additions and 47 deletions
				
			
		|  | @ -97,11 +97,11 @@ impl AtomicState { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| pub struct Parser<'s> { | pub struct Parser<I> { | ||||||
|     /// The last inline element has been provided, finish current events.
 |     /// The last inline element has been provided, finish current events.
 | ||||||
|     last: bool, |     last: bool, | ||||||
|     /// Lexer, hosting upcoming source.
 |     /// Lexer, hosting upcoming source.
 | ||||||
|     lexer: lex::Lexer<'s>, |     lexer: lex::Lexer<I>, | ||||||
|     /// Span of current event.
 |     /// Span of current event.
 | ||||||
|     span: Span, |     span: Span, | ||||||
|     /// State of non-recursive elements.
 |     /// State of non-recursive elements.
 | ||||||
|  | @ -116,11 +116,11 @@ pub struct Parser<'s> { | ||||||
|     events: std::collections::VecDeque<Event>, |     events: std::collections::VecDeque<Event>, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| impl<'s> Parser<'s> { | impl<I: Iterator<Item = char> + Clone> Parser<I> { | ||||||
|     pub fn new() -> Self { |     pub fn new(chars: I) -> Self { | ||||||
|         Self { |         Self { | ||||||
|             last: false, |             last: true, | ||||||
|             lexer: lex::Lexer::new(""), |             lexer: lex::Lexer::new(chars), | ||||||
|             span: Span::new(0, 0), |             span: Span::new(0, 0), | ||||||
|             atomic_state: AtomicState::None, |             atomic_state: AtomicState::None, | ||||||
|             typesets: Vec::new(), |             typesets: Vec::new(), | ||||||
|  | @ -129,13 +129,15 @@ impl<'s> Parser<'s> { | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     pub fn parse(&mut self, src: &'s str, last: bool) { |     /* | ||||||
|         self.lexer = lex::Lexer::new(src); |     pub fn parse(&mut self, src: &str, last: bool) { | ||||||
|  |         self.lexer = lex::Lexer::new(src.chars()); | ||||||
|         if last { |         if last { | ||||||
|             assert!(!self.last); |             assert!(!self.last); | ||||||
|         } |         } | ||||||
|         self.last = last; |         self.last = last; | ||||||
|     } |     } | ||||||
|  |     */ | ||||||
| 
 | 
 | ||||||
|     fn eat(&mut self) -> Option<lex::Token> { |     fn eat(&mut self) -> Option<lex::Token> { | ||||||
|         let tok = self.lexer.next(); |         let tok = self.lexer.next(); | ||||||
|  | @ -181,7 +183,8 @@ impl<'s> Parser<'s> { | ||||||
|                     && first.len == opener_len |                     && first.len == opener_len | ||||||
|                 { |                 { | ||||||
|                     self.atomic_state = AtomicState::None; |                     self.atomic_state = AtomicState::None; | ||||||
|                     let kind = |                     let kind = todo!(); | ||||||
|  |                     /* | ||||||
|                         if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") { |                         if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") { | ||||||
|                             let mut chars = self.lexer.peek_ahead()["{=".len()..].chars(); |                             let mut chars = self.lexer.peek_ahead()["{=".len()..].chars(); | ||||||
|                             let len = chars |                             let len = chars | ||||||
|  | @ -201,6 +204,7 @@ impl<'s> Parser<'s> { | ||||||
|                         } else { |                         } else { | ||||||
|                             kind |                             kind | ||||||
|                         }; |                         }; | ||||||
|  |                     */ | ||||||
|                     EventKind::Exit(kind) |                     EventKind::Exit(kind) | ||||||
|                 } else { |                 } else { | ||||||
|                     EventKind::Str |                     EventKind::Str | ||||||
|  | @ -261,13 +265,12 @@ impl<'s> Parser<'s> { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     fn parse_span(&mut self, first: &lex::Token) -> Option<Event> { |     fn parse_span(&mut self, first: &lex::Token) -> Option<Event> { | ||||||
|         match first.kind { |         if let Some(open) = match first.kind { | ||||||
|             lex::Kind::Open(Delimiter::Bracket) => Some(true), |             lex::Kind::Open(Delimiter::Bracket) => Some(true), | ||||||
|             lex::Kind::Close(Delimiter::Bracket) => Some(false), |             lex::Kind::Close(Delimiter::Bracket) => Some(false), | ||||||
|             _ => None, |             _ => None, | ||||||
|         } |         } { | ||||||
|         .map(|open| { |             Some(if open { | ||||||
|             if open { |  | ||||||
|                 self.spans.push(self.events.len()); |                 self.spans.push(self.events.len()); | ||||||
|                 // use str for now, replace if closed later
 |                 // use str for now, replace if closed later
 | ||||||
|                 Event { |                 Event { | ||||||
|  | @ -275,21 +278,44 @@ impl<'s> Parser<'s> { | ||||||
|                     span: self.span, |                     span: self.span, | ||||||
|                 } |                 } | ||||||
|             } else { |             } else { | ||||||
|                 if self.lexer.peek_ahead().starts_with('[') { |                 /* | ||||||
|  |                 let kind = if self.lexer.peek_ahead().starts_with('[') { | ||||||
|                     let mut chars = self.lexer.peek_ahead()["[".len()..].chars(); |                     let mut chars = self.lexer.peek_ahead()["[".len()..].chars(); | ||||||
|                     let len = chars |                     let len = chars | ||||||
|                         .clone() |                         .clone() | ||||||
|                         .take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']')) |                         .take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']')) | ||||||
|                         .count(); |                         .count(); | ||||||
|                     match chars.nth(len) { |                     match chars.nth(len) { | ||||||
|                         Some(']') => todo!(), |                         Some(']') => EventKind::Exit(ReferenceLink), | ||||||
|                         None => self.atomic_state = AtomicState::ReferenceLinkTag, |                         None => { | ||||||
|                         _ => todo!(), |                             self.atomic_state = AtomicState::ReferenceLinkTag; | ||||||
|  |                             return None; | ||||||
|                         } |                         } | ||||||
|  |                         _ => EventKind::Str, | ||||||
|                     } |                     } | ||||||
|  |                 } else if self.lexer.peek_ahead().starts_with('(') { | ||||||
|  |                     let mut chars = self.lexer.peek_ahead()["[".len()..].chars(); | ||||||
|  |                     let len = chars | ||||||
|  |                         .clone() | ||||||
|  |                         .take_while(|c| !c.is_whitespace() && !matches!(c, '[' | ']')) | ||||||
|  |                         .count(); | ||||||
|  |                     match chars.nth(len) { | ||||||
|  |                         Some(']') => EventKind::Exit(ReferenceLink), | ||||||
|  |                         None => { | ||||||
|  |                             self.atomic_state = AtomicState::Url { auto: false }; | ||||||
|  |                             return None; | ||||||
|  |                         } | ||||||
|  |                         _ => EventKind::Str, | ||||||
|  |                     } | ||||||
|  |                 } else { | ||||||
|  |                     return None; | ||||||
|  |                 }; | ||||||
|  |                     */ | ||||||
|                 todo!() |                 todo!() | ||||||
|             } |  | ||||||
|             }) |             }) | ||||||
|  |         } else { | ||||||
|  |             None | ||||||
|  |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     fn parse_typeset(&mut self, first: &lex::Token) -> Option<Event> { |     fn parse_typeset(&mut self, first: &lex::Token) -> Option<Event> { | ||||||
|  | @ -365,7 +391,7 @@ impl<'s> Parser<'s> { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| impl<'s> Iterator for Parser<'s> { | impl<I: Iterator<Item = char> + Clone> Iterator for Parser<I> { | ||||||
|     type Item = Event; |     type Item = Event; | ||||||
| 
 | 
 | ||||||
|     fn next(&mut self) -> Option<Self::Item> { |     fn next(&mut self) -> Option<Self::Item> { | ||||||
|  | @ -437,8 +463,7 @@ mod test { | ||||||
|     macro_rules! test_parse { |     macro_rules! test_parse { | ||||||
|         ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { |         ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { | ||||||
|             #[allow(unused)] |             #[allow(unused)] | ||||||
|             let mut p = super::Parser::new(); |             let mut p = super::Parser::new($src.chars()); | ||||||
|             p.parse($src, true); |  | ||||||
|             let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>(); |             let actual = p.map(|ev| (ev.kind, ev.span.of($src))).collect::<Vec<_>>(); | ||||||
|             let expected = &[$($($token),*,)?]; |             let expected = &[$($($token),*,)?]; | ||||||
|             assert_eq!(actual, expected, "\n\n{}\n\n", $src); |             assert_eq!(actual, expected, "\n\n{}\n\n", $src); | ||||||
|  |  | ||||||
							
								
								
									
										20
									
								
								src/lex.rs
									
										
									
									
									
								
							
							
						
						
									
										20
									
								
								src/lex.rs
									
										
									
									
									
								
							|  | @ -82,9 +82,8 @@ impl Sequence { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| #[derive(Clone)] | #[derive(Clone)] | ||||||
| pub(crate) struct Lexer<'s> { | pub(crate) struct Lexer<I> { | ||||||
|     pub src: &'s str, |     chars: I, | ||||||
|     chars: std::str::Chars<'s>, |  | ||||||
|     /// Next character should be escaped.
 |     /// Next character should be escaped.
 | ||||||
|     escape: bool, |     escape: bool, | ||||||
|     /// Token to be peeked or next'ed.
 |     /// Token to be peeked or next'ed.
 | ||||||
|  | @ -93,11 +92,10 @@ pub(crate) struct Lexer<'s> { | ||||||
|     len: usize, |     len: usize, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| impl<'s> Lexer<'s> { | impl<I: Iterator<Item = char> + Clone> Lexer<I> { | ||||||
|     pub fn new(src: &'s str) -> Lexer<'s> { |     pub fn new(chars: I) -> Lexer<I> { | ||||||
|         Lexer { |         Lexer { | ||||||
|             src, |             chars, | ||||||
|             chars: src.chars(), |  | ||||||
|             escape: false, |             escape: false, | ||||||
|             next: None, |             next: None, | ||||||
|             len: 0, |             len: 0, | ||||||
|  | @ -111,15 +109,19 @@ impl<'s> Lexer<'s> { | ||||||
|         self.next.as_ref() |         self.next.as_ref() | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     /* | ||||||
|     pub fn pos(&self) -> usize { |     pub fn pos(&self) -> usize { | ||||||
|         self.src.len() |         self.src.len() | ||||||
|             - self.chars.as_str().len() |             - self.chars.as_str().len() | ||||||
|             - self.next.as_ref().map(|t| t.len).unwrap_or_default() |             - self.next.as_ref().map(|t| t.len).unwrap_or_default() | ||||||
|     } |     } | ||||||
|  |     */ | ||||||
| 
 | 
 | ||||||
|  |     /* | ||||||
|     pub fn peek_ahead(&mut self) -> &'s str { |     pub fn peek_ahead(&mut self) -> &'s str { | ||||||
|         &self.src[self.pos()..] |         &self.src[self.pos()..] | ||||||
|     } |     } | ||||||
|  |     */ | ||||||
| 
 | 
 | ||||||
|     fn next_token(&mut self) -> Option<Token> { |     fn next_token(&mut self) -> Option<Token> { | ||||||
|         let mut current = self.token(); |         let mut current = self.token(); | ||||||
|  | @ -272,7 +274,7 @@ impl<'s> Lexer<'s> { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| impl<'s> Iterator for Lexer<'s> { | impl<I: Iterator<Item = char> + Clone> Iterator for Lexer<I> { | ||||||
|     type Item = Token; |     type Item = Token; | ||||||
| 
 | 
 | ||||||
|     fn next(&mut self) -> Option<Self::Item> { |     fn next(&mut self) -> Option<Self::Item> { | ||||||
|  | @ -290,7 +292,7 @@ mod test { | ||||||
|     macro_rules! test_lex { |     macro_rules! test_lex { | ||||||
|         ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { |         ($($st:ident,)? $src:expr $(,$($token:expr),* $(,)?)?) => { | ||||||
|             #[allow(unused)] |             #[allow(unused)] | ||||||
|             let actual = super::Lexer::new($src).collect::<Vec<_>>(); |             let actual = super::Lexer::new($src.chars()).collect::<Vec<_>>(); | ||||||
|             let expected = vec![$($($token),*,)?]; |             let expected = vec![$($($token),*,)?]; | ||||||
|             assert_eq!(actual, expected, "{}", $src); |             assert_eq!(actual, expected, "{}", $src); | ||||||
|         }; |         }; | ||||||
|  |  | ||||||
							
								
								
									
										49
									
								
								src/lib.rs
									
										
									
									
									
								
							
							
						
						
									
										49
									
								
								src/lib.rs
									
										
									
									
									
								
							|  | @ -304,20 +304,31 @@ impl<'s> Attributes<'s> { | ||||||
|         Self(self.0.take()) |         Self(self.0.take()) | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     #[must_use] |  | ||||||
|     pub fn valid(src: &str) -> bool { |  | ||||||
|         todo!() |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     pub fn parse(&mut self, src: &'s str) { |     pub fn parse(&mut self, src: &'s str) { | ||||||
|         todo!() |         todo!() | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #[derive(Clone)] | ||||||
|  | struct InlineChars<'t, 's> { | ||||||
|  |     src: &'s str, | ||||||
|  |     inlines: tree::Atoms<'t, block::Block, block::Atom>, | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | impl<'t, 's> Iterator for InlineChars<'t, 's> { | ||||||
|  |     type Item = char; | ||||||
|  | 
 | ||||||
|  |     fn next(&mut self) -> Option<Self::Item> { | ||||||
|  |         (&mut self.inlines) | ||||||
|  |             .flat_map(|sp| sp.of(self.src).chars()) | ||||||
|  |             .next() | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| pub struct Parser<'s> { | pub struct Parser<'s> { | ||||||
|     src: &'s str, |     src: &'s str, | ||||||
|     tree: block::Tree, |     tree: block::Tree, | ||||||
|     parser: Option<inline::Parser<'s>>, |     inline_parser: Option<inline::Parser<InlineChars<'s, 's>>>, | ||||||
|     inline_start: usize, |     inline_start: usize, | ||||||
|     block_attributes: Attributes<'s>, |     block_attributes: Attributes<'s>, | ||||||
| } | } | ||||||
|  | @ -328,7 +339,7 @@ impl<'s> Parser<'s> { | ||||||
|         Self { |         Self { | ||||||
|             src, |             src, | ||||||
|             tree: block::parse(src), |             tree: block::parse(src), | ||||||
|             parser: None, |             inline_parser: None, | ||||||
|             inline_start: 0, |             inline_start: 0, | ||||||
|             block_attributes: Attributes::none(), |             block_attributes: Attributes::none(), | ||||||
|         } |         } | ||||||
|  | @ -339,12 +350,15 @@ impl<'s> Iterator for Parser<'s> { | ||||||
|     type Item = Event<'s>; |     type Item = Event<'s>; | ||||||
| 
 | 
 | ||||||
|     fn next(&mut self) -> Option<Self::Item> { |     fn next(&mut self) -> Option<Self::Item> { | ||||||
|         while let Some(parser) = &mut self.parser { |         if let Some(parser) = &mut self.inline_parser { | ||||||
|             // inside leaf block, with inline content
 |             // inside leaf block, with inline content
 | ||||||
|             if let Some(mut inline) = parser.next() { |             if let Some(mut inline) = parser.next() { | ||||||
|                 inline.span = inline.span.translate(self.inline_start); |                 inline.span = inline.span.translate(self.inline_start); | ||||||
|                 return Some(Event::from_inline(self.src, inline)); |                 return Some(Event::from_inline(self.src, inline)); | ||||||
|             } else if let Some(ev) = self.tree.next() { |             } | ||||||
|  |             self.inline_parser = None; | ||||||
|  |             /* | ||||||
|  |             else if let Some(ev) = self.tree.next() { | ||||||
|                 match ev.kind { |                 match ev.kind { | ||||||
|                     tree::EventKind::Atom(a) => { |                     tree::EventKind::Atom(a) => { | ||||||
|                         assert_eq!(a, block::Atom::Inline); |                         assert_eq!(a, block::Atom::Inline); | ||||||
|  | @ -352,12 +366,13 @@ impl<'s> Iterator for Parser<'s> { | ||||||
|                         parser.parse(ev.span.of(self.src), last_inline); |                         parser.parse(ev.span.of(self.src), last_inline); | ||||||
|                     } |                     } | ||||||
|                     tree::EventKind::Exit(c) => { |                     tree::EventKind::Exit(c) => { | ||||||
|                         self.parser = None; |                         self.inline_parser = None; | ||||||
|                         return Some(Event::End(Container::from_block(ev.span.of(self.src), c))); |                         return Some(Event::End(Container::from_block(ev.span.of(self.src), c))); | ||||||
|                     } |                     } | ||||||
|                     tree::EventKind::Enter(..) => unreachable!(), |                     tree::EventKind::Enter(..) => unreachable!(), | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|  |             */ | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         for ev in &mut self.tree { |         for ev in &mut self.tree { | ||||||
|  | @ -372,12 +387,18 @@ impl<'s> Iterator for Parser<'s> { | ||||||
|                         continue; |                         continue; | ||||||
|                     } |                     } | ||||||
|                 }, |                 }, | ||||||
|                 tree::EventKind::Enter(c) => { |                 tree::EventKind::Enter(b) => { | ||||||
|                     if matches!(c, block::Block::Leaf(_)) { |                     if matches!(b, block::Block::Leaf(_)) { | ||||||
|                         self.parser = Some(inline::Parser::new()); |                         let chars = InlineChars { | ||||||
|  |                             src: self.src, | ||||||
|  |                             inlines: self.tree.atoms(), | ||||||
|  |                         }; | ||||||
|  |                         // TODO solve self-referential reference here without unsafe
 | ||||||
|  |                         self.inline_parser = | ||||||
|  |                             unsafe { Some(std::mem::transmute(inline::Parser::new(chars))) }; | ||||||
|                         self.inline_start = ev.span.end(); |                         self.inline_start = ev.span.end(); | ||||||
|                     } |                     } | ||||||
|                     let container = match c { |                     let container = match b { | ||||||
|                         block::Block::Leaf(block::Leaf::CodeBlock { .. }) => { |                         block::Block::Leaf(block::Leaf::CodeBlock { .. }) => { | ||||||
|                             self.inline_start += 1; // skip newline
 |                             self.inline_start += 1; // skip newline
 | ||||||
|                             Container::CodeBlock { |                             Container::CodeBlock { | ||||||
|  |  | ||||||
							
								
								
									
										23
									
								
								src/tree.rs
									
										
									
									
									
								
							
							
						
						
									
										23
									
								
								src/tree.rs
									
										
									
									
									
								
							|  | @ -20,6 +20,19 @@ pub struct Tree<C, A> { | ||||||
|     head: Option<NodeIndex>, |     head: Option<NodeIndex>, | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | #[derive(Clone)] | ||||||
|  | pub struct Atoms<'t, C, A> { | ||||||
|  |     iter: std::slice::Iter<'t, Node<C, A>>, | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | impl<'t, C, A> Iterator for Atoms<'t, C, A> { | ||||||
|  |     type Item = Span; | ||||||
|  | 
 | ||||||
|  |     fn next(&mut self) -> Option<Self::Item> { | ||||||
|  |         self.iter.next().map(|n| n.span) | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| impl<C: Copy, A: Copy> Tree<C, A> { | impl<C: Copy, A: Copy> Tree<C, A> { | ||||||
|     fn new(nodes: Vec<Node<C, A>>) -> Self { |     fn new(nodes: Vec<Node<C, A>>) -> Self { | ||||||
|         let head = nodes[NodeIndex::root().index()].next; |         let head = nodes[NodeIndex::root().index()].next; | ||||||
|  | @ -30,7 +43,15 @@ impl<C: Copy, A: Copy> Tree<C, A> { | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     pub fn atoms(&self) -> impl Iterator<Item = (A, Span)> + '_ { |     pub fn atoms(&self) -> Atoms<C, A> { | ||||||
|  |         let start = self.nodes[self.head.unwrap().index()].next.unwrap().index(); | ||||||
|  |         let end = start + self.atoms_().count(); | ||||||
|  |         Atoms { | ||||||
|  |             iter: self.nodes[start..end].iter(), | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     pub fn atoms_(&self) -> impl Iterator<Item = (A, Span)> + '_ { | ||||||
|         let mut head = self.head; |         let mut head = self.head; | ||||||
|         std::iter::from_fn(move || { |         std::iter::from_fn(move || { | ||||||
|             head.take().map(|h| { |             head.take().map(|h| { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue