From e798dc9c28e4f6e2b51abd8ba85de3e3cdf90c01 Mon Sep 17 00:00:00 2001 From: Noah Hellman Date: Sun, 11 Dec 2022 10:45:05 +0100 Subject: [PATCH] raw inline format --- src/html.rs | 73 +++++++++++++++++++----------- src/inline.rs | 122 +++++++++++++++++++++++++++++++------------------- src/lex.rs | 15 +++++++ src/lib.rs | 17 +++++-- 4 files changed, 154 insertions(+), 73 deletions(-) diff --git a/src/html.rs b/src/html.rs index 8ea8b0d..f71f2e2 100644 --- a/src/html.rs +++ b/src/html.rs @@ -42,14 +42,25 @@ pub fn write<'s, I: Iterator>, W: std::io::Write>( .map_err(|_| output.error.unwrap_err()) } +enum Raw { + None, + Html, + Other, +} + struct Writer { events: I, out: W, + raw: Raw, } impl<'s, I: Iterator>, W: std::fmt::Write> Writer { fn new(events: I, out: W) -> Self { - Self { events, out } + Self { + events, + out, + raw: Raw::None, + } } fn write(&mut self) -> std::fmt::Result { @@ -79,7 +90,6 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer { Container::Heading { level } => write!(self.out, "", level)?, Container::TableCell => self.out.write_str("")?, Container::DescriptionTerm => self.out.write_str("
")?, - Container::RawBlock { .. } => todo!(), Container::CodeBlock { lang } => { if let Some(l) = lang { write!(self.out, r#"
"#, l)?;
@@ -96,7 +106,13 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer {
                         } else {
                             r#"\("#
                         })?,
-                        Container::RawInline { .. } => todo!(),
+                        Container::RawBlock { format } | Container::RawInline { format } => {
+                            self.raw = if format == "html" {
+                                Raw::Html
+                            } else {
+                                Raw::Other
+                            }
+                        }
                         Container::Subscript => self.out.write_str("")?,
                         Container::Superscript => self.out.write_str("")?,
                         Container::Insert => self.out.write_str("")?,
@@ -126,14 +142,15 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer {
                         Container::Heading { level } => write!(self.out, "", level)?,
                         Container::TableCell => self.out.write_str("")?,
                         Container::DescriptionTerm => self.out.write_str("
")?, - Container::RawBlock { .. } => todo!(), Container::CodeBlock { .. } => self.out.write_str("")?, Container::Span => self.out.write_str("")?, Container::Link(..) => todo!(), Container::Image(..) => todo!(), Container::Verbatim => self.out.write_str("")?, Container::Math { .. } => self.out.write_str("")?, - Container::RawInline { .. } => todo!(), + Container::RawBlock { .. } | Container::RawInline { .. } => { + self.raw = Raw::None + } Container::Subscript => self.out.write_str("")?, Container::Superscript => self.out.write_str("")?, Container::Insert => self.out.write_str("")?, @@ -145,28 +162,34 @@ impl<'s, I: Iterator>, W: std::fmt::Write> Writer { Container::DoubleQuoted => self.out.write_str("”")?, } } - Event::Str(mut s) => { - let mut ent = ""; - while let Some(i) = s.chars().position(|c| { - if let Some(s) = match c { - '<' => Some("<"), - '>' => Some(">"), - '&' => Some("&"), - '"' => Some("""), - _ => None, - } { - ent = s; - true - } else { - false + Event::Str(mut s) => match self.raw { + Raw::None => { + let mut ent = ""; + while let Some(i) = s.chars().position(|c| { + if let Some(s) = match c { + '<' => Some("<"), + '>' => Some(">"), + '&' => Some("&"), + '"' => Some("""), + _ => None, + } { + ent = s; + true + } else { + false + } + }) { + self.out.write_str(&s[..i])?; + self.out.write_str(ent)?; + s = &s[i + 1..]; } - }) { - self.out.write_str(&s[..i])?; - self.out.write_str(ent)?; - s = &s[i + 1..]; + self.out.write_str(s)?; } - self.out.write_str(s)?; - } + Raw::Html => { + self.out.write_str(s)?; + } + Raw::Other => {} + }, Event::Atom(a) => match a { Atom::Ellipsis => self.out.write_str("…")?, diff --git a/src/inline.rs b/src/inline.rs index 354c16e..c41d561 100644 --- a/src/inline.rs +++ b/src/inline.rs @@ -60,12 +60,14 @@ pub struct Event { /// Current parsing state of elements that are not recursive, i.e. may not contain arbitrary inline /// elements, can only be one of these at a time. +#[derive(Debug)] enum State { None, /// Within a verbatim element, e.g. '$`xxxxx' Verbatim { kind: Container, opener_len: usize, + opener_event: usize, }, /// Potentially within an attribute list, e.g. '{a=b '. Attributes { @@ -81,9 +83,14 @@ enum State { } impl State { - fn verbatim(&self) -> Option<(Container, usize)> { - if let Self::Verbatim { kind, opener_len } = self { - Some((*kind, *opener_len)) + fn verbatim(&self) -> Option<(Container, usize, usize)> { + if let Self::Verbatim { + kind, + opener_len, + opener_event, + } = self + { + Some((*kind, *opener_len, *opener_event)) } else { None } @@ -173,16 +180,34 @@ impl<'s> Parser<'s> { fn parse_verbatim(&mut self, first: &lex::Token) -> Option { self.state .verbatim() - .map(|(kind, opener_len)| { + .map(|(kind, opener_len, opener_event)| { + dbg!(&self.events, opener_event); + assert_eq!(self.events[opener_event].kind, EventKind::Enter(kind)); let kind = if matches!(first.kind, lex::Kind::Seq(lex::Sequence::Backtick)) && first.len == opener_len { self.state = State::None; - if matches!(kind, Container::Span) { - todo!() - } else { - EventKind::Exit(kind) - } + let kind = + if matches!(kind, Verbatim) && self.lexer.peek_ahead().starts_with("{=") { + let mut chars = self.lexer.peek_ahead()[2..].chars(); + let len = chars + .clone() + .take_while(|c| !c.is_whitespace() && !matches!(c, '{' | '}')) + .count(); + if len > 0 && chars.nth(len) == Some('}') { + self.lexer = lex::Lexer::new(chars.as_str()); + let span_format = Span::by_len(self.span.end() + "{=".len(), len); + self.events[opener_event].kind = EventKind::Enter(RawFormat); + self.events[opener_event].span = span_format; + self.span = span_format; + RawFormat + } else { + Verbatim + } + } else { + kind + }; + EventKind::Exit(kind) } else { EventKind::Str }; @@ -203,9 +228,9 @@ impl<'s> Parser<'s> { { Some(( if first.len == 2 { - Container::DisplayMath + DisplayMath } else { - Container::InlineMath + InlineMath }, *len, )) @@ -219,13 +244,16 @@ impl<'s> Parser<'s> { } math_opt } - lex::Kind::Seq(lex::Sequence::Backtick) => { - Some((Container::Verbatim, first.len)) - } + lex::Kind::Seq(lex::Sequence::Backtick) => Some((Verbatim, first.len)), _ => None, } .map(|(kind, opener_len)| { - self.state = State::Verbatim { kind, opener_len }; + dbg!(&self.events); + self.state = State::Verbatim { + kind, + opener_len, + opener_event: self.events.len(), + }; Event { kind: EventKind::Enter(kind), span: self.span, @@ -295,56 +323,60 @@ impl<'s> Iterator for Parser<'s> { type Item = Event; fn next(&mut self) -> Option { + let mut need_more = false; while self.events.is_empty() || !self.openers.is_empty() - || self + || !matches!(self.state, State::None) + || self // for merge .events .back() .map_or(false, |ev| matches!(ev.kind, EventKind::Str)) { if let Some(ev) = self.parse_event() { self.events.push_back(ev); + dbg!(&self.events, &self.state); } else { + need_more = true; break; } } - self.events - .pop_front() - .map(|e| { - if matches!(e.kind, EventKind::Str) { - // merge str events - let mut span = e.span; - while self - .events - .front() - .map_or(false, |ev| matches!(ev.kind, EventKind::Str)) - { - let ev = self.events.pop_front().unwrap(); - assert_eq!(span.end(), ev.span.start()); - span = span.union(ev.span); + if self.last || !need_more { + self.events + .pop_front() + .map(|e| { + if matches!(e.kind, EventKind::Str) { + // merge str events + let mut span = e.span; + while self + .events + .front() + .map_or(false, |ev| matches!(ev.kind, EventKind::Str)) + { + let ev = self.events.pop_front().unwrap(); + assert_eq!(span.end(), ev.span.start()); + span = span.union(ev.span); + } + Event { + kind: EventKind::Str, + span, + } + } else { + e } - Event { - kind: EventKind::Str, - span, - } - } else { - e - } - }) - .or_else(|| { - if self.last { - self.state.verbatim().map(|(kind, _)| { + }) + .or_else(|| { + self.state.verbatim().map(|(kind, _, _)| { self.state = State::None; Event { kind: EventKind::Exit(kind), span: self.span, } }) - } else { - None - } - }) + }) + } else { + None + } } } diff --git a/src/lex.rs b/src/lex.rs index 2fa5d86..333d173 100644 --- a/src/lex.rs +++ b/src/lex.rs @@ -83,15 +83,20 @@ impl Sequence { #[derive(Clone)] pub(crate) struct Lexer<'s> { + pub src: &'s str, chars: std::str::Chars<'s>, + /// Next character should be escaped. escape: bool, + /// Token to be peeked or next'ed. next: Option, + /// Length of current token. len: usize, } impl<'s> Lexer<'s> { pub fn new(src: &'s str) -> Lexer<'s> { Lexer { + src, chars: src.chars(), escape: false, next: None, @@ -106,6 +111,16 @@ impl<'s> Lexer<'s> { self.next.as_ref() } + pub fn pos(&self) -> usize { + self.src.len() + - self.chars.as_str().len() + - self.next.as_ref().map(|t| t.len).unwrap_or_default() + } + + pub fn peek_ahead(&mut self) -> &'s str { + &self.src[self.pos()..] + } + fn next_token(&mut self) -> Option { let mut current = self.token(); diff --git a/src/lib.rs b/src/lib.rs index 36952f4..ced2533 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -234,7 +234,7 @@ impl<'s> Event<'s> { inline::Container::Verbatim => Container::Verbatim, inline::Container::InlineMath => Container::Math { display: false }, inline::Container::DisplayMath => Container::Math { display: true }, - inline::Container::RawFormat => Container::RawInline { format: todo!() }, + inline::Container::RawFormat => Container::RawInline { format: content }, inline::Container::Subscript => Container::Subscript, inline::Container::Superscript => Container::Superscript, inline::Container::Insert => Container::Insert, @@ -483,10 +483,21 @@ mod test { "`abc\ndef", Start(Paragraph, Attributes::none()), Start(Verbatim, Attributes::none()), - Str("abc\n"), - Str("def"), + Str("abc\ndef"), End(Verbatim), End(Paragraph), ); } + + #[test] + fn raw_inline() { + test_parse!( + "`raw\nraw`{=format}", + Start(Paragraph, Attributes::none()), + Start(RawInline { format: "format" }, Attributes::none()), + Str("raw\nraw"), + End(RawInline { format: "format" }), + End(Paragraph), + ); + } }