fix usage of byte vs char count
This commit is contained in:
parent
4cb9c07cfc
commit
cadf49fc53
2 changed files with 38 additions and 29 deletions
55
src/block.rs
55
src/block.rs
|
@ -254,11 +254,12 @@ impl<'s> TreeParser<'s> {
|
||||||
fn parse_leaf(&mut self, leaf: Leaf, k: &Kind, span: Span, lines: &mut [Span]) {
|
fn parse_leaf(&mut self, leaf: Leaf, k: &Kind, span: Span, lines: &mut [Span]) {
|
||||||
if let Kind::Fenced { indent, .. } = k {
|
if let Kind::Fenced { indent, .. } = k {
|
||||||
for line in lines.iter_mut() {
|
for line in lines.iter_mut() {
|
||||||
let indent_line = line.len()
|
let indent_line = line
|
||||||
- line
|
.of(self.src)
|
||||||
.trim_start_matches(self.src, |c| c != '\n' && c.is_whitespace())
|
.chars()
|
||||||
.len();
|
.take_while(|c| *c != '\n' && c.is_whitespace())
|
||||||
*line = line.skip((*indent).min(indent_line));
|
.count();
|
||||||
|
*line = line.skip_chars((*indent).min(indent_line), self.src);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// trim starting whitespace of each inline
|
// trim starting whitespace of each inline
|
||||||
|
@ -380,7 +381,9 @@ impl<'s> TreeParser<'s> {
|
||||||
.position(|sp| sp.of(self.src).trim_start().starts_with('^'))
|
.position(|sp| sp.of(self.src).trim_start().starts_with('^'))
|
||||||
.map_or(lines.len(), |caption_line| {
|
.map_or(lines.len(), |caption_line| {
|
||||||
self.tree.enter(Node::Leaf(Caption), span);
|
self.tree.enter(Node::Leaf(Caption), span);
|
||||||
lines[caption_line] = lines[caption_line].trim_start(self.src).skip("^ ".len());
|
lines[caption_line] = lines[caption_line]
|
||||||
|
.trim_start(self.src)
|
||||||
|
.skip_chars(2, self.src);
|
||||||
lines[lines.len() - 1] = lines[lines.len() - 1].trim_end(self.src);
|
lines[lines.len() - 1] = lines[lines.len() - 1].trim_end(self.src);
|
||||||
for line in &lines[caption_line..] {
|
for line in &lines[caption_line..] {
|
||||||
self.tree.inline(*line);
|
self.tree.inline(*line);
|
||||||
|
@ -577,6 +580,7 @@ impl IdentifiedBlock {
|
||||||
.take_while(|c| *c != '\n' && c.is_whitespace())
|
.take_while(|c| *c != '\n' && c.is_whitespace())
|
||||||
.count();
|
.count();
|
||||||
(&mut chars).take(indent).last();
|
(&mut chars).take(indent).last();
|
||||||
|
let indent_bytes = line.len() - chars.as_str().len();
|
||||||
let line = chars.as_str();
|
let line = chars.as_str();
|
||||||
let line_t = line.trim_end();
|
let line_t = line.trim_end();
|
||||||
let l = line.len();
|
let l = line.len();
|
||||||
|
@ -587,46 +591,47 @@ impl IdentifiedBlock {
|
||||||
} else {
|
} else {
|
||||||
return Self {
|
return Self {
|
||||||
kind: Kind::Atom(Blankline),
|
kind: Kind::Atom(Blankline),
|
||||||
span: Span::empty_at(indent),
|
span: Span::empty_at(indent_bytes),
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
match first {
|
match first {
|
||||||
'\n' => Some((Kind::Atom(Blankline), Span::by_len(indent, 1))),
|
'\n' => Some((Kind::Atom(Blankline), Span::by_len(indent_bytes, 1))),
|
||||||
'#' => chars
|
'#' => chars
|
||||||
.find(|c| *c != '#')
|
.find(|c| *c != '#')
|
||||||
.map_or(true, char::is_whitespace)
|
.map_or(true, char::is_whitespace)
|
||||||
.then(|| {
|
.then(|| {
|
||||||
let level = l - chars.as_str().len() - 1;
|
let level = line.chars().take_while(|c| *c == '#').count();
|
||||||
(Kind::Heading { level }, Span::by_len(indent, level))
|
(Kind::Heading { level }, Span::by_len(indent_bytes, level))
|
||||||
}),
|
}),
|
||||||
'>' => {
|
'>' => {
|
||||||
if chars.next().map_or(true, char::is_whitespace) {
|
if chars.next().map_or(true, char::is_whitespace) {
|
||||||
Some((Kind::Blockquote, Span::by_len(indent, 1)))
|
Some((Kind::Blockquote, Span::by_len(indent_bytes, 1)))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
'{' => (attr::valid(line.chars()).0 == lt)
|
'{' => (attr::valid(line.chars()).0 == lt)
|
||||||
.then(|| (Kind::Atom(Attributes), Span::by_len(indent, l))),
|
.then(|| (Kind::Atom(Attributes), Span::by_len(indent_bytes, l))),
|
||||||
'|' => {
|
'|' => {
|
||||||
// FIXME: last byte may be pipe but end of prefixed unicode char
|
if lt >= 2 && line_t.ends_with('|') && !line_t.ends_with("\\|") {
|
||||||
((lt >= 2 && line.as_bytes()[lt - 1] == b'|')
|
Some((Kind::Table { caption: false }, Span::empty_at(indent_bytes)))
|
||||||
&& !((lt >= 3) && line.as_bytes()[lt - 2] == b'\\'))
|
} else {
|
||||||
.then(|| (Kind::Table { caption: false }, Span::empty_at(indent)))
|
None
|
||||||
|
}
|
||||||
}
|
}
|
||||||
'[' => chars.as_str().find("]:").map(|l| {
|
'[' => chars.as_str().find("]:").map(|l| {
|
||||||
let tag = &chars.as_str()[0..l];
|
let tag = &chars.as_str()[0..l];
|
||||||
let footnote = tag.starts_with('^');
|
let footnote = tag.starts_with('^');
|
||||||
(
|
(
|
||||||
Kind::Definition { indent, footnote },
|
Kind::Definition { indent, footnote },
|
||||||
Span::by_len(indent + 1, l).skip(usize::from(footnote)),
|
Span::by_len(indent_bytes + 1, l).skip(usize::from(footnote)),
|
||||||
)
|
)
|
||||||
}),
|
}),
|
||||||
'-' | '*' if Self::is_thematic_break(chars.clone()) => {
|
'-' | '*' if Self::is_thematic_break(chars.clone()) => {
|
||||||
Some((Kind::Atom(ThematicBreak), Span::by_len(indent, lt)))
|
Some((Kind::Atom(ThematicBreak), Span::by_len(indent_bytes, lt)))
|
||||||
}
|
}
|
||||||
b @ ('-' | '*' | '+') => chars.next().map_or(true, char::is_whitespace).then(|| {
|
b @ ('-' | '*' | '+') => chars.next().map_or(true, |c| c == ' ').then(|| {
|
||||||
let task_list = chars.next() == Some('[')
|
let task_list = chars.next() == Some('[')
|
||||||
&& matches!(chars.next(), Some('x' | 'X' | ' '))
|
&& matches!(chars.next(), Some('x' | 'X' | ' '))
|
||||||
&& chars.next() == Some(']')
|
&& chars.next() == Some(']')
|
||||||
|
@ -638,7 +643,7 @@ impl IdentifiedBlock {
|
||||||
ty: Task,
|
ty: Task,
|
||||||
last_blankline: false,
|
last_blankline: false,
|
||||||
},
|
},
|
||||||
Span::by_len(indent, 5),
|
Span::by_len(indent_bytes, 5),
|
||||||
)
|
)
|
||||||
} else {
|
} else {
|
||||||
(
|
(
|
||||||
|
@ -647,7 +652,7 @@ impl IdentifiedBlock {
|
||||||
ty: Unordered(b as u8),
|
ty: Unordered(b as u8),
|
||||||
last_blankline: false,
|
last_blankline: false,
|
||||||
},
|
},
|
||||||
Span::by_len(indent, 1),
|
Span::by_len(indent_bytes, 1),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
}),
|
}),
|
||||||
|
@ -657,7 +662,7 @@ impl IdentifiedBlock {
|
||||||
ty: Description,
|
ty: Description,
|
||||||
last_blankline: false,
|
last_blankline: false,
|
||||||
},
|
},
|
||||||
Span::by_len(indent, 1),
|
Span::by_len(indent_bytes, 1),
|
||||||
)),
|
)),
|
||||||
f @ ('`' | ':' | '~') => {
|
f @ ('`' | ':' | '~') => {
|
||||||
let fence_length = 1 + (&mut chars).take_while(|c| *c == f).count();
|
let fence_length = 1 + (&mut chars).take_while(|c| *c == f).count();
|
||||||
|
@ -681,7 +686,7 @@ impl IdentifiedBlock {
|
||||||
has_spec: !spec.is_empty(),
|
has_spec: !spec.is_empty(),
|
||||||
has_closing_fence: false,
|
has_closing_fence: false,
|
||||||
},
|
},
|
||||||
Span::by_len(indent + skip, spec.len()),
|
Span::by_len(indent_bytes + skip, spec.len()),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -692,14 +697,14 @@ impl IdentifiedBlock {
|
||||||
ty: Ordered(num, style),
|
ty: Ordered(num, style),
|
||||||
last_blankline: false,
|
last_blankline: false,
|
||||||
},
|
},
|
||||||
Span::by_len(indent, len),
|
Span::by_len(indent_bytes, len),
|
||||||
)
|
)
|
||||||
}),
|
}),
|
||||||
}
|
}
|
||||||
.map(|(kind, span)| Self { kind, span })
|
.map(|(kind, span)| Self { kind, span })
|
||||||
.unwrap_or(Self {
|
.unwrap_or(Self {
|
||||||
kind: Kind::Paragraph,
|
kind: Kind::Paragraph,
|
||||||
span: Span::empty_at(indent),
|
span: Span::empty_at(indent_bytes),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -195,7 +195,8 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
|
||||||
};
|
};
|
||||||
!end && !c.is_whitespace()
|
!end && !c.is_whitespace()
|
||||||
})
|
})
|
||||||
.count();
|
.map(char::len_utf8)
|
||||||
|
.sum();
|
||||||
if len > 0 && end {
|
if len > 0 && end {
|
||||||
let tok = self.eat();
|
let tok = self.eat();
|
||||||
debug_assert_eq!(
|
debug_assert_eq!(
|
||||||
|
@ -323,7 +324,8 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
|
||||||
}
|
}
|
||||||
!end && !c.is_whitespace()
|
!end && !c.is_whitespace()
|
||||||
})
|
})
|
||||||
.count();
|
.map(char::len_utf8)
|
||||||
|
.sum();
|
||||||
(end && is_url).then(|| {
|
(end && is_url).then(|| {
|
||||||
self.lexer = lex::Lexer::new(ahead);
|
self.lexer = lex::Lexer::new(ahead);
|
||||||
self.span = self.span.after(len);
|
self.span = self.span.after(len);
|
||||||
|
@ -376,7 +378,8 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
|
||||||
};
|
};
|
||||||
!end && *c != '\n'
|
!end && *c != '\n'
|
||||||
})
|
})
|
||||||
.count();
|
.map(char::len_utf8)
|
||||||
|
.sum();
|
||||||
end.then(|| {
|
end.then(|| {
|
||||||
self.lexer = lex::Lexer::new(ahead);
|
self.lexer = lex::Lexer::new(ahead);
|
||||||
self.span = self.span.after(len);
|
self.span = self.span.after(len);
|
||||||
|
@ -557,7 +560,8 @@ impl<I: Iterator<Item = char> + Clone> Parser<I> {
|
||||||
};
|
};
|
||||||
!end
|
!end
|
||||||
})
|
})
|
||||||
.count();
|
.map(char::len_utf8)
|
||||||
|
.sum();
|
||||||
end.then(|| {
|
end.then(|| {
|
||||||
let span = self.span.after(len).translate(1);
|
let span = self.span.after(len).translate(1);
|
||||||
(kind, span)
|
(kind, span)
|
||||||
|
|
Loading…
Reference in a new issue