Skip to content

Commit

Permalink
parser: correct tasklist spx.
Browse files Browse the repository at this point in the history
  • Loading branch information
kivikakk committed Mar 2, 2025
1 parent c8d935e commit ec9d14f
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 62 deletions.
56 changes: 4 additions & 52 deletions src/parser/autolink.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
use crate::character_set::character_set;
use crate::ctype::{isalnum, isalpha, isspace};
use crate::nodes::{AstNode, NodeLink, NodeValue, Sourcepos};
use crate::parser::inlines::make_inline;
use std::collections::VecDeque;
use crate::parser::{inlines::make_inline, Spx};
use std::str;
use typed_arena::Arena;
use unicode_categories::UnicodeCategories;
Expand All @@ -13,7 +12,7 @@ pub(crate) fn process_email_autolinks<'a>(
contents_str: &mut String,
relaxed_autolinks: bool,
sourcepos: &mut Sourcepos,
mut spx: VecDeque<(Sourcepos, usize)>,
spx: &mut Spx,
) {
let contents = contents_str.as_bytes();
let len = contents.len();
Expand Down Expand Up @@ -64,9 +63,9 @@ pub(crate) fn process_email_autolinks<'a>(
};
let initial_end_col = sourcepos.end.column;

sourcepos.end.column = consume_spx(&mut spx, i);
sourcepos.end.column = spx.consume(i);

let nsp_end_col = consume_spx(&mut spx, skip);
let nsp_end_col = spx.consume(skip);

contents_str.truncate(i);

Expand Down Expand Up @@ -112,53 +111,6 @@ pub(crate) fn process_email_autolinks<'a>(
}
}
}

// Sourcepos end column `e` of the original node (set by writing to
// `*sourcepos`) determined by advancing through `spx` until `i` bytes of input
// are seen.
//
// For each element `(sp, x)` in `spx`:
// - if remaining `i` is greater than the byte count `x`,
// set `i -= x` and continue.
// - if remaining `i` is equal to the byte count `x`,
// set `e = sp.end.column` and finish.
// - if remaining `i` is less than the byte count `x`,
// assert `sp.end.column - sp.start.column + 1 == x || rem == 0` (1),
// set `e = sp.start.column + i - 1` and finish.
//
// (1) If `x` doesn't equal the range covered between the start and end column,
// there's no way to determine sourcepos within the range. This is a bug if
// it happens; it suggests we've matched an email autolink with some smart
// punctuation in it, or worse.
//
// The one exception is if `rem == 0`. Given nothing to consume, we can
// happily restore what we popped, returning `sp.start.column - 1` for the
// end column of the original node.
fn consume_spx(spx: &mut VecDeque<(Sourcepos, usize)>, mut rem: usize) -> usize {
while let Some((sp, x)) = spx.pop_front() {
if rem > x {
rem -= x;
} else if rem == x {
return sp.end.column;
} else {
// rem < x
assert!((sp.end.column - sp.start.column + 1 == x) || rem == 0);
spx.push_front((
(
sp.start.line,
sp.start.column + rem,
sp.end.line,
sp.end.column,
)
.into(),
x - rem,
));
return sp.start.column + rem - 1;
}
}
unreachable!();
}

fn email_match<'a>(
arena: &'a Arena<AstNode<'a>>,
contents: &[u8],
Expand Down
67 changes: 60 additions & 7 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2961,22 +2961,22 @@ where
// Join adjacent text nodes together, then post-process.
// Record the original list of sourcepos and bytecounts
// for the post-processing step.
let mut spx = VecDeque::new();
spx.push_back((sourcepos, root.len()));
let mut spxv = VecDeque::new();
spxv.push_back((sourcepos, root.len()));
while let Some(ns) = n.next_sibling() {
match ns.data.borrow().value {
NodeValue::Text(ref adj) => {
root.push_str(adj);
let sp = ns.data.borrow().sourcepos;
spx.push_back((sp, adj.len()));
spxv.push_back((sp, adj.len()));
sourcepos.end.column = sp.end.column;
ns.detach();
}
_ => break,
}
}

self.postprocess_text_node(n, root, &mut sourcepos, spx);
self.postprocess_text_node(n, root, &mut sourcepos, spxv);
emptied = root.len() == 0;
}
NodeValue::Link(..) | NodeValue::Image(..) | NodeValue::WikiLink(..) => {
Expand Down Expand Up @@ -3011,10 +3011,11 @@ where
node: &'a AstNode<'a>,
text: &mut String,
sourcepos: &mut Sourcepos,
spx: VecDeque<(Sourcepos, usize)>,
spxv: VecDeque<(Sourcepos, usize)>,
) {
let mut spx = Spx(spxv);
if self.options.extension.tasklist {
self.process_tasklist(node, text, sourcepos);
self.process_tasklist(node, text, sourcepos, &mut spx);
}

if self.options.extension.autolink {
Expand All @@ -3024,7 +3025,7 @@ where
text,
self.options.parse.relaxed_autolinks,
sourcepos,
spx,
&mut spx,
);
}
}
Expand All @@ -3034,6 +3035,7 @@ where
node: &'a AstNode<'a>,
text: &mut String,
sourcepos: &mut Sourcepos,
spx: &mut Spx,
) {
let (end, symbol) = match scanners::tasklist(text.as_bytes()) {
Some(p) => p,
Expand Down Expand Up @@ -3071,6 +3073,8 @@ where
// the count thereof (i.e. "end") will precisely map to characters in
// the source document.
sourcepos.start.column += end;
let reference = spx.consume(end) + 1;
assert_eq!(reference, sourcepos.start.column);
parent.data.borrow_mut().sourcepos.start.column += end;

grandparent.data.borrow_mut().value =
Expand Down Expand Up @@ -3323,3 +3327,52 @@ pub enum ListStyleType {
/// The `*` character
Star = 42,
}

pub(crate) struct Spx(VecDeque<(Sourcepos, usize)>);

impl Spx {
// Sourcepos end column `e` of a node determined by advancing through `spx`
// until `i` bytes of input are seen.
//
// For each element `(sp, x)` in `spx`:
// - if remaining `i` is greater than the byte count `x`,
// set `i -= x` and continue.
// - if remaining `i` is equal to the byte count `x`,
// set `e = sp.end.column` and finish.
// - if remaining `i` is less than the byte count `x`,
// assert `sp.end.column - sp.start.column + 1 == x || i == 0` (1),
// set `e = sp.start.column + i - 1` and finish.
//
// (1) If `x` doesn't equal the range covered between the start and end column,
// there's no way to determine sourcepos within the range. This is a bug if
// it happens; it suggests we've matched an email autolink with some smart
// punctuation in it, or worse.
//
// The one exception is if `i == 0`. Given nothing to consume, we can
// happily restore what we popped, returning `sp.start.column - 1` for the
// end column of the original node.
pub(crate) fn consume(&mut self, mut rem: usize) -> usize {
while let Some((sp, x)) = self.0.pop_front() {
if rem > x {
rem -= x;
} else if rem == x {
return sp.end.column;
} else {
// rem < x
assert!((sp.end.column - sp.start.column + 1 == x) || rem == 0);
self.0.push_front((
(
sp.start.line,
sp.start.column + rem,
sp.end.line,
sp.end.column,
)
.into(),
x - rem,
));
return sp.start.column + rem - 1;
}
}
unreachable!();
}
}
9 changes: 6 additions & 3 deletions src/tests/fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -329,9 +329,12 @@ fn echaw8() {
"- [x] &Xfr;-<[email protected]",
(document (1:1-1:17) [
(list (1:1-1:17) [
(item (1:1-1:17) [
(paragraph (1:3-1:17) [
(text (1:3-1:17) "[x] V𝔛-<A@N")
(taskitem (1:1-1:17) [
(paragraph (1:7-1:17) [
(text (1:7-1:13) "𝔛-<")
(link (1:14-1:17) "mailto:[email protected]" [
(text (1:14-1:17) "[email protected]")
])
])
])
])
Expand Down

0 comments on commit ec9d14f

Please sign in to comment.