Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Track line offsets for better accuracy of inline sourcepos #453

Merged
merged 2 commits into from
Aug 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions src/cm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -544,13 +544,13 @@ impl<'a, 'o, 'c> CommonMarkFormatter<'a, 'o, 'c> {
let info = ncb.info.as_bytes();
let literal = ncb.literal.as_bytes();

if info.is_empty()
&& (literal.len() > 2
&& !isspace(literal[0])
&& !(isspace(literal[literal.len() - 1])
&& isspace(literal[literal.len() - 2])))
&& !first_in_list_item
&& !self.options.render.prefer_fenced
#[allow(clippy::len_zero)]
if !(info.len() > 0
|| literal.len() <= 2
|| isspace(literal[0])
|| first_in_list_item
|| self.options.render.prefer_fenced
|| isspace(literal[literal.len() - 1]) && isspace(literal[literal.len() - 2]))
{
write!(self, " ").unwrap();
write!(self.prefix, " ").unwrap();
Expand Down
2 changes: 2 additions & 0 deletions src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,7 @@ pub struct Ast {
pub(crate) open: bool,
pub(crate) last_line_blank: bool,
pub(crate) table_visited: bool,
pub(crate) line_offsets: Vec<usize>,
}

/// Represents the position in the source Markdown this node was rendered from.
Expand Down Expand Up @@ -609,6 +610,7 @@ impl Ast {
open: true,
last_line_blank: false,
table_visited: false,
line_offsets: Vec::with_capacity(0),
}
}
}
Expand Down
13 changes: 5 additions & 8 deletions src/parser/autolink.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,11 @@ pub(crate) fn process_autolinks<'a>(
}
}

match contents[i] {
b'@' => {
post_org = email_match(arena, contents, i, relaxed_autolinks);
if post_org.is_some() {
break;
}
if contents[i] == b'@' {
post_org = email_match(arena, contents, i, relaxed_autolinks);
if post_org.is_some() {
break;
}
_ => (),
}
i += 1;
}
Expand Down Expand Up @@ -161,7 +158,7 @@ fn check_domain(data: &[u8], allow_short: bool) -> Option<usize> {
}

fn is_valid_hostchar(ch: char) -> bool {
!ch.is_whitespace() && !(ch.is_punctuation() || ch.is_symbol())
!(ch.is_whitespace() || ch.is_punctuation() || ch.is_symbol())
}

fn autolink_delim(data: &[u8], mut link_end: usize, relaxed_autolinks: bool) -> usize {
Expand Down
31 changes: 22 additions & 9 deletions src/parser/inlines.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ pub struct Subject<'a: 'd, 'r, 'o, 'c, 'd, 'i> {
pub input: &'i [u8],
line: usize,
pub pos: usize,
block_offset: usize,
column_offset: isize,
line_offset: usize,
flags: Flags,
pub refmap: &'r mut RefMap,
delimiter_arena: &'d Arena<Delimiter<'a, 'd>>,
Expand Down Expand Up @@ -116,7 +116,6 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
options: &'o Options<'c>,
input: &'i [u8],
line: usize,
block_offset: usize,
refmap: &'r mut RefMap,
delimiter_arena: &'d Arena<Delimiter<'a, 'd>>,
) -> Self {
Expand All @@ -126,8 +125,8 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
input,
line,
pos: 0,
block_offset,
column_offset: 0,
line_offset: 0,
flags: Flags::default(),
refmap,
delimiter_arena,
Expand Down Expand Up @@ -182,6 +181,11 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
None => return false,
Some(ch) => *ch as char,
};

let node_ast = node.data.borrow();
let adjusted_line = self.line - node_ast.sourcepos.start.line;
self.line_offset = node_ast.line_offsets[adjusted_line];

let new_inl: Option<&'a AstNode<'a>> = match c {
'\0' => return false,
'\r' | '\n' => Some(self.handle_newline()),
Expand Down Expand Up @@ -1119,11 +1123,18 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
self.pos,
);
{
// if we have `___` or `***` then we need to adjust the sourcepos colums by 1
let triple_adjustment = if opener_num_chars > 0 && use_delims == 2 {
1
} else {
0
};

emph.data.borrow_mut().sourcepos = (
opener.inl.data.borrow().sourcepos.start.line,
opener.inl.data.borrow().sourcepos.start.column,
opener.inl.data.borrow().sourcepos.start.column + triple_adjustment,
closer.inl.data.borrow().sourcepos.end.line,
closer.inl.data.borrow().sourcepos.end.column,
closer.inl.data.borrow().sourcepos.end.column - triple_adjustment,
)
.into();
}
Expand Down Expand Up @@ -1604,7 +1615,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
inl.data.borrow_mut().sourcepos.start.column =
bracket_inl_text.data.borrow().sourcepos.start.column;
inl.data.borrow_mut().sourcepos.end.column = usize::try_from(
self.pos as isize + self.column_offset + self.block_offset as isize,
self.pos as isize + self.column_offset + self.line_offset as isize,
)
.unwrap();
bracket_inl_text.insert_before(inl);
Expand Down Expand Up @@ -1655,7 +1666,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
.sourcepos
.start;
inl.data.borrow_mut().sourcepos.end.column =
usize::try_from(self.pos as isize + self.column_offset + self.block_offset as isize)
usize::try_from(self.pos as isize + self.column_offset + self.line_offset as isize)
.unwrap();

self.brackets[brackets_len - 1].inl_text.insert_before(inl);
Expand Down Expand Up @@ -1847,8 +1858,8 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
end_column: usize,
) -> &'a AstNode<'a> {
let start_column =
start_column as isize + 1 + self.column_offset + self.block_offset as isize;
let end_column = end_column as isize + 1 + self.column_offset + self.block_offset as isize;
start_column as isize + 1 + self.column_offset + self.line_offset as isize;
let end_column = end_column as isize + 1 + self.column_offset + self.line_offset as isize;

let ast = Ast {
value,
Expand All @@ -1864,6 +1875,7 @@ impl<'a, 'r, 'o, 'c, 'd, 'i> Subject<'a, 'r, 'o, 'c, 'd, 'i> {
open: false,
last_line_blank: false,
table_visited: false,
line_offsets: Vec::with_capacity(0),
};
self.arena.alloc(Node::new(RefCell::new(ast)))
}
Expand Down Expand Up @@ -1972,6 +1984,7 @@ pub fn make_inline<'a>(
open: false,
last_line_blank: false,
table_visited: false,
line_offsets: Vec::with_capacity(0),
};
arena.alloc(Node::new(RefCell::new(ast)))
}
Expand Down
8 changes: 6 additions & 2 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ pub fn parse_document<'a>(
open: true,
last_line_blank: false,
table_visited: false,
line_offsets: Vec::with_capacity(0),
})));
let mut parser = Parser::new(arena, root, options);
let mut linebuf = Vec::with_capacity(buffer.len());
Expand Down Expand Up @@ -1998,6 +1999,11 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> {
}
}
if self.offset < line.len() {
// since whitespace is stripped off the beginning of lines, we need to keep
// track of how much was stripped off. This allows us to properly calculate
// inline sourcepos during inline processing.
ast.line_offsets.push(self.offset);

ast.content
.push_str(str::from_utf8(&line[self.offset..]).unwrap());
}
Expand Down Expand Up @@ -2185,7 +2191,6 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> {
self.options,
content,
node_data.sourcepos.start.line,
node_data.sourcepos.start.column - 1 + node_data.internal_offset,
&mut self.refmap,
&delimiter_arena,
);
Expand Down Expand Up @@ -2439,7 +2444,6 @@ impl<'a, 'o, 'c: 'o> Parser<'a, 'o, 'c> {
self.options,
content,
0, // XXX -1 in upstream; never used?
0,
&mut self.refmap,
&delimiter_arena,
);
Expand Down
24 changes: 14 additions & 10 deletions src/parser/table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ fn try_opening_header<'a>(
start.column_add((cell.end_offset - header_row.paragraph_offset) as isize);
ast.internal_offset = cell.internal_offset;
ast.content.clone_from(&cell.content);
ast.line_offsets.push(
start.column + cell.start_offset - 1 + cell.internal_offset
- header_row.paragraph_offset,
);

i += 1;
}
Expand Down Expand Up @@ -172,6 +176,9 @@ fn try_opening_row<'a>(
cell_ast.internal_offset = cell.internal_offset;
cell_ast.sourcepos.end.column = sourcepos.start.column + cell.end_offset;
cell_ast.content.clone_from(&cell.content);
cell_ast
.line_offsets
.push(sourcepos.start.column + cell.start_offset - 1 + cell.internal_offset);

last_column = cell_ast.sourcepos.end.column;

Expand Down Expand Up @@ -295,16 +302,13 @@ fn try_inserting_table_header_paragraph<'a>(
let mut paragraph = Ast::new(NodeValue::Paragraph, start);
paragraph.sourcepos.end.line = start.line + newlines - 1;

// XXX We don't have the last_line_length to go on by this point,
// so we have no idea what the end column should be.
// We can't track it in row() like we do paragraph_offset, because
// we've already discarded the leading whitespace for that line.
// This is hard to avoid with this backtracking approach to
// creating the pre-table paragraph — we're doing the work of
// finalize() here, but without the parser state at that time.
// Approximate by just counting the line length as it is and adding
// to the start column.
paragraph.sourcepos.end.column = start.column - 1
// copy over the line offsets related to the paragraph
for n in 0..newlines {
paragraph.line_offsets.push(container_ast.line_offsets[n]);
}

let last_line_offset = *paragraph.line_offsets.last().unwrap_or(&0);
paragraph.sourcepos.end.column = last_line_offset
+ preface
.iter()
.rev()
Expand Down
Loading