Skip to content

Commit

Permalink
Merge pull request #542 from kivikakk/sourcepos-fixes
Browse files Browse the repository at this point in the history
Inline sourcepos fixes.
  • Loading branch information
kivikakk authored Mar 4, 2025
2 parents f368cfc + 1c55e16 commit 90e2307
Show file tree
Hide file tree
Showing 13 changed files with 1,061 additions and 453 deletions.
2 changes: 2 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,8 @@
formatter = pkgs.alejandra;

devShells.default = pkgs.mkShell {
name = "comrak";

inputsFrom = builtins.attrValues self.checks.${system};

nativeBuildInputs = [
Expand Down
293 changes: 168 additions & 125 deletions src/parser/autolink.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
use crate::character_set::character_set;
use crate::ctype::{isalnum, isalpha, isspace};
use crate::nodes::{AstNode, NodeLink, NodeValue};
use crate::parser::inlines::make_inline;
use crate::nodes::{AstNode, NodeLink, NodeValue, Sourcepos};
use crate::parser::{inlines::make_inline, Spx};
use std::str;
use typed_arena::Arena;
use unicode_categories::UnicodeCategories;

// TODO: this can probably be cleaned up a lot. It used to handle all three of
// {url,www,email}_match, but now just the last of those.
pub(crate) fn process_autolinks<'a>(
pub(crate) fn process_email_autolinks<'a>(
arena: &'a Arena<AstNode<'a>>,
node: &'a AstNode<'a>,
contents_str: &mut String,
relaxed_autolinks: bool,
sourcepos: &mut Sourcepos,
spx: &mut Spx,
) {
let contents = contents_str.as_bytes();
let len = contents.len();
Expand Down Expand Up @@ -53,20 +53,177 @@ pub(crate) fn process_autolinks<'a>(
if let Some((post, reverse, skip)) = post_org {
i -= reverse;
node.insert_after(post);
if i + skip < len {

let remain = if i + skip < len {
let remain = str::from_utf8(&contents[i + skip..]).unwrap();
assert!(!remain.is_empty());
post.insert_after(make_inline(
Some(remain.to_string())
} else {
None
};
let initial_end_col = sourcepos.end.column;

sourcepos.end.column = spx.consume(i);

let nsp_end_col = spx.consume(skip);

contents_str.truncate(i);

let nsp: Sourcepos = (
sourcepos.end.line,
sourcepos.end.column + 1,
sourcepos.end.line,
nsp_end_col,
)
.into();
post.data.borrow_mut().sourcepos = nsp;
// Inner text gets same sourcepos as link, since there's nothing but
// the text.
post.first_child().unwrap().data.borrow_mut().sourcepos = nsp;

if let Some(remain) = remain {
let mut asp: Sourcepos = (
sourcepos.end.line,
nsp.end.column + 1,
sourcepos.end.line,
initial_end_col,
)
.into();
let after = make_inline(arena, NodeValue::Text(remain.to_string()), asp);
post.insert_after(after);

let after_ast = &mut after.data.borrow_mut();
process_email_autolinks(
arena,
NodeValue::Text(remain.to_string()),
(0, 1, 0, 1).into(),
));
after,
match after_ast.value {
NodeValue::Text(ref mut t) => t,
_ => unreachable!(),
},
relaxed_autolinks,
&mut asp,
spx,
);
after_ast.sourcepos = asp;
}
contents_str.truncate(i);

return;
}
}
}
fn email_match<'a>(
arena: &'a Arena<AstNode<'a>>,
contents: &[u8],
i: usize,
relaxed_autolinks: bool,
) -> Option<(&'a AstNode<'a>, usize, usize)> {
const EMAIL_OK_SET: [bool; 256] = character_set!(b".+-_");

let size = contents.len();

let mut auto_mailto = true;
let mut is_xmpp = false;
let mut rewind = 0;

while rewind < i {
let c = contents[i - rewind - 1];

if isalnum(c) || EMAIL_OK_SET[c as usize] {
rewind += 1;
continue;
}

if c == b':' {
if validate_protocol("mailto", contents, i - rewind - 1) {
auto_mailto = false;
rewind += 1;
continue;
}

if validate_protocol("xmpp", contents, i - rewind - 1) {
is_xmpp = true;
auto_mailto = false;
rewind += 1;
continue;
}
}

break;
}

if rewind == 0 {
return None;
}

let mut link_end = 1;
let mut np = 0;

while link_end < size - i {
let c = contents[i + link_end];

if isalnum(c) {
// empty
} else if c == b'@' {
return None;
} else if c == b'.' && link_end < size - i - 1 && isalnum(contents[i + link_end + 1]) {
np += 1;
} else if c == b'/' && is_xmpp {
// xmpp allows a `/` in the url
} else if c != b'-' && c != b'_' {
break;
}

link_end += 1;
}

if link_end < 2
|| np == 0
|| (!isalpha(contents[i + link_end - 1]) && contents[i + link_end - 1] != b'.')
{
return None;
}

link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks);
if link_end == 0 {
return None;
}

let mut url = if auto_mailto {
"mailto:".to_string()
} else {
"".to_string()
};
let text = str::from_utf8(&contents[i - rewind..link_end + i]).unwrap();
url.push_str(text);

let inl = make_inline(
arena,
NodeValue::Link(NodeLink {
url,
title: String::new(),
}),
(0, 1, 0, 1).into(),
);

inl.append(make_inline(
arena,
NodeValue::Text(text.to_string()),
(0, 1, 0, 1).into(),
));
Some((inl, rewind, rewind + link_end))
}

fn validate_protocol(protocol: &str, contents: &[u8], cursor: usize) -> bool {
let size = contents.len();
let mut rewind = 0;

while rewind < cursor && isalpha(contents[cursor - rewind - 1]) {
rewind += 1;
}

size - cursor + rewind >= protocol.len()
&& &contents[cursor - rewind..cursor] == protocol.as_bytes()
}

pub fn www_match<'a>(
arena: &'a Arena<AstNode<'a>>,
Expand Down Expand Up @@ -292,117 +449,3 @@ pub fn url_match<'a>(
));
Some((inl, rewind, rewind + link_end))
}

fn email_match<'a>(
arena: &'a Arena<AstNode<'a>>,
contents: &[u8],
i: usize,
relaxed_autolinks: bool,
) -> Option<(&'a AstNode<'a>, usize, usize)> {
const EMAIL_OK_SET: [bool; 256] = character_set!(b".+-_");

let size = contents.len();

let mut auto_mailto = true;
let mut is_xmpp = false;
let mut rewind = 0;

while rewind < i {
let c = contents[i - rewind - 1];

if isalnum(c) || EMAIL_OK_SET[c as usize] {
rewind += 1;
continue;
}

if c == b':' {
if validate_protocol("mailto", contents, i - rewind - 1) {
auto_mailto = false;
rewind += 1;
continue;
}

if validate_protocol("xmpp", contents, i - rewind - 1) {
is_xmpp = true;
auto_mailto = false;
rewind += 1;
continue;
}
}

break;
}

if rewind == 0 {
return None;
}

let mut link_end = 1;
let mut np = 0;

while link_end < size - i {
let c = contents[i + link_end];

if isalnum(c) {
// empty
} else if c == b'@' {
return None;
} else if c == b'.' && link_end < size - i - 1 && isalnum(contents[i + link_end + 1]) {
np += 1;
} else if c == b'/' && is_xmpp {
// xmpp allows a `/` in the url
} else if c != b'-' && c != b'_' {
break;
}

link_end += 1;
}

if link_end < 2
|| np == 0
|| (!isalpha(contents[i + link_end - 1]) && contents[i + link_end - 1] != b'.')
{
return None;
}

link_end = autolink_delim(&contents[i..], link_end, relaxed_autolinks);
if link_end == 0 {
return None;
}

let mut url = if auto_mailto {
"mailto:".to_string()
} else {
"".to_string()
};
let text = str::from_utf8(&contents[i - rewind..link_end + i]).unwrap();
url.push_str(text);

let inl = make_inline(
arena,
NodeValue::Link(NodeLink {
url,
title: String::new(),
}),
(0, 1, 0, 1).into(),
);

inl.append(make_inline(
arena,
NodeValue::Text(text.to_string()),
(0, 1, 0, 1).into(),
));
Some((inl, rewind, rewind + link_end))
}

fn validate_protocol(protocol: &str, contents: &[u8], cursor: usize) -> bool {
let size = contents.len();
let mut rewind = 0;

while rewind < cursor && isalpha(contents[cursor - rewind - 1]) {
rewind += 1;
}

size - cursor + rewind >= protocol.len()
&& &contents[cursor - rewind..cursor] == protocol.as_bytes()
}
Loading

0 comments on commit 90e2307

Please sign in to comment.