Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add formatters. #105

Merged
merged 15 commits into from
Aug 22, 2023
59 changes: 59 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 7 additions & 1 deletion jaq-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,22 @@ repository = "https://github.com/01mf02/jaq"
keywords = ["json", "query", "jq"]

[features]
default = ["std", "log", "math", "regex", "time"]
default = ["std", "log", "math", "regex", "time", "format"]
std = []
math = ["libm"]
format = ["base64", "csv", "html-escape", "shell-escape", "urlencoding"]

[dependencies]
jaq-interpret = { version = "1.0.0-beta", path = "../jaq-interpret" }
time = { version = "0.3.22", optional = true, features = ["formatting", "parsing"] }
regex = { version = "1.9", optional = true }
log = { version = "0.4.17", optional = true }
libm = { version = "0.2.7", optional = true }
base64 = { version = "0.21.2", optional = true }
csv = { version = "1.2.2", optional = true }
html-escape = { version = "0.2.13", optional = true }
shell-escape = { version = "0.1.5", optional = true }
urlencoding = { version = "2.1.3", optional = true }
itertools = "0.10.3"

[dev-dependencies]
Expand Down
94 changes: 93 additions & 1 deletion jaq-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ pub fn minimal() -> impl Iterator<Item = (String, usize, Native)> {
feature = "log",
feature = "math",
feature = "regex",
feature = "time"
feature = "time",
feature = "format"
))]
pub fn core() -> impl Iterator<Item = (String, usize, Native)> {
minimal()
Expand All @@ -48,6 +49,7 @@ pub fn core() -> impl Iterator<Item = (String, usize, Native)> {
.chain(run(MATH))
.chain(run(REGEX))
.chain(run(TIME))
.chain(run(FORMAT))
}

fn run<'a>(fs: &'a [(&str, usize, RunPtr)]) -> impl Iterator<Item = (String, usize, Native)> + 'a {
Expand Down Expand Up @@ -402,3 +404,93 @@ const LOG: &[(&str, usize, RunPtr, UpdatePtr)] = &[(
|_, cv| box_once(Ok(debug(cv.1))),
|_, cv, f| f(debug(cv.1)),
)];

#[cfg(feature = "format")]
fn to_csv(v: Val, delimiter: u8) -> ValR {
use csv::{StringRecord, WriterBuilder};
match v.clone() {
Val::Arr(vs) => {
let mut writer = WriterBuilder::new()
.delimiter(delimiter)
.from_writer(vec![]);
let mut record = StringRecord::new();
for cell in &*vs {
match cell {
Val::Str(s) => {
record.push_field(s);
Ok(())
}
_ => Err(Error::Str(cell.clone())),
}?;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could shorten this a lot by using cell.as_str().

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This remark of mine is probably obsolete if you consider my comment below.

}
writer
.write_record(&record)
.map_err(|e| Error::Csv(e.to_string()))?;
Ok(Val::str(
String::from_utf8(writer.into_inner().map_err(|e| Error::Csv(e.to_string()))?)
.map_or_else(
|e| Err(Error::Csv(e.to_string())),
|s| Ok(s[0..s.len() - 1].to_string()),
)?,
))
}
_ => Err(Error::Arr(v)),
}
}

#[cfg(feature = "format")]
const FORMAT: &[(&str, usize, RunPtr)] = &[
("tohtml", 0, |_, cv| {
box_once(match cv.1 {
Val::Str(s) => Ok(Val::str(html_escape::encode_safe(&*s).to_string())),
_ => Err(Error::Str(cv.1)),
})
}),
("touri", 0, |_, cv| {
box_once(match cv.1 {
Val::Str(s) => Ok(Val::str(urlencoding::encode(&s).to_string())),
_ => Err(Error::Str(cv.1)),
})
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You frequently have this pattern where you check if the input value is a string, and if it is, then run some function on the string and return another string.
I would create a function in Val that encodes this pattern, similarly to mutate_str(). You could call it map_str, for example. For functions that can fail during this conversion, like fromuri, you could introduce another function like try_map_str().

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This remark of mine is probably obsolete if you consider my comment below.

}),
("fromuri", 0, |_, cv| {
box_once(match cv.1.clone() {
Val::Str(s) => urlencoding::decode(&s).map_or_else(
|e| Err(Error::FromUri(cv.1, e.to_string())),
|ds| Ok(Val::str(ds.to_string())),
),
_ => Err(Error::Str(cv.1)),
})
}),
("tocsv", 0, |_, cv| box_once(to_csv(cv.1, b','))),
("totsv", 0, |_, cv| box_once(to_csv(cv.1, b'\t'))),
("tosh", 0, |_, cv| {
box_once(match cv.1 {
Val::Str(s) => Ok(Val::str(
shell_escape::escape(Into::into((*s).clone())).to_string(),
)),
_ => Err(Error::Str(cv.1)),
})
}),
("tobase64", 0, |_, cv| {
use base64::{engine::general_purpose, Engine as _};
box_once(match cv.1 {
Val::Str(s) => Ok(Val::str(general_purpose::STANDARD.encode((*s).clone()))),
_ => Err(Error::Str(cv.1)),
})
}),
("frombase64", 0, |_, cv| {
use base64::{engine::general_purpose, Engine as _};
box_once(match cv.1.clone() {
Val::Str(s) => general_purpose::STANDARD.decode((*s).clone()).map_or_else(
|e| Err(Error::FromBase64(cv.1.clone(), e.to_string())),
|d| {
std::str::from_utf8(&d).map_or_else(
|e| Err(Error::FromBase64(cv.1.clone(), e.to_string())),
|ds| Ok(Val::str(ds.to_string())),
)
},
),
_ => Err(Error::Str(cv.1)),
})
}),
];
9 changes: 9 additions & 0 deletions jaq-interpret/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ pub enum Error {
Round(Val),
/// `"[1, 2" | fromjson`
FromJson(Val, String),
/// errors from deserializing URIs
FromUri(Val, String),
/// `"not valid" | @base64d`
FromBase64(Val, String),
/// `[] | has("a")` or `{} | has(0)`
Has(Val, Val),
/// `0 | keys`
Expand Down Expand Up @@ -52,6 +56,8 @@ pub enum Error {
Regex(String),
/// `"a" | test("."; "b")`
RegexFlag(char),
/// CSV serialization errors
Csv(String),
/// arbitrary errors for custom filters
Custom(String),
}
Expand All @@ -78,6 +84,8 @@ impl fmt::Display for Error {
Self::Length(v) => write!(f, "{v} has no length"),
Self::Round(v) => write!(f, "cannot round {v}"),
Self::FromJson(v, why) => write!(f, "cannot parse {v} as JSON: {why}"),
Self::FromUri(v, why) => write!(f, "cannot parse {v} as URI: {why}"),
Self::FromBase64(v, why) => write!(f, "cannot parse {v} as base64: {why}"),
Self::Keys(v) => write!(f, "{v} has no keys"),
Self::Has(v, k) => write!(f, "cannot check whether {v} has key {k}"),
Self::Iter(v) => write!(f, "cannot iterate over {v}"),
Expand All @@ -92,6 +100,7 @@ impl fmt::Display for Error {
Self::PathExp => write!(f, "invalid path expression"),
Self::Regex(e) => write!(f, "invalid regex: {e}"),
Self::RegexFlag(c) => write!(f, "invalid regex flag '{c}'"),
Self::Csv(e) => write!(f, "csv serialization error: {e}"),
Self::Custom(e) => write!(f, "custom filter error: {e}"),
}
}
Expand Down
3 changes: 3 additions & 0 deletions jaq-parse/src/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,8 @@ pub fn token() -> impl Parser<char, Token, Error = Simple<char>> {

let var = just('$').ignore_then(text::ident());

let format_filter = just('@').chain(text::ident()).collect();
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you use just('@').or_not() to make this more general and then use it instead of text::ident() to define ident below?


// A parser for control characters (delimiters, semicolons, etc.)
let ctrl = one_of("{}()[]:;,?");

Expand Down Expand Up @@ -135,6 +137,7 @@ pub fn token() -> impl Parser<char, Token, Error = Simple<char>> {
.or(ctrl.map(Token::Ctrl))
.or(op.map(Token::Op))
.or(var.map(Token::Var))
.or(format_filter.map(Token::Ident))
.or(num().map(Token::Num))
.or(str_().map(Token::Str))
.recover_with(skip_then_retry_until([]))
Expand Down
12 changes: 12 additions & 0 deletions jaq-std/src/std.jq
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,15 @@ def input: first(inputs);
# Date
def todate: todateiso8601;
def fromdate: fromdateiso8601;

# Formatting
def @text: tostring;
def @json: tojson;
def @html: tostring | tohtml;
def @uri: tostring | touri;
def @urid: tostring | fromuri;
def @csv: map(tostring) | tocsv;
def @tsv: map(tostring) | totsv;
def @sh: tostring | tosh;
def @base64: tostring | tobase64;
def @base64d: tostring | frombase64;
46 changes: 46 additions & 0 deletions jaq-std/tests/std.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,52 @@ yields!(flatten_obj, "{a: 1} | flatten", json!([{"a": 1}]));
// jq gives an error here
yields!(flatten_num, "0 | flatten", [0]);

yields!(
format_text,
"[0, true, null, \"hello\", {}, [] | @text]",
["0", "true", "null", "hello", "{}", "[]"]
);
yields!(
format_json,
"[0, true, null, \"hello\", {}, [] | @json]",
["0", "true", "null", "\"hello\"", "{}", "[]"]
);
yields!(
format_html,
"\"<p style='visibility: hidden'>sneaky</p>\" | @html",
"&lt;p style=&#x27;visibility: hidden&#x27;&gt;sneaky&lt;&#x2F;p&gt;"
);
yields!(
format_uri,
"\"hello cruel world\" | @uri",
"hello%20cruel%20world"
);
yields!(
format_unformat_uri,
"\"hello cruel world\" | @uri | @urid",
"hello cruel world"
);
yields!(
format_csv,
"[0, true, null, \"hello \\\"quotes\\\"\", {}, []] | @csv",
r#"0,true,null,"hello ""quotes""",{},[]"#
);
yields!(
format_tsv,
"[0, true, null, \"hello \\\"quotes\\\"\", {}, []] | @tsv",
"0\ttrue\tnull\t\"hello \"\"quotes\"\"\"\t{}\t[]"
);
yields!(
format_base64,
"\"hello cruel world\" | @base64",
"aGVsbG8gY3J1ZWwgd29ybGQ="
);
yields!(
format_unformat_base64,
"\"hello cruel world\" | @base64 | @base64d",
"hello cruel world"
);

#[test]
fn inside() {
give(
Expand Down