mirror of
https://github.com/stedolan/jq.git
synced 2024-05-11 05:55:39 +00:00
Add support for JSON sequence MIME type
Per draft-ietf-json-text-sequence-07 (which soon will be published as an RFC).
This commit is contained in:
@@ -92,6 +92,17 @@ sections:
|
||||
|
||||
Output the jq version and exit with zero.
|
||||
|
||||
* `--seq`:
|
||||
|
||||
Use the `application/json-seq` MIME type scheme for separating
|
||||
JSON texts in jq's input and output. This means that an ASCII
|
||||
RS (record separator) character is printed before each value on
|
||||
output and an ASCII LF (line feed) is printed after every
|
||||
output. Input JSON texts that fail to parse are ignored (but
|
||||
warned about), discarding all subsequent input until the next
|
||||
RS. This more also parses the output of jq without the `--seq`
|
||||
option.
|
||||
|
||||
* `--slurp`/`-s`:
|
||||
|
||||
Instead of running the filter for each JSON object in the
|
||||
|
||||
@@ -156,6 +156,8 @@ void jv_dump(jv, int flags);
|
||||
void jv_show(jv, int flags);
|
||||
jv jv_dump_string(jv, int flags);
|
||||
|
||||
#define JV_PARSE_SEQ 1
|
||||
|
||||
jv jv_parse(const char* string);
|
||||
jv jv_parse_sized(const char* string, int length);
|
||||
|
||||
|
||||
+63
-9
@@ -24,6 +24,8 @@ struct jv_parser {
|
||||
int curr_buf_is_partial;
|
||||
unsigned bom_strip_position;
|
||||
|
||||
int flags;
|
||||
|
||||
jv* stack;
|
||||
int stackpos;
|
||||
int stacklen;
|
||||
@@ -40,12 +42,15 @@ struct jv_parser {
|
||||
enum {
|
||||
JV_PARSER_NORMAL,
|
||||
JV_PARSER_STRING,
|
||||
JV_PARSER_STRING_ESCAPE
|
||||
JV_PARSER_STRING_ESCAPE,
|
||||
JV_PARSER_WAITING_FOR_RS // parse error, waiting for RS
|
||||
} st;
|
||||
unsigned int last_ch_was_ws:1;
|
||||
};
|
||||
|
||||
|
||||
static void parser_init(struct jv_parser* p) {
|
||||
p->flags = 0;
|
||||
p->stack = 0;
|
||||
p->stacklen = p->stackpos = 0;
|
||||
p->next = jv_invalid();
|
||||
@@ -60,10 +65,18 @@ static void parser_init(struct jv_parser* p) {
|
||||
jvp_dtoa_context_init(&p->dtoa);
|
||||
}
|
||||
|
||||
static void parser_free(struct jv_parser* p) {
|
||||
static void parser_reset(struct jv_parser* p) {
|
||||
jv_free(p->next);
|
||||
p->next = jv_invalid();
|
||||
for (int i=0; i<p->stackpos; i++)
|
||||
jv_free(p->stack[i]);
|
||||
p->stackpos = 0;
|
||||
p->tokenpos = 0;
|
||||
p->st = JV_PARSER_NORMAL;
|
||||
}
|
||||
|
||||
static void parser_free(struct jv_parser* p) {
|
||||
parser_reset(p);
|
||||
jv_mem_free(p->stack);
|
||||
jv_mem_free(p->tokenbuf);
|
||||
jvp_dtoa_context_free(&p->dtoa);
|
||||
@@ -330,9 +343,26 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) {
|
||||
p->line++;
|
||||
p->column = 0;
|
||||
}
|
||||
if (ch == '\036' /* ASCII RS; see draft-ietf-json-sequence-07 */) {
|
||||
TRY(check_literal(p));
|
||||
if (p->st == JV_PARSER_NORMAL && check_done(p, out)) {
|
||||
if ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(*out) == JV_KIND_NUMBER) {
|
||||
jv_free(*out);
|
||||
*out = jv_invalid();
|
||||
return "Potentially truncated top-level numeric value";
|
||||
}
|
||||
return OK;
|
||||
}
|
||||
parser_reset(p);
|
||||
*out = jv_invalid();
|
||||
return "Truncated value";
|
||||
}
|
||||
presult answer = 0;
|
||||
p->last_ch_was_ws = 0;
|
||||
if (p->st == JV_PARSER_NORMAL) {
|
||||
chclass cls = classify(ch);
|
||||
if (cls == WHITESPACE)
|
||||
p->last_ch_was_ws = 1;
|
||||
if (cls != LITERAL) {
|
||||
TRY(check_literal(p));
|
||||
if (check_done(p, out)) answer = OK;
|
||||
@@ -373,6 +403,7 @@ static pfunc scan(struct jv_parser* p, char ch, jv* out) {
|
||||
struct jv_parser* jv_parser_new(int flags) {
|
||||
struct jv_parser* p = jv_mem_alloc(sizeof(struct jv_parser));
|
||||
parser_init(p);
|
||||
p->flags = flags;
|
||||
return p;
|
||||
}
|
||||
|
||||
@@ -412,14 +443,22 @@ jv jv_parser_next(struct jv_parser* p) {
|
||||
assert(p->curr_buf && "a buffer must be provided");
|
||||
if (p->bom_strip_position == 0xff) return jv_invalid_with_msg(jv_string("Malformed BOM"));
|
||||
jv value;
|
||||
char ch;
|
||||
presult msg = 0;
|
||||
while (!msg && p->curr_buf_pos < p->curr_buf_length) {
|
||||
char ch = p->curr_buf[p->curr_buf_pos++];
|
||||
ch = p->curr_buf[p->curr_buf_pos++];
|
||||
if (ch != '\036' && p->st == JV_PARSER_WAITING_FOR_RS)
|
||||
continue; // need to resync, wait for RS
|
||||
msg = scan(p, ch, &value);
|
||||
}
|
||||
if (msg == OK) {
|
||||
return value;
|
||||
} else if (msg) {
|
||||
parser_reset(p);
|
||||
if (ch != '\036' && (p->flags & JV_PARSE_SEQ)) {
|
||||
p->st = JV_PARSER_WAITING_FOR_RS;
|
||||
return jv_invalid_with_msg(jv_string_fmt("%s at line %d, column %d (need RS to resync)", msg, p->line, p->column));
|
||||
}
|
||||
return jv_invalid_with_msg(jv_string_fmt("%s at line %d, column %d", msg, p->line, p->column));
|
||||
} else if (p->curr_buf_is_partial) {
|
||||
assert(p->curr_buf_pos == p->curr_buf_length);
|
||||
@@ -428,16 +467,31 @@ jv jv_parser_next(struct jv_parser* p) {
|
||||
} else {
|
||||
assert(p->curr_buf_pos == p->curr_buf_length);
|
||||
// at EOF
|
||||
if (p->st != JV_PARSER_NORMAL)
|
||||
return jv_invalid_with_msg(jv_string("Unfinished string"));
|
||||
if ((msg = check_literal(p)))
|
||||
return jv_invalid_with_msg(jv_string(msg));
|
||||
if (p->stackpos != 0)
|
||||
return jv_invalid_with_msg(jv_string("Unfinished JSON term"));
|
||||
if (p->st != JV_PARSER_WAITING_FOR_RS) {
|
||||
if (p->st != JV_PARSER_NORMAL) {
|
||||
parser_reset(p);
|
||||
p->st = JV_PARSER_WAITING_FOR_RS;
|
||||
return jv_invalid_with_msg(jv_string("Unfinished string"));
|
||||
}
|
||||
if ((msg = check_literal(p))) {
|
||||
parser_reset(p);
|
||||
p->st = JV_PARSER_WAITING_FOR_RS;
|
||||
return jv_invalid_with_msg(jv_string(msg));
|
||||
}
|
||||
if (p->stackpos != 0) {
|
||||
parser_reset(p);
|
||||
p->st = JV_PARSER_WAITING_FOR_RS;
|
||||
return jv_invalid_with_msg(jv_string("Unfinished JSON term"));
|
||||
}
|
||||
}
|
||||
// p->next is either invalid (nothing here but no syntax error)
|
||||
// or valid (this is the value). either way it's the thing to return
|
||||
value = p->next;
|
||||
p->next = jv_invalid();
|
||||
if ((p->flags & JV_PARSE_SEQ) && !p->last_ch_was_ws && jv_get_kind(value) == JV_KIND_NUMBER) {
|
||||
jv_free(value);
|
||||
return jv_invalid_with_msg(jv_string("Potentially truncated top-level numeric value"));
|
||||
}
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,8 +90,9 @@ enum {
|
||||
UNBUFFERED_OUTPUT = 2048,
|
||||
EXIT_STATUS = 4096,
|
||||
IN_PLACE = 8192,
|
||||
SEQ = 16384,
|
||||
/* debugging only */
|
||||
DUMP_DISASM = 16384,
|
||||
DUMP_DISASM = 32768,
|
||||
};
|
||||
static int options = 0;
|
||||
|
||||
@@ -122,6 +123,8 @@ static int process(jq_state *jq, jv value, int flags) {
|
||||
ret = 11;
|
||||
else
|
||||
ret = 0;
|
||||
if (options & SEQ)
|
||||
fwrite("\036", 1, 1, stdout);
|
||||
jv_dump(result, dumpopts);
|
||||
}
|
||||
if (!(options & RAW_NO_LF))
|
||||
@@ -284,6 +287,10 @@ int main(int argc, char* argv[]) {
|
||||
options |= IN_PLACE;
|
||||
if (!short_opts) continue;
|
||||
}
|
||||
if (isoption(argv[i], 0, "seq", &short_opts)) {
|
||||
options |= SEQ;
|
||||
if (!short_opts) continue;
|
||||
}
|
||||
if (isoption(argv[i], 'e', "exit-status", &short_opts)) {
|
||||
options |= EXIT_STATUS;
|
||||
if (!short_opts) continue;
|
||||
@@ -444,7 +451,7 @@ int main(int argc, char* argv[]) {
|
||||
slurped = jv_array();
|
||||
}
|
||||
}
|
||||
struct jv_parser* parser = jv_parser_new(0);
|
||||
struct jv_parser* parser = jv_parser_new((options & SEQ) ? JV_PARSE_SEQ : 0);
|
||||
char buf[4096];
|
||||
int is_last = 0;
|
||||
while (read_more(buf, sizeof(buf), &is_last)) {
|
||||
@@ -461,22 +468,28 @@ int main(int argc, char* argv[]) {
|
||||
} else {
|
||||
jv_parser_set_buf(parser, buf, strlen(buf), !is_last);
|
||||
jv value;
|
||||
while (jv_is_valid((value = jv_parser_next(parser)))) {
|
||||
while (jv_is_valid(value = jv_parser_next(parser)) || jv_invalid_has_msg(jv_copy(value))) {
|
||||
if (!jv_is_valid(value)) {
|
||||
jv msg = jv_invalid_get_msg(value);
|
||||
if (!(options & SEQ)) {
|
||||
// We used to treat parse errors as fatal...
|
||||
ret = 4;
|
||||
fprintf(stderr, "parse error: %s\n", jv_string_value(msg));
|
||||
jv_free(msg);
|
||||
break;
|
||||
}
|
||||
fprintf(stderr, "ignoring parse error: %s\n", jv_string_value(msg));
|
||||
jv_free(msg);
|
||||
// ...but with --seq we attempt to recover.
|
||||
continue;
|
||||
}
|
||||
if (options & SLURP) {
|
||||
slurped = jv_array_append(slurped, value);
|
||||
} else {
|
||||
ret = process(jq, value, jq_flags);
|
||||
value = jv_invalid();
|
||||
}
|
||||
}
|
||||
if (jv_invalid_has_msg(jv_copy(value))) {
|
||||
jv msg = jv_invalid_get_msg(value);
|
||||
fprintf(stderr, "parse error: %s\n", jv_string_value(msg));
|
||||
jv_free(msg);
|
||||
ret = 4;
|
||||
break;
|
||||
} else {
|
||||
jv_free(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
jv_parser_free(parser);
|
||||
|
||||
@@ -78,6 +78,46 @@ case "$v" in
|
||||
*) true;;
|
||||
esac
|
||||
|
||||
## Test JSON sequence support
|
||||
|
||||
cat > $d/expected <<EOF
|
||||
ignoring parse error: Potentially truncated top-level numeric value at line 1, column 2
|
||||
ignoring parse error: Truncated value at line 2, column 5
|
||||
ignoring parse error: Truncated value at line 2, column 25
|
||||
ignoring parse error: Truncated value at line 2, column 41
|
||||
EOF
|
||||
printf '1\0362 3\n[0,1\036[4,5]true"ab"{"c":4\036{}{"d":5,"e":6"\036false\n'|$VALGRIND $Q ./jq -ces --seq '. == [2,3,[4,5],true,"ab",{},false]' > /dev/null 2> $d/out
|
||||
cmp $d/out $d/expected
|
||||
|
||||
cat > $d/expected <<EOF
|
||||
ignoring parse error: Potentially truncated top-level numeric value at line 1, column 2
|
||||
ignoring parse error: Truncated value at line 2, column 5
|
||||
ignoring parse error: Truncated value at line 2, column 25
|
||||
ignoring parse error: Invalid literal at line 3, column 1
|
||||
EOF
|
||||
printf '1\0362 3\n[0,1\036[4,5]true"ab"{"c":4\036{}{"d":5,"e":6"false\n\036null'|$VALGRIND $Q ./jq -ces --seq '. == [2,3,[4,5],true,"ab",{},null]' > /dev/null 2> $d/out
|
||||
cmp $d/out $d/expected
|
||||
|
||||
# Note that here jq sees no inputs at all but it still succeeds because
|
||||
# --seq ignores parse errors
|
||||
cat > $d/expected <<EOF
|
||||
ignoring parse error: Unfinished string
|
||||
EOF
|
||||
printf '"foo'|./jq -ce --seq . > $d/out 2>&1
|
||||
cmp $d/out $d/expected
|
||||
|
||||
# Numeric values truncated by EOF are ignored
|
||||
cat > $d/expected <<EOF
|
||||
ignoring parse error: Potentially truncated top-level numeric value
|
||||
EOF
|
||||
printf '1'|./jq -ce --seq . > $d/out 2>&1
|
||||
cmp $d/out $d/expected
|
||||
|
||||
cat > $d/expected <<EOF
|
||||
EOF
|
||||
printf '1\n'|./jq -ces --seq '. == [1]' >/dev/null 2> $d/out
|
||||
cmp $d/out $d/expected
|
||||
|
||||
## Test library/module system
|
||||
|
||||
mods=$PWD/tests/modules
|
||||
|
||||
Reference in New Issue
Block a user