1
0
mirror of https://github.com/stedolan/jq.git synced 2024-05-11 05:55:39 +00:00

JSON stream parser.

Allow multiple values on input as concatenated JSON objects,
possibly separated by whitespace.
This commit is contained in:
Stephen Dolan
2012-09-11 14:52:10 +01:00
parent 062a6aa6d7
commit 51a44edc63
7 changed files with 153 additions and 56 deletions

7
c/jv.c
View File

@@ -103,6 +103,13 @@ jv jv_invalid_get_msg(jv inv) {
return x; return x;
} }
int jv_invalid_has_msg(jv inv) {
jv msg = jv_invalid_get_msg(inv);
int r = jv_get_kind(msg) != JV_KIND_NULL;
jv_free(msg);
return r;
}
static void jvp_invalid_free(jv_complex* x) { static void jvp_invalid_free(jv_complex* x) {
if (jvp_refcnt_dec(x)) { if (jvp_refcnt_dec(x)) {
jv_free(((jvp_invalid*)x->ptr)->errmsg); jv_free(((jvp_invalid*)x->ptr)->errmsg);

2
c/jv.h
View File

@@ -52,6 +52,8 @@ int jv_equal(jv, jv);
jv jv_invalid(); jv jv_invalid();
jv jv_invalid_with_msg(jv); jv jv_invalid_with_msg(jv);
jv jv_invalid_get_msg(jv); jv jv_invalid_get_msg(jv);
int jv_invalid_has_msg(jv);
jv jv_null(); jv jv_null();
jv jv_true(); jv jv_true();

View File

@@ -1,4 +1,5 @@
#ifndef JV_DTOA_H
#define JV_DTOA_H
#define Kmax 7 #define Kmax 7
struct Bigint; struct Bigint;
@@ -18,3 +19,4 @@ void jvp_freedtoa(struct dtoa_context* C, char *s);
#define JVP_DTOA_FMT_MAX_LEN 32 #define JVP_DTOA_FMT_MAX_LEN 32
char* jvp_dtoa_fmt(struct dtoa_context* C, register char *b, double x); char* jvp_dtoa_fmt(struct dtoa_context* C, register char *b, double x);
#endif

View File

@@ -22,11 +22,15 @@ void jv_parser_init(struct jv_parser* p) {
p->tokenbuf = 0; p->tokenbuf = 0;
p->tokenlen = p->tokenpos = 0; p->tokenlen = p->tokenpos = 0;
p->st = JV_PARSER_NORMAL; p->st = JV_PARSER_NORMAL;
p->curr_buf = 0;
p->curr_buf_length = p->curr_buf_pos = p->curr_buf_is_partial = 0;
jvp_dtoa_context_init(&p->dtoa); jvp_dtoa_context_init(&p->dtoa);
} }
void jv_parser_free(struct jv_parser* p) { void jv_parser_free(struct jv_parser* p) {
jv_free(p->next); jv_free(p->next);
for (int i=0; i<p->stackpos; i++)
jv_free(p->stack[i]);
free(p->stack); free(p->stack);
free(p->tokenbuf); free(p->tokenbuf);
jvp_dtoa_context_free(&p->dtoa); jvp_dtoa_context_free(&p->dtoa);
@@ -271,13 +275,25 @@ static chclass classify(char c) {
} }
static presult OK = "output produced";
static int check_done(struct jv_parser* p, jv* out) {
if (p->stackpos == 0 && jv_is_valid(p->next)) {
*out = p->next;
p->next = jv_invalid();
return 1;
} else {
return 0;
}
}
static pfunc scan(struct jv_parser* p, char ch) { static pfunc scan(struct jv_parser* p, char ch, jv* out) {
presult answer = 0;
if (p->st == JV_PARSER_NORMAL) { if (p->st == JV_PARSER_NORMAL) {
chclass cls = classify(ch); chclass cls = classify(ch);
if (cls != LITERAL) { if (cls != LITERAL) {
TRY(check_literal(p)); TRY(check_literal(p));
if (check_done(p, out)) answer = OK;
} }
switch (cls) { switch (cls) {
case LITERAL: case LITERAL:
@@ -294,10 +310,12 @@ static pfunc scan(struct jv_parser* p, char ch) {
case INVALID: case INVALID:
return "Invalid character"; return "Invalid character";
} }
if (check_done(p, out)) answer = OK;
} else { } else {
if (ch == '"' && p->st == JV_PARSER_STRING) { if (ch == '"' && p->st == JV_PARSER_STRING) {
TRY(found_string(p)); TRY(found_string(p));
p->st = JV_PARSER_NORMAL; p->st = JV_PARSER_NORMAL;
if (check_done(p, out)) answer = OK;
} else { } else {
tokenadd(p, ch); tokenadd(p, ch);
if (ch == '\\' && p->st == JV_PARSER_STRING) { if (ch == '\\' && p->st == JV_PARSER_STRING) {
@@ -307,43 +325,87 @@ static pfunc scan(struct jv_parser* p, char ch) {
} }
} }
} }
return 0; return answer;
} }
static pfunc finish(struct jv_parser* p) { void jv_parser_set_buf(struct jv_parser* p, const char* buf, int length, int is_partial) {
if (p->st != JV_PARSER_NORMAL) assert((p->curr_buf == 0 || p->curr_buf_pos == p->curr_buf_length)
return "Unfinished string"; && "previous buffer not exhausted");
TRY(check_literal(p)); p->curr_buf = buf;
p->curr_buf_length = length;
p->curr_buf_pos = 0;
p->curr_buf_is_partial = is_partial;
}
if (p->stackpos != 0) jv jv_parser_next(struct jv_parser* p) {
return "Unfinished JSON term"; assert(p->curr_buf && "a buffer must be provided");
jv value;
// this will happen on the empty string presult msg = 0;
if (!jv_is_valid(p->next)) while (!msg && p->curr_buf_pos < p->curr_buf_length) {
return "Expected JSON value"; char ch = p->curr_buf[p->curr_buf_pos++];
msg = scan(p, ch, &value);
return 0; }
if (msg == OK) {
return value;
} else if (msg) {
return jv_invalid_with_msg(jv_string(msg));
} else if (p->curr_buf_is_partial) {
assert(p->curr_buf_pos == p->curr_buf_length);
// need another buffer
return jv_invalid();
} else {
assert(p->curr_buf_pos == p->curr_buf_length);
// at EOF
if (p->st != JV_PARSER_NORMAL)
return jv_invalid_with_msg(jv_string("Unfinished string"));
if ((msg = check_literal(p)))
return jv_invalid_with_msg(jv_string(msg));
if (p->stackpos != 0)
return jv_invalid_with_msg(jv_string("Unfinished JSON term"));
// p->next is either invalid (nothing here but no syntax error)
// or valid (this is the value). either way it's the thing to return
value = p->next;
p->next = jv_invalid();
return value;
}
} }
jv jv_parse_sized(const char* string, int length) { jv jv_parse_sized(const char* string, int length) {
struct jv_parser parser; struct jv_parser parser;
jv_parser_init(&parser); jv_parser_init(&parser);
jv_parser_set_buf(&parser, string, length, 0);
const char* p = string; jv value = jv_parser_next(&parser);
char ch; if (jv_is_valid(value)) {
presult msg = 0; jv next = jv_parser_next(&parser);
while (msg == 0 && p < string + length) { if (jv_is_valid(next)) {
ch = *p++; // multiple JSON values, we only wanted one
msg = scan(&parser, ch); jv_free(value);
} jv_free(next);
if (msg == 0) msg = finish(&parser); value = jv_invalid_with_msg(jv_string("Unexpected extra JSON values"));
jv value; } else if (jv_invalid_has_msg(jv_copy(next))) {
if (msg) { // parser error after the first JSON value
value = jv_invalid_with_msg(jv_string_fmt("%s (while parsing '%s')", msg, string)); jv_free(value);
value = next;
} else {
// a single valid JSON value
jv_free(next);
}
} else if (jv_invalid_has_msg(jv_copy(value))) {
// parse error, we'll return it
} else { } else {
value = jv_copy(parser.next); // no value at all
jv_free(value);
value = jv_invalid_with_msg(jv_string("Expected JSON value"));
} }
jv_parser_free(&parser); jv_parser_free(&parser);
if (!jv_is_valid(value) && jv_invalid_has_msg(jv_copy(value))) {
jv msg = jv_invalid_get_msg(value);
value = jv_invalid_with_msg(jv_string_fmt("%s (while parsing '%s')",
jv_string_value(msg),
string));
jv_free(msg);
}
return value; return value;
} }

View File

@@ -1,5 +1,12 @@
#ifndef JV_PARSE_H
#define JV_PARSE_H
#include "jv_dtoa.h"
struct jv_parser { struct jv_parser {
const char* curr_buf;
int curr_buf_length;
int curr_buf_pos;
int curr_buf_is_partial;
jv* stack; jv* stack;
int stackpos; int stackpos;
int stacklen; int stacklen;
@@ -17,3 +24,11 @@ struct jv_parser {
JV_PARSER_STRING_ESCAPE JV_PARSER_STRING_ESCAPE
} st; } st;
}; };
void jv_parser_init(struct jv_parser* p);
void jv_parser_free(struct jv_parser* p);
void jv_parser_set_buf(struct jv_parser* p, const char* buf, int length, int is_partial);
jv jv_parser_next(struct jv_parser* p);
#endif

View File

@@ -10,7 +10,7 @@ static void jv_dump_string(jv str, int ascii_only) {
assert(jv_get_kind(str) == JV_KIND_STRING); assert(jv_get_kind(str) == JV_KIND_STRING);
const char* i = jv_string_value(str); const char* i = jv_string_value(str);
const char* end = i + jv_string_length(jv_copy(str)); const char* end = i + jv_string_length(jv_copy(str));
int c; int c = 0;
while ((i = jvp_utf8_next(i, end, &c))) { while ((i = jvp_utf8_next(i, end, &c))) {
assert(c != -1); assert(c != -1);
int unicode_escape = 0; int unicode_escape = 0;

View File

@@ -4,6 +4,7 @@
#include "parser.tab.h" #include "parser.tab.h"
#include "builtin.h" #include "builtin.h"
#include "jv.h" #include "jv.h"
#include "jv_parse.h"
#include "locfile.h" #include "locfile.h"
int jq_parse(struct locfile* source, block* answer); int jq_parse(struct locfile* source, block* answer);
@@ -31,31 +32,6 @@ struct bytecode* jq_compile(const char* str) {
return bc; return bc;
} }
void run_program(struct bytecode* bc) {
#if JQ_DEBUG
dump_disassembly(0, bc);
printf("\n");
#endif
char buf[409600];
fgets(buf, sizeof(buf), stdin);
jv value = jv_parse(buf);
if (!jv_is_valid(value)) {
assert(0 && "couldn't parse input"); //FIXME
}
jq_init(bc, value);
jv result;
while (jv_is_valid(result = jq_next())) {
jv_dump(result);
printf("\n");
}
jv_free(result);
#if JQ_DEBUG
printf("end of results\n");
#endif
jq_teardown();
}
int skipline(const char* buf) { int skipline(const char* buf) {
int p = 0; int p = 0;
while (buf[p] == ' ' || buf[p] == '\t') p++; while (buf[p] == ' ' || buf[p] == '\t') p++;
@@ -128,7 +104,40 @@ int main(int argc, char* argv[]) {
if (argc == 1) { run_tests(); return 0; } if (argc == 1) { run_tests(); return 0; }
struct bytecode* bc = jq_compile(argv[1]); struct bytecode* bc = jq_compile(argv[1]);
if (!bc) return 1; if (!bc) return 1;
run_program(bc);
#if JQ_DEBUG
dump_disassembly(0, bc);
printf("\n");
#endif
struct jv_parser parser;
jv_parser_init(&parser);
while (!feof(stdin)) {
char buf[4096];
if (!fgets(buf, sizeof(buf), stdin)) buf[0] = 0;
jv_parser_set_buf(&parser, buf, strlen(buf), !feof(stdin));
jv value;
while (jv_is_valid((value = jv_parser_next(&parser)))) {
jq_init(bc, value);
jv result;
while (jv_is_valid(result = jq_next())) {
jv_dump(result);
printf("\n");
}
jv_free(result);
jq_teardown();
}
if (jv_invalid_has_msg(jv_copy(value))) {
jv msg = jv_invalid_get_msg(value);
fprintf(stderr, "parse error: %s\n", jv_string_value(msg));
jv_free(msg);
break;
} else {
jv_free(value);
}
}
jv_parser_free(&parser);
bytecode_free(bc); bytecode_free(bc);
return 0; return 0;
} }