diff --git a/c/jv.c b/c/jv.c index 214354ce..786e7b26 100644 --- a/c/jv.c +++ b/c/jv.c @@ -103,6 +103,13 @@ jv jv_invalid_get_msg(jv inv) { return x; } +int jv_invalid_has_msg(jv inv) { + jv msg = jv_invalid_get_msg(inv); + int r = jv_get_kind(msg) != JV_KIND_NULL; + jv_free(msg); + return r; +} + static void jvp_invalid_free(jv_complex* x) { if (jvp_refcnt_dec(x)) { jv_free(((jvp_invalid*)x->ptr)->errmsg); diff --git a/c/jv.h b/c/jv.h index 3c557c45..48ae0e0f 100644 --- a/c/jv.h +++ b/c/jv.h @@ -52,6 +52,8 @@ int jv_equal(jv, jv); jv jv_invalid(); jv jv_invalid_with_msg(jv); jv jv_invalid_get_msg(jv); +int jv_invalid_has_msg(jv); + jv jv_null(); jv jv_true(); diff --git a/c/jv_dtoa.h b/c/jv_dtoa.h index e9346c0e..3bafcf47 100644 --- a/c/jv_dtoa.h +++ b/c/jv_dtoa.h @@ -1,4 +1,5 @@ - +#ifndef JV_DTOA_H +#define JV_DTOA_H #define Kmax 7 struct Bigint; @@ -18,3 +19,4 @@ void jvp_freedtoa(struct dtoa_context* C, char *s); #define JVP_DTOA_FMT_MAX_LEN 32 char* jvp_dtoa_fmt(struct dtoa_context* C, register char *b, double x); +#endif diff --git a/c/jv_parse.c b/c/jv_parse.c index 7fd4d2d2..e4565ef7 100644 --- a/c/jv_parse.c +++ b/c/jv_parse.c @@ -22,11 +22,15 @@ void jv_parser_init(struct jv_parser* p) { p->tokenbuf = 0; p->tokenlen = p->tokenpos = 0; p->st = JV_PARSER_NORMAL; + p->curr_buf = 0; + p->curr_buf_length = p->curr_buf_pos = p->curr_buf_is_partial = 0; jvp_dtoa_context_init(&p->dtoa); } void jv_parser_free(struct jv_parser* p) { jv_free(p->next); + for (int i=0; istackpos; i++) + jv_free(p->stack[i]); free(p->stack); free(p->tokenbuf); jvp_dtoa_context_free(&p->dtoa); @@ -271,13 +275,25 @@ static chclass classify(char c) { } +static presult OK = "output produced"; +static int check_done(struct jv_parser* p, jv* out) { + if (p->stackpos == 0 && jv_is_valid(p->next)) { + *out = p->next; + p->next = jv_invalid(); + return 1; + } else { + return 0; + } +} -static pfunc scan(struct jv_parser* p, char ch) { +static pfunc scan(struct jv_parser* p, char ch, jv* out) { + presult answer = 0; if (p->st == JV_PARSER_NORMAL) { chclass cls = classify(ch); if (cls != LITERAL) { TRY(check_literal(p)); + if (check_done(p, out)) answer = OK; } switch (cls) { case LITERAL: @@ -294,10 +310,12 @@ static pfunc scan(struct jv_parser* p, char ch) { case INVALID: return "Invalid character"; } + if (check_done(p, out)) answer = OK; } else { if (ch == '"' && p->st == JV_PARSER_STRING) { TRY(found_string(p)); p->st = JV_PARSER_NORMAL; + if (check_done(p, out)) answer = OK; } else { tokenadd(p, ch); if (ch == '\\' && p->st == JV_PARSER_STRING) { @@ -307,43 +325,87 @@ static pfunc scan(struct jv_parser* p, char ch) { } } } - return 0; + return answer; } -static pfunc finish(struct jv_parser* p) { - if (p->st != JV_PARSER_NORMAL) - return "Unfinished string"; - TRY(check_literal(p)); +void jv_parser_set_buf(struct jv_parser* p, const char* buf, int length, int is_partial) { + assert((p->curr_buf == 0 || p->curr_buf_pos == p->curr_buf_length) + && "previous buffer not exhausted"); + p->curr_buf = buf; + p->curr_buf_length = length; + p->curr_buf_pos = 0; + p->curr_buf_is_partial = is_partial; +} - if (p->stackpos != 0) - return "Unfinished JSON term"; - - // this will happen on the empty string - if (!jv_is_valid(p->next)) - return "Expected JSON value"; - - return 0; +jv jv_parser_next(struct jv_parser* p) { + assert(p->curr_buf && "a buffer must be provided"); + jv value; + presult msg = 0; + while (!msg && p->curr_buf_pos < p->curr_buf_length) { + char ch = p->curr_buf[p->curr_buf_pos++]; + msg = scan(p, ch, &value); + } + if (msg == OK) { + return value; + } else if (msg) { + return jv_invalid_with_msg(jv_string(msg)); + } else if (p->curr_buf_is_partial) { + assert(p->curr_buf_pos == p->curr_buf_length); + // need another buffer + return jv_invalid(); + } else { + assert(p->curr_buf_pos == p->curr_buf_length); + // at EOF + if (p->st != JV_PARSER_NORMAL) + return jv_invalid_with_msg(jv_string("Unfinished string")); + if ((msg = check_literal(p))) + return jv_invalid_with_msg(jv_string(msg)); + if (p->stackpos != 0) + return jv_invalid_with_msg(jv_string("Unfinished JSON term")); + // p->next is either invalid (nothing here but no syntax error) + // or valid (this is the value). either way it's the thing to return + value = p->next; + p->next = jv_invalid(); + return value; + } } jv jv_parse_sized(const char* string, int length) { struct jv_parser parser; jv_parser_init(&parser); - - const char* p = string; - char ch; - presult msg = 0; - while (msg == 0 && p < string + length) { - ch = *p++; - msg = scan(&parser, ch); - } - if (msg == 0) msg = finish(&parser); - jv value; - if (msg) { - value = jv_invalid_with_msg(jv_string_fmt("%s (while parsing '%s')", msg, string)); + jv_parser_set_buf(&parser, string, length, 0); + jv value = jv_parser_next(&parser); + if (jv_is_valid(value)) { + jv next = jv_parser_next(&parser); + if (jv_is_valid(next)) { + // multiple JSON values, we only wanted one + jv_free(value); + jv_free(next); + value = jv_invalid_with_msg(jv_string("Unexpected extra JSON values")); + } else if (jv_invalid_has_msg(jv_copy(next))) { + // parser error after the first JSON value + jv_free(value); + value = next; + } else { + // a single valid JSON value + jv_free(next); + } + } else if (jv_invalid_has_msg(jv_copy(value))) { + // parse error, we'll return it } else { - value = jv_copy(parser.next); + // no value at all + jv_free(value); + value = jv_invalid_with_msg(jv_string("Expected JSON value")); } jv_parser_free(&parser); + + if (!jv_is_valid(value) && jv_invalid_has_msg(jv_copy(value))) { + jv msg = jv_invalid_get_msg(value); + value = jv_invalid_with_msg(jv_string_fmt("%s (while parsing '%s')", + jv_string_value(msg), + string)); + jv_free(msg); + } return value; } diff --git a/c/jv_parse.h b/c/jv_parse.h index 10270766..5b8e7cdf 100644 --- a/c/jv_parse.h +++ b/c/jv_parse.h @@ -1,5 +1,12 @@ - +#ifndef JV_PARSE_H +#define JV_PARSE_H +#include "jv_dtoa.h" struct jv_parser { + const char* curr_buf; + int curr_buf_length; + int curr_buf_pos; + int curr_buf_is_partial; + jv* stack; int stackpos; int stacklen; @@ -17,3 +24,11 @@ struct jv_parser { JV_PARSER_STRING_ESCAPE } st; }; + +void jv_parser_init(struct jv_parser* p); +void jv_parser_free(struct jv_parser* p); + +void jv_parser_set_buf(struct jv_parser* p, const char* buf, int length, int is_partial); + +jv jv_parser_next(struct jv_parser* p); +#endif diff --git a/c/jv_print.c b/c/jv_print.c index c2f7f58d..e122a5c8 100644 --- a/c/jv_print.c +++ b/c/jv_print.c @@ -10,7 +10,7 @@ static void jv_dump_string(jv str, int ascii_only) { assert(jv_get_kind(str) == JV_KIND_STRING); const char* i = jv_string_value(str); const char* end = i + jv_string_length(jv_copy(str)); - int c; + int c = 0; while ((i = jvp_utf8_next(i, end, &c))) { assert(c != -1); int unicode_escape = 0; diff --git a/c/main.c b/c/main.c index f6d9e67f..05288d1b 100644 --- a/c/main.c +++ b/c/main.c @@ -4,6 +4,7 @@ #include "parser.tab.h" #include "builtin.h" #include "jv.h" +#include "jv_parse.h" #include "locfile.h" int jq_parse(struct locfile* source, block* answer); @@ -31,31 +32,6 @@ struct bytecode* jq_compile(const char* str) { return bc; } - -void run_program(struct bytecode* bc) { -#if JQ_DEBUG - dump_disassembly(0, bc); - printf("\n"); -#endif - char buf[409600]; - fgets(buf, sizeof(buf), stdin); - jv value = jv_parse(buf); - if (!jv_is_valid(value)) { - assert(0 && "couldn't parse input"); //FIXME - } - jq_init(bc, value); - jv result; - while (jv_is_valid(result = jq_next())) { - jv_dump(result); - printf("\n"); - } - jv_free(result); - #if JQ_DEBUG - printf("end of results\n"); - #endif - jq_teardown(); -} - int skipline(const char* buf) { int p = 0; while (buf[p] == ' ' || buf[p] == '\t') p++; @@ -128,7 +104,40 @@ int main(int argc, char* argv[]) { if (argc == 1) { run_tests(); return 0; } struct bytecode* bc = jq_compile(argv[1]); if (!bc) return 1; - run_program(bc); + +#if JQ_DEBUG + dump_disassembly(0, bc); + printf("\n"); +#endif + + struct jv_parser parser; + jv_parser_init(&parser); + while (!feof(stdin)) { + char buf[4096]; + if (!fgets(buf, sizeof(buf), stdin)) buf[0] = 0; + jv_parser_set_buf(&parser, buf, strlen(buf), !feof(stdin)); + jv value; + while (jv_is_valid((value = jv_parser_next(&parser)))) { + jq_init(bc, value); + jv result; + while (jv_is_valid(result = jq_next())) { + jv_dump(result); + printf("\n"); + } + jv_free(result); + jq_teardown(); + } + if (jv_invalid_has_msg(jv_copy(value))) { + jv msg = jv_invalid_get_msg(value); + fprintf(stderr, "parse error: %s\n", jv_string_value(msg)); + jv_free(msg); + break; + } else { + jv_free(value); + } + } + jv_parser_free(&parser); + bytecode_free(bc); return 0; }