1
0
mirror of https://github.com/stedolan/jq.git synced 2024-05-11 05:55:39 +00:00

Remove globals from parser, use explicit structure.

This commit is contained in:
Stephen Dolan
2012-09-03 01:12:42 +01:00
parent 22ffc0fdfc
commit 52487ff812
2 changed files with 142 additions and 111 deletions

View File

@@ -2,10 +2,7 @@
#include <stdlib.h> #include <stdlib.h>
#include "jv.h" #include "jv.h"
#include "jv_dtoa.h" #include "jv_dtoa.h"
jv stack[1000]; #include "jv_parse.h"
int stackpos = 0;
jv next;
int hasnext;
typedef const char* presult; typedef const char* presult;
@@ -16,55 +13,78 @@ typedef const char* presult;
#define pfunc presult #define pfunc presult
#endif #endif
void jv_parser_init(struct jv_parser* p) {
p->stack = 0;
p->stacklen = p->stackpos = 0;
p->hasnext = 0;
p->next = jv_null(); //FIXME: jv_invalid
p->tokenbuf = 0;
p->tokenlen = p->tokenpos = 0;
p->st = JV_PARSER_NORMAL;
jvp_dtoa_context_init(&p->dtoa);
}
void jv_parser_free(struct jv_parser* p) {
if (p->hasnext) jv_free(p->next);
free(p->stack);
free(p->tokenbuf);
jvp_dtoa_context_free(&p->dtoa);
}
pfunc value(jv val) { static pfunc value(struct jv_parser* p, jv val) {
if (hasnext) return "Expected separator between values"; if (p->hasnext) return "Expected separator between values";
hasnext = 1; p->hasnext = 1;
next = val; p->next = val;
return 0; return 0;
} }
void push(jv v) { static void push(struct jv_parser* p, jv v) {
stack[stackpos++] = v; assert(p->stackpos <= p->stacklen);
if (p->stackpos == p->stacklen) {
p->stacklen = p->stacklen * 2 + 10;
p->stack = realloc(p->stack, p->stacklen * sizeof(jv));
}
assert(p->stackpos < p->stacklen);
p->stack[p->stackpos++] = v;
} }
pfunc token(char ch) { static pfunc token(struct jv_parser* p, char ch) {
switch (ch) { switch (ch) {
case '[': case '[':
if (hasnext) return "Expected separator between values"; if (p->hasnext) return "Expected separator between values";
push(jv_array()); push(p, jv_array());
break; break;
case '{': case '{':
if (hasnext) return "Expected separator between values"; if (p->hasnext) return "Expected separator between values";
push(jv_object()); push(p, jv_object());
break; break;
case ':': case ':':
if (!hasnext) if (!p->hasnext)
return "Expected string key before ':'"; return "Expected string key before ':'";
if (stackpos == 0 || jv_get_kind(stack[stackpos-1]) != JV_KIND_OBJECT) if (p->stackpos == 0 || jv_get_kind(p->stack[p->stackpos-1]) != JV_KIND_OBJECT)
return "':' not as part of an object"; return "':' not as part of an object";
if (jv_get_kind(next) != JV_KIND_STRING) if (jv_get_kind(p->next) != JV_KIND_STRING)
return "Object keys must be strings"; return "Object keys must be strings";
push(next); push(p, p->next);
hasnext = 0; p->hasnext = 0;
break; break;
case ',': case ',':
if (!hasnext) if (!p->hasnext)
return "Expected value before ','"; return "Expected value before ','";
if (stackpos == 0) if (p->stackpos == 0)
return "',' not as part of an object or array"; return "',' not as part of an object or array";
if (jv_get_kind(stack[stackpos-1]) == JV_KIND_ARRAY) { if (jv_get_kind(p->stack[p->stackpos-1]) == JV_KIND_ARRAY) {
stack[stackpos-1] = jv_array_append(stack[stackpos-1], next); p->stack[p->stackpos-1] = jv_array_append(p->stack[p->stackpos-1], p->next);
hasnext = 0; p->hasnext = 0;
} else if (jv_get_kind(stack[stackpos-1]) == JV_KIND_STRING) { } else if (jv_get_kind(p->stack[p->stackpos-1]) == JV_KIND_STRING) {
assert(stackpos > 1 && jv_get_kind(stack[stackpos-2]) == JV_KIND_OBJECT); assert(p->stackpos > 1 && jv_get_kind(p->stack[p->stackpos-2]) == JV_KIND_OBJECT);
stack[stackpos-2] = jv_object_set(stack[stackpos-2], stack[stackpos-1], next); p->stack[p->stackpos-2] = jv_object_set(p->stack[p->stackpos-2],
stackpos--; p->stack[p->stackpos-1], p->next);
hasnext = 0; p->stackpos--;
p->hasnext = 0;
} else { } else {
// this case hits on input like {"a", "b"} // this case hits on input like {"a", "b"}
return "Objects must consist of key:value pairs"; return "Objects must consist of key:value pairs";
@@ -72,54 +92,57 @@ pfunc token(char ch) {
break; break;
case ']': case ']':
if (stackpos == 0 || jv_get_kind(stack[stackpos-1]) != JV_KIND_ARRAY) if (p->stackpos == 0 || jv_get_kind(p->stack[p->stackpos-1]) != JV_KIND_ARRAY)
return "Unmatched ']'"; return "Unmatched ']'";
if (hasnext) { if (p->hasnext) {
stack[stackpos-1] = jv_array_append(stack[stackpos-1], next); p->stack[p->stackpos-1] = jv_array_append(p->stack[p->stackpos-1], p->next);
hasnext = 0; p->hasnext = 0;
} else { } else {
if (jv_array_length(jv_copy(stack[stackpos-1])) != 0) { if (jv_array_length(jv_copy(p->stack[p->stackpos-1])) != 0) {
// this case hits on input like [1,2,3,] // this case hits on input like [1,2,3,]
return "Expected another array element"; return "Expected another array element";
} }
} }
hasnext = 1; p->hasnext = 1;
next = stack[--stackpos]; p->next = p->stack[--p->stackpos];
break; break;
case '}': case '}':
if (stackpos == 0) if (p->stackpos == 0)
return "Unmatched '}'"; return "Unmatched '}'";
if (hasnext) { if (p->hasnext) {
if (jv_get_kind(stack[stackpos-1]) != JV_KIND_STRING) if (jv_get_kind(p->stack[p->stackpos-1]) != JV_KIND_STRING)
return "Objects must consist of key:value pairs"; return "Objects must consist of key:value pairs";
assert(stackpos > 1 && jv_get_kind(stack[stackpos-2]) == JV_KIND_OBJECT); assert(p->stackpos > 1 && jv_get_kind(p->stack[p->stackpos-2]) == JV_KIND_OBJECT);
stack[stackpos-2] = jv_object_set(stack[stackpos-2], stack[stackpos-1], next); p->stack[p->stackpos-2] = jv_object_set(p->stack[p->stackpos-2],
stackpos--; p->stack[p->stackpos-1], p->next);
hasnext = 0; p->stackpos--;
p->hasnext = 0;
} else { } else {
if (jv_get_kind(stack[stackpos-1]) != JV_KIND_OBJECT) if (jv_get_kind(p->stack[p->stackpos-1]) != JV_KIND_OBJECT)
return "Unmatched '}'"; return "Unmatched '}'";
if (jv_object_length(jv_copy(stack[stackpos-1])) != 0) if (jv_object_length(jv_copy(p->stack[p->stackpos-1])) != 0)
return "Expected another key-value pair"; return "Expected another key-value pair";
} }
hasnext = 1; p->hasnext = 1;
next = stack[--stackpos]; p->next = p->stack[--p->stackpos];
break; break;
} }
return 0; return 0;
} }
char tokenbuf[1000]; static void tokenadd(struct jv_parser* p, char c) {
int tokenpos; assert(p->tokenpos <= p->tokenlen);
struct dtoa_context dtoa; if (p->tokenpos == p->tokenlen) {
p->tokenlen = p->tokenlen*2 + 256;
void tokenadd(char c) { p->tokenbuf = realloc(p->tokenbuf, p->tokenlen);
tokenbuf[tokenpos++] = c; }
assert(p->tokenpos < p->tokenlen);
p->tokenbuf[p->tokenpos++] = c;
} }
int unhex4(char* hex) { static int unhex4(char* hex) {
int r = 0; int r = 0;
for (int i=0; i<4; i++) { for (int i=0; i<4; i++) {
char c = *hex++; char c = *hex++;
@@ -133,7 +156,7 @@ int unhex4(char* hex) {
return r; return r;
} }
int utf8_encode(int codepoint, char* out) { static int utf8_encode(int codepoint, char* out) {
assert(codepoint >= 0 && codepoint <= 0x10FFFF); assert(codepoint >= 0 && codepoint <= 0x10FFFF);
char* start = out; char* start = out;
if (codepoint <= 0x7F) { if (codepoint <= 0x7F) {
@@ -154,10 +177,10 @@ int utf8_encode(int codepoint, char* out) {
return out - start; return out - start;
} }
pfunc found_string() { static pfunc found_string(struct jv_parser* p) {
char* in = tokenbuf; char* in = p->tokenbuf;
char* out = tokenbuf; char* out = p->tokenbuf;
char* end = tokenbuf + tokenpos; char* end = p->tokenbuf + p->tokenpos;
while (in < end) { while (in < end) {
char c = *in++; char c = *in++;
@@ -203,38 +226,38 @@ pfunc found_string() {
*out++ = c; *out++ = c;
} }
} }
TRY(value(jv_string_sized(tokenbuf, out - tokenbuf))); TRY(value(p, jv_string_sized(p->tokenbuf, out - p->tokenbuf)));
tokenpos=0; p->tokenpos = 0;
return 0; return 0;
} }
pfunc check_literal() { static pfunc check_literal(struct jv_parser* p) {
if (tokenpos == 0) return 0; if (p->tokenpos == 0) return 0;
const char* pattern = 0; const char* pattern = 0;
int plen; int plen;
jv v; jv v;
switch (tokenbuf[0]) { switch (p->tokenbuf[0]) {
case 't': pattern = "true"; plen = 4; v = jv_true(); break; case 't': pattern = "true"; plen = 4; v = jv_true(); break;
case 'f': pattern = "false"; plen = 5; v = jv_false(); break; case 'f': pattern = "false"; plen = 5; v = jv_false(); break;
case 'n': pattern = "null"; plen = 4; v = jv_null(); break; case 'n': pattern = "null"; plen = 4; v = jv_null(); break;
} }
if (pattern) { if (pattern) {
if (tokenpos != plen) return "Invalid literal"; if (p->tokenpos != plen) return "Invalid literal";
for (int i=0; i<plen; i++) for (int i=0; i<plen; i++)
if (tokenbuf[i] != pattern[i]) if (p->tokenbuf[i] != pattern[i])
return "Invalid literal"; return "Invalid literal";
TRY(value(v)); TRY(value(p, v));
} else { } else {
// FIXME: better parser // FIXME: better parser
tokenbuf[tokenpos] = 0; // FIXME: invalid p->tokenbuf[p->tokenpos] = 0; // FIXME: invalid
char* end = 0; char* end = 0;
double d = jvp_strtod(&dtoa, tokenbuf, &end); double d = jvp_strtod(&p->dtoa, p->tokenbuf, &end);
if (end == 0 || *end != 0) if (end == 0 || *end != 0)
return "Invalid numeric literal"; return "Invalid numeric literal";
TRY(value(jv_number(d))); TRY(value(p, jv_number(d)));
} }
tokenpos=0; p->tokenpos = 0;
return 0; return 0;
} }
@@ -246,7 +269,7 @@ typedef enum {
INVALID INVALID
} chclass; } chclass;
chclass classify(char c) { static chclass classify(char c) {
switch (c) { switch (c) {
case ' ': case ' ':
case '\t': case '\t':
@@ -268,91 +291,79 @@ chclass classify(char c) {
} }
enum state {
NORMAL,
STRING,
STRING_ESCAPE
};
enum state st = NORMAL;
pfunc scan(char ch) { static pfunc scan(struct jv_parser* p, char ch) {
if (st == NORMAL) { if (p->st == JV_PARSER_NORMAL) {
chclass cls = classify(ch); chclass cls = classify(ch);
if (cls != LITERAL) { if (cls != LITERAL) {
TRY(check_literal()); TRY(check_literal(p));
} }
switch (cls) { switch (cls) {
case LITERAL: case LITERAL:
tokenadd(ch); tokenadd(p, ch);
break; break;
case WHITESPACE: case WHITESPACE:
break; break;
case QUOTE: case QUOTE:
st = STRING; p->st = JV_PARSER_STRING;
break; break;
case STRUCTURE: case STRUCTURE:
TRY(token(ch)); TRY(token(p, ch));
break; break;
case INVALID: case INVALID:
return "Invalid character"; return "Invalid character";
} }
} else { } else {
if (ch == '"' && st == STRING) { if (ch == '"' && p->st == JV_PARSER_STRING) {
TRY(found_string()); TRY(found_string(p));
st = NORMAL; p->st = JV_PARSER_NORMAL;
} else { } else {
tokenadd(ch); tokenadd(p, ch);
if (ch == '\\' && st == STRING) { if (ch == '\\' && p->st == JV_PARSER_STRING) {
st = STRING_ESCAPE; p->st = JV_PARSER_STRING_ESCAPE;
} else { } else {
st = STRING; p->st = JV_PARSER_STRING;
} }
} }
} }
return 0; return 0;
} }
pfunc finish() { static pfunc finish(struct jv_parser* p) {
assert(st == NORMAL); if (p->st != JV_PARSER_NORMAL)
TRY(check_literal()); return "Unfinished string";
TRY(check_literal(p));
if (stackpos != 0) if (p->stackpos != 0)
return "Unfinished JSON term"; return "Unfinished JSON term";
// this will happen on the empty string // this will happen on the empty string
if (!hasnext) if (!p->hasnext)
return "Expected JSON value"; return "Expected JSON value";
return 0; return 0;
} }
jv jv_parse(const char* string) { jv jv_parse(const char* string) {
jvp_dtoa_context_init(&dtoa); struct jv_parser parser;
jv_parser_init(&parser);
const char* p = string; const char* p = string;
char ch; char ch;
while ((ch = *p++)) { while ((ch = *p++)) {
presult msg = scan(ch); presult msg = scan(&parser, ch);
if (msg){ if (msg){
printf("ERROR: %s (parsing [%s])\n", msg, string); printf("ERROR: %s (parsing [%s])\n", msg, string);
return jv_null(); return jv_null();
} }
} }
presult msg = finish(); presult msg = finish(&parser);
if (msg) { if (msg) {
printf("ERROR: %s (parsing [%s])\n", msg, string); printf("ERROR: %s (parsing [%s])\n", msg, string);
return jv_null(); return jv_null();
} }
jvp_dtoa_context_free(&dtoa); jv value = jv_copy(parser.next);
hasnext = 0; jv_parser_free(&parser);
return next; return value;
} }
#if JV_PARSE_MAIN
int main(int argc, char* argv[]) {
assert(argc == 2);
jv_dump(jv_parse(argv[1]));
printf("\n");
return 0;
}
#endif

20
c/jv_parse.h Normal file
View File

@@ -0,0 +1,20 @@
struct jv_parser {
jv* stack;
int stackpos;
int stacklen;
jv next;
int hasnext;
char* tokenbuf;
int tokenpos;
int tokenlen;
struct dtoa_context dtoa;
enum {
JV_PARSER_NORMAL,
JV_PARSER_STRING,
JV_PARSER_STRING_ESCAPE
} st;
};