1
0
mirror of https://github.com/stedolan/jq.git synced 2024-05-11 05:55:39 +00:00

Most of a C implementation of jq

This commit is contained in:
Stephen
2012-08-16 01:00:30 +01:00
parent eca89acee0
commit 2002dc1a2f
16 changed files with 1270 additions and 0 deletions

21
c/Makefile Normal file
View File

@@ -0,0 +1,21 @@
CC=gcc -Wall -std=gnu99 -ggdb
.PHONY: all clean
all: parsertest
clean:
make -Bnd | grep 'Must remake target' | \
sed 's/.*`\(.*\)'\''.*/\1/' | grep -v '^all$$' | \
xargs rm
lexer.yy.c: lexer.l
flex -o lexer.yy.c --header-file=lexer.yy.h lexer.l
lexer.yy.h: lexer.yy.c
parser.tab.c: parser.y lexer.yy.h
bison -W -d parser.y
parser.tab.h: parser.tab.c
parsertest: parser.tab.c lexer.yy.c main.c opcode.c bytecode.c compile.c execute.c builtin.c
$(CC) -o $@ $^ -ljansson

18
c/builtin.c Normal file
View File

@@ -0,0 +1,18 @@
#include "builtin.h"
#include <jansson.h>
void f_false(json_t* input[], json_t* output[]) {
output[0] = json_false();
}
void f_true(json_t* input[], json_t* output[]) {
output[0] = json_true();
}
struct cfunction function_list[] = {
{f_true, "true", CALL_BUILTIN_1_1},
{f_false, "false", CALL_BUILTIN_1_1},
};
struct symbol_table builtins = {function_list, sizeof(function_list)/sizeof(function_list[0])};

8
c/builtin.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef BUILTIN_H
#define BUILTIN_H
#include "bytecode.h"
extern struct symbol_table builtins;
#endif

35
c/bytecode.c Normal file
View File

@@ -0,0 +1,35 @@
#include <stdio.h>
#include <stdint.h>
#include <jansson.h>
#include "bytecode.h"
#include "opcode.h"
void dump_disassembly(struct bytecode* bc) {
int pc = 0;
while (pc < bc->codelen) {
dump_operation(bc, bc->code + pc);
printf("\n");
pc += opcode_length(bc->code[pc]);
}
}
void dump_operation(struct bytecode* bc, uint16_t* codeptr) {
int pc = codeptr - bc->code;
printf("%04d ", pc);
const struct opcode_description* op = opcode_describe(bc->code[pc++]);
printf("%s", op->name);
if (op->flags & OP_HAS_IMMEDIATE) {
uint16_t imm = bc->code[pc++];
printf(" ");
if (op->flags & OP_HAS_BRANCH) {
printf("%04d", pc + imm);
} else if (op->flags & OP_HAS_CONSTANT) {
json_dumpf(json_array_get(bc->constants, imm),
stdout, JSON_ENCODE_ANY);
} else if (op->flags & OP_HAS_VARIABLE) {
printf("v%d", imm);
} else {
printf("%d", imm);
}
}
}

33
c/bytecode.h Normal file
View File

@@ -0,0 +1,33 @@
#ifndef BYTECODE_H
#define BYTECODE_H
#include <jansson.h>
#include <stdint.h>
#include "opcode.h"
typedef void (*cfunction_ptr)(json_t* input[], json_t* output[]);
struct cfunction {
cfunction_ptr fptr;
const char* name;
opcode callop;
};
#define MAX_CFUNCTION_ARGS 10
struct symbol_table {
struct cfunction* cfunctions;
int ncfunctions;
};
struct bytecode {
uint16_t* code;
int codelen;
int framesize;
json_t* constants;
struct symbol_table* globals;
};
void dump_disassembly(struct bytecode* code);
void dump_operation(struct bytecode* bc, uint16_t* op);
#endif

288
c/compile.c Normal file
View File

@@ -0,0 +1,288 @@
#include <assert.h>
#include <string.h>
#include "opcode.h"
#include "compile.h"
struct inst {
struct inst* next;
struct inst* prev;
opcode op;
union {
uint16_t intval;
struct inst* target;
json_t* constant;
char* symbol;
} imm;
struct inst* var_binding;
int var_frame_idx;
int bytecode_pos; // position just after this insn
};
static inst* inst_new(opcode op) {
inst* i = malloc(sizeof(inst));
i->next = i->prev = 0;
i->op = op;
i->bytecode_pos = -1;
i->var_binding = 0;
i->var_frame_idx = 0;
return i;
}
static void inst_free(struct inst* i) {
if (opcode_describe(i->op)->flags &
(OP_HAS_SYMBOL | OP_HAS_VARIABLE)) {
free(i->imm.symbol);
}
free(i);
}
static block inst_block(inst* i) {
block b = {i,i};
return b;
}
block gen_noop() {
block b = {0,0};
return b;
}
block gen_op_simple(opcode op) {
assert(!(opcode_describe(op)->flags & OP_HAS_IMMEDIATE));
return inst_block(inst_new(op));
}
block gen_op_const(opcode op, json_t* constant) {
assert(opcode_describe(op)->flags & OP_HAS_CONSTANT);
inst* i = inst_new(op);
i->imm.constant = constant;
return inst_block(i);
}
block gen_op_target(opcode op, block target) {
assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
assert(target.last);
inst* i = inst_new(op);
i->imm.target = target.last;
return inst_block(i);
}
block gen_op_targetlater(opcode op) {
assert(opcode_describe(op)->flags & OP_HAS_BRANCH);
inst* i = inst_new(op);
i->imm.target = 0;
return inst_block(i);
}
void inst_set_target(block b, block target) {
assert(b.first);
assert(b.first == b.last);
assert(opcode_describe(b.first->op)->flags & OP_HAS_BRANCH);
assert(target.last);
b.first->imm.target = target.last;
}
block gen_op_var_unbound(opcode op, const char* name) {
assert(opcode_describe(op)->flags & OP_HAS_VARIABLE);
inst* i = inst_new(op);
i->imm.symbol = strdup(name);
return inst_block(i);
}
block gen_op_var_bound(opcode op, block binder) {
assert(binder.first);
assert(binder.first == binder.last);
block b = gen_op_var_unbound(op, binder.first->imm.symbol);
b.first->var_binding = binder.first;
return b;
}
block gen_op_symbol(opcode op, const char* sym) {
assert(opcode_describe(op)->flags & OP_HAS_SYMBOL);
inst* i = inst_new(op);
i->imm.symbol = strdup(sym);
return inst_block(i);
}
static void inst_join(inst* a, inst* b) {
assert(a && b);
assert(!a->next);
assert(!b->prev);
a->next = b;
b->prev = a;
}
void block_append(block* b, block b2) {
if (b2.first) {
if (b->last) {
inst_join(b->last, b2.first);
} else {
b->first = b2.first;
}
b->last = b2.last;
}
}
block block_join(block a, block b) {
block c = a;
block_append(&c, b);
return c;
}
block block_bind(block binder, block body) {
assert(binder.first);
assert(binder.first == binder.last);
assert(opcode_describe(binder.first->op)->flags & OP_HAS_VARIABLE);
assert(binder.first->imm.symbol);
assert(binder.first->var_binding == 0);
binder.first->var_binding = binder.first;
for (inst* i = body.first; i; i = i->next) {
if (opcode_describe(i->op)->flags & OP_HAS_VARIABLE &&
i->var_binding == 0 &&
!strcmp(i->imm.symbol, binder.first->imm.symbol)) {
// bind this variable
i->var_binding = binder.first;
}
}
return block_join(binder, body);
}
block gen_subexp(block a) {
block c = gen_noop();
block_append(&c, gen_op_simple(DUP));
block_append(&c, a);
block_append(&c, gen_op_simple(SWAP));
return c;
}
block gen_both(block a, block b) {
block c = gen_noop();
block jump = gen_op_targetlater(JUMP);
block fork = gen_op_targetlater(FORK);
block_append(&c, fork);
block_append(&c, a);
block_append(&c, jump);
inst_set_target(fork, c);
block_append(&c, b);
inst_set_target(jump, c);
return c;
}
block gen_collect(block expr) {
block c = gen_noop();
block_append(&c, gen_op_simple(DUP));
block_append(&c, gen_op_const(LOADK, json_array()));
block array_var = block_bind(gen_op_var_unbound(STOREV, "collect"),
gen_noop());
block_append(&c, array_var);
block tail = {0};
block_append(&tail, gen_op_simple(DUP));
block_append(&tail, gen_op_var_bound(LOADV, array_var));
block_append(&tail, gen_op_simple(SWAP));
block_append(&tail, gen_op_simple(APPEND));
block_append(&tail, gen_op_var_bound(STOREV, array_var));
block_append(&tail, gen_op_simple(BACKTRACK));
block_append(&c, gen_op_target(FORK, tail));
block_append(&c, expr);
block_append(&c, tail);
block_append(&c, gen_op_var_bound(LOADV, array_var));
return c;
}
block gen_else(block a, block b) {
assert(0);
}
struct bytecode* block_compile(struct symbol_table* syms, block b) {
inst* curr = b.first;
int pos = 0;
int var_frame_idx = 0;
for (; curr; curr = curr->next) {
if (!curr->next) assert(curr == b.last);
pos += opcode_length(curr->op);
curr->bytecode_pos = pos;
if (opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) {
assert(curr->var_binding && "unbound variable");
if (curr->var_binding == curr) {
curr->var_frame_idx = var_frame_idx++;
}
}
}
struct bytecode* bc = malloc(sizeof(struct bytecode));
bc->codelen = pos;
uint16_t* code = malloc(sizeof(uint16_t) * bc->codelen);
bc->code = code;
int* stack_height = malloc(sizeof(int) * (bc->codelen + 1));
for (int i = 0; i<bc->codelen + 1; i++) stack_height[i] = -1;
pos = 0;
json_t* constant_pool = json_array();
int maxvar = -1;
int curr_stack_height = 1;
for (curr = b.first; curr; curr = curr->next) {
const struct opcode_description* op = opcode_describe(curr->op);
if (curr_stack_height < op->stack_in) {
printf("Stack underflow at %04d\n", curr->bytecode_pos);
}
if (stack_height[curr->bytecode_pos] != -1 &&
stack_height[curr->bytecode_pos] != curr_stack_height) {
// FIXME: not sure this is right at all :(
printf("Inconsistent stack heights at %04d %s\n", curr->bytecode_pos, op->name);
}
curr_stack_height -= op->stack_in;
curr_stack_height += op->stack_out;
code[pos++] = curr->op;
int opflags = op->flags;
if (opflags & OP_HAS_CONSTANT) {
code[pos++] = json_array_size(constant_pool);
json_array_append(constant_pool, curr->imm.constant);
} else if (opflags & OP_HAS_VARIABLE) {
uint16_t var = (uint16_t)curr->var_binding->var_frame_idx;
code[pos++] = var;
if (var > maxvar) maxvar = var;
} else if (opflags & OP_HAS_BRANCH) {
assert(curr->imm.target->bytecode_pos != -1);
assert(curr->imm.target->bytecode_pos > pos); // only forward branches
code[pos] = curr->imm.target->bytecode_pos - (pos + 1);
stack_height[curr->imm.target->bytecode_pos] = curr_stack_height;
pos++;
} else if (opflags & OP_HAS_CFUNC) {
assert(curr->imm.symbol);
int found = 0;
for (int i=0; i<syms->ncfunctions; i++) {
if (!strcmp(curr->imm.symbol, syms->cfunctions[i].name)) {
code[pos++] = i;
found = 1;
break;
}
}
assert(found);
} else if (opflags & OP_HAS_IMMEDIATE) {
code[pos++] = curr->imm.intval;
}
}
free(stack_height);
bc->constants = constant_pool;
bc->framesize = maxvar + 2; // FIXME: frames of size zero?
bc->globals = syms;
return bc;
}
void block_free(block b) {
struct inst* next;
for (struct inst* curr = b.first; curr; curr = next) {
next = curr->next;
inst_free(curr);
}
}

34
c/compile.h Normal file
View File

@@ -0,0 +1,34 @@
#include <stdint.h>
#include "bytecode.h"
#include "opcode.h"
struct inst;
typedef struct inst inst;
typedef struct block {
inst* first;
inst* last;
} block;
block gen_noop();
block gen_op_simple(opcode op);
block gen_op_const(opcode op, json_t* constant);
block gen_op_target(opcode op, block target);
block gen_op_var_unbound(opcode op, const char* name);
block gen_op_var_bound(opcode op, block binder);
block gen_op_symbol(opcode op, const char* name);
block gen_subexp(block a);
block gen_both(block a, block b);
block gen_collect(block expr);
block gen_else(block a, block b);
void block_append(block* b, block b2);
block block_join(block a, block b);
block block_bind(block binder, block body);
struct bytecode* block_compile(struct symbol_table*, block);
void block_free(block);

376
c/execute.c Normal file
View File

@@ -0,0 +1,376 @@
#include <jansson.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "opcode.h"
#include "bytecode.h"
#include "compile.h"
#include "forkable_stack.h"
typedef struct {
json_t* value;
int pathidx;
} stackval;
json_t** pathbuf;
int pathsize; // number of allocated elements
int path_push(stackval sv, json_t* val) {
int pos = sv.pathidx;
assert(pos <= pathsize);
assert(pos >= 0);
if (pos == pathsize) {
pathsize = pathsize ? pathsize * 2 : 100;
pathbuf = realloc(pathbuf, sizeof(pathbuf[0]) * pathsize);
}
pathbuf[pos] = val;
return pos + 1;
}
stackval stackval_replace(stackval value, json_t* newjs) {
stackval s = {newjs, value.pathidx};
return s;
}
// Probably all uses of this function are bugs
stackval stackval_root(json_t* v) {
stackval s = {v, 0};
return s;
}
struct forkable_stack data_stk;
typedef struct {
FORKABLE_STACK_HEADER;
stackval sv;
} data_stk_elem;
data_stk_elem* stk_push_frame(int n) {
return forkable_stack_push(&data_stk, sizeof(data_stk_elem) * n);
}
void stk_pop_frame(int n) {
forkable_stack_pop(&data_stk, sizeof(data_stk_elem) * n);
}
void stk_push(stackval val) {
data_stk_elem* s = stk_push_frame(1);
s->sv = val;
}
stackval stk_pop() {
data_stk_elem* s = forkable_stack_peek(&data_stk, sizeof(data_stk_elem));
stackval sv = s->sv;
forkable_stack_pop(&data_stk, sizeof(data_stk_elem));
return sv;
}
typedef struct {
FORKABLE_STACK_HEADER;
struct bytecode* bc;
data_stk_elem* fp;
uint16_t* pc;
} call_stk_elem;
struct forkable_stack call_stk;
struct forkpoint {
FORKABLE_STACK_HEADER;
struct forkable_stack_state saved_data_stack;
struct forkable_stack_state saved_call_stack;
};
struct forkable_stack fork_stk;
void stack_save(){
struct forkpoint* fork = forkable_stack_push(&fork_stk, sizeof(struct forkpoint));
forkable_stack_save(&data_stk, &fork->saved_data_stack);
forkable_stack_save(&call_stk, &fork->saved_call_stack);
}
void stack_switch() {
struct forkpoint* fork = forkable_stack_peek(&fork_stk, sizeof(struct forkpoint));
forkable_stack_switch(&data_stk, &fork->saved_data_stack);
forkable_stack_switch(&call_stk, &fork->saved_call_stack);
}
void stack_restore(){
struct forkpoint* fork = forkable_stack_peek(&fork_stk, sizeof(struct forkpoint));
forkable_stack_restore(&data_stk, &fork->saved_data_stack);
forkable_stack_restore(&call_stk, &fork->saved_call_stack);
forkable_stack_pop(&fork_stk, sizeof(struct forkpoint));
}
#define stack_push stk_push
#define stack_pop stk_pop
#define ON_BACKTRACK(op) ((op)+NUM_OPCODES)
json_t* jq_next() {
assert(!forkable_stack_empty(&call_stk));
call_stk_elem* ctx = forkable_stack_peek(&call_stk, sizeof(call_stk_elem));
struct bytecode* bc = ctx->bc;
uint16_t* pc = ctx->pc;
data_stk_elem* fp = ctx->fp;
json_t* cpool = bc->constants;
json_t* cfunc_input[MAX_CFUNCTION_ARGS] = {0};
json_t* cfunc_output[MAX_CFUNCTION_ARGS] = {0};
int backtracking = 0;
while (1) {
dump_operation(bc, pc);
uint16_t opcode = *pc++;
printf("\t");
const struct opcode_description* opdesc = opcode_describe(opcode);
data_stk_elem* param = forkable_stack_peek(&data_stk, sizeof(data_stk_elem));
for (int i=0; i<opdesc->stack_in; i++) {
json_dumpf(param->sv.value, stdout, JSON_ENCODE_ANY);
if (i < opdesc->stack_in-1) printf(" | ");
param = forkable_stack_peek_next(&data_stk, param, sizeof(data_stk_elem));
}
if (backtracking) {
printf("\t<backtracking>");
opcode = ON_BACKTRACK(opcode);
backtracking = 0;
}
printf("\n");
switch (opcode) {
default: assert(0 && "invalid instruction");
case LOADK: {
json_t* v = json_array_get(cpool, *pc++);
assert(v);
stack_push(stackval_replace(stack_pop(), v));
break;
}
case DUP: {
stackval v = stack_pop();
stack_push(v);
stack_push(v);
break;
}
case SWAP: {
stackval a = stack_pop();
stackval b = stack_pop();
stack_push(a);
stack_push(b);
break;
}
case POP: {
stack_pop();
break;
}
case APPEND: {
// FIXME paths
json_t* v = stack_pop().value;
json_t* array = stack_pop().value;
array = json_copy(array);
json_array_append(array, v);
stack_push(stackval_root(array));
break;
}
case INSERT: {
stackval stktop = stack_pop();
json_t* v = stack_pop().value;
json_t* k = stack_pop().value;
stackval objv = stack_pop();
assert(json_is_string(k));
assert(json_is_object(objv.value));
json_t* obj = json_copy(objv.value);
json_object_set(obj, json_string_value(k), v);
assert(json_is_object(obj));
stack_push(stackval_replace(objv, obj));
stack_push(stktop);
break;
}
case LOADV: {
uint16_t v = *pc++;
stack_push(stackval_replace(stack_pop(), fp[v].sv.value));
break;
}
case STOREV: {
uint16_t v = *pc++;
stackval val = stack_pop();
printf("V%d = ", v);
json_dumpf(val.value, stdout, JSON_ENCODE_ANY);
printf("\n");
fp[v].sv.value = val.value;
break;
}
#if 0
case DISPLAY: {
stackval sv = stack_pop();
if (sv.value) {
json_dumpf(sv.value, stdout, JSON_ENCODE_ANY);
} else {
printf("#ERROR");
}
printf(" - ");
for (int i = 0; i < sv.pathidx; i++) {
printf("/");
json_dumpf(pathbuf[i], stdout, JSON_ENCODE_ANY);
}
printf("\n");
return;
}
#endif
case INDEX: {
stackval t = stack_pop();
json_t* k = stack_pop().value;
stackval v;
if (json_is_string(k)) {
v.value = json_object_get(t.value, json_string_value(k));
} else if (json_is_integer(k)) {
v.value = json_array_get(t.value, json_integer_value(k));
} else {
assert(0 && "key neither string nor int");
}
if (v.value) {
v.pathidx = path_push(t, k);
stack_push(v);
} else {
assert(0 && "bad lookup");
}
break;
}
case JUMP: {
uint16_t offset = *pc++;
pc += offset;
break;
}
case EACH:
stack_push(stackval_root(json_integer(0)));
// fallthrough
case ON_BACKTRACK(EACH): {
json_t* idxj = stack_pop().value;
int idx = json_integer_value(idxj);
stackval array = stack_pop();
if (idx >= json_array_size(array.value)) {
goto do_backtrack;
} else {
stack_save();
stack_push(array);
stack_push(stackval_root(json_integer(idx+1)));
call_stk_elem* ctx = forkable_stack_push(&call_stk, sizeof(call_stk_elem));
ctx->bc = bc;
ctx->fp = fp;
ctx->pc = pc - 1;
stack_switch();
stackval sv = {json_array_get(array.value, idx),
path_push(array, json_integer(idx))};
stack_push(sv);
}
break;
}
do_backtrack:
case BACKTRACK: {
if (forkable_stack_empty(&fork_stk)) {
return 0;
}
stack_restore();
call_stk_elem* ctx = forkable_stack_peek(&call_stk, sizeof(call_stk_elem));
bc = ctx->bc;
pc = ctx->pc;
fp = ctx->fp;
cpool = bc->constants;
forkable_stack_pop(&call_stk, sizeof(call_stk_elem));
backtracking = 1;
break;
}
case FORK: {
stack_save();
call_stk_elem* ctx = forkable_stack_push(&call_stk, sizeof(call_stk_elem));
ctx->bc = bc;
ctx->fp = fp;
ctx->pc = pc - 1;
stack_switch();
pc++; // skip offset this time
break;
}
case ON_BACKTRACK(FORK): {
uint16_t offset = *pc++;
pc += offset;
break;
}
case YIELD: {
json_t* value = stack_pop().value;
call_stk_elem* ctx = forkable_stack_push(&call_stk, sizeof(call_stk_elem));
ctx->bc = bc;
ctx->fp = fp;
ctx->pc = pc;
return value;
}
case CALL_BUILTIN_1_1: {
stackval top = stack_pop();
cfunc_input[0] = top.value;
struct cfunction* func = &bc->globals->cfunctions[*pc++];
printf(" call %s\n", func->name);
func->fptr(cfunc_input, cfunc_output);
stack_push(stackval_replace(top, cfunc_output[0]));
break;
}
}
}
}
void jq_init(struct bytecode* bc, json_t* input) {
forkable_stack_init(&data_stk, sizeof(stackval) * 100); // FIXME: lower this number, see if it breaks
forkable_stack_init(&call_stk, 1024); // FIXME: lower this number, see if it breaks
forkable_stack_init(&fork_stk, 1024); // FIXME: lower this number, see if it breaks
data_stk_elem* frame = stk_push_frame(bc->framesize);
stack_push(stackval_root(input));
call_stk_elem* ctx = forkable_stack_push(&call_stk, sizeof(call_stk_elem));
ctx->pc = bc->code;
ctx->bc = bc;
ctx->fp = frame;
}
void run_program(struct bytecode* bc) {
jq_init(bc, json_loadf(stdin, 0, 0));
json_t* result;
while ((result = jq_next())) {
json_dumpf(result, stdout, JSON_ENCODE_ANY);
printf("\n");
}
printf("end of results\n");
//assert(frame == stack_top_frame(bc->framesize));
//stk_pop_frame(bc->framesize);
//assert(stackpos == 0);
}

14
c/execute.h Normal file
View File

@@ -0,0 +1,14 @@
#include "opcode.h"
#define MAX_CFUNCTION_PARAM 10
typedef struct {
json_t* value;
int pathidx;
} stackval;
typedef void (*cfunction_ptr)(stackval* input, stackval* output);
struct cfunction {
cfunction_ptr fptr;
const char* name;
opcode callop;
};

94
c/forkable_stack.h Normal file
View File

@@ -0,0 +1,94 @@
#include <stdlib.h>
#include <stddef.h>
#include <assert.h>
struct forkable_stack_header {
int next;
};
#define FORKABLE_STACK_HEADER struct forkable_stack_header fk_header_
struct forkable_stack {
char* stk;
// stk+length is just past end of allocated area
int length;
// stk+pos is just past top-of-stack item
int pos;
// everything before stk+savedlimit must be preserved
int savedlimit;
};
static void forkable_stack_check(struct forkable_stack* s) {
assert(s->stk);
assert(s->length > 0);
assert(s->pos >= 0 && s->pos <= s->length);
assert(s->savedlimit >= 0 && s->savedlimit <= s->length);
}
static int forkable_stack_empty(struct forkable_stack* s) {
return s->pos == 0;
}
static void forkable_stack_init(struct forkable_stack* s, size_t sz) {
s->stk = malloc(sz);
s->length = sz;
s->pos = 0;
s->savedlimit = 0;
forkable_stack_check(s);
}
static void* forkable_stack_push(struct forkable_stack* s, size_t size) {
forkable_stack_check(s);
int curr = s->pos > s->savedlimit ? s->pos : s->savedlimit;
if (curr + size > s->length) {
s->length = (size + s->length + 1024) * 2;
s->stk = realloc(s->stk, s->length);
}
void* ret = (void*)(s->stk + curr);
((struct forkable_stack_header*)ret)->next = s->pos;
s->pos = curr + size;
return ret;
}
static void* forkable_stack_peek(struct forkable_stack* s, size_t size) {
assert(!forkable_stack_empty(s));
return (void*)(s->stk + s->pos - size);
}
static void* forkable_stack_peek_next(struct forkable_stack* s, void* top, size_t size) {
struct forkable_stack_header* elem = top;
return (void*)(s->stk + elem->next - size);
}
static void forkable_stack_pop(struct forkable_stack* s, size_t size) {
struct forkable_stack_header* elem = forkable_stack_peek(s, size);
s->pos = elem->next;
}
struct forkable_stack_state {
int prevpos, prevlimit;
};
static void forkable_stack_save(struct forkable_stack* s, struct forkable_stack_state* state) {
state->prevpos = s->pos;
state->prevlimit = s->savedlimit;
if (s->pos > s->savedlimit) s->savedlimit = s->pos;
}
static void forkable_stack_switch(struct forkable_stack* s, struct forkable_stack_state* state) {
int curr_pos = s->pos;
s->pos = state->prevpos;
state->prevpos = curr_pos;
int curr_limit = s->savedlimit;
if (curr_pos > curr_limit) s->savedlimit = curr_pos;
state->prevlimit = curr_limit;
}
static void forkable_stack_restore(struct forkable_stack* s, struct forkable_stack_state* state) {
s->pos = state->prevpos;
s->savedlimit = state->prevlimit;
}

25
c/lexer.l Normal file
View File

@@ -0,0 +1,25 @@
%{
#include "compile.h"
#include "parser.tab.h" /* Generated by bison. */
%}
%option noyywrap nounput noinput nodefault
%option reentrant
%option bison-bridge bison-locations
%%
"==" { return EQ; }
"as" { return AS; }
"."|"="|";"|"["|"]"|","|":"|"("|")"|"{"|"}"|"|"|"+"|"\$" { return yytext[0];}
[[:digit:]]+ { yylval->num = atoi(yytext); return NUMBER;}
[[:alnum:]]+ { yylval->str = strdup(yytext); return IDENT;}
[ \n\t]+ {}
%%
/* perhaps these should be calls... */
/*
"true" { return TRUE; }
"false" { return FALSE; }
"null" { return NULL; }
*/

82
c/main.c Normal file
View File

@@ -0,0 +1,82 @@
#include <stdio.h>
#include "compile.h"
#include "parser.tab.h"
#include "builtin.h"
block compile(const char* str);
void jq_init(struct bytecode* bc, json_t* value);
json_t* jq_next();
void run_program(struct bytecode* bc);
int skipline(const char* buf) {
int p = 0;
while (buf[p] == ' ' || buf[p] == '\t') p++;
if (buf[p] == '#' || buf[p] == '\n' || buf[p] == 0) return 1;
return 0;
}
void run_tests() {
FILE* testdata = fopen("testdata","r");
char buf[4096];
int tests = 0, passed = 0;
while (1) {
if (!fgets(buf, sizeof(buf), testdata)) break;
if (skipline(buf)) continue;
printf("Testing %s\n", buf);
int pass = 1;
block program = compile(buf);
block_append(&program, gen_op_simple(YIELD));
block_append(&program, gen_op_simple(BACKTRACK));
struct bytecode* bc = block_compile(&builtins, program);
block_free(program);
fgets(buf, sizeof(buf), testdata);
json_t* input = json_loads(buf, JSON_DECODE_ANY, 0);
jq_init(bc, input);
while (fgets(buf, sizeof(buf), testdata)) {
if (skipline(buf)) break;
json_t* expected = json_loads(buf, JSON_DECODE_ANY, 0);
json_t* actual = jq_next();
if (!actual) {
printf("Insufficient results\n");
pass = 0;
break;
} else if (!json_equal(expected, actual)) {
printf("Expected ");
json_dumpf(expected, stdout, JSON_ENCODE_ANY);
printf(", but got ");
json_dumpf(actual, stdout, JSON_ENCODE_ANY);
printf("\n");
pass = 0;
break;
}
}
if (pass) {
json_t* extra = jq_next();
if (extra) {
printf("Superfluous result: ");
json_dumpf(extra, stdout, JSON_ENCODE_ANY);
printf("\n");
pass = 0;
}
}
tests++;
passed+=pass;
}
fclose(testdata);
printf("%d of %d tests passed\n", passed,tests);
}
int main(int argc, char* argv[]) {
if (argc == 1) { run_tests(); return 0; }
block blk = compile(argv[1]);
block_append(&blk, block_join(gen_op_simple(YIELD), gen_op_simple(BACKTRACK)));
struct bytecode* bc = block_compile(&builtins, blk);
block_free(blk);
dump_disassembly(bc);
printf("\n");
run_program(bc);
}

27
c/opcode.c Normal file
View File

@@ -0,0 +1,27 @@
#include "opcode.h"
#define NONE 0
#define CONSTANT (OP_HAS_IMMEDIATE | OP_HAS_CONSTANT)
#define VARIABLE (OP_HAS_IMMEDIATE | OP_HAS_VARIABLE)
#define BRANCH (OP_HAS_IMMEDIATE | OP_HAS_BRANCH)
#define CFUNC (OP_HAS_IMMEDIATE | OP_HAS_SYMBOL | OP_HAS_CFUNC)
#define OP(name, imm, in, out) \
{name, #name, imm, in, out},
static const struct opcode_description opcode_descriptions[] = {
#include "opcode_list.h"
};
static const struct opcode_description invalid_opcode_description = {
-1, "#INVALID", 0, 0, 0
};
const struct opcode_description* opcode_describe(opcode op) {
if ((int)op >= 0 && (int)op < NUM_OPCODES) {
return &opcode_descriptions[op];
} else {
return &invalid_opcode_description;
}
}

36
c/opcode.h Normal file
View File

@@ -0,0 +1,36 @@
#ifndef OPCODE_H
#define OPCODE_H
typedef enum {
#define OP(name, imm, in, out) name,
#include "opcode_list.h"
#undef OP
} opcode;
enum {
NUM_OPCODES =
#define OP(name, imm, in, out) +1
#include "opcode_list.h"
#undef OP
};
enum {
OP_HAS_IMMEDIATE = 1,
OP_HAS_CONSTANT = 2,
OP_HAS_VARIABLE = 4,
OP_HAS_BRANCH = 8,
OP_HAS_SYMBOL = 16,
OP_HAS_CFUNC = 32
};
struct opcode_description {
opcode op;
const char* name;
int flags;
int stack_in, stack_out;
};
const struct opcode_description* opcode_describe(opcode op);
static inline int opcode_length(opcode op) {
return 1 + (opcode_describe(op)->flags & OP_HAS_IMMEDIATE ? 1 : 0);
}
#endif

17
c/opcode_list.h Normal file
View File

@@ -0,0 +1,17 @@
OP(LOADK, CONSTANT, 1, 1)
OP(DUP, NONE, 1, 2)
OP(SWAP, NONE, 2, 2)
OP(POP, NONE, 1, 0)
OP(LOADV, VARIABLE, 1, 1)
OP(STOREV, VARIABLE, 1, 0)
OP(INDEX, NONE, 2, 1)
//OP(DISPLAY, NONE, 1, 0)
OP(YIELD, NONE, 1, 0)
OP(EACH, NONE, 1, 1)
OP(FORK, BRANCH, 0, 0)
OP(JUMP, BRANCH, 0, 0)
OP(BACKTRACK, NONE, 0, 0)
OP(APPEND, NONE, 2, 1)
OP(INSERT, NONE, 4, 2)
OP(CALL_BUILTIN_1_1, CFUNC, 1, 1)

162
c/parser.y Normal file
View File

@@ -0,0 +1,162 @@
%{
#include <stdio.h>
#include <string.h>
#include "compile.h"
%}
%locations
%define api.pure
%union {
int num;
char* str;
block blk;
}
%parse-param {block* answer}
%parse-param {yyscan_t lexer}
%lex-param {yyscan_t lexer}
%token <str> IDENT
%token <num> NUMBER
%left '|'
%left ','
%token EQ "=="
%token AS "as"
%nonassoc EQ
%left '+'
%type <blk> Exp Term MkDict MkDictPair ExpD
%{
#include "lexer.yy.h"
void yyerror(YYLTYPE* loc, block* answer, yyscan_t lexer, const char *s){
printf("ERROR: %s\n", s);
}
static block gen_dictpair(block k, block v) {
block b = gen_subexp(k);
block_append(&b, gen_subexp(v));
block_append(&b, gen_op_simple(INSERT));
return b;
}
static block gen_string(const char* str) {
return gen_op_const(LOADK, json_string(str));
}
static block gen_index(block obj, block key) {
return block_join(obj, block_join(gen_subexp(key), gen_op_simple(INDEX)));
}
%}
%%
program: Exp { *answer = $1; }
Exp:
Term "as" '$' IDENT '|' Exp {
$$ = gen_op_simple(DUP);
block_append(&$$, $1);
block_append(&$$, block_bind(gen_op_var_unbound(STOREV, $4), $6));
} |
Exp '|' Exp {
$$ = block_join($1, $3);
} |
Exp ',' Exp {
$$ = gen_both($1, $3);
} |
Term {
$$ = $1;
}
ExpD:
ExpD '|' ExpD {
$$ = block_join($1, $3);
} |
Term {
$$ = $1;
}
Term:
'.' {
$$ = gen_noop();
} |
Term '.' IDENT {
$$ = gen_index($1, gen_string($3));
} |
'.' IDENT {
$$ = gen_index(gen_noop(), gen_string($2));
} |
/* FIXME: string literals */
Term '[' Exp ']' {
$$ = gen_index($1, $3);
} |
Term '[' ']' {
$$ = block_join($1, gen_op_simple(EACH));
} |
NUMBER {
$$ = gen_op_const(LOADK, json_integer($1));
} |
'(' Exp ')' {
$$ = $2;
} |
'[' Exp ']' {
$$ = gen_collect($2);
} |
'[' ']' {
$$ = gen_op_const(LOADK, json_array());
} |
'{' MkDict '}' {
$$ = gen_subexp(gen_op_const(LOADK, json_object()));
block_append(&$$, $2);
block_append(&$$, gen_op_simple(POP));
} |
IDENT {
$$ = gen_op_symbol(CALL_BUILTIN_1_1, $1);
} |
'$' IDENT {
$$ = gen_op_var_unbound(LOADV, $2);
}
MkDict:
{
$$=gen_noop();
}
|
MkDictPair
{ $$ = $1; }
| MkDictPair ',' MkDict { $$=block_join($1, $3); }
MkDictPair
: IDENT ':' ExpD {
$$ = gen_dictpair(gen_string($1), $3);
}
| IDENT {
$$ = gen_dictpair(gen_string($1),
gen_index(gen_noop(), gen_string($1)));
}
| '(' Exp ')' ':' ExpD {
$$ = gen_dictpair($2, $5);
}
%%
block compile(const char* str) {
yyscan_t scanner;
YY_BUFFER_STATE buf;
block answer = gen_noop();
yylex_init(&scanner);
buf = yy_scan_string(str, scanner);
yyparse(&answer, scanner);
yy_delete_buffer(buf, scanner);
yylex_destroy(scanner);
return answer;
}