mirror of
https://github.com/stedolan/jq.git
synced 2024-05-11 05:55:39 +00:00
First pass at functions + tests
This commit is contained in:
@@ -14,7 +14,7 @@ lexer.yy.c: lexer.l
|
||||
lexer.yy.h: lexer.yy.c
|
||||
|
||||
parser.tab.c: parser.y lexer.yy.h
|
||||
bison -W -d parser.y
|
||||
bison -W -d parser.y -v --report-file=parser.info
|
||||
parser.tab.h: parser.tab.c
|
||||
|
||||
parsertest: parser.tab.c lexer.yy.c main.c opcode.c bytecode.c compile.c execute.c builtin.c
|
||||
|
43
c/bytecode.c
43
c/bytecode.c
@@ -4,12 +4,29 @@
|
||||
#include "bytecode.h"
|
||||
#include "opcode.h"
|
||||
|
||||
void dump_disassembly(struct bytecode* bc) {
|
||||
static int bytecode_operation_length(uint16_t* codeptr) {
|
||||
if (opcode_describe(*codeptr)->flags & OP_HAS_VARIABLE_LENGTH_ARGLIST) {
|
||||
return 2 + codeptr[1] * 2;
|
||||
} else {
|
||||
return opcode_length(*codeptr);
|
||||
}
|
||||
}
|
||||
|
||||
void dump_disassembly(int indent, struct bytecode* bc) {
|
||||
dump_code(indent, bc);
|
||||
for (int i=0; i<bc->nsubfunctions; i++) {
|
||||
printf("%*ssubfn[%d]:\n", indent, "", i);
|
||||
dump_disassembly(indent+2, bc->subfunctions[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void dump_code(int indent, struct bytecode* bc) {
|
||||
int pc = 0;
|
||||
while (pc < bc->codelen) {
|
||||
printf("%*s", indent, "");
|
||||
dump_operation(bc, bc->code + pc);
|
||||
printf("\n");
|
||||
pc += opcode_length(bc->code[pc]);
|
||||
pc += bytecode_operation_length(bc->code + pc);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -20,16 +37,28 @@ void dump_operation(struct bytecode* bc, uint16_t* codeptr) {
|
||||
printf("%s", op->name);
|
||||
if (op->flags & OP_HAS_IMMEDIATE) {
|
||||
uint16_t imm = bc->code[pc++];
|
||||
printf(" ");
|
||||
if (op->flags & OP_HAS_BRANCH) {
|
||||
printf("%04d", pc + imm);
|
||||
if (op->flags & OP_HAS_VARIABLE_LENGTH_ARGLIST) {
|
||||
for (int i=0; i<imm; i++) {
|
||||
uint16_t level = bc->code[pc++];
|
||||
uint16_t idx = bc->code[pc++];
|
||||
if (idx & ARG_NEWCLOSURE) {
|
||||
printf(" subfn[%d]", idx & ~ARG_NEWCLOSURE);
|
||||
} else {
|
||||
printf(" param[%d]", idx);
|
||||
}
|
||||
if (level) {
|
||||
printf("^%d", level);
|
||||
}
|
||||
}
|
||||
} else if (op->flags & OP_HAS_BRANCH) {
|
||||
printf(" %04d", pc + imm);
|
||||
} else if (op->flags & OP_HAS_CONSTANT) {
|
||||
json_dumpf(json_array_get(bc->constants, imm),
|
||||
stdout, JSON_ENCODE_ANY);
|
||||
} else if (op->flags & OP_HAS_VARIABLE) {
|
||||
printf("v%d", imm);
|
||||
printf(" v%d", imm);
|
||||
} else {
|
||||
printf("%d", imm);
|
||||
printf(" %d", imm);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
13
c/bytecode.h
13
c/bytecode.h
@@ -19,6 +19,12 @@ struct symbol_table {
|
||||
int ncfunctions;
|
||||
};
|
||||
|
||||
// The bytecode format matters in:
|
||||
// execute.c - interpreter
|
||||
// compile.c - compiler
|
||||
// bytecode.c - disassembler
|
||||
|
||||
#define ARG_NEWCLOSURE 0x1000
|
||||
|
||||
struct bytecode {
|
||||
uint16_t* code;
|
||||
@@ -30,11 +36,14 @@ struct bytecode {
|
||||
json_t* constants;
|
||||
struct symbol_table* globals;
|
||||
|
||||
struct bytecode* subfunctions;
|
||||
struct bytecode** subfunctions;
|
||||
int nsubfunctions;
|
||||
|
||||
struct bytecode* parent;
|
||||
};
|
||||
|
||||
void dump_disassembly(struct bytecode* code);
|
||||
void dump_disassembly(int, struct bytecode* code);
|
||||
void dump_code(int, struct bytecode* code);
|
||||
void dump_operation(struct bytecode* bc, uint16_t* op);
|
||||
|
||||
#endif
|
||||
|
208
c/compile.c
208
c/compile.c
@@ -14,11 +14,23 @@ struct inst {
|
||||
uint16_t intval;
|
||||
struct inst* target;
|
||||
json_t* constant;
|
||||
char* symbol;
|
||||
} imm;
|
||||
|
||||
struct inst* var_binding;
|
||||
int var_frame_idx;
|
||||
// Binding
|
||||
// An instruction requiring binding (for parameters/variables)
|
||||
// is in one of three states:
|
||||
// bound_by = NULL - Unbound free variable
|
||||
// bound_by = self - This instruction binds a variable
|
||||
// bound_by = other - Uses variable bound by other instruction
|
||||
// The immediate field is generally not meaningful until instructions
|
||||
// are bound, and even then only for instructions which bind.
|
||||
struct inst* bound_by;
|
||||
char* symbol;
|
||||
block subfn;
|
||||
|
||||
// This instruction is compiled as part of which function?
|
||||
// (only used during block_compile)
|
||||
struct bytecode* compiled;
|
||||
|
||||
int bytecode_pos; // position just after this insn
|
||||
};
|
||||
@@ -28,15 +40,16 @@ static inst* inst_new(opcode op) {
|
||||
i->next = i->prev = 0;
|
||||
i->op = op;
|
||||
i->bytecode_pos = -1;
|
||||
i->var_binding = 0;
|
||||
i->var_frame_idx = 0;
|
||||
i->bound_by = 0;
|
||||
i->symbol = 0;
|
||||
i->subfn = gen_noop();
|
||||
return i;
|
||||
}
|
||||
|
||||
static void inst_free(struct inst* i) {
|
||||
if (opcode_describe(i->op)->flags &
|
||||
(OP_HAS_SYMBOL | OP_HAS_VARIABLE)) {
|
||||
free(i->imm.symbol);
|
||||
free(i->symbol);
|
||||
if (opcode_describe(i->op)->flags & OP_HAS_BLOCK) {
|
||||
block_free(i->subfn);
|
||||
}
|
||||
free(i);
|
||||
}
|
||||
@@ -89,25 +102,56 @@ void inst_set_target(block b, block target) {
|
||||
block gen_op_var_unbound(opcode op, const char* name) {
|
||||
assert(opcode_describe(op)->flags & OP_HAS_VARIABLE);
|
||||
inst* i = inst_new(op);
|
||||
i->imm.symbol = strdup(name);
|
||||
i->symbol = strdup(name);
|
||||
return inst_block(i);
|
||||
}
|
||||
|
||||
block gen_op_var_bound(opcode op, block binder) {
|
||||
assert(opcode_describe(op)->flags & OP_HAS_VARIABLE);
|
||||
assert(binder.first);
|
||||
assert(binder.first == binder.last);
|
||||
block b = gen_op_var_unbound(op, binder.first->imm.symbol);
|
||||
b.first->var_binding = binder.first;
|
||||
block b = gen_op_var_unbound(op, binder.first->symbol);
|
||||
b.first->bound_by = binder.first;
|
||||
return b;
|
||||
}
|
||||
|
||||
block gen_op_symbol(opcode op, const char* sym) {
|
||||
assert(opcode_describe(op)->flags & OP_HAS_SYMBOL);
|
||||
inst* i = inst_new(op);
|
||||
i->imm.symbol = strdup(sym);
|
||||
i->symbol = strdup(sym);
|
||||
return inst_block(i);
|
||||
}
|
||||
|
||||
block gen_op_block_defn(opcode op, const char* name, block block) {
|
||||
assert(opcode_describe(op)->flags & OP_IS_CALL_PSEUDO);
|
||||
assert(opcode_describe(op)->flags & OP_HAS_BLOCK);
|
||||
inst* i = inst_new(op);
|
||||
i->subfn = block;
|
||||
i->symbol = strdup(name);
|
||||
return inst_block(i);
|
||||
}
|
||||
|
||||
block gen_op_block_unbound(opcode op, const char* name) {
|
||||
assert(opcode_describe(op)->flags & OP_IS_CALL_PSEUDO);
|
||||
inst* i = inst_new(op);
|
||||
i->symbol = strdup(name);
|
||||
return inst_block(i);
|
||||
}
|
||||
|
||||
|
||||
block gen_op_call(opcode op, block arglist) {
|
||||
assert(opcode_describe(op)->flags & OP_HAS_VARIABLE_LENGTH_ARGLIST);
|
||||
inst* i = inst_new(op);
|
||||
int nargs = 0;
|
||||
for (inst* curr = arglist.first; curr; curr = curr->next) {
|
||||
assert(opcode_describe(curr->op)->flags & OP_IS_CALL_PSEUDO);
|
||||
nargs++;
|
||||
}
|
||||
assert(nargs < 100); //FIXME
|
||||
i->imm.intval = nargs;
|
||||
return block_join(inst_block(i), arglist);
|
||||
}
|
||||
|
||||
static void inst_join(inst* a, inst* b) {
|
||||
assert(a && b);
|
||||
assert(!a->next);
|
||||
@@ -133,22 +177,31 @@ block block_join(block a, block b) {
|
||||
return c;
|
||||
}
|
||||
|
||||
block block_bind(block binder, block body) {
|
||||
static void block_bind_subblock(block binder, block body, int bindflags) {
|
||||
assert(binder.first);
|
||||
assert(binder.first == binder.last);
|
||||
assert(opcode_describe(binder.first->op)->flags & OP_HAS_VARIABLE);
|
||||
assert(binder.first->imm.symbol);
|
||||
assert(binder.first->var_binding == 0);
|
||||
assert((opcode_describe(binder.first->op)->flags & bindflags) == bindflags);
|
||||
assert(binder.first->symbol);
|
||||
assert(binder.first->bound_by == 0 || binder.first->bound_by == binder.first);
|
||||
|
||||
binder.first->var_binding = binder.first;
|
||||
binder.first->bound_by = binder.first;
|
||||
for (inst* i = body.first; i; i = i->next) {
|
||||
if (opcode_describe(i->op)->flags & OP_HAS_VARIABLE &&
|
||||
i->var_binding == 0 &&
|
||||
!strcmp(i->imm.symbol, binder.first->imm.symbol)) {
|
||||
// bind this variable
|
||||
i->var_binding = binder.first;
|
||||
int flags = opcode_describe(i->op)->flags;
|
||||
if ((flags & bindflags) == bindflags &&
|
||||
i->bound_by == 0 &&
|
||||
!strcmp(i->symbol, binder.first->symbol)) {
|
||||
// bind this instruction
|
||||
i->bound_by = binder.first;
|
||||
}
|
||||
if (flags & OP_HAS_BLOCK) {
|
||||
block_bind_subblock(binder, i->subfn, bindflags);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
block block_bind(block binder, block body, int bindflags) {
|
||||
bindflags |= OP_HAS_BINDING;
|
||||
block_bind_subblock(binder, body, bindflags);
|
||||
return block_join(binder, body);
|
||||
}
|
||||
|
||||
@@ -180,7 +233,7 @@ block gen_collect(block expr) {
|
||||
block_append(&c, gen_op_simple(DUP));
|
||||
block_append(&c, gen_op_const(LOADK, json_array()));
|
||||
block array_var = block_bind(gen_op_var_unbound(STOREV, "collect"),
|
||||
gen_noop());
|
||||
gen_noop(), OP_HAS_VARIABLE);
|
||||
block_append(&c, array_var);
|
||||
|
||||
block tail = {0};
|
||||
@@ -204,64 +257,108 @@ block gen_else(block a, block b) {
|
||||
assert(0);
|
||||
}
|
||||
|
||||
static uint16_t nesting_level(struct bytecode* bc, inst* target) {
|
||||
uint16_t level = 0;
|
||||
assert(bc && target->compiled);
|
||||
while (bc && target->compiled != bc) {
|
||||
level++;
|
||||
bc = bc->parent;
|
||||
}
|
||||
assert(bc && bc == target->compiled);
|
||||
return level;
|
||||
}
|
||||
|
||||
struct bytecode* block_compile(struct symbol_table* syms, block b) {
|
||||
inst* curr = b.first;
|
||||
static void compile(struct bytecode* bc, block b) {
|
||||
int pos = 0;
|
||||
int var_frame_idx = 0;
|
||||
for (; curr; curr = curr->next) {
|
||||
bc->nsubfunctions = 0;
|
||||
for (inst* curr = b.first; curr; curr = curr->next) {
|
||||
if (!curr->next) assert(curr == b.last);
|
||||
pos += opcode_length(curr->op);
|
||||
curr->bytecode_pos = pos;
|
||||
if (opcode_describe(curr->op)->flags & OP_HAS_VARIABLE) {
|
||||
assert(curr->var_binding && "unbound variable");
|
||||
if (curr->var_binding == curr) {
|
||||
curr->var_frame_idx = var_frame_idx++;
|
||||
}
|
||||
curr->compiled = bc;
|
||||
|
||||
int opflags = opcode_describe(curr->op)->flags;
|
||||
if (opflags & OP_HAS_BINDING) {
|
||||
assert(curr->bound_by && "unbound term");
|
||||
}
|
||||
if ((opflags & OP_HAS_VARIABLE) &&
|
||||
curr->bound_by == curr) {
|
||||
curr->imm.intval = var_frame_idx++;
|
||||
}
|
||||
if (opflags & OP_HAS_BLOCK) {
|
||||
assert(curr->bound_by == curr);
|
||||
curr->imm.intval = bc->nsubfunctions++;
|
||||
}
|
||||
}
|
||||
struct bytecode* bc = malloc(sizeof(struct bytecode));
|
||||
if (bc->nsubfunctions) {
|
||||
bc->subfunctions = malloc(sizeof(struct bytecode*) * bc->nsubfunctions);
|
||||
for (inst* curr = b.first; curr; curr = curr->next) {
|
||||
if (!(opcode_describe(curr->op)->flags & OP_HAS_BLOCK))
|
||||
continue;
|
||||
struct bytecode* subfn = malloc(sizeof(struct bytecode));
|
||||
bc->subfunctions[curr->imm.intval] = subfn;
|
||||
subfn->globals = bc->globals;
|
||||
subfn->parent = bc;
|
||||
compile(subfn, curr->subfn);
|
||||
}
|
||||
} else {
|
||||
bc->subfunctions = 0;
|
||||
}
|
||||
bc->codelen = pos;
|
||||
uint16_t* code = malloc(sizeof(uint16_t) * bc->codelen);
|
||||
bc->code = code;
|
||||
int* stack_height = malloc(sizeof(int) * (bc->codelen + 1));
|
||||
for (int i = 0; i<bc->codelen + 1; i++) stack_height[i] = -1;
|
||||
pos = 0;
|
||||
json_t* constant_pool = json_array();
|
||||
int maxvar = -1;
|
||||
int curr_stack_height = 1;
|
||||
for (curr = b.first; curr; curr = curr->next) {
|
||||
for (inst* curr = b.first; curr; curr = curr->next) {
|
||||
if (curr->op == CLOSURE_CREATE) {
|
||||
// CLOSURE_CREATE opcodes define closures for use later in the
|
||||
// codestream. They generate no code.
|
||||
|
||||
// FIXME: make the above true :)
|
||||
code[pos++] = DUP;
|
||||
code[pos++] = POP;
|
||||
continue;
|
||||
}
|
||||
const struct opcode_description* op = opcode_describe(curr->op);
|
||||
if (curr_stack_height < op->stack_in) {
|
||||
printf("Stack underflow at %04d\n", curr->bytecode_pos);
|
||||
}
|
||||
if (stack_height[curr->bytecode_pos] != -1 &&
|
||||
stack_height[curr->bytecode_pos] != curr_stack_height) {
|
||||
// FIXME: not sure this is right at all :(
|
||||
printf("Inconsistent stack heights at %04d %s\n", curr->bytecode_pos, op->name);
|
||||
}
|
||||
curr_stack_height -= op->stack_in;
|
||||
curr_stack_height += op->stack_out;
|
||||
code[pos++] = curr->op;
|
||||
int opflags = op->flags;
|
||||
if (opflags & OP_HAS_CONSTANT) {
|
||||
assert(!(op->flags & OP_IS_CALL_PSEUDO));
|
||||
if (opflags & OP_HAS_VARIABLE_LENGTH_ARGLIST) {
|
||||
int nargs = curr->imm.intval;
|
||||
assert(nargs > 0);
|
||||
code[pos++] = (uint16_t)nargs;
|
||||
for (int i=0; i<nargs; i++) {
|
||||
curr = curr->next;
|
||||
assert(curr && opcode_describe(curr->op)->flags & OP_IS_CALL_PSEUDO);
|
||||
code[pos++] = nesting_level(bc, curr->bound_by);
|
||||
switch (curr->bound_by->op) {
|
||||
default: assert(0 && "Unknown type of argument");
|
||||
case CLOSURE_CREATE:
|
||||
code[pos++] = curr->bound_by->imm.intval | ARG_NEWCLOSURE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else if (opflags & OP_HAS_CONSTANT) {
|
||||
code[pos++] = json_array_size(constant_pool);
|
||||
json_array_append(constant_pool, curr->imm.constant);
|
||||
} else if (opflags & OP_HAS_VARIABLE) {
|
||||
uint16_t var = (uint16_t)curr->var_binding->var_frame_idx;
|
||||
// no closing over variables yet
|
||||
assert(curr->bound_by->compiled == bc);
|
||||
uint16_t var = (uint16_t)curr->bound_by->imm.intval;
|
||||
code[pos++] = var;
|
||||
if (var > maxvar) maxvar = var;
|
||||
} else if (opflags & OP_HAS_BRANCH) {
|
||||
assert(curr->imm.target->bytecode_pos != -1);
|
||||
assert(curr->imm.target->bytecode_pos > pos); // only forward branches
|
||||
code[pos] = curr->imm.target->bytecode_pos - (pos + 1);
|
||||
stack_height[curr->imm.target->bytecode_pos] = curr_stack_height;
|
||||
pos++;
|
||||
} else if (opflags & OP_HAS_CFUNC) {
|
||||
assert(curr->imm.symbol);
|
||||
assert(curr->symbol);
|
||||
int found = 0;
|
||||
for (int i=0; i<syms->ncfunctions; i++) {
|
||||
if (!strcmp(curr->imm.symbol, syms->cfunctions[i].name)) {
|
||||
for (int i=0; i<bc->globals->ncfunctions; i++) {
|
||||
if (!strcmp(curr->symbol, bc->globals->cfunctions[i].name)) {
|
||||
code[pos++] = i;
|
||||
found = 1;
|
||||
break;
|
||||
@@ -272,11 +369,16 @@ struct bytecode* block_compile(struct symbol_table* syms, block b) {
|
||||
code[pos++] = curr->imm.intval;
|
||||
}
|
||||
}
|
||||
free(stack_height);
|
||||
bc->constants = constant_pool;
|
||||
bc->nlocals = maxvar + 2; // FIXME: frames of size zero?
|
||||
bc->nclosures = 0;
|
||||
}
|
||||
|
||||
struct bytecode* block_compile(struct symbol_table* syms, block b) {
|
||||
struct bytecode* bc = malloc(sizeof(struct bytecode));
|
||||
bc->parent = 0;
|
||||
bc->globals = syms;
|
||||
compile(bc, b);
|
||||
return bc;
|
||||
}
|
||||
|
||||
|
@@ -17,6 +17,9 @@ block gen_op_const(opcode op, json_t* constant);
|
||||
block gen_op_target(opcode op, block target);
|
||||
block gen_op_var_unbound(opcode op, const char* name);
|
||||
block gen_op_var_bound(opcode op, block binder);
|
||||
block gen_op_block_defn(opcode op, const char* name, block block);
|
||||
block gen_op_block_unbound(opcode op, const char* name);
|
||||
block gen_op_call(opcode op, block arglist);
|
||||
block gen_op_symbol(opcode op, const char* name);
|
||||
|
||||
block gen_subexp(block a);
|
||||
@@ -27,7 +30,7 @@ block gen_else(block a, block b);
|
||||
|
||||
void block_append(block* b, block b2);
|
||||
block block_join(block a, block b);
|
||||
block block_bind(block binder, block body);
|
||||
block block_bind(block binder, block body, int bindflags);
|
||||
|
||||
struct bytecode* block_compile(struct symbol_table*, block);
|
||||
|
||||
|
42
c/execute.c
42
c/execute.c
@@ -95,6 +95,18 @@ void stack_restore(){
|
||||
forkable_stack_pop(&fork_stk);
|
||||
}
|
||||
|
||||
static struct closure make_closure(struct forkable_stack* stk, frame_ptr fr, uint16_t* pc) {
|
||||
uint16_t level = *pc++;
|
||||
uint16_t idx = *pc++;
|
||||
fr = frame_get_level(stk, fr, level);
|
||||
if (idx & ARG_NEWCLOSURE) {
|
||||
int subfn_idx = idx & ~ARG_NEWCLOSURE;
|
||||
assert(subfn_idx < frame_self(fr)->bc->nsubfunctions);
|
||||
return closure_new(stk, frame_self(fr)->bc->subfunctions[subfn_idx]);
|
||||
} else {
|
||||
return *frame_closure_arg(fr, idx);
|
||||
}
|
||||
}
|
||||
#define stack_push stk_push
|
||||
#define stack_pop stk_pop
|
||||
|
||||
@@ -267,7 +279,7 @@ json_t* jq_next() {
|
||||
stack_save();
|
||||
stack_push(array);
|
||||
stack_push(stackval_root(json_integer(idx+1)));
|
||||
frame_push_backtrack(&frame_stk, frame_current_bytecode(&frame_stk), pc - 1);
|
||||
frame_push_backtrack(&frame_stk, pc - 1);
|
||||
stack_switch();
|
||||
|
||||
stackval sv = {json_array_get(array.value, idx),
|
||||
@@ -291,7 +303,7 @@ json_t* jq_next() {
|
||||
|
||||
case FORK: {
|
||||
stack_save();
|
||||
frame_push_backtrack(&frame_stk, frame_current_bytecode(&frame_stk), pc - 1);
|
||||
frame_push_backtrack(&frame_stk, pc - 1);
|
||||
stack_switch();
|
||||
pc++; // skip offset this time
|
||||
break;
|
||||
@@ -332,13 +344,27 @@ json_t* jq_next() {
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
case CALL_1_1: {
|
||||
uint16_t nargs = *pc++;
|
||||
|
||||
uint16_t nclosures = *pc++;
|
||||
*frame_current_pc(&frame_stk) = pc + nclosures * 2;
|
||||
frame_ptr new_frame = frame_push(&frame_stk,
|
||||
make_closure(&frame_stk, frame_current(&frame_stk), pc));
|
||||
pc += 2;
|
||||
frame_ptr old_frame = forkable_stack_peek_next(&frame_stk, new_frame);
|
||||
for (int i=0; i<nclosures-1; i++) {
|
||||
*frame_closure_arg(new_frame, i) = make_closure(&frame_stk, old_frame, pc);
|
||||
pc += 2;
|
||||
}
|
||||
|
||||
pc = *frame_current_pc(&frame_stk);
|
||||
break;
|
||||
}
|
||||
|
||||
case RET: {
|
||||
frame_pop(&frame_stk);
|
||||
pc = *frame_current_pc(&frame_stk);
|
||||
break;
|
||||
}
|
||||
*/
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -350,7 +376,7 @@ void jq_init(struct bytecode* bc, json_t* input) {
|
||||
forkable_stack_init(&fork_stk, 1024); // FIXME: lower this number, see if it breaks
|
||||
|
||||
stack_push(stackval_root(input));
|
||||
frame_push(&frame_stk, bc);
|
||||
frame_push(&frame_stk, closure_new_toplevel(bc));
|
||||
}
|
||||
|
||||
void run_program(struct bytecode* bc) {
|
||||
|
@@ -116,4 +116,19 @@ static void forkable_stack_restore(struct forkable_stack* s, struct forkable_sta
|
||||
s->savedlimit = state->prevlimit;
|
||||
forkable_stack_check(s);
|
||||
}
|
||||
|
||||
typedef int stack_idx;
|
||||
|
||||
static stack_idx forkable_stack_to_idx(struct forkable_stack* s, void* ptr) {
|
||||
char* item = ptr;
|
||||
int pos = item - s->stk;
|
||||
assert(pos >= 0 && pos < s->length);
|
||||
return s->length - pos;
|
||||
}
|
||||
|
||||
static void* forkable_stack_from_idx(struct forkable_stack* s, stack_idx idx) {
|
||||
assert(idx >= 1 && idx <= s->length);
|
||||
return &s->stk[s->length - idx];
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -6,6 +6,7 @@
|
||||
struct closure {
|
||||
struct bytecode* bc;
|
||||
uint16_t* pc;
|
||||
stack_idx env;
|
||||
};
|
||||
|
||||
typedef union frame_elem {
|
||||
@@ -57,23 +58,41 @@ static uint16_t** frame_current_pc(struct forkable_stack* stk) {
|
||||
return &frame_self(frame_current(stk))->pc;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static void frame_push(struct forkable_stack* stk, struct bytecode* bc) {
|
||||
frame_ptr fp = forkable_stack_push(stk, frame_size(bc));
|
||||
frame_self(fp)->bc = bc;
|
||||
frame_self(fp)->pc = bc->code;
|
||||
static frame_ptr frame_get_parent(struct forkable_stack* stk, frame_ptr fr) {
|
||||
return forkable_stack_from_idx(stk, frame_self(fr)->env);
|
||||
}
|
||||
|
||||
static void frame_push_backtrack(struct forkable_stack* stk,
|
||||
struct bytecode* bc, uint16_t* pc) {
|
||||
static frame_ptr frame_get_level(struct forkable_stack* stk, frame_ptr fr, int level) {
|
||||
for (int i=0; i<level; i++) {
|
||||
fr = frame_get_parent(stk, fr);
|
||||
}
|
||||
return fr;
|
||||
}
|
||||
|
||||
static struct closure closure_new_toplevel(struct bytecode* bc) {
|
||||
struct closure cl = {bc, bc->code, -1};
|
||||
return cl;
|
||||
}
|
||||
static struct closure closure_new(struct forkable_stack* stk, struct bytecode* bc) {
|
||||
struct closure cl = {bc, bc->code,
|
||||
forkable_stack_to_idx(stk, frame_current(stk))};
|
||||
return cl;
|
||||
}
|
||||
|
||||
static frame_ptr frame_push(struct forkable_stack* stk, struct closure cl) {
|
||||
frame_ptr fp = forkable_stack_push(stk, frame_size(cl.bc));
|
||||
*frame_self(fp) = cl;
|
||||
return fp;
|
||||
}
|
||||
|
||||
static frame_ptr frame_push_backtrack(struct forkable_stack* stk, uint16_t* pc) {
|
||||
struct closure curr = *frame_self(frame_current(stk));
|
||||
frame_ptr fp = forkable_stack_push(stk, sizeof(union frame_elem) * 2);
|
||||
frame_self(fp)->bc = bc;
|
||||
frame_self(fp)->pc = pc;
|
||||
curr.pc = pc;
|
||||
*frame_self(fp) = curr;
|
||||
return fp;
|
||||
}
|
||||
|
||||
|
||||
static void frame_pop(struct forkable_stack* stk) {
|
||||
forkable_stack_pop(stk);
|
||||
}
|
||||
|
@@ -11,6 +11,7 @@
|
||||
|
||||
"==" { return EQ; }
|
||||
"as" { return AS; }
|
||||
"def" { return DEF; }
|
||||
"."|"="|";"|"["|"]"|","|":"|"("|")"|"{"|"}"|"|"|"+"|"\$" { return yytext[0];}
|
||||
|
||||
[[:digit:]]+ { yylval->num = atoi(yytext); return NUMBER;}
|
||||
|
5
c/main.c
5
c/main.c
@@ -32,6 +32,9 @@ void run_tests() {
|
||||
block_append(&program, gen_op_simple(BACKTRACK));
|
||||
struct bytecode* bc = block_compile(&builtins, program);
|
||||
block_free(program);
|
||||
printf("Disassembly:\n");
|
||||
dump_disassembly(2, bc);
|
||||
printf("\n");
|
||||
fgets(buf, sizeof(buf), testdata);
|
||||
json_t* input = json_loads(buf, JSON_DECODE_ANY, 0);
|
||||
jq_init(bc, input);
|
||||
@@ -76,7 +79,7 @@ int main(int argc, char* argv[]) {
|
||||
block_append(&blk, block_join(gen_op_simple(YIELD), gen_op_simple(BACKTRACK)));
|
||||
struct bytecode* bc = block_compile(&builtins, blk);
|
||||
block_free(blk);
|
||||
dump_disassembly(bc);
|
||||
dump_disassembly(0, bc);
|
||||
printf("\n");
|
||||
run_program(bc);
|
||||
}
|
||||
|
@@ -2,10 +2,12 @@
|
||||
|
||||
#define NONE 0
|
||||
#define CONSTANT (OP_HAS_IMMEDIATE | OP_HAS_CONSTANT)
|
||||
#define VARIABLE (OP_HAS_IMMEDIATE | OP_HAS_VARIABLE)
|
||||
#define VARIABLE (OP_HAS_IMMEDIATE | OP_HAS_VARIABLE | OP_HAS_BINDING)
|
||||
#define BRANCH (OP_HAS_IMMEDIATE | OP_HAS_BRANCH)
|
||||
#define CFUNC (OP_HAS_IMMEDIATE | OP_HAS_SYMBOL | OP_HAS_CFUNC)
|
||||
#define UFUNC (OP_HAS_IMMEDIATE | OP_HAS_UFUNC)
|
||||
#define UFUNC (OP_HAS_IMMEDIATE | OP_HAS_UFUNC | OP_HAS_VARIABLE_LENGTH_ARGLIST)
|
||||
#define CLOSURE_DEFINE (OP_HAS_IMMEDIATE | OP_HAS_BLOCK | OP_IS_CALL_PSEUDO | OP_HAS_BINDING)
|
||||
#define CLOSURE_REF (OP_HAS_IMMEDIATE | OP_IS_CALL_PSEUDO | OP_HAS_BINDING)
|
||||
|
||||
#define OP(name, imm, in, out) \
|
||||
{name, #name, imm, in, out},
|
||||
|
@@ -21,6 +21,10 @@ enum {
|
||||
OP_HAS_SYMBOL = 16,
|
||||
OP_HAS_CFUNC = 32,
|
||||
OP_HAS_UFUNC = 64,
|
||||
OP_IS_CALL_PSEUDO = 128,
|
||||
OP_HAS_VARIABLE_LENGTH_ARGLIST = 256,
|
||||
OP_HAS_BLOCK = 512,
|
||||
OP_HAS_BINDING = 1024,
|
||||
};
|
||||
struct opcode_description {
|
||||
opcode op;
|
||||
|
@@ -18,3 +18,8 @@ OP(CALL_BUILTIN_1_1, CFUNC, 1, 1)
|
||||
OP(CALL_BUILTIN_3_1, CFUNC, 3, 1)
|
||||
|
||||
OP(CALL_1_1, UFUNC, 1, 1)
|
||||
OP(RET, NONE, 1, 1)
|
||||
|
||||
OP(CLOSURE_PARAM, CLOSURE_REF, 0, 0)
|
||||
OP(CLOSURE_REF, CLOSURE_REF, 0, 0)
|
||||
OP(CLOSURE_CREATE, CLOSURE_DEFINE, 0, 0)
|
||||
|
15
c/parser.y
15
c/parser.y
@@ -20,10 +20,14 @@
|
||||
%token <str> IDENT
|
||||
%token <num> NUMBER
|
||||
|
||||
/* revolting hack */
|
||||
%left ';'
|
||||
|
||||
%left '|'
|
||||
%left ','
|
||||
%token EQ "=="
|
||||
%token AS "as"
|
||||
%token DEF "def"
|
||||
%nonassoc EQ
|
||||
%left '+'
|
||||
|
||||
@@ -55,12 +59,16 @@ static block gen_index(block obj, block key) {
|
||||
%%
|
||||
program: Exp { *answer = $1; }
|
||||
|
||||
|
||||
Exp:
|
||||
"def" IDENT ':' Exp ';' Exp {
|
||||
block body = block_join($4, gen_op_simple(RET));
|
||||
$$ = block_bind(gen_op_block_defn(CLOSURE_CREATE, $2, body), $6, OP_IS_CALL_PSEUDO);
|
||||
} |
|
||||
|
||||
Term "as" '$' IDENT '|' Exp {
|
||||
$$ = gen_op_simple(DUP);
|
||||
block_append(&$$, $1);
|
||||
block_append(&$$, block_bind(gen_op_var_unbound(STOREV, $4), $6));
|
||||
block_append(&$$, block_bind(gen_op_var_unbound(STOREV, $4), $6, OP_HAS_VARIABLE));
|
||||
} |
|
||||
|
||||
Exp '|' Exp {
|
||||
@@ -132,6 +140,9 @@ IDENT {
|
||||
} |
|
||||
'$' IDENT {
|
||||
$$ = gen_op_var_unbound(LOADV, $2);
|
||||
} |
|
||||
'$' '$' IDENT {
|
||||
$$ = gen_op_call(CALL_1_1, gen_op_block_unbound(CLOSURE_REF, $3));
|
||||
}
|
||||
|
||||
MkDict:
|
||||
|
143
c/testdata
Normal file
143
c/testdata
Normal file
@@ -0,0 +1,143 @@
|
||||
# Tests are groups of three lines: program, input, expected output
|
||||
# Blank lines and lines starting with # are ignored
|
||||
|
||||
#
|
||||
# Simple value tests to check parser. Input is irrelevant
|
||||
#
|
||||
|
||||
true
|
||||
null
|
||||
true
|
||||
|
||||
false
|
||||
null
|
||||
false
|
||||
|
||||
# null
|
||||
# 42
|
||||
# null
|
||||
|
||||
1
|
||||
null
|
||||
1
|
||||
|
||||
# FIXME: much more number testing needed
|
||||
|
||||
{}
|
||||
null
|
||||
{}
|
||||
|
||||
[]
|
||||
null
|
||||
[]
|
||||
|
||||
# FIXME: string literals
|
||||
|
||||
#
|
||||
# Dictionary construction syntax
|
||||
#
|
||||
|
||||
{a: 1}
|
||||
null
|
||||
{"a":1}
|
||||
|
||||
# FIXME: string literals
|
||||
|
||||
#
|
||||
# Field access, piping
|
||||
#
|
||||
|
||||
.foo
|
||||
{"foo": 42, "bar": 43}
|
||||
42
|
||||
|
||||
.foo | .bar
|
||||
{"foo": {"bar": 42}, "bar": "badvalue"}
|
||||
42
|
||||
|
||||
.foo.bar
|
||||
{"foo": {"bar": 42}, "bar": "badvalue"}
|
||||
42
|
||||
|
||||
|
||||
# FIXME strings
|
||||
# .["foo"].bar
|
||||
# {"foo": {"bar": 42}, "bar": "badvalue"}
|
||||
# 42
|
||||
|
||||
|
||||
#
|
||||
# Multiple outputs, iteration
|
||||
#
|
||||
|
||||
.[]
|
||||
[1,2,3]
|
||||
1
|
||||
2
|
||||
3
|
||||
|
||||
[(.,1),((.,.[]),(2,3))]
|
||||
["a","b"]
|
||||
[["a","b"],1,["a","b"],"a","b",2,3]
|
||||
|
||||
[([5,5][]),.,.[]]
|
||||
[1,2,3]
|
||||
[5,5,[1,2,3],1,2,3]
|
||||
|
||||
{x: (1,2)},{x:3} | .x
|
||||
null
|
||||
1
|
||||
2
|
||||
3
|
||||
|
||||
#
|
||||
# Variables
|
||||
#
|
||||
|
||||
1 as $x | 2 as $y | [$x,$y,$x]
|
||||
null
|
||||
[1,2,1]
|
||||
|
||||
[1,2,3][] as $x | [[4,5,6,7][$x]]
|
||||
null
|
||||
[5]
|
||||
[6]
|
||||
[7]
|
||||
|
||||
1 as $x | [$x,$x,$x as $x | $x]
|
||||
null
|
||||
[1,1,1]
|
||||
|
||||
# [.,(.[] | {x:.},.),.,.[]]
|
||||
|
||||
#
|
||||
# Builtin functions
|
||||
#
|
||||
|
||||
# FIXME: floats vs. integer
|
||||
|
||||
1+1
|
||||
null
|
||||
2.0
|
||||
|
||||
.+4
|
||||
15
|
||||
19.0
|
||||
|
||||
[1,2,3] + [.]
|
||||
null
|
||||
[1,2,3,null]
|
||||
|
||||
#
|
||||
# User-defined functions
|
||||
# Oh god.
|
||||
#
|
||||
|
||||
def f: . + 1; def g: def g: . + 100; $$f | $$g | $$f; ($$f | $$g), $$g
|
||||
3.0
|
||||
106.0
|
||||
105.0
|
||||
|
||||
[[100,200][] as $x | def f: . + $x; $$f | $$f | $$f]
|
||||
1
|
||||
[300.0, 600.0]
|
Reference in New Issue
Block a user