diff --git a/bytecode.c b/bytecode.c index 1277fdc1..b03671b6 100644 --- a/bytecode.c +++ b/bytecode.c @@ -36,7 +36,7 @@ const struct opcode_description* opcode_describe(opcode op) { } -static int bytecode_operation_length(uint16_t* codeptr) { +int bytecode_operation_length(uint16_t* codeptr) { int length = opcode_describe(*codeptr)->length; if (*codeptr == CALL_JQ) { length += codeptr[1] * 2; diff --git a/bytecode.h b/bytecode.h index b81e1478..ee50d2cb 100644 --- a/bytecode.h +++ b/bytecode.h @@ -83,6 +83,7 @@ struct bytecode { void dump_disassembly(int, struct bytecode* code); void dump_operation(struct bytecode* bc, uint16_t* op); +int bytecode_operation_length(uint16_t* codeptr); void bytecode_free(struct bytecode* bc); #endif diff --git a/execute.c b/execute.c index 5b583e50..974707f5 100644 --- a/execute.c +++ b/execute.c @@ -95,12 +95,16 @@ static struct closure make_closure(struct jq_state* jq, uint16_t* pc) { stack_ptr fridx = frame_get_level(jq, level); struct frame* fr = stack_block(&jq->stk, fridx); if (idx & ARG_NEWCLOSURE) { + // A new closure closing the frame identified by level, and with + // the bytecode body of the idx'th subfunction of that frame int subfn_idx = idx & ~ARG_NEWCLOSURE; assert(subfn_idx < fr->bc->nsubfunctions); struct closure cl = {fr->bc->subfunctions[subfn_idx], fridx}; return cl; } else { + // A reference to a closure from the frame identified by level; copy + // it as-is int closure = idx; assert(closure >= 0); assert(closure < fr->bc->nclosures); @@ -652,6 +656,7 @@ jv jq_next(jq_state *jq) { break; } + case TAIL_CALL_JQ: case CALL_JQ: { /* * Bytecode layout here: @@ -683,23 +688,7 @@ jv jq_next(jq_state *jq) { stack_ptr retdata = jq->stk_top; struct frame* new_frame; struct closure cl = make_closure(jq, pc); - if (nclosures == 0 && *(pc + 2) == RET && *pc >= 1) { - /* - * TCO: Tail call with no references left to the current frame. - * - * The *pc >= 1 portion of the conditional checks that the - * callee does not close over this frame. The nclosures == 0 - * potion checks that there are no arguments (which could close - * over this frame). - * - * TODO: nclosures > 0 can still allow TCO if none of those - * closures refers to the current frame. - * - * TODO: chase JUMP chains to see if they end in RET. - * - * These additions will make us want to memoize here or in an - * optimizer pass preceding interpretation. - */ + if (opcode == TAIL_CALL_JQ) { retaddr = frame_current(jq)->retaddr; retdata = frame_current(jq)->retdata; frame_pop(jq); @@ -807,6 +796,76 @@ void jq_teardown(jq_state **jq) { jv_mem_free(old_jq); } +static int ret_follows(uint16_t *pc) { + if (*pc == RET) + return 1; + if (*pc++ != JUMP) + return 0; + return ret_follows(pc + *pc); // FIXME, might be ironic +} + +/* + * Look for tail calls that can be optimized: tail calls with no + * references left to the current frame. + * + * We're staring at this bytecode layout: + * + * CALL_JQ + * + * (2 units) + * (2 units each) + * + * + * A closure is: + * + * (a relative frame count chased via the current frame's env) + * (an index of a subfunction or closure in that frame) + * + * We're looking for: + * + * a) the next instruction is a RET or a chain of unconditional JUMPs + * that ends in a RET, and + * + * b) none of the closures -callee included- have level == 0. + */ +static uint16_t tail_call_analyze(uint16_t *pc) { + assert(*pc == CALL_JQ); + pc++; + // + 1 for the callee closure + for (uint16_t nclosures = *pc++ + 1; nclosures > 0; pc++, nclosures--) { + if (*pc++ == 0) + return CALL_JQ; + } + if (ret_follows(pc)) + return TAIL_CALL_JQ; + return CALL_JQ; +} + +static struct bytecode *optimize_code(struct bytecode *bc) { + uint16_t *pc = bc->code; + // FIXME: Don't mutate bc->code... + while (pc < bc->code + bc->codelen) { + switch (*pc) { + case CALL_JQ: + *pc = tail_call_analyze(pc); + break; + + // Other bytecode optimizations here. A peephole optimizer would + // fit right in. + default: break; + } + pc += bytecode_operation_length(pc); + } + return bc; +} + +static struct bytecode *optimize(struct bytecode *bc) { + for (int i=0; insubfunctions; i++) { + bc->subfunctions[i] = optimize(bc->subfunctions[i]); + } + return optimize_code(bc); +} + int jq_compile_args(jq_state *jq, const char* str, jv args) { jv_nomem_handler(jq->nomem_handler, jq->nomem_handler_data); assert(jv_get_kind(args) == JV_KIND_ARRAY); @@ -844,6 +903,8 @@ int jq_compile_args(jq_state *jq, const char* str, jv args) { fprintf(stderr, "%s\n", jv_string_value(s)); jv_free(s); } + if (jq->bc) + jq->bc = optimize(jq->bc); locfile_free(&locations); return jq->bc != NULL; } diff --git a/opcode_list.h b/opcode_list.h index 2b27f78f..c4c48deb 100644 --- a/opcode_list.h +++ b/opcode_list.h @@ -27,6 +27,7 @@ OP(CALL_BUILTIN, CFUNC, -1, 1) OP(CALL_JQ, UFUNC, 1, 1) OP(RET, NONE, 1, 1) +OP(TAIL_CALL_JQ, UFUNC, 1, 1) OP(CLOSURE_PARAM, DEFINITION, 0, 0) OP(CLOSURE_REF, CLOSURE_REF_IMM, 0, 0)