summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMaxime Chevalier-Boisvert <maxime.chevalierboisvert@shopify.com>2021-02-09 16:24:06 -0500
committerAlan Wu <XrXr@users.noreply.github.com>2021-10-20 18:19:28 -0400
commit9d8cc01b758f9385bd4c806f3daff9719e07faa0 (patch)
tree2e5eca04a45f899655f79a6cfb49e7573fb12f4c
parent6341fc21b2e15dca82253da41047ce37409508fc (diff)
WIP JIT-to-JIT returns
-rw-r--r--ujit_codegen.c79
-rw-r--r--ujit_core.c22
-rw-r--r--ujit_core.h5
-rw-r--r--vm.c2
-rw-r--r--vm_core.h2
-rw-r--r--vm_insnhelper.c1
6 files changed, 91 insertions, 20 deletions
diff --git a/ujit_codegen.c b/ujit_codegen.c
index b9d2a0a7cb..4a2a4c865e 100644
--- a/ujit_codegen.c
+++ b/ujit_codegen.c
@@ -114,7 +114,7 @@ Compile an interpreter entry block to be inserted into an iseq
Returns `NULL` if compilation fails.
*/
uint8_t*
-ujit_entry_prologue()
+ujit_entry_prologue(void)
{
RUBY_ASSERT(cb != NULL);
@@ -248,9 +248,9 @@ gen_dup(jitstate_t* jit, ctx_t* ctx)
x86opnd_t dup_val = ctx_stack_pop(ctx, 1);
x86opnd_t loc0 = ctx_stack_push(ctx, T_NONE);
x86opnd_t loc1 = ctx_stack_push(ctx, T_NONE);
- mov(cb, RAX, dup_val);
- mov(cb, loc0, RAX);
- mov(cb, loc1, RAX);
+ mov(cb, REG0, dup_val);
+ mov(cb, loc0, REG0);
+ mov(cb, loc1, REG0);
return true;
}
@@ -1191,6 +1191,23 @@ gen_opt_swb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const r
bool rb_simple_iseq_p(const rb_iseq_t *iseq);
+void
+gen_return_branch(codeblock_t* cb, uint8_t* target0, uint8_t* target1, uint8_t shape)
+{
+ switch (shape)
+ {
+ case SHAPE_NEXT0:
+ case SHAPE_NEXT1:
+ RUBY_ASSERT(false);
+ break;
+
+ case SHAPE_DEFAULT:
+ mov(cb, REG0, const_ptr_opnd(target0));
+ mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
+ break;
+ }
+}
+
static bool
gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_callable_method_entry_t *cme, int32_t argc)
{
@@ -1251,13 +1268,32 @@ gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb
cmp(cb, klass_opnd, REG1);
jne_ptr(cb, side_exit);
- // Store incremented PC into current control frame in case callee raises.
+ // Store the updated SP on the current frame (pop arguments and receiver)
+ lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
+ mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
+
+ // Store the next PC i the current frame
mov(cb, REG0, const_ptr_opnd(jit->pc + insn_len(BIN(opt_send_without_block))));
mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), REG0);
- // Store the updated SP on the CFP (pop arguments and receiver)
- lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
- mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
+ // Stub so we can return to JITted code
+ blockid_t return_block = { jit->iseq, jit_next_insn_idx(jit) };
+
+ // Pop arguments and receiver in return context, push the return value
+ // After the return, the JIT and interpreter SP will match up
+ ctx_t return_ctx = *ctx;
+ ctx_stack_pop(&return_ctx, argc);
+ return_ctx.sp_offset = 0;
+
+ // Write the JIT return address on the current frame
+ gen_branch(
+ ctx,
+ return_block,
+ &return_ctx,
+ return_block,
+ &return_ctx,
+ gen_return_branch
+ );
// Stack overflow check
// #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
@@ -1327,7 +1363,6 @@ gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb
&DEFAULT_CTX,
(blockid_t){ iseq, 0 }
);
-
// TODO: create stub for call continuation
@@ -1432,7 +1467,31 @@ gen_leave(jitstate_t* jit, ctx_t* ctx)
mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
mov(cb, mem_opnd(64, REG_SP, -SIZEOF_VALUE), REG0);
- // Write the post call bytes
+
+
+
+
+
+
+ // Load the JIT return address
+ mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, jit_return));
+
+ // If the return address is NULL, fall back to the interpreter
+ int FALLBACK_LABEL = cb_new_label(cb, "FALLBACK");
+ cmp(cb, REG0, imm_opnd(0));
+ jz(cb, FALLBACK_LABEL);
+
+ // Jump to the JIT return address
+ jmp_rm(cb, REG0);
+
+ // Fall back to the interpreter
+ cb_write_label(cb, FALLBACK_LABEL);
+ cb_link_labels(cb);
+
+
+
+
+
cb_write_post_call_bytes(cb);
return true;
diff --git a/ujit_core.c b/ujit_core.c
index d7b0d65e69..105769955c 100644
--- a/ujit_core.c
+++ b/ujit_core.c
@@ -32,7 +32,7 @@ Get an operand for the adjusted stack pointer address
x86opnd_t
ctx_sp_opnd(ctx_t* ctx, int32_t offset_bytes)
{
- int32_t offset = (ctx->stack_size) * sizeof(VALUE) + offset_bytes;
+ int32_t offset = (ctx->sp_offset * sizeof(VALUE)) + offset_bytes;
return mem_opnd(64, REG_SP, offset);
}
@@ -49,9 +49,10 @@ ctx_stack_push(ctx_t* ctx, int type)
ctx->temp_types[ctx->stack_size] = type;
ctx->stack_size += 1;
+ ctx->sp_offset += 1;
// SP points just above the topmost value
- int32_t offset = (ctx->stack_size - 1) * sizeof(VALUE);
+ int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE);
return mem_opnd(64, REG_SP, offset);
}
@@ -65,7 +66,7 @@ ctx_stack_pop(ctx_t* ctx, size_t n)
RUBY_ASSERT(n <= ctx->stack_size);
// SP points just above the topmost value
- int32_t offset = (ctx->stack_size - 1) * sizeof(VALUE);
+ int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE);
x86opnd_t top = mem_opnd(64, REG_SP, offset);
// Clear the types of the popped values
@@ -77,6 +78,7 @@ ctx_stack_pop(ctx_t* ctx, size_t n)
}
ctx->stack_size -= n;
+ ctx->sp_offset -= n;
return top;
}
@@ -88,7 +90,7 @@ x86opnd_t
ctx_stack_opnd(ctx_t* ctx, int32_t idx)
{
// SP points just above the topmost value
- int32_t offset = (ctx->stack_size - 1 - idx) * sizeof(VALUE);
+ int32_t offset = (ctx->sp_offset - 1 - idx) * sizeof(VALUE);
x86opnd_t opnd = mem_opnd(64, REG_SP, offset);
return opnd;
@@ -120,6 +122,9 @@ int ctx_diff(const ctx_t* src, const ctx_t* dst)
if (dst->stack_size != src->stack_size)
return INT_MAX;
+ if (dst->sp_offset != src->sp_offset)
+ return INT_MAX;
+
if (dst->self_is_object != src->self_is_object)
return INT_MAX;
@@ -345,6 +350,7 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx)
// Limit the number of block versions
ctx_t generic_ctx = DEFAULT_CTX;
generic_ctx.stack_size = target_ctx->stack_size;
+ generic_ctx.sp_offset = target_ctx->sp_offset;
if (count_block_versions(target) >= MAX_VERSIONS - 1)
{
fprintf(stderr, "version limit hit in branch_stub_hit\n");
@@ -383,7 +389,6 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx)
}
// Get a version or stub corresponding to a branch target
-// TODO: need incoming and target contexts
uint8_t* get_branch_target(
blockid_t target,
const ctx_t* ctx,
@@ -440,13 +445,13 @@ void gen_branch(
)
{
RUBY_ASSERT(target0.iseq != NULL);
- RUBY_ASSERT(target1.iseq != NULL);
+ //RUBY_ASSERT(target1.iseq != NULL);
RUBY_ASSERT(num_branches < MAX_BRANCHES);
uint32_t branch_idx = num_branches++;
// Get the branch targets or stubs
uint8_t* dst_addr0 = get_branch_target(target0, ctx0, branch_idx, 0);
- uint8_t* dst_addr1 = get_branch_target(target1, ctx1, branch_idx, 1);
+ uint8_t* dst_addr1 = ctx1? get_branch_target(target1, ctx1, branch_idx, 1):NULL;
// Call the branch generation function
uint32_t start_pos = cb->write_pos;
@@ -459,7 +464,7 @@ void gen_branch(
end_pos,
*src_ctx,
{ target0, target1 },
- { *ctx0, *ctx1 },
+ { *ctx0, ctx1? *ctx1:DEFAULT_CTX },
{ dst_addr0, dst_addr1 },
gen_fn,
SHAPE_DEFAULT
@@ -508,6 +513,7 @@ void gen_direct_jump(
// Limit the number of block versions
ctx_t generic_ctx = DEFAULT_CTX;
generic_ctx.stack_size = ctx->stack_size;
+ generic_ctx.sp_offset = ctx->sp_offset;
if (count_block_versions(target0) >= MAX_VERSIONS - 1)
{
fprintf(stderr, "version limit hit in branch_stub_hit\n");
diff --git a/ujit_core.h b/ujit_core.h
index 08fdd4d779..9430269438 100644
--- a/ujit_core.h
+++ b/ujit_core.h
@@ -31,9 +31,12 @@ typedef struct CtxStruct
// T_NONE==0 is the unknown type
uint8_t temp_types[MAX_TEMP_TYPES];
- // Number of values pushed on the temporary stack
+ // Number of values currently on the temporary stack
uint16_t stack_size;
+ // Offset of the JIT SP relative to the interpreter SP
+ int16_t sp_offset;
+
// Whether we know self is a heap object
bool self_is_object : 1;
diff --git a/vm.c b/vm.c
index e98f89864b..86a78bc8d5 100644
--- a/vm.c
+++ b/vm.c
@@ -202,7 +202,7 @@ VM_CAPTURED_BLOCK_TO_CFP(const struct rb_captured_block *captured)
{
rb_control_frame_t *cfp = ((rb_control_frame_t *)((VALUE *)(captured) - 3));
VM_ASSERT(!VM_CFP_IN_HEAP_P(GET_EC(), cfp));
- VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 7 + VM_DEBUG_BP_CHECK ? 1 : 0);
+ VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 8 + VM_DEBUG_BP_CHECK ? 1 : 0);
return cfp;
}
diff --git a/vm_core.h b/vm_core.h
index 6b627f4a82..23ebf37b1a 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -790,6 +790,8 @@ typedef struct rb_control_frame_struct {
#if VM_DEBUG_BP_CHECK
VALUE *bp_check; /* cfp[7] */
#endif
+ // Return address for uJIT code
+ void *jit_return;
} rb_control_frame_t;
extern const rb_data_type_t ruby_threadptr_data_type;
diff --git a/vm_insnhelper.c b/vm_insnhelper.c
index 7075f7c0f2..f743e07e03 100644
--- a/vm_insnhelper.c
+++ b/vm_insnhelper.c
@@ -390,6 +390,7 @@ vm_push_frame(rb_execution_context_t *ec,
#if VM_DEBUG_BP_CHECK
.bp_check = sp,
#endif
+ .jit_return = NULL
};
ec->cfp = cfp;