From 4b3250c5073149c59c5c11e06c2c0d93b6a9f5ff Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Fri, 29 Nov 2019 19:00:14 -0600 Subject: Initial overlay of HQEMU 2.5.2 changes onto underlying 2.5.1 QEMU GIT tree --- tcg/ppc/tcg-target.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 118 insertions(+), 5 deletions(-) (limited to 'tcg/ppc') diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 2c72565..ca5c7a4 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -78,7 +78,8 @@ #define TCG_CT_CONST_ZERO 0x1000 #define TCG_CT_CONST_MONE 0x2000 -static tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *ibtc_ret_addr; #include "elf.h" static bool have_isa_2_06; @@ -1785,8 +1786,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) #define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) +#if defined(CONFIG_LLVM) +#define STACK_SIZE 0x800 +#else +#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE +#endif + #define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ - + TCG_STATIC_CALL_ARGS_SIZE \ + + STACK_SIZE \ + CPU_TEMP_BUF_SIZE \ + REG_SAVE_SIZE \ + TCG_TARGET_STACK_ALIGN - 1) \ @@ -1794,6 +1801,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) +static unsigned num_epilogue_insns = 1; +static void tcg_out_epilogue(TCGContext *s) +{ + /* IBTC exit entry */ + ibtc_ret_addr = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0); +} + static void tcg_target_qemu_prologue(TCGContext *s) { int i; @@ -1832,27 +1847,29 @@ static void tcg_target_qemu_prologue(TCGContext *s) if (USE_REG_RA) { #ifdef _CALL_AIX /* Make the caller load the value as the TOC into R2. */ - tb_ret_addr = s->code_ptr + 2; + tb_ret_addr = s->code_ptr + 2 + num_epilogue_insns; desc[1] = tb_ret_addr; tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2); tcg_out32(s, BCCTR | BO_ALWAYS); #elif defined(_CALL_ELF) && _CALL_ELF == 2 /* Compute from the incoming R12 value. */ - tb_ret_addr = s->code_ptr + 2; + tb_ret_addr = s->code_ptr + 2 + num_epilogue_insns; tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12, tcg_ptr_byte_diff(tb_ret_addr, s->code_buf))); tcg_out32(s, BCCTR | BO_ALWAYS); #else /* Reserve max 5 insns for the constant load. */ - tb_ret_addr = s->code_ptr + 6; + tb_ret_addr = s->code_ptr + 6 + num_epilogue_insns; tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr); tcg_out32(s, BCCTR | BO_ALWAYS); while (s->code_ptr < tb_ret_addr) { tcg_out32(s, NOP); } #endif + tcg_out_epilogue(s); } else { tcg_out32(s, BCCTR | BO_ALWAYS); + tcg_out_epilogue(s); tb_ret_addr = s->code_ptr; } @@ -1869,6 +1886,85 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out32(s, BCLR | BO_ALWAYS); } +static void tcg_out_jmp_short(uintptr_t jmp_addr, uintptr_t addr) +{ + tcg_insn_unit i1, i2; + uint64_t pair; + intptr_t diff = addr - jmp_addr; + + if (!in_range_b(diff)) + tcg_abort(); + + i1 = B | (diff & 0x3fffffc); + i2 = NOP; +#ifdef HOST_WORDS_BIGENDIAN + pair = (uint64_t)i1 << 32 | i2; +#else + pair = (uint64_t)i2 << 32 | i1; +#endif + *(uint64_t *)jmp_addr = pair; +} + +/* + * Emit trace profiling/prediction stubs. The code sequence is as following: + * S1: direct jump (the reloc part requires 4-byte alignment) + * S2: trace profiling stub + * S3: trace prediction stub + * S4: beginning of QEMU emulation code + * + * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2). + * Remember the offset of S3 (patch_next) which is used to turn the + * trace profiling off. Also remember the offset of S4 (patch_skip) + * so that the trace stubs can be skipped quickly while searching pc. + */ +static void tcg_out_hotpatch(TCGContext *s, int is_user, int emit_helper) +{ + tcg_insn_unit *label_ptr[2]; + TranslationBlock *tb = s->tb; + + /* S1: direct jump. Ensure the next insns are 8-byte aligned. */ + if ((uintptr_t)s->code_ptr & 7) + tcg_out32(s, NOP); + + tb->patch_jmp = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + /* S1: Direct Jump */ + if (is_user == 0 || emit_helper == 0) { + tcg_out_jmp_short((uintptr_t)s->code_ptr, (uintptr_t)(s->code_ptr + 4)); + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + return; + } + + label_ptr[0] = s->code_ptr; + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + + /* S2: Trace Profiling Stub */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile); + tcg_out_jmp_short((uintptr_t)label_ptr[0], (uintptr_t)s->code_ptr); + + /* S3: Trace Prediction stub */ + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_AREG0, + offsetof(CPUArchState, start_trace_prediction)); + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_TMP1, 0, 1, 7, TCG_TYPE_I32); + label_ptr[1] = s->code_ptr; + tcg_out_bc_noaddr(s, tcg_to_bc[TCG_COND_EQ]); + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict); + reloc_pc14(label_ptr[1], s->code_ptr); + + /* S4: QEMU emulation code */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -1906,6 +2002,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out32(s, BCCTR | BO_ALWAYS); s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_hotpatch: + tcg_out_hotpatch(s, args[0], args[1]); + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_b(s, 0, (tcg_insn_unit *)args[0]); + } else { + tcg_out32(s, MTSPR | RS(args[0]) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + } + break; case INDEX_op_br: { TCGLabel *l = arg_label(args[0]); @@ -2436,6 +2543,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, } static const TCGTargetOpDef ppc_op_defs[] = { + { INDEX_op_hotpatch, { "i", "i" } }, + { INDEX_op_jmp, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, @@ -2572,6 +2681,10 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } }, #endif +#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} }, +#include "tcg-opc-vector.h" +#undef DEF + { -1 }, }; -- cgit v1.1