diff options
author | Timothy Pearson <tpearson@raptorengineering.com> | 2019-11-29 19:25:14 -0600 |
---|---|---|
committer | Timothy Pearson <tpearson@raptorengineering.com> | 2019-11-29 19:25:14 -0600 |
commit | d7b5e291ffb166239c44682af2363080c72888aa (patch) | |
tree | 226ff4668e6e5fd9309d792181bb54e256032338 /tcg | |
parent | a8c40fa2d667e585382080db36ac44e216b37a1c (diff) | |
download | hqemu-2.5_overlay.zip hqemu-2.5_overlay.tar.gz |
Initial overlay of HQEMU 2.5.2 changes onto underlying 2.5.0 QEMU GIT tree2.5_overlay
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/aarch64/tcg-target.c | 82 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c | 197 | ||||
-rw-r--r-- | tcg/i386/tcg-target.h | 2 | ||||
-rw-r--r-- | tcg/ppc/tcg-target.c | 123 | ||||
-rw-r--r-- | tcg/tcg-op.c | 2 | ||||
-rw-r--r-- | tcg/tcg-op.h | 11 | ||||
-rw-r--r-- | tcg/tcg-opc.h | 6 | ||||
-rw-r--r-- | tcg/tcg.c | 270 | ||||
-rw-r--r-- | tcg/tcg.h | 58 |
9 files changed, 703 insertions, 48 deletions
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index 0ed10a9..05e26af 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -1264,7 +1264,56 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, #endif /* CONFIG_SOFTMMU */ } -static tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *ibtc_ret_addr; + +/* + * Emit trace profiling/prediction stubs. The code sequence is as following: + * S1: direct jump (the reloc part requires 4-byte alignment) + * S2: trace profiling stub + * S3: trace prediction stub + * S4: beginning of QEMU emulation code + * + * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2). + * Remember the offset of S3 (patch_next) which is used to turn the + * trace profiling off. Also remember the offset of S4 (patch_skip) + * so that the trace stubs can be skipped quickly while searching pc. + */ +static void tcg_out_hotpatch(TCGContext *s, int is_user, int emit_helper) +{ + tcg_insn_unit *label_ptr[2]; + TranslationBlock *tb = s->tb; + + tb->patch_jmp = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + /* S1: Direct Jump */ + if (is_user == 0 || emit_helper == 0) { + tcg_out_goto(s, s->code_ptr + 1); + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + return; + } + + label_ptr[0] = s->code_ptr; + tcg_out_goto_noaddr(s); + /* S2: Trace Profiling Stub */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile); + reloc_pc26(label_ptr[0], s->code_ptr); + + /* S3: Trace Prediction stub */ + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + tcg_out_ld(s, TCG_TYPE_I32, tcg_target_reg_alloc_order[0], + TCG_AREG0, offsetof(CPUArchState, start_trace_prediction)); + tcg_out_cmp(s, 0, tcg_target_reg_alloc_order[0], 0, 1); + label_ptr[1] = s->code_ptr; + tcg_out_goto_cond_noaddr(s, TCG_COND_EQ); + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict); + reloc_pc19(label_ptr[1], s->code_ptr); +} static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1302,6 +1351,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, s->tb_next_offset[a0] = tcg_current_code_size(s); break; + case INDEX_op_hotpatch: + tcg_out_hotpatch(s, args[0], args[1]); + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_goto(s, (tcg_insn_unit *)args[0]); + } else { + tcg_out_insn(s, 3207, BR, args[0]); + } + break; case INDEX_op_br: tcg_out_goto_label(s, arg_label(a0)); break; @@ -1637,6 +1696,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } static const TCGTargetOpDef aarch64_op_defs[] = { + { INDEX_op_hotpatch, { "i", "i" } }, + { INDEX_op_jmp, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, @@ -1748,6 +1809,10 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_muluh_i64, { "r", "r", "r" } }, { INDEX_op_mulsh_i64, { "r", "r", "r" } }, +#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} }, +#include "tcg-opc-vector.h" +#undef DEF + { -1 }, }; @@ -1777,12 +1842,24 @@ static void tcg_target_init(TCGContext *s) tcg_add_target_add_op_defs(aarch64_op_defs); } +static void tcg_out_epilogue(TCGContext *s) +{ + /* IBTC exit entry */ + ibtc_ret_addr = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X0, 0); +} + +#if defined(CONFIG_LLVM) +#define STACK_SIZE 0x800 +#else +#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE +#endif /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ #define PUSH_SIZE ((30 - 19 + 1) * 8) #define FRAME_SIZE \ ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ + + STACK_SIZE \ + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + TCG_TARGET_STACK_ALIGN - 1) \ & ~(TCG_TARGET_STACK_ALIGN - 1)) @@ -1828,6 +1905,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); + tcg_out_epilogue(s); tb_ret_addr = s->code_ptr; /* Remove TCG locals stack space. */ diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 9187d34..b95f5fb 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -139,7 +139,8 @@ static bool have_bmi2; # define have_bmi2 0 #endif -static tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *ibtc_ret_addr; static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) @@ -323,6 +324,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_MOVB_EvGv (0x88) /* stores, more or less */ #define OPC_MOVL_EvGv (0x89) /* stores, more or less */ #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ +#define OPC_NOP (0x90) #define OPC_MOVB_EvIz (0xc6) #define OPC_MOVL_EvIz (0xc7) #define OPC_MOVL_Iv (0xb8) @@ -1150,6 +1152,62 @@ static void * const qemu_st_helpers[16] = { [MO_BEQ] = helper_be_stq_mmu, }; +/* helpers for LLVM */ +void * const llvm_ld_helpers[16] = { + [MO_UB] = llvm_ret_ldub_mmu, + [MO_LEUW] = llvm_le_lduw_mmu, + [MO_LEUL] = llvm_le_ldul_mmu, + [MO_LEQ] = llvm_le_ldq_mmu, + [MO_BEUW] = llvm_be_lduw_mmu, + [MO_BEUL] = llvm_be_ldul_mmu, + [MO_BEQ] = llvm_be_ldq_mmu, +}; + +void * const llvm_st_helpers[16] = { + [MO_UB] = llvm_ret_stb_mmu, + [MO_LEUW] = llvm_le_stw_mmu, + [MO_LEUL] = llvm_le_stl_mmu, + [MO_LEQ] = llvm_le_stq_mmu, + [MO_BEUW] = llvm_be_stw_mmu, + [MO_BEUL] = llvm_be_stl_mmu, + [MO_BEQ] = llvm_be_stq_mmu, +}; + +static inline void tcg_out_compute_gva(TCGContext *s, TCGReg addrlo, + TCGMemOp opc, int trexw, int tv_hrexw) +{ + const TCGReg r1 = TCG_REG_L1; + int s_mask = (1 << (opc & MO_SIZE)) - 1; + +#if defined(ALIGNED_ONLY) + TCGType ttype = TCG_TYPE_I32; + bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) + ttype = TCG_TYPE_I64; + if (aligned) { + tcg_out_mov(s, ttype, r1, addrlo); + } else { + /* For unaligned access check that we don't cross pages using + the page address of the last byte. */ + tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); + } + tgen_arithi(s, ARITH_AND + trexw, r1, + TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); +#elif defined(ENABLE_TLBVERSION) + /* the following code is as equivalent to + * (((addr + (size - 1)) & TARGET_PAGE_MASK) | env->tlb_version) */ + tcg_out_modrm_sib_offset(s, OPC_LEA + trexw, r1, addrlo, -1, 0, s_mask); + tgen_arithi(s, ARITH_AND + trexw, r1, TARGET_PAGE_MASK, 0); + tcg_out_modrm_offset(s, (OPC_ARITH_GvEv | (ARITH_OR << 3)) + trexw + tv_hrexw, + r1, TCG_AREG0, offsetof(CPUArchState, tlb_version)); +#else + /* the following code is as equivalent to + * ((addr + (size - 1)) & TARGET_PAGE_MASK) */ + tcg_out_modrm_sib_offset(s, OPC_LEA + trexw, r1, addrlo, -1, 0, s_mask); + tgen_arithi(s, ARITH_AND + trexw, r1, TARGET_PAGE_MASK, 0); +#endif +} + /* Perform the TLB load and compare. Inputs: @@ -1179,9 +1237,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, const TCGReg r1 = TCG_REG_L1; TCGType ttype = TCG_TYPE_I32; TCGType tlbtype = TCG_TYPE_I32; - int trexw = 0, hrexw = 0, tlbrexw = 0; - int s_mask = (1 << (opc & MO_SIZE)) - 1; - bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; + int trexw = 0, hrexw = 0, tlbrexw = 0, tv_hrexw = 0; if (TCG_TARGET_REG_BITS == 64) { if (TARGET_LONG_BITS == 64) { @@ -1197,20 +1253,18 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } } +#if defined(ENABLE_TLBVERSION_EXT) + trexw = 0; + tv_hrexw = P_REXW; +#endif + tcg_out_mov(s, tlbtype, r0, addrlo); - if (aligned) { - tcg_out_mov(s, ttype, r1, addrlo); - } else { - /* For unaligned access check that we don't cross pages using - the page address of the last byte. */ - tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); - } tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tgen_arithi(s, ARITH_AND + trexw, r1, - TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); + tcg_out_compute_gva(s, addrlo, opc, trexw, tv_hrexw); + tgen_arithi(s, ARITH_AND + tlbrexw, r0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); @@ -1219,7 +1273,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, + which); /* cmp 0(r0), r1 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); + tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw + tv_hrexw, r1, r0, 0); /* Prepare for both the fast path add of the tlb addend, and the slow path function argument setup. There are two cases worth note: @@ -1754,6 +1808,73 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #endif } +/* + * Emit trace profiling/prediction stubs. The code sequence is as following: + * S1: direct jump (the reloc part requires 4-byte alignment) + * S2: trace profiling stub + * S3: trace prediction stub + * S4: beginning of QEMU emulation code + * + * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2). + * Remember the offset of S3 (patch_next) which is used to turn the + * trace profiling off. Also remember the offset of S4 (patch_skip) + * so that the trace stubs can be skipped quickly while searching pc. + */ +static void tcg_out_hotpatch(TCGContext *s, uint32_t is_user, uint32_t emit_helper) +{ + uint8_t *label_ptr[2]; + TranslationBlock *tb = s->tb; + + /* S1: direct jump */ + while (((uintptr_t)s->code_ptr + 1) % 4) + tcg_out8(s, OPC_NOP); + + tb->patch_jmp = (uint16_t)(s->code_ptr - s->code_buf); + + tcg_out8(s, OPC_JMP_long); + label_ptr[0] = s->code_ptr; + s->code_ptr += 4; + + if (is_user == 0 || emit_helper == 0) { + *(uint32_t *)label_ptr[0] = s->code_ptr - label_ptr[0] - 4; + tb->patch_next = (uint16_t)(s->code_ptr - s->code_buf); + return; + } + + /* S2: trace profiling stub */ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, 0); + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, 4, tb->id); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + } + tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile); + *(uint32_t *)label_ptr[0] = s->code_ptr - label_ptr[0] - 4; + + /* S3: trace prediction stub */ + tb->patch_next = (uint16_t)(s->code_ptr - s->code_buf); + + tcg_out_ld(s, TCG_TYPE_I32, tcg_target_reg_alloc_order[0], + TCG_AREG0, offsetof(CPUArchState, start_trace_prediction)); + tcg_out_cmp(s, tcg_target_reg_alloc_order[0], 0, 1, 0); + tcg_out_opc(s, OPC_JCC_long + JCC_JE, 0, 0, 0); + label_ptr[1] = s->code_ptr; + s->code_ptr += 4; + + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, 0); + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, 4, tb->id); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + } + tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict); + *(uint32_t *)label_ptr[1] = s->code_ptr - label_ptr[1] - 4; + + /* S4: QEMU emulation code */ +} + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -1777,6 +1898,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ +#if defined(CONFIG_USER_ONLY) + while (((uintptr_t)s->code_ptr + 1) % 4) /* need 4-byte aligned */ + tcg_out8(s, OPC_NOP); +#endif tcg_out8(s, OPC_JMP_long); /* jmp im */ s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); tcg_out32(s, 0); @@ -1787,6 +1912,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_hotpatch: + tcg_out_hotpatch(s, args[0], args[1]); + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_jmp(s, (tcg_insn_unit *)args[0]); + } else { + /* jmp *reg */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]); + } + break; case INDEX_op_br: tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0); break; @@ -2110,6 +2246,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } static const TCGTargetOpDef x86_op_defs[] = { + { INDEX_op_hotpatch, { "i", "i" } }, + { INDEX_op_jmp, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, @@ -2238,6 +2376,11 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, #endif + +#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} }, +#include "tcg-opc-vector.h" +#undef DEF + { -1 }, }; @@ -2261,16 +2404,29 @@ static int tcg_target_callee_save_regs[] = { #endif }; +static void tcg_out_epilogue(TCGContext *s) +{ + /* IBTC exit entry */ + ibtc_ret_addr = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, 0); +} + /* Compute frame size via macros, to share between tcg_target_qemu_prologue and tcg_register_jit. */ +#if defined(CONFIG_LLVM) +#define STACK_SIZE 0x2000 +#else +#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE +#endif + #define PUSH_SIZE \ ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ * (TCG_TARGET_REG_BITS / 8)) #define FRAME_SIZE \ ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ + + STACK_SIZE \ + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + TCG_TARGET_STACK_ALIGN - 1) \ & ~(TCG_TARGET_STACK_ALIGN - 1)) @@ -2279,10 +2435,12 @@ static int tcg_target_callee_save_regs[] = { static void tcg_target_qemu_prologue(TCGContext *s) { int i, stack_addend; + tcg_target_long stack_align_mask; /* TB prologue */ /* Reserve some stack space, also for TCG temps. */ + stack_align_mask = ~(TCG_TARGET_STACK_ALIGN - 1); stack_addend = FRAME_SIZE - PUSH_SIZE; tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, CPU_TEMP_BUF_NLONGS * sizeof(long)); @@ -2296,6 +2454,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0, + offsetof(CPUArchState, sp)); + tgen_arithi(s, ARITH_AND, TCG_REG_ESP, stack_align_mask, 0); /* jmp *tb. */ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 @@ -2303,13 +2464,19 @@ static void tcg_target_qemu_prologue(TCGContext *s) #else tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0, + offsetof(CPUArchState, sp)); + tgen_arithi(s, ARITH_AND + P_REXW, TCG_REG_ESP, stack_align_mask, 0); /* jmp *tb. */ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); #endif /* TB epilogue */ + tcg_out_epilogue(s); tb_ret_addr = s->code_ptr; + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0, + offsetof(CPUArchState, sp)); tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 92be341..c5715dc 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -67,7 +67,7 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_ESP -#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_STACK_ALIGN 32 #if defined(_WIN64) #define TCG_TARGET_CALL_STACK_OFFSET 32 #else diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 2c72565..ca5c7a4 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -78,7 +78,8 @@ #define TCG_CT_CONST_ZERO 0x1000 #define TCG_CT_CONST_MONE 0x2000 -static tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *ibtc_ret_addr; #include "elf.h" static bool have_isa_2_06; @@ -1785,8 +1786,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) #define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) +#if defined(CONFIG_LLVM) +#define STACK_SIZE 0x800 +#else +#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE +#endif + #define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ - + TCG_STATIC_CALL_ARGS_SIZE \ + + STACK_SIZE \ + CPU_TEMP_BUF_SIZE \ + REG_SAVE_SIZE \ + TCG_TARGET_STACK_ALIGN - 1) \ @@ -1794,6 +1801,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) +static unsigned num_epilogue_insns = 1; +static void tcg_out_epilogue(TCGContext *s) +{ + /* IBTC exit entry */ + ibtc_ret_addr = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0); +} + static void tcg_target_qemu_prologue(TCGContext *s) { int i; @@ -1832,27 +1847,29 @@ static void tcg_target_qemu_prologue(TCGContext *s) if (USE_REG_RA) { #ifdef _CALL_AIX /* Make the caller load the value as the TOC into R2. */ - tb_ret_addr = s->code_ptr + 2; + tb_ret_addr = s->code_ptr + 2 + num_epilogue_insns; desc[1] = tb_ret_addr; tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2); tcg_out32(s, BCCTR | BO_ALWAYS); #elif defined(_CALL_ELF) && _CALL_ELF == 2 /* Compute from the incoming R12 value. */ - tb_ret_addr = s->code_ptr + 2; + tb_ret_addr = s->code_ptr + 2 + num_epilogue_insns; tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12, tcg_ptr_byte_diff(tb_ret_addr, s->code_buf))); tcg_out32(s, BCCTR | BO_ALWAYS); #else /* Reserve max 5 insns for the constant load. */ - tb_ret_addr = s->code_ptr + 6; + tb_ret_addr = s->code_ptr + 6 + num_epilogue_insns; tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr); tcg_out32(s, BCCTR | BO_ALWAYS); while (s->code_ptr < tb_ret_addr) { tcg_out32(s, NOP); } #endif + tcg_out_epilogue(s); } else { tcg_out32(s, BCCTR | BO_ALWAYS); + tcg_out_epilogue(s); tb_ret_addr = s->code_ptr; } @@ -1869,6 +1886,85 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out32(s, BCLR | BO_ALWAYS); } +static void tcg_out_jmp_short(uintptr_t jmp_addr, uintptr_t addr) +{ + tcg_insn_unit i1, i2; + uint64_t pair; + intptr_t diff = addr - jmp_addr; + + if (!in_range_b(diff)) + tcg_abort(); + + i1 = B | (diff & 0x3fffffc); + i2 = NOP; +#ifdef HOST_WORDS_BIGENDIAN + pair = (uint64_t)i1 << 32 | i2; +#else + pair = (uint64_t)i2 << 32 | i1; +#endif + *(uint64_t *)jmp_addr = pair; +} + +/* + * Emit trace profiling/prediction stubs. The code sequence is as following: + * S1: direct jump (the reloc part requires 4-byte alignment) + * S2: trace profiling stub + * S3: trace prediction stub + * S4: beginning of QEMU emulation code + * + * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2). + * Remember the offset of S3 (patch_next) which is used to turn the + * trace profiling off. Also remember the offset of S4 (patch_skip) + * so that the trace stubs can be skipped quickly while searching pc. + */ +static void tcg_out_hotpatch(TCGContext *s, int is_user, int emit_helper) +{ + tcg_insn_unit *label_ptr[2]; + TranslationBlock *tb = s->tb; + + /* S1: direct jump. Ensure the next insns are 8-byte aligned. */ + if ((uintptr_t)s->code_ptr & 7) + tcg_out32(s, NOP); + + tb->patch_jmp = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + /* S1: Direct Jump */ + if (is_user == 0 || emit_helper == 0) { + tcg_out_jmp_short((uintptr_t)s->code_ptr, (uintptr_t)(s->code_ptr + 4)); + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + return; + } + + label_ptr[0] = s->code_ptr; + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + + /* S2: Trace Profiling Stub */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile); + tcg_out_jmp_short((uintptr_t)label_ptr[0], (uintptr_t)s->code_ptr); + + /* S3: Trace Prediction stub */ + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_AREG0, + offsetof(CPUArchState, start_trace_prediction)); + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_TMP1, 0, 1, 7, TCG_TYPE_I32); + label_ptr[1] = s->code_ptr; + tcg_out_bc_noaddr(s, tcg_to_bc[TCG_COND_EQ]); + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict); + reloc_pc14(label_ptr[1], s->code_ptr); + + /* S4: QEMU emulation code */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -1906,6 +2002,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out32(s, BCCTR | BO_ALWAYS); s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_hotpatch: + tcg_out_hotpatch(s, args[0], args[1]); + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_b(s, 0, (tcg_insn_unit *)args[0]); + } else { + tcg_out32(s, MTSPR | RS(args[0]) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + } + break; case INDEX_op_br: { TCGLabel *l = arg_label(args[0]); @@ -2436,6 +2543,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, } static const TCGTargetOpDef ppc_op_defs[] = { + { INDEX_op_hotpatch, { "i", "i" } }, + { INDEX_op_jmp, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, @@ -2572,6 +2681,10 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } }, #endif +#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} }, +#include "tcg-opc-vector.h" +#undef DEF + { -1 }, }; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 0b9dd8f..3773253 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -39,7 +39,7 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); Up to and including filling in the forward link immediately. We'll do proper termination of the end of the list after we finish translation. */ -static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args) +void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args) { int oi = ctx->gen_next_op_idx; int ni = oi + 1; diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 4e20dc1..17d31df 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -28,6 +28,7 @@ /* Basic output routines. Not for general consumption. */ +void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args); void tcg_gen_op1(TCGContext *, TCGOpcode, TCGArg); void tcg_gen_op2(TCGContext *, TCGOpcode, TCGArg, TCGArg); void tcg_gen_op3(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg); @@ -311,6 +312,16 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg); +static inline void tcg_gen_hotpatch(uint32_t arg1, uint32_t arg2) +{ + tcg_gen_op2(&tcg_ctx, INDEX_op_hotpatch, arg1, arg2); +} + +static inline void tcg_gen_annotate(uint32_t arg) +{ + tcg_gen_op1(&tcg_ctx, INDEX_op_annotate, arg); +} + static inline void tcg_gen_discard_i32(TCGv_i32 arg) { tcg_gen_op1_i32(INDEX_op_discard, arg); diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 6d0410c..5ba1e05 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -26,12 +26,16 @@ * DEF(name, oargs, iargs, cargs, flags) */ +DEF(hotpatch, 0, 0, 2, 0) +DEF(annotate, 0, 0, 1, TCG_OPF_NOT_PRESENT) + /* predefined ops */ DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT) DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) /* variable number of parameters */ DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT) +DEF(jmp, 0, 1, 0, TCG_OPF_BB_END) DEF(br, 0, 0, 1, TCG_OPF_BB_END) @@ -191,6 +195,8 @@ DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1, DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) +#include "tcg-opc-vector.h" + #undef TLADDR_ARGS #undef DATA64_ARGS #undef IMPL @@ -304,19 +304,22 @@ void tcg_pool_reset(TCGContext *s) s->pool_current = NULL; } -typedef struct TCGHelperInfo { - void *func; - const char *name; - unsigned flags; - unsigned sizemask; -} TCGHelperInfo; - #include "exec/helper-proto.h" -static const TCGHelperInfo all_helpers[] = { +const TCGHelperInfo all_helpers[] = { #include "exec/helper-tcg.h" }; +int tcg_num_helpers(void) +{ + return ARRAY_SIZE(all_helpers); +} + +const TCGHelperInfo *get_tcg_helpers(void) +{ + return all_helpers; +} + void tcg_context_init(TCGContext *s) { int op, total_args, n, i; @@ -413,7 +416,7 @@ void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size) s->frame_reg = reg; } -void tcg_func_start(TCGContext *s) +void tcg_func_start(TCGContext *s, TranslationBlock *tb) { tcg_pool_reset(s); s->nb_temps = s->nb_globals; @@ -432,8 +435,10 @@ void tcg_func_start(TCGContext *s) s->gen_last_op_idx = -1; s->gen_next_op_idx = 0; s->gen_next_parm_idx = 0; + s->vec_opparam_ptr = s->vec_opparam_buf; s->be = tcg_malloc(sizeof(TCGBackendData)); + s->tb = tb; } static inline void tcg_temp_alloc(TCGContext *s, int n) @@ -1004,6 +1009,7 @@ void tcg_dump_ops(TCGContext *s) char buf[128]; TCGOp *op; int oi; + const TCGArg *vec_args = s->vec_opparam_buf; for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) { int i, k, nb_oargs, nb_iargs, nb_cargs; @@ -1051,8 +1057,29 @@ void tcg_dump_ops(TCGContext *s) qemu_log(",%s", t); } } else { + int is_vec = 0; qemu_log(" %s ", def->name); + /* print vector opc */ + switch (c) { + case INDEX_op_vector_start ... INDEX_op_vector_end: + is_vec = 1; + break; + default: + break; + } + if (is_vec) { + qemu_log("$0x%" TCG_PRIlx, vec_args[0]); + if (c == INDEX_op_vload_128 || c == INDEX_op_vstore_128) + qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), vec_args[1])); + else + qemu_log(",$0x%" TCG_PRIlx, vec_args[1]); + qemu_log(",$0x%" TCG_PRIlx, vec_args[2]); + qemu_log("\n"); + vec_args += 3; + continue; + } + nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; nb_cargs = def->nb_cargs; @@ -1138,6 +1165,172 @@ void tcg_dump_ops(TCGContext *s) } } +void tcg_dump_ops_fn(TCGContext *s, void (*fn)(const char *)) +{ + char buf[128]; + char outbuf[128]; + TCGOp *op; + int oi; + const TCGArg *vec_args = s->vec_opparam_buf; + +#define printops(args...) \ + do { snprintf(outbuf, 128, ##args); (*fn)(outbuf); } while(0) + + for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) { + int i, k, nb_oargs, nb_iargs, nb_cargs; + const TCGOpDef *def; + const TCGArg *args; + TCGOpcode c; + + op = &s->gen_op_buf[oi]; + c = op->opc; + def = &tcg_op_defs[c]; + args = &s->gen_opparam_buf[op->args]; + + if (c == INDEX_op_insn_start) { + printops("%s ----", oi != s->gen_first_op_idx ? "\n" : ""); + + for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { + target_ulong a; +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; +#else + a = args[i]; +#endif + printops(" " TARGET_FMT_lx, a); + } + } else if (c == INDEX_op_call) { + /* variable number of arguments */ + nb_oargs = op->callo; + nb_iargs = op->calli; + nb_cargs = def->nb_cargs; + + /* function name, flags, out args */ + printops(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, + tcg_find_helper(s, args[nb_oargs + nb_iargs]), + args[nb_oargs + nb_iargs + 1], nb_oargs); + for (i = 0; i < nb_oargs; i++) { + printops(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[i])); + } + for (i = 0; i < nb_iargs; i++) { + TCGArg arg = args[nb_oargs + i]; + const char *t = "<dummy>"; + if (arg != TCG_CALL_DUMMY_ARG) { + t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg); + } + printops(",%s", t); + } + } else { + int is_vec = 0; + printops(" %s ", def->name); + + /* print vector opc */ + switch (c) { + case INDEX_op_vector_start ... INDEX_op_vector_end: + is_vec = 1; + break; + default: + break; + } + if (is_vec) { + printops("$0x%" TCG_PRIlx, vec_args[0]); + if (c == INDEX_op_vload_128 || c == INDEX_op_vstore_128) + printops(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), vec_args[1])); + else + printops(",$0x%" TCG_PRIlx, vec_args[1]); + printops(",$0x%" TCG_PRIlx, vec_args[2]); + printops("\n"); + vec_args += 3; + continue; + } + + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + nb_cargs = def->nb_cargs; + + k = 0; + for (i = 0; i < nb_oargs; i++) { + if (k != 0) { + printops(","); + } + printops("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[k++])); + } + for (i = 0; i < nb_iargs; i++) { + if (k != 0) { + printops(","); + } + printops("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[k++])); + } + switch (c) { + case INDEX_op_brcond_i32: + case INDEX_op_setcond_i32: + case INDEX_op_movcond_i32: + case INDEX_op_brcond2_i32: + case INDEX_op_setcond2_i32: + case INDEX_op_brcond_i64: + case INDEX_op_setcond_i64: + case INDEX_op_movcond_i64: + if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { + printops(",%s", cond_name[args[k++]]); + } else { + printops(",$0x%" TCG_PRIlx, args[k++]); + } + i = 1; + break; + case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_st_i64: + { + TCGMemOpIdx oi = args[k++]; + TCGMemOp op = get_memop(oi); + unsigned ix = get_mmuidx(oi); + + if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { + printops(",$0x%x,%u", op, ix); + } else { + const char *s_al = "", *s_op; + if (op & MO_AMASK) { + if ((op & MO_AMASK) == MO_ALIGN) { + s_al = "al+"; + } else { + s_al = "un+"; + } + } + s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; + printops(",%s%s,%u", s_al, s_op, ix); + } + i = 1; + } + break; + default: + i = 0; + break; + } + switch (c) { + case INDEX_op_set_label: + case INDEX_op_br: + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + case INDEX_op_brcond2_i32: + printops("%s$L%d", k ? "," : "", arg_label(args[k])->id); + i++, k++; + break; + default: + break; + } + for (; i < nb_cargs; i++, k++) { + printops("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]); + } + } + printops("\n"); + } +#undef printops +} + /* we give more priority to constraints with less registers */ static int get_constraint_priority(const TCGOpDef *def, int k) { @@ -1334,10 +1527,11 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, /* Liveness analysis : update the opc_dead_args array to tell if a given input arguments is dead. Instructions updating dead temporaries are removed. */ -static void tcg_liveness_analysis(TCGContext *s) +void tcg_liveness_analysis(TCGContext *s) { uint8_t *dead_temps, *mem_temps; int oi, oi_prev, nb_ops; + TCGArg *vec_args = s->vec_opparam_ptr; nb_ops = s->gen_next_op_idx; s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t)); @@ -1427,6 +1621,7 @@ static void tcg_liveness_analysis(TCGContext *s) } } break; + case INDEX_op_annotate: case INDEX_op_insn_start: break; case INDEX_op_discard: @@ -1434,7 +1629,11 @@ static void tcg_liveness_analysis(TCGContext *s) dead_temps[args[0]] = 1; mem_temps[args[0]] = 0; break; - + case INDEX_op_vector_start ... INDEX_op_vector_end: + vec_args -= 3; + if (opc == INDEX_op_vload_128 || opc == INDEX_op_vstore_128) + dead_temps[vec_args[1]] = 0; + break; case INDEX_op_add2_i32: opc_new = INDEX_op_add_i32; goto do_addsub2; @@ -1577,7 +1776,7 @@ static void tcg_liveness_analysis(TCGContext *s) } #else /* dummy liveness analysis */ -static void tcg_liveness_analysis(TCGContext *s) +void tcg_liveness_analysis(TCGContext *s) { int nb_ops; nb_ops = s->gen_opc_ptr - s->gen_opc_buf; @@ -2418,6 +2617,8 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf) s->gen_insn_data[num_insns][i] = a; } break; + case INDEX_op_annotate: + break; case INDEX_op_discard: temp_dead(s, args[0]); break; @@ -2554,15 +2755,15 @@ struct jit_descriptor { struct jit_code_entry *first_entry; }; -void __jit_debug_register_code(void) __attribute__((noinline)); -void __jit_debug_register_code(void) +void qemu_jit_debug_register_code(void) __attribute__((noinline)); +void qemu_jit_debug_register_code(void) { asm(""); } /* Must statically initialize the version, because GDB may check the version before we can set it. */ -struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; +struct jit_descriptor qemu_jit_debug_descriptor = { 1, 0, 0, 0 }; /* End GDB interface. */ @@ -2771,10 +2972,10 @@ static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, one_entry.symfile_addr = img; one_entry.symfile_size = img_size; - __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; - __jit_debug_descriptor.relevant_entry = &one_entry; - __jit_debug_descriptor.first_entry = &one_entry; - __jit_debug_register_code(); + qemu_jit_debug_descriptor.action_flag = JIT_REGISTER_FN; + qemu_jit_debug_descriptor.relevant_entry = &one_entry; + qemu_jit_debug_descriptor.first_entry = &one_entry; + qemu_jit_debug_register_code(); } #else /* No support for the feature. Provide the entry point expected by exec.c, @@ -2790,3 +2991,34 @@ void tcg_register_jit(void *buf, size_t buf_size) { } #endif /* ELF_HOST_MACHINE */ + + +/* + * copy_tcg_context_global() + * Copy thread's local TCG context to the global TCG context. + * + * We first initialize main thread's tcg_ctx and copy it to tcg_ctx_global + * at this point. The tcg_ctx_global is copied to each thread's local + * tcg_ctx later using copy_tcg_context(). + * + * Note: This copy must be done after tcg_ctx is completely initialized + * and should be setup by the main thread. + */ +void copy_tcg_context_global(void) +{ + static int init_once = 0; + if (init_once == 1) + return; + + memcpy(&tcg_ctx_global, &tcg_ctx, sizeof(TCGContext)); + init_once = 1; +} + +/* + * copy_tcg_context() + * Copy the global TCG context to the thread's local TCG context. + */ +void copy_tcg_context(void) +{ + memcpy(&tcg_ctx, &tcg_ctx_global, sizeof(TCGContext)); +} @@ -193,6 +193,7 @@ typedef struct TCGPool { #define TCG_POOL_CHUNK_SIZE 32768 +#define TCG_MAX_LABELS 512 #define TCG_MAX_TEMPS 512 #define TCG_MAX_INSNS 512 @@ -564,7 +565,7 @@ struct TCGContext { /* Threshold to flush the translated code buffer. */ void *code_gen_highwater; - TBContext tb_ctx; + TBContext *tb_ctx; /* The TCGBackendData structure is private to tcg-target.c. */ struct TCGBackendData *be; @@ -578,12 +579,33 @@ struct TCGContext { TCGOp gen_op_buf[OPC_BUF_SIZE]; TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE]; + TCGArg vec_opparam_buf[OPPARAM_BUF_SIZE]; + TCGArg *vec_opparam_ptr; uint16_t gen_insn_end_off[TCG_MAX_INSNS]; target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS]; + + TranslationBlock *tb; }; -extern TCGContext tcg_ctx; +extern TCGContext tcg_ctx_global; +extern __thread TCGContext tcg_ctx; + +typedef struct TCGHelperInfo { + void *func; + const char *name; + unsigned flags; + unsigned sizemask; +} TCGHelperInfo; + +void copy_tcg_context_global(void); +void copy_tcg_context(void); +int tcg_num_helpers(void); +const TCGHelperInfo *get_tcg_helpers(void); +void tcg_liveness_analysis(TCGContext *s); +void tcg_dump_ops_fn(TCGContext *s, void (*fn)(const char *)); +target_long decode_sleb128(uint8_t **pp); + /* The number of opcodes emitted so far. */ static inline int tcg_op_buf_count(void) @@ -624,7 +646,7 @@ static inline void *tcg_malloc(int size) void tcg_context_init(TCGContext *s); void tcg_prologue_init(TCGContext *s); -void tcg_func_start(TCGContext *s); +void tcg_func_start(TCGContext *s, TranslationBlock *tb); int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf); @@ -822,7 +844,7 @@ static inline TCGLabel *arg_label(TCGArg i) static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b) { - return a - b; + return (ptrdiff_t)a - (ptrdiff_t)b; } /** @@ -876,7 +898,7 @@ static inline TCGMemOpIdx make_memop_idx(TCGMemOp op, unsigned idx) */ static inline TCGMemOp get_memop(TCGMemOpIdx oi) { - return oi >> 4; + return (TCGMemOp)(oi >> 4); } /** @@ -939,6 +961,7 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi) #define TB_EXIT_IDX1 1 #define TB_EXIT_ICOUNT_EXPIRED 2 #define TB_EXIT_REQUESTED 3 +#define TB_EXIT_LLVM TB_EXIT_ICOUNT_EXPIRED #ifdef HAVE_TCG_QEMU_TB_EXEC uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); @@ -1011,6 +1034,31 @@ uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr, uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr); + +/* Value zero-extended to tcg register size. */ +tcg_target_ulong llvm_ret_ldub_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_lduw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_ldul_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +uint64_t llvm_le_ldq_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_lduw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_ldul_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +uint64_t llvm_be_ldq_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); + +/* Value sign-extended to tcg register size. */ +tcg_target_ulong llvm_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_ldsw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_ldsl_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_ldsw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_ldsl_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); + +void llvm_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, TCGMemOpIdx oi); +void llvm_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, TCGMemOpIdx oi); +void llvm_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, TCGMemOpIdx oi); +void llvm_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, TCGMemOpIdx oi); +void llvm_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, TCGMemOpIdx oi); +void llvm_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, TCGMemOpIdx oi); +void llvm_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, TCGMemOpIdx oi); + /* Temporary aliases until backends are converted. */ #ifdef TARGET_WORDS_BIGENDIAN # define helper_ret_ldsw_mmu helper_be_ldsw_mmu |