diff options
Diffstat (limited to 'tcg')
-rw-r--r-- | tcg/README | 22 | ||||
-rw-r--r-- | tcg/i386/tcg-target.c | 218 | ||||
-rw-r--r-- | tcg/mips/tcg-target.c | 7 | ||||
-rw-r--r-- | tcg/optimize.c | 3 | ||||
-rw-r--r-- | tcg/tcg-op.h | 18 | ||||
-rw-r--r-- | tcg/tcg-opc.h | 29 | ||||
-rw-r--r-- | tcg/tcg.c | 449 | ||||
-rw-r--r-- | tcg/tcg.h | 29 |
8 files changed, 467 insertions, 308 deletions
@@ -77,11 +77,20 @@ destroyed, but local temporaries and globals are preserved. Using the tcg_gen_helper_x_y it is possible to call any function taking i32, i64 or pointer types. By default, before calling a helper, all globals are stored at their canonical location and it is assumed -that the function can modify them. This can be overridden by the -TCG_CALL_CONST function modifier. By default, the helper is allowed to -modify the CPU state or raise an exception. This can be overridden by -the TCG_CALL_PURE function modifier, in which case the call to the -function is removed if the return value is not used. +that the function can modify them. By default, the helper is allowed to +modify the CPU state or raise an exception. + +This can be overridden using the following function modifiers: +- TCG_CALL_NO_READ_GLOBALS means that the helper does not read globals, + either directly or via an exception. They will not be saved to their + canonical locations before calling the helper. +- TCG_CALL_NO_WRITE_GLOBALS means that the helper does not modify any globals. + They will only be saved to their canonical location before calling helpers, + but they won't be reloaded afterwise. +- TCG_CALL_NO_SIDE_EFFECTS means that the call to the function is removed if + the return value is not used. + +Note that TCG_CALL_NO_READ_GLOBALS implies TCG_CALL_NO_WRITE_GLOBALS. On some TCG targets (e.g. x86), several calling conventions are supported. @@ -349,6 +358,9 @@ st32_i64 t0, t1, offset write(t0, t1 + offset) Write 8, 16, 32 or 64 bits to host memory. +All this opcodes assume that the pointed host memory doesn't correspond +to a global. In the latter case the behaviour is unpredictable. + ********* 64-bit target on 32-bit host support The following opcodes are internal to TCG. Thus they are to be implemented by diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 4952c05..e45a5a0 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -92,7 +92,6 @@ static const int tcg_target_call_oarg_regs[] = { #if TCG_TARGET_REG_BITS == 64 # define TCG_REG_L0 tcg_target_call_iarg_regs[0] # define TCG_REG_L1 tcg_target_call_iarg_regs[1] -# define TCG_REG_L2 tcg_target_call_iarg_regs[2] #else # define TCG_REG_L0 TCG_REG_EAX # define TCG_REG_L1 TCG_REG_EDX @@ -181,14 +180,11 @@ static int target_parse_constraint(TCGArgConstraint *ct, const char **pct_str) ct->ct |= TCG_CT_REG; #if TCG_TARGET_REG_BITS == 64 tcg_regset_set32(ct->u.regs, 0, 0xffff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_L2); #else tcg_regset_set32(ct->u.regs, 0, 0xff); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); - tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); #endif + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L0); + tcg_regset_reset_reg(ct->u.regs, TCG_REG_L1); break; case 'e': @@ -236,11 +232,13 @@ static inline int tcg_target_const_match(tcg_target_long val, # define P_REXW 0x800 /* Set REX.W = 1 */ # define P_REXB_R 0x1000 /* REG field as byte register */ # define P_REXB_RM 0x2000 /* R/M field as byte register */ +# define P_GS 0x4000 /* gs segment override */ #else # define P_ADDR32 0 # define P_REXW 0 # define P_REXB_R 0 # define P_REXB_RM 0 +# define P_GS 0 #endif #define OPC_ARITH_EvIz (0x81) @@ -356,6 +354,9 @@ static void tcg_out_opc(TCGContext *s, int opc, int r, int rm, int x) { int rex; + if (opc & P_GS) { + tcg_out8(s, 0x65); + } if (opc & P_DATA16) { /* We should never be asking for both 16 and 64-bit operation. */ assert((opc & P_REXW) == 0); @@ -1016,12 +1017,12 @@ static const void *qemu_st_helpers[4] = { LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses) positions of the displacements of forward jumps to the TLB miss case. - First argument register is loaded with the low part of the address. + Second argument register is loaded with the low part of the address. In the TLB hit case, it has been adjusted as indicated by the TLB and so is a host address. In the TLB miss case, it continues to hold a guest address. - Second argument register is clobbered. */ + First argument register is clobbered. */ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, int mem_index, int s_bits, @@ -1039,25 +1040,25 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, rexw = P_REXW; } - tcg_out_mov(s, type, r1, addrlo); tcg_out_mov(s, type, r0, addrlo); + tcg_out_mov(s, type, r1, addrlo); - tcg_out_shifti(s, SHIFT_SHR + rexw, r1, + tcg_out_shifti(s, SHIFT_SHR + rexw, r0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tgen_arithi(s, ARITH_AND + rexw, r0, - TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); tgen_arithi(s, ARITH_AND + rexw, r1, + TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0); + tgen_arithi(s, ARITH_AND + rexw, r0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); - tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0, + tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r0, TCG_AREG0, r0, 0, offsetof(CPUArchState, tlb_table[mem_index][0]) + which); - /* cmp 0(r1), r0 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0); + /* cmp 0(r0), r1 */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r1, r0, 0); - tcg_out_mov(s, type, r0, addrlo); + tcg_out_mov(s, type, r1, addrlo); /* jne label1 */ tcg_out8(s, OPC_JCC_short + JCC_JNE); @@ -1065,8 +1066,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, s->code_ptr++; if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { - /* cmp 4(r1), addrhi */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4); + /* cmp 4(r0), addrhi */ + tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r0, 4); /* jne label1 */ tcg_out8(s, OPC_JCC_short + JCC_JNE); @@ -1076,14 +1077,31 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx, /* TLB Hit. */ - /* add addend(r1), r0 */ - tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1, + /* add addend(r0), r1 */ + tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r1, r0, offsetof(CPUTLBEntry, addend) - which); } -#endif +#elif defined(__x86_64__) && defined(__linux__) +# include <asm/prctl.h> +# include <sys/prctl.h> + +int arch_prctl(int code, unsigned long addr); + +static int guest_base_flags; +static inline void setup_guest_base_seg(void) +{ + if (arch_prctl(ARCH_SET_GS, GUEST_BASE) == 0) { + guest_base_flags = P_GS; + } +} +#else +# define guest_base_flags 0 +static inline void setup_guest_base_seg(void) { } +#endif /* SOFTMMU */ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, - int base, tcg_target_long ofs, int sizeop) + int base, tcg_target_long ofs, int seg, + int sizeop) { #ifdef TARGET_WORDS_BIGENDIAN const int bswap = 1; @@ -1092,28 +1110,29 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, #endif switch (sizeop) { case 0: - tcg_out_modrm_offset(s, OPC_MOVZBL, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVZBL + seg, datalo, base, ofs); break; case 0 | 4: - tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVSBL + P_REXW + seg, datalo, base, ofs); break; case 1: - tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); if (bswap) { tcg_out_rolw_8(s, datalo); } break; case 1 | 4: if (bswap) { - tcg_out_modrm_offset(s, OPC_MOVZWL, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVZWL + seg, datalo, base, ofs); tcg_out_rolw_8(s, datalo); tcg_out_modrm(s, OPC_MOVSWL + P_REXW, datalo, datalo); } else { - tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVSWL + P_REXW + seg, + datalo, base, ofs); } break; case 2: - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs); if (bswap) { tcg_out_bswap32(s, datalo); } @@ -1121,17 +1140,18 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, #if TCG_TARGET_REG_BITS == 64 case 2 | 4: if (bswap) { - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, datalo, base, ofs); tcg_out_bswap32(s, datalo); tcg_out_ext32s(s, datalo, datalo); } else { - tcg_out_modrm_offset(s, OPC_MOVSLQ, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVSLQ + seg, datalo, base, ofs); } break; #endif case 3: if (TCG_TARGET_REG_BITS == 64) { - tcg_out_ld(s, TCG_TYPE_I64, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + P_REXW + seg, + datalo, base, ofs); if (bswap) { tcg_out_bswap64(s, datalo); } @@ -1142,11 +1162,15 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo, int datahi, datahi = t; } if (base != datalo) { - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); - tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4); + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, + datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, + datahi, base, ofs + 4); } else { - tcg_out_ld(s, TCG_TYPE_I32, datahi, base, ofs + 4); - tcg_out_ld(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, + datahi, base, ofs + 4); + tcg_out_modrm_offset(s, OPC_MOVL_GvEv + seg, + datalo, base, ofs); } if (bswap) { tcg_out_bswap32(s, datalo); @@ -1169,9 +1193,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int addrlo_idx; #if defined(CONFIG_SOFTMMU) int mem_index, s_bits; -#if TCG_TARGET_REG_BITS == 64 - int arg_idx; -#else +#if TCG_TARGET_REG_BITS == 32 int stack_adjust; #endif uint8_t *label_ptr[3]; @@ -1192,7 +1214,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, label_ptr, offsetof(CPUTLBEntry, addr_read)); /* TLB Hit. */ - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc); + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc); /* jmp label2 */ tcg_out8(s, OPC_JMP_short); @@ -1220,15 +1242,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, tcg_out_push(s, TCG_AREG0); stack_adjust += 4; #else - /* The first argument is already loaded with addrlo. */ - arg_idx = 1; - tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx], - mem_index); - /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0); tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0); + /* The second argument is already loaded with addrlo. */ + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index); #endif tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]); @@ -1285,29 +1301,31 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, { int32_t offset = GUEST_BASE; int base = args[addrlo_idx]; - - if (TCG_TARGET_REG_BITS == 64) { - /* ??? We assume all operations have left us with register - contents that are zero extended. So far this appears to - be true. If we want to enforce this, we can either do - an explicit zero-extension here, or (if GUEST_BASE == 0) - use the ADDR32 prefix. For now, do nothing. */ - - if (offset != GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE); - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base); - base = TCG_REG_L0; - offset = 0; - } + int seg = 0; + + /* ??? We assume all operations have left us with register contents + that are zero extended. So far this appears to be true. If we + want to enforce this, we can either do an explicit zero-extension + here, or (if GUEST_BASE == 0, or a segment register is in use) + use the ADDR32 prefix. For now, do nothing. */ + if (GUEST_BASE && guest_base_flags) { + seg = guest_base_flags; + offset = 0; + } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); + base = TCG_REG_L1; + offset = 0; } - tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, opc); + tcg_out_qemu_ld_direct(s, data_reg, data_reg2, base, offset, seg, opc); } #endif } static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, - int base, tcg_target_long ofs, int sizeop) + int base, tcg_target_long ofs, int seg, + int sizeop) { #ifdef TARGET_WORDS_BIGENDIAN const int bswap = 1; @@ -1317,12 +1335,13 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, /* ??? Ideally we wouldn't need a scratch register. For user-only, we could perform the bswap twice to restore the original value instead of moving to the scratch. But as it is, the L constraint - means that TCG_REG_L1 is definitely free here. */ - const int scratch = TCG_REG_L1; + means that TCG_REG_L0 is definitely free here. */ + const int scratch = TCG_REG_L0; switch (sizeop) { case 0: - tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVB_EvGv + P_REXB_R + seg, + datalo, base, ofs); break; case 1: if (bswap) { @@ -1330,7 +1349,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, tcg_out_rolw_8(s, scratch); datalo = scratch; } - tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_DATA16 + seg, + datalo, base, ofs); break; case 2: if (bswap) { @@ -1338,7 +1358,7 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, tcg_out_bswap32(s, scratch); datalo = scratch; } - tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs); break; case 3: if (TCG_TARGET_REG_BITS == 64) { @@ -1347,17 +1367,18 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi, tcg_out_bswap64(s, scratch); datalo = scratch; } - tcg_out_st(s, TCG_TYPE_I64, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + P_REXW + seg, + datalo, base, ofs); } else if (bswap) { tcg_out_mov(s, TCG_TYPE_I32, scratch, datahi); tcg_out_bswap32(s, scratch); - tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs); tcg_out_mov(s, TCG_TYPE_I32, scratch, datalo); tcg_out_bswap32(s, scratch); - tcg_out_st(s, TCG_TYPE_I32, scratch, base, ofs + 4); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, scratch, base, ofs+4); } else { - tcg_out_st(s, TCG_TYPE_I32, datalo, base, ofs); - tcg_out_st(s, TCG_TYPE_I32, datahi, base, ofs + 4); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datalo, base, ofs); + tcg_out_modrm_offset(s, OPC_MOVL_EvGv + seg, datahi, base, ofs+4); } break; default: @@ -1391,7 +1412,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, label_ptr, offsetof(CPUTLBEntry, addr_write)); /* TLB Hit. */ - tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc); + tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, 0, opc); /* jmp label2 */ tcg_out8(s, OPC_JMP_short); @@ -1425,15 +1446,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, tcg_out_push(s, TCG_AREG0); stack_adjust += 4; #else + tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0); + /* The second argument is already loaded with addrlo. */ tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32), - TCG_REG_L1, data_reg); - tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index); + tcg_target_call_iarg_regs[2], data_reg); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], mem_index); stack_adjust = 0; - /* XXX/FIXME: suboptimal */ - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0); - tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0); #endif tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]); @@ -1451,23 +1469,24 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, { int32_t offset = GUEST_BASE; int base = args[addrlo_idx]; - - if (TCG_TARGET_REG_BITS == 64) { - /* ??? We assume all operations have left us with register - contents that are zero extended. So far this appears to - be true. If we want to enforce this, we can either do - an explicit zero-extension here, or (if GUEST_BASE == 0) - use the ADDR32 prefix. For now, do nothing. */ - - if (offset != GUEST_BASE) { - tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE); - tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base); - base = TCG_REG_L0; - offset = 0; - } + int seg = 0; + + /* ??? We assume all operations have left us with register contents + that are zero extended. So far this appears to be true. If we + want to enforce this, we can either do an explicit zero-extension + here, or (if GUEST_BASE == 0, or a segment register is in use) + use the ADDR32 prefix. For now, do nothing. */ + if (GUEST_BASE && guest_base_flags) { + seg = guest_base_flags; + offset = 0; + } else if (TCG_TARGET_REG_BITS == 64 && offset != GUEST_BASE) { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE); + tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base); + base = TCG_REG_L1; + offset = 0; } - tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, opc); + tcg_out_qemu_st_direct(s, data_reg, data_reg2, base, offset, seg, opc); } #endif } @@ -2061,6 +2080,13 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_pop(s, tcg_target_callee_save_regs[i]); } tcg_out_opc(s, OPC_RET, 0, 0, 0); + +#if !defined(CONFIG_SOFTMMU) + /* Try to set up a segment register to point to GUEST_BASE. */ + if (GUEST_BASE) { + setup_guest_base_seg(); + } +#endif } static void tcg_target_init(TCGContext *s) diff --git a/tcg/mips/tcg-target.c b/tcg/mips/tcg-target.c index 7e4013e..ae2b274 100644 --- a/tcg/mips/tcg-target.c +++ b/tcg/mips/tcg-target.c @@ -323,6 +323,9 @@ enum { OPC_BLTZ = OPC_REGIMM | (0x00 << 16), OPC_BGEZ = OPC_REGIMM | (0x01 << 16), + OPC_SPECIAL2 = 0x1c << 26, + OPC_MUL = OPC_SPECIAL2 | 0x002, + OPC_SPECIAL3 = 0x1f << 26, OPC_INS = OPC_SPECIAL3 | 0x004, OPC_WSBH = OPC_SPECIAL3 | 0x0a0, @@ -1403,8 +1406,12 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, tcg_out_mov(s, TCG_TYPE_I32, args[0], TCG_REG_AT); break; case INDEX_op_mul_i32: +#if defined(__mips_isa_rev) && (__mips_isa_rev >= 1) + tcg_out_opc_reg(s, OPC_MUL, args[0], args[1], args[2]); +#else tcg_out_opc_reg(s, OPC_MULT, 0, args[1], args[2]); tcg_out_opc_reg(s, OPC_MFLO, args[0], 0, 0); +#endif break; case INDEX_op_mulu2_i32: tcg_out_opc_reg(s, OPC_MULTU, 0, args[2], args[3]); diff --git a/tcg/optimize.c b/tcg/optimize.c index a06c8eb..8e5d918 100644 --- a/tcg/optimize.c +++ b/tcg/optimize.c @@ -915,7 +915,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr, case INDEX_op_call: nb_call_args = (args[0] >> 16) + (args[0] & 0xffff); - if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) { + if (!(args[nb_call_args + 1] & (TCG_CALL_NO_READ_GLOBALS | + TCG_CALL_NO_WRITE_GLOBALS))) { for (i = 0; i < nb_globals; i++) { reset_temp(i); } diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 8100a5a..8d1da2b 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -401,10 +401,10 @@ static inline void tcg_gen_helperN(void *func, int flags, int sizemask, } /* Note: Both tcg_gen_helper32() and tcg_gen_helper64() are currently - reserved for helpers in tcg-runtime.c. These helpers are all const - and pure, hence the call to tcg_gen_callN() with TCG_CALL_CONST | - TCG_CALL_PURE. This may need to be adjusted if these functions - start to be used with other helpers. */ + reserved for helpers in tcg-runtime.c. These helpers all do not read + globals and do not have side effects, hence the call to tcg_gen_callN() + with TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS. This may need + to be adjusted if these functions start to be used with other helpers. */ static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret, TCGv_i32 a, TCGv_i32 b) { @@ -413,8 +413,9 @@ static inline void tcg_gen_helper32(void *func, int sizemask, TCGv_i32 ret, fn = tcg_const_ptr(func); args[0] = GET_TCGV_I32(a); args[1] = GET_TCGV_I32(b); - tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask, - GET_TCGV_I32(ret), 2, args); + tcg_gen_callN(&tcg_ctx, fn, + TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS, + sizemask, GET_TCGV_I32(ret), 2, args); tcg_temp_free_ptr(fn); } @@ -426,8 +427,9 @@ static inline void tcg_gen_helper64(void *func, int sizemask, TCGv_i64 ret, fn = tcg_const_ptr(func); args[0] = GET_TCGV_I64(a); args[1] = GET_TCGV_I64(b); - tcg_gen_callN(&tcg_ctx, fn, TCG_CALL_CONST | TCG_CALL_PURE, sizemask, - GET_TCGV_I64(ret), 2, args); + tcg_gen_callN(&tcg_ctx, fn, + TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_SIDE_EFFECTS, + sizemask, GET_TCGV_I64(ret), 2, args); tcg_temp_free_ptr(fn); } diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 04cb7ca..9651063 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -37,8 +37,8 @@ DEF(nopn, 0, 0, 1, 0) /* variable number of parameters */ DEF(discard, 1, 0, 0, 0) DEF(set_label, 0, 0, 1, TCG_OPF_BB_END) -DEF(call, 0, 1, 2, TCG_OPF_SIDE_EFFECTS) /* variable number of parameters */ -DEF(br, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(call, 0, 1, 2, TCG_OPF_CALL_CLOBBER) /* variable number of parameters */ +DEF(br, 0, 0, 1, TCG_OPF_BB_END) #define IMPL(X) (X ? 0 : TCG_OPF_NOT_PRESENT) #if TCG_TARGET_REG_BITS == 32 @@ -57,9 +57,9 @@ DEF(ld8s_i32, 1, 1, 1, 0) DEF(ld16u_i32, 1, 1, 1, 0) DEF(ld16s_i32, 1, 1, 1, 0) DEF(ld_i32, 1, 1, 1, 0) -DEF(st8_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) -DEF(st16_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) -DEF(st_i32, 0, 2, 1, TCG_OPF_SIDE_EFFECTS) +DEF(st8_i32, 0, 2, 1, 0) +DEF(st16_i32, 0, 2, 1, 0) +DEF(st_i32, 0, 2, 1, 0) /* arith */ DEF(add_i32, 1, 2, 0, 0) DEF(sub_i32, 1, 2, 0, 0) @@ -81,12 +81,11 @@ DEF(rotl_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) DEF(rotr_i32, 1, 2, 0, IMPL(TCG_TARGET_HAS_rot_i32)) DEF(deposit_i32, 1, 2, 2, IMPL(TCG_TARGET_HAS_deposit_i32)) -DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(brcond_i32, 0, 2, 2, TCG_OPF_BB_END) DEF(add2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32)) DEF(sub2_i32, 2, 4, 0, IMPL(TCG_TARGET_REG_BITS == 32)) -DEF(brcond2_i32, 0, 4, 2, - TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS | IMPL(TCG_TARGET_REG_BITS == 32)) +DEF(brcond2_i32, 0, 4, 2, TCG_OPF_BB_END | IMPL(TCG_TARGET_REG_BITS == 32)) DEF(mulu2_i32, 2, 2, 0, IMPL(TCG_TARGET_REG_BITS == 32)) DEF(setcond2_i32, 1, 4, 1, IMPL(TCG_TARGET_REG_BITS == 32)) @@ -116,10 +115,10 @@ DEF(ld16s_i64, 1, 1, 1, IMPL64) DEF(ld32u_i64, 1, 1, 1, IMPL64) DEF(ld32s_i64, 1, 1, 1, IMPL64) DEF(ld_i64, 1, 1, 1, IMPL64) -DEF(st8_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64) -DEF(st16_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64) -DEF(st32_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64) -DEF(st_i64, 0, 2, 1, TCG_OPF_SIDE_EFFECTS | IMPL64) +DEF(st8_i64, 0, 2, 1, IMPL64) +DEF(st16_i64, 0, 2, 1, IMPL64) +DEF(st32_i64, 0, 2, 1, IMPL64) +DEF(st_i64, 0, 2, 1, IMPL64) /* arith */ DEF(add_i64, 1, 2, 0, IMPL64) DEF(sub_i64, 1, 2, 0, IMPL64) @@ -141,7 +140,7 @@ DEF(rotl_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(rotr_i64, 1, 2, 0, IMPL64 | IMPL(TCG_TARGET_HAS_rot_i64)) DEF(deposit_i64, 1, 2, 2, IMPL64 | IMPL(TCG_TARGET_HAS_deposit_i64)) -DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS | IMPL64) +DEF(brcond_i64, 0, 2, 2, TCG_OPF_BB_END | IMPL64) DEF(ext8s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext8s_i64)) DEF(ext16s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext16s_i64)) DEF(ext32s_i64, 1, 1, 0, IMPL64 | IMPL(TCG_TARGET_HAS_ext32s_i64)) @@ -165,8 +164,8 @@ DEF(debug_insn_start, 0, 0, 2, 0) #else DEF(debug_insn_start, 0, 0, 1, 0) #endif -DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) -DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_SIDE_EFFECTS) +DEF(exit_tb, 0, 0, 1, TCG_OPF_BB_END) +DEF(goto_tb, 0, 0, 1, TCG_OPF_BB_END) /* Note: even if TARGET_LONG_BITS is not defined, the INDEX_op constants must be defined */ #if TCG_TARGET_REG_BITS == 32 @@ -776,7 +776,11 @@ static void tcg_reg_alloc_start(TCGContext *s) } for(i = s->nb_globals; i < s->nb_temps; i++) { ts = &s->temps[i]; - ts->val_type = TEMP_VAL_DEAD; + if (ts->temp_local) { + ts->val_type = TEMP_VAL_MEM; + } else { + ts->val_type = TEMP_VAL_DEAD; + } ts->mem_allocated = 0; ts->fixed_reg = 0; } @@ -1180,31 +1184,27 @@ static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr, } } -/* liveness analysis: end of function: globals are live, temps are - dead. */ -/* XXX: at this stage, not used as there would be little gains because - most TBs end with a conditional jump. */ -static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps) +/* liveness analysis: end of function: all temps are dead, and globals + should be in memory. */ +static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps, + uint8_t *mem_temps) { - memset(dead_temps, 0, s->nb_globals); - memset(dead_temps + s->nb_globals, 1, s->nb_temps - s->nb_globals); + memset(dead_temps, 1, s->nb_temps); + memset(mem_temps, 1, s->nb_globals); + memset(mem_temps + s->nb_globals, 0, s->nb_temps - s->nb_globals); } -/* liveness analysis: end of basic block: globals are live, temps are - dead, local temps are live. */ -static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps) +/* liveness analysis: end of basic block: all temps are dead, globals + and local temps should be in memory. */ +static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, + uint8_t *mem_temps) { int i; - TCGTemp *ts; - memset(dead_temps, 0, s->nb_globals); - ts = &s->temps[s->nb_globals]; + memset(dead_temps, 1, s->nb_temps); + memset(mem_temps, 1, s->nb_globals); for(i = s->nb_globals; i < s->nb_temps; i++) { - if (ts->temp_local) - dead_temps[i] = 0; - else - dead_temps[i] = 1; - ts++; + mem_temps[i] = s->temps[i].temp_local; } } @@ -1217,17 +1217,20 @@ static void tcg_liveness_analysis(TCGContext *s) TCGOpcode op; TCGArg *args; const TCGOpDef *def; - uint8_t *dead_temps; - unsigned int dead_args; + uint8_t *dead_temps, *mem_temps; + uint16_t dead_args; + uint8_t sync_args; gen_opc_ptr++; /* skip end */ nb_ops = gen_opc_ptr - gen_opc_buf; s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t)); + s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t)); dead_temps = tcg_malloc(s->nb_temps); - memset(dead_temps, 1, s->nb_temps); + mem_temps = tcg_malloc(s->nb_temps); + tcg_la_func_end(s, dead_temps, mem_temps); args = gen_opparam_ptr; op_index = nb_ops - 1; @@ -1248,11 +1251,12 @@ static void tcg_liveness_analysis(TCGContext *s) /* pure functions can be removed if their result is not used */ - if (call_flags & TCG_CALL_PURE) { + if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) { for(i = 0; i < nb_oargs; i++) { arg = args[i]; - if (!dead_temps[arg]) + if (!dead_temps[arg] || mem_temps[arg]) { goto do_not_remove_call; + } } tcg_set_nop(s, gen_opc_buf + op_index, args - 1, nb_args); @@ -1261,17 +1265,27 @@ static void tcg_liveness_analysis(TCGContext *s) /* output args are dead */ dead_args = 0; + sync_args = 0; for(i = 0; i < nb_oargs; i++) { arg = args[i]; if (dead_temps[arg]) { dead_args |= (1 << i); } + if (mem_temps[arg]) { + sync_args |= (1 << i); + } dead_temps[arg] = 1; + mem_temps[arg] = 0; + } + + if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) { + /* globals should be synced to memory */ + memset(mem_temps, 1, s->nb_globals); } - - if (!(call_flags & TCG_CALL_CONST)) { - /* globals are live (they may be used by the call) */ - memset(dead_temps, 0, s->nb_globals); + if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS | + TCG_CALL_NO_READ_GLOBALS))) { + /* globals should go back to memory */ + memset(dead_temps, 1, s->nb_globals); } /* input args are live */ @@ -1285,6 +1299,7 @@ static void tcg_liveness_analysis(TCGContext *s) } } s->op_dead_args[op_index] = dead_args; + s->op_sync_args[op_index] = sync_args; } args--; } @@ -1300,6 +1315,7 @@ static void tcg_liveness_analysis(TCGContext *s) args--; /* mark the temporary as dead */ dead_temps[args[0]] = 1; + mem_temps[args[0]] = 0; break; case INDEX_op_end: break; @@ -1365,8 +1381,9 @@ static void tcg_liveness_analysis(TCGContext *s) if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) { for(i = 0; i < nb_oargs; i++) { arg = args[i]; - if (!dead_temps[arg]) + if (!dead_temps[arg] || mem_temps[arg]) { goto do_not_remove; + } } do_remove: tcg_set_nop(s, gen_opc_buf + op_index, args, def->nb_args); @@ -1378,20 +1395,25 @@ static void tcg_liveness_analysis(TCGContext *s) /* output args are dead */ dead_args = 0; + sync_args = 0; for(i = 0; i < nb_oargs; i++) { arg = args[i]; if (dead_temps[arg]) { dead_args |= (1 << i); } + if (mem_temps[arg]) { + sync_args |= (1 << i); + } dead_temps[arg] = 1; + mem_temps[arg] = 0; } /* if end of basic block, update */ if (def->flags & TCG_OPF_BB_END) { - tcg_la_bb_end(s, dead_temps); - } else if (def->flags & TCG_OPF_CALL_CLOBBER) { - /* globals are live */ - memset(dead_temps, 0, s->nb_globals); + tcg_la_bb_end(s, dead_temps, mem_temps); + } else if (def->flags & TCG_OPF_SIDE_EFFECTS) { + /* globals should be synced to memory */ + memset(mem_temps, 1, s->nb_globals); } /* input args are live */ @@ -1403,6 +1425,7 @@ static void tcg_liveness_analysis(TCGContext *s) dead_temps[arg] = 0; } s->op_dead_args[op_index] = dead_args; + s->op_sync_args[op_index] = sync_args; } break; } @@ -1421,6 +1444,8 @@ static void tcg_liveness_analysis(TCGContext *s) s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t)); memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t)); + s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t)); + memset(s->op_sync_args, 0, nb_ops * sizeof(uint8_t)); } #endif @@ -1517,22 +1542,33 @@ static void temp_allocate_frame(TCGContext *s, int temp) s->current_frame_offset += (tcg_target_long)sizeof(tcg_target_long); } +/* sync register 'reg' by saving it to the corresponding temporary */ +static inline void tcg_reg_sync(TCGContext *s, int reg) +{ + TCGTemp *ts; + int temp; + + temp = s->reg_to_temp[reg]; + ts = &s->temps[temp]; + assert(ts->val_type == TEMP_VAL_REG); + if (!ts->mem_coherent && !ts->fixed_reg) { + if (!ts->mem_allocated) { + temp_allocate_frame(s, temp); + } + tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset); + } + ts->mem_coherent = 1; +} + /* free register 'reg' by spilling the corresponding temporary if necessary */ static void tcg_reg_free(TCGContext *s, int reg) { - TCGTemp *ts; int temp; temp = s->reg_to_temp[reg]; if (temp != -1) { - ts = &s->temps[temp]; - assert(ts->val_type == TEMP_VAL_REG); - if (!ts->mem_coherent) { - if (!ts->mem_allocated) - temp_allocate_frame(s, temp); - tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset); - } - ts->val_type = TEMP_VAL_MEM; + tcg_reg_sync(s, reg); + s->temps[temp].val_type = TEMP_VAL_MEM; s->reg_to_temp[reg] = -1; } } @@ -1564,31 +1600,45 @@ static int tcg_reg_alloc(TCGContext *s, TCGRegSet reg1, TCGRegSet reg2) tcg_abort(); } -/* save a temporary to memory. 'allocated_regs' is used in case a +/* mark a temporary as dead. */ +static inline void temp_dead(TCGContext *s, int temp) +{ + TCGTemp *ts; + + ts = &s->temps[temp]; + if (!ts->fixed_reg) { + if (ts->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ts->reg] = -1; + } + if (temp < s->nb_globals || (ts->temp_local && ts->mem_allocated)) { + ts->val_type = TEMP_VAL_MEM; + } else { + ts->val_type = TEMP_VAL_DEAD; + } + } +} + +/* sync a temporary to memory. 'allocated_regs' is used in case a temporary registers needs to be allocated to store a constant. */ -static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs) +static inline void temp_sync(TCGContext *s, int temp, TCGRegSet allocated_regs) { TCGTemp *ts; - int reg; ts = &s->temps[temp]; if (!ts->fixed_reg) { switch(ts->val_type) { + case TEMP_VAL_CONST: + ts->reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], + allocated_regs); + ts->val_type = TEMP_VAL_REG; + s->reg_to_temp[ts->reg] = temp; + ts->mem_coherent = 0; + tcg_out_movi(s, ts->type, ts->reg, ts->val); + /* fallthrough*/ case TEMP_VAL_REG: - tcg_reg_free(s, ts->reg); + tcg_reg_sync(s, ts->reg); break; case TEMP_VAL_DEAD: - ts->val_type = TEMP_VAL_MEM; - break; - case TEMP_VAL_CONST: - reg = tcg_reg_alloc(s, tcg_target_available_regs[ts->type], - allocated_regs); - if (!ts->mem_allocated) - temp_allocate_frame(s, temp); - tcg_out_movi(s, ts->type, reg, ts->val); - tcg_out_st(s, ts->type, reg, ts->mem_reg, ts->mem_offset); - ts->val_type = TEMP_VAL_MEM; - break; case TEMP_VAL_MEM: break; default: @@ -1597,6 +1647,20 @@ static void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs) } } +/* save a temporary to memory. 'allocated_regs' is used in case a + temporary registers needs to be allocated to store a constant. */ +static inline void temp_save(TCGContext *s, int temp, TCGRegSet allocated_regs) +{ +#ifdef USE_LIVENESS_ANALYSIS + /* The liveness analysis already ensures that globals are back + in memory. Keep an assert for safety. */ + assert(s->temps[temp].val_type == TEMP_VAL_MEM || s->temps[temp].fixed_reg); +#else + temp_sync(s, temp, allocated_regs); + temp_dead(s, temp); +#endif +} + /* save globals to their canonical location and assume they can be modified be the following code. 'allocated_regs' is used in case a temporary registers needs to be allocated to store a constant. */ @@ -1609,6 +1673,23 @@ static void save_globals(TCGContext *s, TCGRegSet allocated_regs) } } +/* sync globals to their canonical location and assume they can be + read by the following code. 'allocated_regs' is used in case a + temporary registers needs to be allocated to store a constant. */ +static void sync_globals(TCGContext *s, TCGRegSet allocated_regs) +{ + int i; + + for (i = 0; i < s->nb_globals; i++) { +#ifdef USE_LIVENESS_ANALYSIS + assert(s->temps[i].val_type != TEMP_VAL_REG || s->temps[i].fixed_reg || + s->temps[i].mem_coherent); +#else + temp_sync(s, i, allocated_regs); +#endif + } +} + /* at the end of a basic block, we assume all temporaries are dead and all globals are stored at their canonical location. */ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) @@ -1621,10 +1702,13 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) if (ts->temp_local) { temp_save(s, i, allocated_regs); } else { - if (ts->val_type == TEMP_VAL_REG) { - s->reg_to_temp[ts->reg] = -1; - } - ts->val_type = TEMP_VAL_DEAD; +#ifdef USE_LIVENESS_ANALYSIS + /* The liveness analysis already ensures that temps are dead. + Keep an assert for safety. */ + assert(ts->val_type == TEMP_VAL_DEAD); +#else + temp_dead(s, i); +#endif } } @@ -1632,8 +1716,10 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs) } #define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1) +#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1) -static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args) +static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args, + uint16_t dead_args, uint8_t sync_args) { TCGTemp *ots; tcg_target_ulong val; @@ -1652,71 +1738,99 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args) ots->val_type = TEMP_VAL_CONST; ots->val = val; } + if (NEED_SYNC_ARG(0)) { + temp_sync(s, args[0], s->reserved_regs); + } + if (IS_DEAD_ARG(0)) { + temp_dead(s, args[0]); + } } static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def, - const TCGArg *args, - unsigned int dead_args) + const TCGArg *args, uint16_t dead_args, + uint8_t sync_args) { + TCGRegSet allocated_regs; TCGTemp *ts, *ots; - int reg; - const TCGArgConstraint *arg_ct; + const TCGArgConstraint *arg_ct, *oarg_ct; + tcg_regset_set(allocated_regs, s->reserved_regs); ots = &s->temps[args[0]]; ts = &s->temps[args[1]]; - arg_ct = &def->args_ct[0]; + oarg_ct = &def->args_ct[0]; + arg_ct = &def->args_ct[1]; + + /* If the source value is not in a register, and we're going to be + forced to have it in a register in order to perform the copy, + then copy the SOURCE value into its own register first. That way + we don't have to reload SOURCE the next time it is used. */ + if (((NEED_SYNC_ARG(0) || ots->fixed_reg) && ts->val_type != TEMP_VAL_REG) + || ts->val_type == TEMP_VAL_MEM) { + ts->reg = tcg_reg_alloc(s, arg_ct->u.regs, allocated_regs); + if (ts->val_type == TEMP_VAL_MEM) { + tcg_out_ld(s, ts->type, ts->reg, ts->mem_reg, ts->mem_offset); + ts->mem_coherent = 1; + } else if (ts->val_type == TEMP_VAL_CONST) { + tcg_out_movi(s, ts->type, ts->reg, ts->val); + } + s->reg_to_temp[ts->reg] = args[1]; + ts->val_type = TEMP_VAL_REG; + } - /* XXX: always mark arg dead if IS_DEAD_ARG(1) */ - if (ts->val_type == TEMP_VAL_REG) { + if (IS_DEAD_ARG(0) && !ots->fixed_reg) { + /* mov to a non-saved dead register makes no sense (even with + liveness analysis disabled). */ + assert(NEED_SYNC_ARG(0)); + /* The code above should have moved the temp to a register. */ + assert(ts->val_type == TEMP_VAL_REG); + if (!ots->mem_allocated) { + temp_allocate_frame(s, args[0]); + } + tcg_out_st(s, ots->type, ts->reg, ots->mem_reg, ots->mem_offset); + if (IS_DEAD_ARG(1)) { + temp_dead(s, args[1]); + } + temp_dead(s, args[0]); + } else if (ts->val_type == TEMP_VAL_CONST) { + /* propagate constant */ + if (ots->val_type == TEMP_VAL_REG) { + s->reg_to_temp[ots->reg] = -1; + } + ots->val_type = TEMP_VAL_CONST; + ots->val = ts->val; + } else { + /* The code in the first if block should have moved the + temp to a register. */ + assert(ts->val_type == TEMP_VAL_REG); if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) { /* the mov can be suppressed */ - if (ots->val_type == TEMP_VAL_REG) - s->reg_to_temp[ots->reg] = -1; - reg = ts->reg; - s->reg_to_temp[reg] = -1; - ts->val_type = TEMP_VAL_DEAD; - } else { if (ots->val_type == TEMP_VAL_REG) { - reg = ots->reg; - } else { - reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs); - } - if (ts->reg != reg) { - tcg_out_mov(s, ots->type, reg, ts->reg); + s->reg_to_temp[ots->reg] = -1; } - } - } else if (ts->val_type == TEMP_VAL_MEM) { - if (ots->val_type == TEMP_VAL_REG) { - reg = ots->reg; + ots->reg = ts->reg; + temp_dead(s, args[1]); } else { - reg = tcg_reg_alloc(s, arg_ct->u.regs, s->reserved_regs); + if (ots->val_type != TEMP_VAL_REG) { + /* When allocating a new register, make sure to not spill the + input one. */ + tcg_regset_set_reg(allocated_regs, ts->reg); + ots->reg = tcg_reg_alloc(s, oarg_ct->u.regs, allocated_regs); + } + tcg_out_mov(s, ots->type, ots->reg, ts->reg); } - tcg_out_ld(s, ts->type, reg, ts->mem_reg, ts->mem_offset); - } else if (ts->val_type == TEMP_VAL_CONST) { - if (ots->fixed_reg) { - reg = ots->reg; - tcg_out_movi(s, ots->type, reg, ts->val); - } else { - /* propagate constant */ - if (ots->val_type == TEMP_VAL_REG) - s->reg_to_temp[ots->reg] = -1; - ots->val_type = TEMP_VAL_CONST; - ots->val = ts->val; - return; + ots->val_type = TEMP_VAL_REG; + ots->mem_coherent = 0; + s->reg_to_temp[ots->reg] = args[0]; + if (NEED_SYNC_ARG(0)) { + tcg_reg_sync(s, ots->reg); } - } else { - tcg_abort(); } - s->reg_to_temp[reg] = args[0]; - ots->reg = reg; - ots->val_type = TEMP_VAL_REG; - ots->mem_coherent = 0; } static void tcg_reg_alloc_op(TCGContext *s, const TCGOpDef *def, TCGOpcode opc, - const TCGArg *args, - unsigned int dead_args) + const TCGArg *args, uint16_t dead_args, + uint8_t sync_args) { TCGRegSet allocated_regs; int i, k, nb_iargs, nb_oargs, reg; @@ -1796,22 +1910,16 @@ static void tcg_reg_alloc_op(TCGContext *s, iarg_end: ; } + /* mark dead temporaries and free the associated registers */ + for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { + if (IS_DEAD_ARG(i)) { + temp_dead(s, args[i]); + } + } + if (def->flags & TCG_OPF_BB_END) { tcg_reg_alloc_bb_end(s, allocated_regs); } else { - /* mark dead temporaries and free the associated registers */ - for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) { - arg = args[i]; - if (IS_DEAD_ARG(i)) { - ts = &s->temps[arg]; - if (!ts->fixed_reg) { - if (ts->val_type == TEMP_VAL_REG) - s->reg_to_temp[ts->reg] = -1; - ts->val_type = TEMP_VAL_DEAD; - } - } - } - if (def->flags & TCG_OPF_CALL_CLOBBER) { /* XXX: permit generic clobber register list ? */ for(reg = 0; reg < TCG_TARGET_NB_REGS; reg++) { @@ -1819,12 +1927,11 @@ static void tcg_reg_alloc_op(TCGContext *s, tcg_reg_free(s, reg); } } - /* XXX: for load/store we could do that only for the slow path - (i.e. when a memory callback is called) */ - - /* store globals and free associated registers (we assume the insn - can modify any global. */ - save_globals(s, allocated_regs); + } + if (def->flags & TCG_OPF_SIDE_EFFECTS) { + /* sync globals if the op has side effects and might trigger + an exception. */ + sync_globals(s, allocated_regs); } /* satisfy the output constraints */ @@ -1848,18 +1955,15 @@ static void tcg_reg_alloc_op(TCGContext *s, tcg_regset_set_reg(allocated_regs, reg); /* if a fixed register is used, then a move will be done afterwards */ if (!ts->fixed_reg) { - if (ts->val_type == TEMP_VAL_REG) + if (ts->val_type == TEMP_VAL_REG) { s->reg_to_temp[ts->reg] = -1; - if (IS_DEAD_ARG(i)) { - ts->val_type = TEMP_VAL_DEAD; - } else { - ts->val_type = TEMP_VAL_REG; - ts->reg = reg; - /* temp value is modified, so the value kept in memory is - potentially not the same */ - ts->mem_coherent = 0; - s->reg_to_temp[reg] = arg; - } + } + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + /* temp value is modified, so the value kept in memory is + potentially not the same */ + ts->mem_coherent = 0; + s->reg_to_temp[reg] = arg; } oarg_end: new_args[i] = reg; @@ -1876,6 +1980,12 @@ static void tcg_reg_alloc_op(TCGContext *s, if (ts->fixed_reg && ts->reg != reg) { tcg_out_mov(s, ts->type, ts->reg, reg); } + if (NEED_SYNC_ARG(i)) { + tcg_reg_sync(s, reg); + } + if (IS_DEAD_ARG(i)) { + temp_dead(s, args[i]); + } } } @@ -1887,7 +1997,7 @@ static void tcg_reg_alloc_op(TCGContext *s, static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, TCGOpcode opc, const TCGArg *args, - unsigned int dead_args) + uint16_t dead_args, uint8_t sync_args) { int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params; TCGArg arg, func_arg; @@ -2011,14 +2121,8 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, /* mark dead temporaries and free the associated registers */ for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) { - arg = args[i]; if (IS_DEAD_ARG(i)) { - ts = &s->temps[arg]; - if (!ts->fixed_reg) { - if (ts->val_type == TEMP_VAL_REG) - s->reg_to_temp[ts->reg] = -1; - ts->val_type = TEMP_VAL_DEAD; - } + temp_dead(s, args[i]); } } @@ -2028,10 +2132,14 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, tcg_reg_free(s, reg); } } - - /* store globals and free associated registers (we assume the call - can modify any global. */ - if (!(flags & TCG_CALL_CONST)) { + + /* Save globals if they might be written by the helper, sync them if + they might be read. */ + if (flags & TCG_CALL_NO_READ_GLOBALS) { + /* Nothing to do */ + } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) { + sync_globals(s, allocated_regs); + } else { save_globals(s, allocated_regs); } @@ -2048,15 +2156,18 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def, tcg_out_mov(s, ts->type, ts->reg, reg); } } else { - if (ts->val_type == TEMP_VAL_REG) + if (ts->val_type == TEMP_VAL_REG) { s->reg_to_temp[ts->reg] = -1; + } + ts->val_type = TEMP_VAL_REG; + ts->reg = reg; + ts->mem_coherent = 0; + s->reg_to_temp[reg] = arg; + if (NEED_SYNC_ARG(i)) { + tcg_reg_sync(s, reg); + } if (IS_DEAD_ARG(i)) { - ts->val_type = TEMP_VAL_DEAD; - } else { - ts->val_type = TEMP_VAL_REG; - ts->reg = reg; - ts->mem_coherent = 0; - s->reg_to_temp[reg] = arg; + temp_dead(s, args[i]); } } } @@ -2087,7 +2198,6 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, TCGOpcode opc; int op_index; const TCGOpDef *def; - unsigned int dead_args; const TCGArg *args; #ifdef DEBUG_DISAS @@ -2148,12 +2258,13 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, switch(opc) { case INDEX_op_mov_i32: case INDEX_op_mov_i64: - dead_args = s->op_dead_args[op_index]; - tcg_reg_alloc_mov(s, def, args, dead_args); + tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index], + s->op_sync_args[op_index]); break; case INDEX_op_movi_i32: case INDEX_op_movi_i64: - tcg_reg_alloc_movi(s, args); + tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index], + s->op_sync_args[op_index]); break; case INDEX_op_debug_insn_start: /* debug instruction */ @@ -2167,24 +2278,16 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, args += args[0]; goto next; case INDEX_op_discard: - { - TCGTemp *ts; - ts = &s->temps[args[0]]; - /* mark the temporary as dead */ - if (!ts->fixed_reg) { - if (ts->val_type == TEMP_VAL_REG) - s->reg_to_temp[ts->reg] = -1; - ts->val_type = TEMP_VAL_DEAD; - } - } + temp_dead(s, args[0]); break; case INDEX_op_set_label: tcg_reg_alloc_bb_end(s, s->reserved_regs); tcg_out_label(s, args[0], s->code_ptr); break; case INDEX_op_call: - dead_args = s->op_dead_args[op_index]; - args += tcg_reg_alloc_call(s, def, opc, args, dead_args); + args += tcg_reg_alloc_call(s, def, opc, args, + s->op_dead_args[op_index], + s->op_sync_args[op_index]); goto next; case INDEX_op_end: goto the_end; @@ -2196,8 +2299,8 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf, /* Note: in order to speed up the code, it would be much faster to have specialized register allocator functions for some common argument patterns */ - dead_args = s->op_dead_args[op_index]; - tcg_reg_alloc_op(s, def, opc, args, dead_args); + tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index], + s->op_sync_args[op_index]); break; } args += def->nb_args; @@ -253,14 +253,20 @@ typedef int TCGv_i64; #define TCGV_UNUSED_I64(x) x = MAKE_TCGV_I64(-1) /* call flags */ -/* A pure function only reads its arguments and TCG global variables - and cannot raise exceptions. Hence a call to a pure function can be - safely suppressed if the return value is not used. */ -#define TCG_CALL_PURE 0x0010 -/* A const function only reads its arguments and does not use TCG - global variables. Hence a call to such a function does not - save TCG global variables back to their canonical location. */ -#define TCG_CALL_CONST 0x0020 +/* Helper does not read globals (either directly or through an exception). It + implies TCG_CALL_NO_WRITE_GLOBALS. */ +#define TCG_CALL_NO_READ_GLOBALS 0x0010 +/* Helper does not write globals */ +#define TCG_CALL_NO_WRITE_GLOBALS 0x0020 +/* Helper can be safely suppressed if the return value is not used. */ +#define TCG_CALL_NO_SIDE_EFFECTS 0x0040 + +/* convenience version of most used call flags */ +#define TCG_CALL_NO_RWG TCG_CALL_NO_READ_GLOBALS +#define TCG_CALL_NO_WG TCG_CALL_NO_WRITE_GLOBALS +#define TCG_CALL_NO_SE TCG_CALL_NO_SIDE_EFFECTS +#define TCG_CALL_NO_RWG_SE (TCG_CALL_NO_RWG | TCG_CALL_NO_SE) +#define TCG_CALL_NO_WG_SE (TCG_CALL_NO_WG | TCG_CALL_NO_SE) /* used to align parameters */ #define TCG_CALL_DUMMY_TCGV MAKE_TCGV_I32(-1) @@ -381,6 +387,9 @@ struct TCGContext { /* liveness analysis */ uint16_t *op_dead_args; /* for each operation, each bit tells if the corresponding argument is dead */ + uint8_t *op_sync_args; /* for each operation, each bit tells if the + corresponding output argument needs to be + sync to memory. */ /* tells in which temporary a given register is. It does not take into account fixed registers */ @@ -527,8 +536,8 @@ enum { TCG_OPF_BB_END = 0x01, /* Instruction clobbers call registers and potentially update globals. */ TCG_OPF_CALL_CLOBBER = 0x02, - /* Instruction has side effects: it cannot be removed - if its outputs are not used. */ + /* Instruction has side effects: it cannot be removed if its outputs + are not used, and might trigger exceptions. */ TCG_OPF_SIDE_EFFECTS = 0x04, /* Instruction operands are 64-bits (otherwise 32-bits). */ TCG_OPF_64BIT = 0x08, |