From 91c45a38f282b970f443f8e9d6bdb6ffaa00dfbf Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 23 May 2015 15:06:53 -0700 Subject: linux-user: Default sh4 to sh7785 Signed-off-by: Richard Henderson Signed-off-by: Aurelien Jarno --- linux-user/main.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/linux-user/main.c b/linux-user/main.c index 6989b82..a0d3e58 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -3925,6 +3925,8 @@ int main(int argc, char **argv, char **envp) # else cpu_model = "750"; # endif +#elif defined TARGET_SH4 + cpu_model = TYPE_SH7785_CPU; #else cpu_model = "any"; #endif -- cgit v1.1 From e42fd944f02dda893fc8773959d6db75f2a49367 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sat, 23 May 2015 15:06:54 -0700 Subject: linux-user: Add HWCAP for SH4 Only exposing FPU and LLSC as the only features supported by the translator. Signed-off-by: Richard Henderson Signed-off-by: Aurelien Jarno --- linux-user/elfload.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 0ba9706..b71e866 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -1075,6 +1075,35 @@ static inline void elf_core_copy_regs(target_elf_gregset_t *regs, #define USE_ELF_CORE_DUMP #define ELF_EXEC_PAGESIZE 4096 +enum { + SH_CPU_HAS_FPU = 0x0001, /* Hardware FPU support */ + SH_CPU_HAS_P2_FLUSH_BUG = 0x0002, /* Need to flush the cache in P2 area */ + SH_CPU_HAS_MMU_PAGE_ASSOC = 0x0004, /* SH3: TLB way selection bit support */ + SH_CPU_HAS_DSP = 0x0008, /* SH-DSP: DSP support */ + SH_CPU_HAS_PERF_COUNTER = 0x0010, /* Hardware performance counters */ + SH_CPU_HAS_PTEA = 0x0020, /* PTEA register */ + SH_CPU_HAS_LLSC = 0x0040, /* movli.l/movco.l */ + SH_CPU_HAS_L2_CACHE = 0x0080, /* Secondary cache / URAM */ + SH_CPU_HAS_OP32 = 0x0100, /* 32-bit instruction support */ + SH_CPU_HAS_PTEAEX = 0x0200, /* PTE ASID Extension support */ +}; + +#define ELF_HWCAP get_elf_hwcap() + +static uint32_t get_elf_hwcap(void) +{ + SuperHCPU *cpu = SUPERH_CPU(thread_cpu); + uint32_t hwcap = 0; + + hwcap |= SH_CPU_HAS_FPU; + + if (cpu->env.features & SH_FEATURE_SH4A) { + hwcap |= SH_CPU_HAS_LLSC; + } + + return hwcap; +} + #endif #ifdef TARGET_CRIS -- cgit v1.1 From 563807520ff19e6ed2d40695f543f1fba7ba432f Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Wed, 3 Jun 2015 23:16:43 +0200 Subject: sh4/r2d: convert to new MMIO accessor style The documentation is clear to use 16-bit accesses for all registers. Signed-off-by: Aurelien Jarno --- hw/sh4/r2d.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hw/sh4/r2d.c b/hw/sh4/r2d.c index 4221060..5e22ed7 100644 --- a/hw/sh4/r2d.c +++ b/hw/sh4/r2d.c @@ -127,7 +127,7 @@ static void r2d_fpga_irq_set(void *opaque, int n, int level) update_irl(fpga); } -static uint32_t r2d_fpga_read(void *opaque, hwaddr addr) +static uint64_t r2d_fpga_read(void *opaque, hwaddr addr, unsigned int size) { r2d_fpga_t *s = opaque; @@ -146,7 +146,7 @@ static uint32_t r2d_fpga_read(void *opaque, hwaddr addr) } static void -r2d_fpga_write(void *opaque, hwaddr addr, uint32_t value) +r2d_fpga_write(void *opaque, hwaddr addr, uint64_t value, unsigned int size) { r2d_fpga_t *s = opaque; @@ -170,10 +170,10 @@ r2d_fpga_write(void *opaque, hwaddr addr, uint32_t value) } static const MemoryRegionOps r2d_fpga_ops = { - .old_mmio = { - .read = { r2d_fpga_read, r2d_fpga_read, NULL, }, - .write = { r2d_fpga_write, r2d_fpga_write, NULL, }, - }, + .read = r2d_fpga_read, + .write = r2d_fpga_write, + .impl.min_access_size = 2, + .impl.max_access_size = 2, .endianness = DEVICE_NATIVE_ENDIAN, }; -- cgit v1.1 From 5ed9a259c164bb9fd2a6fe8a363a4bda2e4a5461 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 25 May 2015 01:28:56 +0200 Subject: target-sh4: use bit number for SR constants Use the bit number for SR constants instead of using a bit mask. This make possible to also use the constants for shifts. Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/cpu.c | 3 +- target-sh4/cpu.h | 30 ++++++++++---------- target-sh4/gdbstub.c | 4 +-- target-sh4/helper.c | 27 +++++++++--------- target-sh4/op_helper.c | 26 ++++++++--------- target-sh4/translate.c | 75 ++++++++++++++++++++++++++------------------------ 6 files changed, 85 insertions(+), 80 deletions(-) diff --git a/target-sh4/cpu.c b/target-sh4/cpu.c index d187a2b..cccb14f 100644 --- a/target-sh4/cpu.c +++ b/target-sh4/cpu.c @@ -61,7 +61,8 @@ static void superh_cpu_reset(CPUState *s) env->fpscr = FPSCR_PR; /* value for userspace according to the kernel */ set_float_rounding_mode(float_round_nearest_even, &env->fp_status); /* ?! */ #else - env->sr = SR_MD | SR_RB | SR_BL | SR_I3 | SR_I2 | SR_I1 | SR_I0; + env->sr = (1u << SR_MD) | (1u << SR_RB) | (1u << SR_BL) | + (1u << SR_I3) | (1u << SR_I2) | (1u << SR_I1) | (1u << SR_I0); env->fpscr = FPSCR_DN | FPSCR_RM_ZERO; /* CPU reset value according to SH4 manual */ set_float_rounding_mode(float_round_to_zero, &env->fp_status); set_flush_to_zero(1, &env->fp_status); diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h index c8dea6c..76fda35 100644 --- a/target-sh4/cpu.h +++ b/target-sh4/cpu.h @@ -47,18 +47,18 @@ #define TARGET_PHYS_ADDR_SPACE_BITS 32 #define TARGET_VIRT_ADDR_SPACE_BITS 32 -#define SR_MD (1 << 30) -#define SR_RB (1 << 29) -#define SR_BL (1 << 28) -#define SR_FD (1 << 15) -#define SR_M (1 << 9) -#define SR_Q (1 << 8) -#define SR_I3 (1 << 7) -#define SR_I2 (1 << 6) -#define SR_I1 (1 << 5) -#define SR_I0 (1 << 4) -#define SR_S (1 << 1) -#define SR_T (1 << 0) +#define SR_MD 30 +#define SR_RB 29 +#define SR_BL 28 +#define SR_FD 15 +#define SR_M 9 +#define SR_Q 8 +#define SR_I3 7 +#define SR_I2 6 +#define SR_I1 5 +#define SR_I0 4 +#define SR_S 1 +#define SR_T 0 #define FPSCR_MASK (0x003fffff) #define FPSCR_FR (1 << 21) @@ -234,7 +234,7 @@ void cpu_load_tlb(CPUSH4State * env); #define MMU_USER_IDX 1 static inline int cpu_mmu_index (CPUSH4State *env) { - return (env->sr & SR_MD) == 0 ? 1 : 0; + return (env->sr & (1u << SR_MD)) == 0 ? 1 : 0; } #include "exec/cpu-all.h" @@ -339,8 +339,8 @@ static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, *flags = (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL | DELAY_SLOT_TRUE | DELAY_SLOT_CLEARME)) /* Bits 0- 3 */ | (env->fpscr & (FPSCR_FR | FPSCR_SZ | FPSCR_PR)) /* Bits 19-21 */ - | (env->sr & (SR_MD | SR_RB)) /* Bits 29-30 */ - | (env->sr & SR_FD) /* Bit 15 */ + | (env->sr & ((1u << SR_MD) | (1u << SR_RB))) /* Bits 29-30 */ + | (env->sr & (1u << SR_FD)) /* Bit 15 */ | (env->movcal_backup ? TB_FLAG_PENDING_MOVCA : 0); /* Bit 4 */ } diff --git a/target-sh4/gdbstub.c b/target-sh4/gdbstub.c index df4fa2a..05ba728 100644 --- a/target-sh4/gdbstub.c +++ b/target-sh4/gdbstub.c @@ -31,7 +31,7 @@ int superh_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) switch (n) { case 0 ... 7: - if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) { + if ((env->sr & (1u << SR_MD)) && (env->sr & (1u << SR_RB))) { return gdb_get_regl(mem_buf, env->gregs[n + 16]); } else { return gdb_get_regl(mem_buf, env->gregs[n]); @@ -83,7 +83,7 @@ int superh_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) switch (n) { case 0 ... 7: - if ((env->sr & (SR_MD | SR_RB)) == (SR_MD | SR_RB)) { + if ((env->sr & (1u << SR_MD)) && (env->sr & (1u << SR_RB))) { env->gregs[n + 16] = ldl_p(mem_buf); } else { env->gregs[n] = ldl_p(mem_buf); diff --git a/target-sh4/helper.c b/target-sh4/helper.c index 5811360..1cb0e8d 100644 --- a/target-sh4/helper.c +++ b/target-sh4/helper.c @@ -93,7 +93,7 @@ void superh_cpu_do_interrupt(CPUState *cs) do_exp = cs->exception_index != -1; do_irq = do_irq && (cs->exception_index == -1); - if (env->sr & SR_BL) { + if (env->sr & (1u << SR_BL)) { if (do_exp && cs->exception_index != 0x1e0) { cs->exception_index = 0x000; /* masked exception -> reset */ } @@ -165,7 +165,7 @@ void superh_cpu_do_interrupt(CPUState *cs) env->ssr = env->sr; env->spc = env->pc; env->sgr = env->gregs[15]; - env->sr |= SR_BL | SR_MD | SR_RB; + env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB); if (env->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { /* Branch instruction should be executed again before delay slot. */ @@ -182,7 +182,7 @@ void superh_cpu_do_interrupt(CPUState *cs) case 0x000: case 0x020: case 0x140: - env->sr &= ~SR_FD; + env->sr &= ~(1u << SR_FD); env->sr |= 0xf << 4; /* IMASK */ env->pc = 0xa0000000; break; @@ -355,23 +355,24 @@ static int get_mmu_address(CPUSH4State * env, target_ulong * physical, int use_asid, n; tlb_t *matching = NULL; - use_asid = (env->mmucr & MMUCR_SV) == 0 || (env->sr & SR_MD) == 0; + use_asid = !(env->mmucr & MMUCR_SV) || !(env->sr & (1u << SR_MD)); if (rw == 2) { n = find_itlb_entry(env, address, use_asid); if (n >= 0) { matching = &env->itlb[n]; - if (!(env->sr & SR_MD) && !(matching->pr & 2)) + if (!(env->sr & (1u << SR_MD)) && !(matching->pr & 2)) { n = MMU_ITLB_VIOLATION; - else + } else { *prot = PAGE_EXEC; + } } else { n = find_utlb_entry(env, address, use_asid); if (n >= 0) { n = copy_utlb_entry_itlb(env, n); matching = &env->itlb[n]; - if (!(env->sr & SR_MD) && !(matching->pr & 2)) { - n = MMU_ITLB_VIOLATION; + if (!(env->sr & (1u << SR_MD)) && !(matching->pr & 2)) { + n = MMU_ITLB_VIOLATION; } else { *prot = PAGE_READ | PAGE_EXEC; if ((matching->pr & 1) && matching->d) { @@ -388,7 +389,7 @@ static int get_mmu_address(CPUSH4State * env, target_ulong * physical, n = find_utlb_entry(env, address, use_asid); if (n >= 0) { matching = &env->utlb[n]; - if (!(env->sr & SR_MD) && !(matching->pr & 2)) { + if (!(env->sr & (1u << SR_MD)) && !(matching->pr & 2)) { n = (rw == 1) ? MMU_DTLB_VIOLATION_WRITE : MMU_DTLB_VIOLATION_READ; } else if ((rw == 1) && !(matching->pr & 1)) { @@ -421,7 +422,7 @@ static int get_physical_address(CPUSH4State * env, target_ulong * physical, /* P1, P2 and P4 areas do not use translation */ if ((address >= 0x80000000 && address < 0xc0000000) || address >= 0xe0000000) { - if (!(env->sr & SR_MD) + if (!(env->sr & (1u << SR_MD)) && (address < 0xe0000000 || address >= 0xe4000000)) { /* Unauthorized access in user mode (only store queues are available) */ fprintf(stderr, "Unauthorized access\n"); @@ -690,7 +691,7 @@ void cpu_sh4_write_mmaped_utlb_addr(CPUSH4State *s, hwaddr addr, uint8_t d = (uint8_t)((mem_value & 0x00000200) >> 9); uint8_t v = (uint8_t)((mem_value & 0x00000100) >> 8); uint8_t asid = (uint8_t)(mem_value & 0x000000ff); - int use_asid = (s->mmucr & MMUCR_SV) == 0 || (s->sr & SR_MD) == 0; + int use_asid = !(s->mmucr & MMUCR_SV) || !(s->sr & (1u << SR_MD)); if (associate) { int i; @@ -821,10 +822,10 @@ void cpu_sh4_write_mmaped_utlb_data(CPUSH4State *s, hwaddr addr, int cpu_sh4_is_cached(CPUSH4State * env, target_ulong addr) { int n; - int use_asid = (env->mmucr & MMUCR_SV) == 0 || (env->sr & SR_MD) == 0; + int use_asid = !(env->mmucr & MMUCR_SV) || !(env->sr & (1u << SR_MD)); /* check area */ - if (env->sr & SR_MD) { + if (env->sr & (1u << SR_MD)) { /* For previledged mode, P2 and P4 area is not cachable. */ if ((0xA0000000 <= addr && addr < 0xC0000000) || 0xE0000000 <= addr) return 0; diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c index 74a5c4e..6f34292 100644 --- a/target-sh4/op_helper.c +++ b/target-sh4/op_helper.c @@ -156,15 +156,15 @@ void helper_ocbi(CPUSH4State *env, uint32_t address) } } -#define T (env->sr & SR_T) -#define Q (env->sr & SR_Q ? 1 : 0) -#define M (env->sr & SR_M ? 1 : 0) -#define SETT env->sr |= SR_T -#define CLRT env->sr &= ~SR_T -#define SETQ env->sr |= SR_Q -#define CLRQ env->sr &= ~SR_Q -#define SETM env->sr |= SR_M -#define CLRM env->sr &= ~SR_M +#define T (env->sr & (1u << SR_T)) +#define Q (env->sr & (1u << SR_Q) ? 1 : 0) +#define M (env->sr & (1u << SR_M) ? 1 : 0) +#define SETT (env->sr |= (1u << SR_T)) +#define CLRT (env->sr &= ~(1u << SR_T)) +#define SETQ (env->sr |= (1u << SR_Q)) +#define CLRQ (env->sr &= ~(1u << SR_Q)) +#define SETM (env->sr |= (1u << SR_M)) +#define CLRM (env->sr &= ~(1u << SR_M)) uint32_t helper_div1(CPUSH4State *env, uint32_t arg0, uint32_t arg1) { @@ -282,7 +282,7 @@ void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1) res += (int64_t) (int32_t) arg0 *(int64_t) (int32_t) arg1; env->mach = (res >> 32) & 0xffffffff; env->macl = res & 0xffffffff; - if (env->sr & SR_S) { + if (env->sr & (1u << SR_S)) { if (res < 0) env->mach |= 0xffff0000; else @@ -298,7 +298,7 @@ void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1) res += (int64_t) (int16_t) arg0 *(int64_t) (int16_t) arg1; env->mach = (res >> 32) & 0xffffffff; env->macl = res & 0xffffffff; - if (env->sr & SR_S) { + if (env->sr & (1u << SR_S)) { if (res < -0x80000000) { env->mach = 1; env->macl = 0x80000000; @@ -311,12 +311,12 @@ void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1) static inline void set_t(CPUSH4State *env) { - env->sr |= SR_T; + env->sr |= (1u << SR_T); } static inline void clr_t(CPUSH4State *env) { - env->sr &= ~SR_T; + env->sr &= ~(1u << SR_T); } void helper_ld_fpscr(CPUSH4State *env, uint32_t val) diff --git a/target-sh4/translate.c b/target-sh4/translate.c index 41aa928..b25f79c 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -47,7 +47,7 @@ typedef struct DisasContext { #if defined(CONFIG_USER_ONLY) #define IS_USER(ctx) 1 #else -#define IS_USER(ctx) (!(ctx->flags & SR_MD)) +#define IS_USER(ctx) (!(ctx->flags & (1u << SR_MD))) #endif enum { @@ -214,7 +214,7 @@ static inline void gen_branch_slot(uint32_t delayed_pc, int t) TCGLabel *label = gen_new_label(); tcg_gen_movi_i32(cpu_delayed_pc, delayed_pc); sr = tcg_temp_new(); - tcg_gen_andi_i32(sr, cpu_sr, SR_T); + tcg_gen_andi_i32(sr, cpu_sr, (1u << SR_T)); tcg_gen_brcondi_i32(t ? TCG_COND_EQ:TCG_COND_NE, sr, 0, label); tcg_gen_ori_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE); gen_set_label(label); @@ -229,7 +229,7 @@ static void gen_conditional_jump(DisasContext * ctx, l1 = gen_new_label(); sr = tcg_temp_new(); - tcg_gen_andi_i32(sr, cpu_sr, SR_T); + tcg_gen_andi_i32(sr, cpu_sr, (1u << SR_T)); tcg_gen_brcondi_i32(TCG_COND_NE, sr, 0, l1); gen_goto_tb(ctx, 0, ifnott); gen_set_label(l1); @@ -258,7 +258,7 @@ static inline void gen_cmp(int cond, TCGv t0, TCGv t1) t = tcg_temp_new(); tcg_gen_setcond_i32(cond, t, t1, t0); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); tcg_gen_or_i32(cpu_sr, cpu_sr, t); tcg_temp_free(t); @@ -270,7 +270,7 @@ static inline void gen_cmp_imm(int cond, TCGv t0, int32_t imm) t = tcg_temp_new(); tcg_gen_setcondi_i32(cond, t, t0, imm); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); tcg_gen_or_i32(cpu_sr, cpu_sr, t); tcg_temp_free(t); @@ -326,10 +326,12 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) #define B11_8 ((ctx->opcode >> 8) & 0xf) #define B15_12 ((ctx->opcode >> 12) & 0xf) -#define REG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) == (SR_MD | SR_RB) \ +#define REG(x) ((x) < 8 && (ctx->flags & (1u << SR_MD))\ + && (ctx->flags & (1u << SR_RB))\ ? (cpu_gregs[x + 16]) : (cpu_gregs[x])) -#define ALTREG(x) ((x) < 8 && (ctx->flags & (SR_MD | SR_RB)) != (SR_MD | SR_RB)\ +#define ALTREG(x) ((x) < 8 && (!(ctx->flags & (1u << SR_MD))\ + || !(ctx->flags & (1u << SR_RB)))\ ? (cpu_gregs[x + 16]) : (cpu_gregs[x])) #define FREG(x) (ctx->flags & FPSCR_FR ? (x) ^ 0x10 : (x)) @@ -359,7 +361,7 @@ static inline void gen_store_fpr64 (TCGv_i64 t, int reg) } #define CHECK_FPU_ENABLED \ - if (ctx->flags & SR_FD) { \ + if (ctx->flags & (1u << SR_FD)) { \ tcg_gen_movi_i32(cpu_pc, ctx->pc); \ if (ctx->flags & (DELAY_SLOT | DELAY_SLOT_CONDITIONAL)) { \ gen_helper_raise_slot_fpu_disable(cpu_env); \ @@ -409,7 +411,8 @@ static void _decode_opc(DisasContext * ctx) switch (ctx->opcode) { case 0x0019: /* div0u */ - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(SR_M | SR_Q | SR_T)); + tcg_gen_andi_i32(cpu_sr, cpu_sr, + ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T))); return; case 0x000b: /* rts */ CHECK_NOT_DELAY_SLOT @@ -422,10 +425,10 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_movi_i32(cpu_macl, 0); return; case 0x0048: /* clrs */ - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_S); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_S)); return; case 0x0008: /* clrt */ - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); return; case 0x0038: /* ldtlb */ CHECK_PRIVILEGED @@ -440,10 +443,10 @@ static void _decode_opc(DisasContext * ctx) ctx->delayed_pc = (uint32_t) - 1; return; case 0x0058: /* sets */ - tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_S); + tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S)); return; case 0x0018: /* sett */ - tcg_gen_ori_i32(cpu_sr, cpu_sr, SR_T); + tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_T)); return; case 0xfbfd: /* frchg */ tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR); @@ -661,7 +664,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv t0, t1, t2; t0 = tcg_temp_new(); - tcg_gen_andi_i32(t0, cpu_sr, SR_T); + tcg_gen_andi_i32(t0, cpu_sr, (1u << SR_T)); t1 = tcg_temp_new(); tcg_gen_add_i32(t1, REG(B7_4), REG(B11_8)); tcg_gen_add_i32(t0, t0, t1); @@ -670,7 +673,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_setcond_i32(TCG_COND_GTU, t1, t1, t0); tcg_gen_or_i32(t1, t1, t2); tcg_temp_free(t2); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); tcg_gen_or_i32(cpu_sr, cpu_sr, t1); tcg_temp_free(t1); tcg_gen_mov_i32(REG(B11_8), t0); @@ -689,7 +692,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_andc_i32(t1, t1, t2); tcg_temp_free(t2); tcg_gen_shri_i32(t1, t1, 31); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); tcg_gen_or_i32(cpu_sr, cpu_sr, t1); tcg_temp_free(t1); tcg_gen_mov_i32(REG(B7_4), t0); @@ -718,7 +721,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv cmp1 = tcg_temp_new(); TCGv cmp2 = tcg_temp_new(); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); tcg_gen_xor_i32(cmp1, REG(B7_4), REG(B11_8)); tcg_gen_andi_i32(cmp2, cmp1, 0xff000000); tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0); @@ -738,11 +741,11 @@ static void _decode_opc(DisasContext * ctx) return; case 0x2007: /* div0s Rm,Rn */ { - gen_copy_bit_i32(cpu_sr, 8, REG(B11_8), 31); /* SR_Q */ - gen_copy_bit_i32(cpu_sr, 9, REG(B7_4), 31); /* SR_M */ + gen_copy_bit_i32(cpu_sr, SR_Q, REG(B11_8), 31); /* SR_Q */ + gen_copy_bit_i32(cpu_sr, SR_M, REG(B7_4), 31); /* SR_M */ TCGv val = tcg_temp_new(); tcg_gen_xor_i32(val, REG(B7_4), REG(B11_8)); - gen_copy_bit_i32(cpu_sr, 0, val, 31); /* SR_T */ + gen_copy_bit_i32(cpu_sr, SR_T, val, 31); /* SR_T */ tcg_temp_free(val); } return; @@ -831,9 +834,9 @@ static void _decode_opc(DisasContext * ctx) t0 = tcg_temp_new(); tcg_gen_neg_i32(t0, REG(B7_4)); t1 = tcg_temp_new(); - tcg_gen_andi_i32(t1, cpu_sr, SR_T); + tcg_gen_andi_i32(t1, cpu_sr, (1u << SR_T)); tcg_gen_sub_i32(REG(B11_8), t0, t1); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); tcg_gen_setcondi_i32(TCG_COND_GTU, t1, t0, 0); tcg_gen_or_i32(cpu_sr, cpu_sr, t1); tcg_gen_setcond_i32(TCG_COND_GTU, t1, REG(B11_8), t0); @@ -920,7 +923,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv t0, t1, t2; t0 = tcg_temp_new(); - tcg_gen_andi_i32(t0, cpu_sr, SR_T); + tcg_gen_andi_i32(t0, cpu_sr, (1u << SR_T)); t1 = tcg_temp_new(); tcg_gen_sub_i32(t1, REG(B11_8), REG(B7_4)); tcg_gen_sub_i32(t0, t1, t0); @@ -929,7 +932,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_setcond_i32(TCG_COND_LTU, t1, t1, t0); tcg_gen_or_i32(t1, t1, t2); tcg_temp_free(t2); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); tcg_gen_or_i32(cpu_sr, cpu_sr, t1); tcg_temp_free(t1); tcg_gen_mov_i32(REG(B11_8), t0); @@ -948,7 +951,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_and_i32(t1, t1, t2); tcg_temp_free(t2); tcg_gen_shri_i32(t1, t1, 31); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); tcg_gen_or_i32(cpu_sr, cpu_sr, t1); tcg_temp_free(t1); tcg_gen_mov_i32(REG(B11_8), t0); @@ -1545,7 +1548,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); return; case 0x0029: /* movt Rn */ - tcg_gen_andi_i32(REG(B11_8), cpu_sr, SR_T); + tcg_gen_andi_i32(REG(B11_8), cpu_sr, (1u << SR_T)); return; case 0x0073: /* MOVCO.L @@ -1555,7 +1558,7 @@ static void _decode_opc(DisasContext * ctx) */ if (ctx->features & SH_FEATURE_SH4A) { TCGLabel *label = gen_new_label(); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~SR_T); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); tcg_gen_or_i32(cpu_sr, cpu_sr, cpu_ldst); tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label); tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL); @@ -1610,9 +1613,9 @@ static void _decode_opc(DisasContext * ctx) { TCGv tmp = tcg_temp_new(); tcg_gen_mov_i32(tmp, cpu_sr); - gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 31); + gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 31); tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1); - gen_copy_bit_i32(REG(B11_8), 0, tmp, 0); + gen_copy_bit_i32(REG(B11_8), SR_T, tmp, 0); tcg_temp_free(tmp); } return; @@ -1620,7 +1623,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv tmp = tcg_temp_new(); tcg_gen_mov_i32(tmp, cpu_sr); - gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0); + gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0); tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1); gen_copy_bit_i32(REG(B11_8), 31, tmp, 0); tcg_temp_free(tmp); @@ -1628,23 +1631,23 @@ static void _decode_opc(DisasContext * ctx) return; case 0x4004: /* rotl Rn */ tcg_gen_rotli_i32(REG(B11_8), REG(B11_8), 1); - gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0); + gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0); return; case 0x4005: /* rotr Rn */ - gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0); + gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0); tcg_gen_rotri_i32(REG(B11_8), REG(B11_8), 1); return; case 0x4000: /* shll Rn */ case 0x4020: /* shal Rn */ - gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 31); + gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 31); tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1); return; case 0x4021: /* shar Rn */ - gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0); + gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0); tcg_gen_sari_i32(REG(B11_8), REG(B11_8), 1); return; case 0x4001: /* shlr Rn */ - gen_copy_bit_i32(cpu_sr, 0, REG(B11_8), 0); + gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0); tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1); return; case 0x4008: /* shll2 Rn */ @@ -1874,7 +1877,7 @@ gen_intermediate_code_internal(SuperHCPU *cpu, TranslationBlock *tb, ctx.pc = pc_start; ctx.flags = (uint32_t)tb->flags; ctx.bstate = BS_NONE; - ctx.memidx = (ctx.flags & SR_MD) == 0 ? 1 : 0; + ctx.memidx = (ctx.flags & (1u << SR_MD)) == 0 ? 1 : 0; /* We don't know if the delayed pc came from a dynamic or static branch, so assume it is a dynamic branch. */ ctx.delayed_pc = -1; /* use delayed pc from env pointer */ -- cgit v1.1 From 34086945254c035a03e01e472d99e4524a2f2416 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 25 May 2015 01:28:56 +0200 Subject: target-sh4: Split out T from SR In preparation for more efficient setting of this field. Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/cpu.h | 14 +++- target-sh4/gdbstub.c | 4 +- target-sh4/helper.c | 2 +- target-sh4/op_helper.c | 32 ++------ target-sh4/translate.c | 213 +++++++++++++++++++++---------------------------- 5 files changed, 112 insertions(+), 153 deletions(-) diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h index 76fda35..a308c53 100644 --- a/target-sh4/cpu.h +++ b/target-sh4/cpu.h @@ -138,7 +138,8 @@ typedef struct CPUSH4State { uint32_t flags; /* general execution flags */ uint32_t gregs[24]; /* general registers */ float32 fregs[32]; /* floating point registers */ - uint32_t sr; /* status register */ + uint32_t sr; /* status register (with T split out) */ + uint32_t sr_t; /* T bit of status register */ uint32_t ssr; /* saved status register */ uint32_t spc; /* saved program counter */ uint32_t gbr; /* global base register */ @@ -331,6 +332,17 @@ static inline int cpu_ptel_pr (uint32_t ptel) #define TB_FLAG_PENDING_MOVCA (1 << 4) +static inline target_ulong cpu_read_sr(CPUSH4State *env) +{ + return env->sr | (env->sr_t << SR_T); +} + +static inline void cpu_write_sr(CPUSH4State *env, target_ulong sr) +{ + env->sr_t = sr & (1u << SR_T); + env->sr = sr & ~(1u << SR_T); +} + static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, target_ulong *cs_base, int *flags) { diff --git a/target-sh4/gdbstub.c b/target-sh4/gdbstub.c index 05ba728..a365a27 100644 --- a/target-sh4/gdbstub.c +++ b/target-sh4/gdbstub.c @@ -51,7 +51,7 @@ int superh_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) case 21: return gdb_get_regl(mem_buf, env->macl); case 22: - return gdb_get_regl(mem_buf, env->sr); + return gdb_get_regl(mem_buf, cpu_read_sr(env)); case 23: return gdb_get_regl(mem_buf, env->fpul); case 24: @@ -111,7 +111,7 @@ int superh_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) env->macl = ldl_p(mem_buf); break; case 22: - env->sr = ldl_p(mem_buf); + cpu_write_sr(env, ldl_p(mem_buf)); break; case 23: env->fpul = ldl_p(mem_buf); diff --git a/target-sh4/helper.c b/target-sh4/helper.c index 1cb0e8d..a533f08 100644 --- a/target-sh4/helper.c +++ b/target-sh4/helper.c @@ -162,7 +162,7 @@ void superh_cpu_do_interrupt(CPUState *cs) log_cpu_state(cs, 0); } - env->ssr = env->sr; + env->ssr = cpu_read_sr(env); env->spc = env->pc; env->sgr = env->gregs[15]; env->sr |= (1u << SR_BL) | (1u << SR_MD) | (1u << SR_RB); diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c index 6f34292..524d7f6 100644 --- a/target-sh4/op_helper.c +++ b/target-sh4/op_helper.c @@ -156,11 +156,11 @@ void helper_ocbi(CPUSH4State *env, uint32_t address) } } -#define T (env->sr & (1u << SR_T)) +#define T (env->sr_t) #define Q (env->sr & (1u << SR_Q) ? 1 : 0) #define M (env->sr & (1u << SR_M) ? 1 : 0) -#define SETT (env->sr |= (1u << SR_T)) -#define CLRT (env->sr &= ~(1u << SR_T)) +#define SETT (env->sr_t = 1) +#define CLRT (env->sr_t = 0) #define SETQ (env->sr |= (1u << SR_Q)) #define CLRQ (env->sr &= ~(1u << SR_Q)) #define SETM (env->sr |= (1u << SR_M)) @@ -309,16 +309,6 @@ void helper_macw(CPUSH4State *env, uint32_t arg0, uint32_t arg1) } } -static inline void set_t(CPUSH4State *env) -{ - env->sr |= (1u << SR_T); -} - -static inline void clr_t(CPUSH4State *env) -{ - env->sr &= ~(1u << SR_T); -} - void helper_ld_fpscr(CPUSH4State *env, uint32_t val) { env->fpscr = val & FPSCR_MASK; @@ -403,10 +393,8 @@ void helper_fcmp_eq_FT(CPUSH4State *env, float32 t0, float32 t1) relation = float32_compare(t0, t1, &env->fp_status); if (unlikely(relation == float_relation_unordered)) { update_fpscr(env, GETPC()); - } else if (relation == float_relation_equal) { - set_t(env); } else { - clr_t(env); + env->sr_t = (relation == float_relation_equal); } } @@ -418,10 +406,8 @@ void helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1) relation = float64_compare(t0, t1, &env->fp_status); if (unlikely(relation == float_relation_unordered)) { update_fpscr(env, GETPC()); - } else if (relation == float_relation_equal) { - set_t(env); } else { - clr_t(env); + env->sr_t = (relation == float_relation_equal); } } @@ -433,10 +419,8 @@ void helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1) relation = float32_compare(t0, t1, &env->fp_status); if (unlikely(relation == float_relation_unordered)) { update_fpscr(env, GETPC()); - } else if (relation == float_relation_greater) { - set_t(env); } else { - clr_t(env); + env->sr_t = (relation == float_relation_greater); } } @@ -448,10 +432,8 @@ void helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1) relation = float64_compare(t0, t1, &env->fp_status); if (unlikely(relation == float_relation_unordered)) { update_fpscr(env, GETPC()); - } else if (relation == float_relation_greater) { - set_t(env); } else { - clr_t(env); + env->sr_t = (relation == float_relation_greater); } } diff --git a/target-sh4/translate.c b/target-sh4/translate.c index b25f79c..bcdf4f3 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -62,7 +62,7 @@ enum { /* global register indexes */ static TCGv_ptr cpu_env; static TCGv cpu_gregs[24]; -static TCGv cpu_pc, cpu_sr, cpu_ssr, cpu_spc, cpu_gbr; +static TCGv cpu_pc, cpu_sr, cpu_sr_t, cpu_ssr, cpu_spc, cpu_gbr; static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl; static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst; static TCGv cpu_fregs[32]; @@ -110,6 +110,8 @@ void sh4_translate_init(void) offsetof(CPUSH4State, pc), "PC"); cpu_sr = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUSH4State, sr), "SR"); + cpu_sr_t = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUSH4State, sr_t), "SR_T"); cpu_ssr = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUSH4State, ssr), "SSR"); cpu_spc = tcg_global_mem_new_i32(TCG_AREG0, @@ -156,7 +158,7 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, CPUSH4State *env = &cpu->env; int i; cpu_fprintf(f, "pc=0x%08x sr=0x%08x pr=0x%08x fpscr=0x%08x\n", - env->pc, env->sr, env->pr, env->fpscr); + env->pc, cpu_read_sr(env), env->pr, env->fpscr); cpu_fprintf(f, "spc=0x%08x ssr=0x%08x gbr=0x%08x vbr=0x%08x\n", env->spc, env->ssr, env->gbr, env->vbr); cpu_fprintf(f, "sgr=0x%08x dbr=0x%08x delayed_pc=0x%08x fpul=0x%08x\n", @@ -175,6 +177,17 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, } } +static void gen_read_sr(TCGv dst) +{ + tcg_gen_or_i32(dst, cpu_sr, cpu_sr_t); +} + +static void gen_write_sr(TCGv src) +{ + tcg_gen_andi_i32(cpu_sr, src, ~(1u << SR_T)); + tcg_gen_andi_i32(cpu_sr_t, src, (1u << SR_T)); +} + static void gen_goto_tb(DisasContext * ctx, int n, target_ulong dest) { TranslationBlock *tb; @@ -210,12 +223,9 @@ static void gen_jump(DisasContext * ctx) static inline void gen_branch_slot(uint32_t delayed_pc, int t) { - TCGv sr; TCGLabel *label = gen_new_label(); tcg_gen_movi_i32(cpu_delayed_pc, delayed_pc); - sr = tcg_temp_new(); - tcg_gen_andi_i32(sr, cpu_sr, (1u << SR_T)); - tcg_gen_brcondi_i32(t ? TCG_COND_EQ:TCG_COND_NE, sr, 0, label); + tcg_gen_brcondi_i32(t ? TCG_COND_EQ : TCG_COND_NE, cpu_sr_t, 0, label); tcg_gen_ori_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE); gen_set_label(label); } @@ -224,13 +234,8 @@ static inline void gen_branch_slot(uint32_t delayed_pc, int t) static void gen_conditional_jump(DisasContext * ctx, target_ulong ift, target_ulong ifnott) { - TCGLabel *l1; - TCGv sr; - - l1 = gen_new_label(); - sr = tcg_temp_new(); - tcg_gen_andi_i32(sr, cpu_sr, (1u << SR_T)); - tcg_gen_brcondi_i32(TCG_COND_NE, sr, 0, l1); + TCGLabel *l1 = gen_new_label(); + tcg_gen_brcondi_i32(TCG_COND_NE, cpu_sr_t, 0, l1); gen_goto_tb(ctx, 0, ifnott); gen_set_label(l1); gen_goto_tb(ctx, 1, ift); @@ -252,30 +257,6 @@ static void gen_delayed_conditional_jump(DisasContext * ctx) gen_jump(ctx); } -static inline void gen_cmp(int cond, TCGv t0, TCGv t1) -{ - TCGv t; - - t = tcg_temp_new(); - tcg_gen_setcond_i32(cond, t, t1, t0); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); - tcg_gen_or_i32(cpu_sr, cpu_sr, t); - - tcg_temp_free(t); -} - -static inline void gen_cmp_imm(int cond, TCGv t0, int32_t imm) -{ - TCGv t; - - t = tcg_temp_new(); - tcg_gen_setcondi_i32(cond, t, t0, imm); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); - tcg_gen_or_i32(cpu_sr, cpu_sr, t); - - tcg_temp_free(t); -} - static inline void gen_store_flags(uint32_t flags) { tcg_gen_andi_i32(cpu_flags, cpu_flags, DELAY_SLOT_TRUE); @@ -411,8 +392,8 @@ static void _decode_opc(DisasContext * ctx) switch (ctx->opcode) { case 0x0019: /* div0u */ - tcg_gen_andi_i32(cpu_sr, cpu_sr, - ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T))); + tcg_gen_andi_i32(cpu_sr, cpu_sr, ~((1u << SR_M) | (1u << SR_Q))); + tcg_gen_movi_i32(cpu_sr_t, 0); return; case 0x000b: /* rts */ CHECK_NOT_DELAY_SLOT @@ -428,7 +409,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_S)); return; case 0x0008: /* clrt */ - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); + tcg_gen_movi_i32(cpu_sr_t, 0); return; case 0x0038: /* ldtlb */ CHECK_PRIVILEGED @@ -437,7 +418,7 @@ static void _decode_opc(DisasContext * ctx) case 0x002b: /* rte */ CHECK_PRIVILEGED CHECK_NOT_DELAY_SLOT - tcg_gen_mov_i32(cpu_sr, cpu_ssr); + gen_write_sr(cpu_ssr); tcg_gen_mov_i32(cpu_delayed_pc, cpu_spc); ctx->flags |= DELAY_SLOT; ctx->delayed_pc = (uint32_t) - 1; @@ -446,7 +427,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_S)); return; case 0x0018: /* sett */ - tcg_gen_ori_i32(cpu_sr, cpu_sr, (1u << SR_T)); + tcg_gen_movi_i32(cpu_sr_t, 1); return; case 0xfbfd: /* frchg */ tcg_gen_xori_i32(cpu_fpscr, cpu_fpscr, FPSCR_FR); @@ -662,22 +643,17 @@ static void _decode_opc(DisasContext * ctx) return; case 0x300e: /* addc Rm,Rn */ { - TCGv t0, t1, t2; + TCGv t0, t1; t0 = tcg_temp_new(); - tcg_gen_andi_i32(t0, cpu_sr, (1u << SR_T)); t1 = tcg_temp_new(); - tcg_gen_add_i32(t1, REG(B7_4), REG(B11_8)); - tcg_gen_add_i32(t0, t0, t1); - t2 = tcg_temp_new(); - tcg_gen_setcond_i32(TCG_COND_GTU, t2, REG(B11_8), t1); - tcg_gen_setcond_i32(TCG_COND_GTU, t1, t1, t0); - tcg_gen_or_i32(t1, t1, t2); - tcg_temp_free(t2); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); - tcg_gen_or_i32(cpu_sr, cpu_sr, t1); - tcg_temp_free(t1); - tcg_gen_mov_i32(REG(B11_8), t0); + tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8)); + tcg_gen_add_i32(t1, cpu_sr_t, t0); + tcg_gen_setcond_i32(TCG_COND_GTU, cpu_sr_t, REG(B11_8), t0); + tcg_gen_setcond_i32(TCG_COND_GTU, t0, t0, t1); + tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, t0); tcg_temp_free(t0); + tcg_gen_mov_i32(REG(B11_8), t1); + tcg_temp_free(t1); } return; case 0x300f: /* addv Rm,Rn */ @@ -689,11 +665,9 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_xor_i32(t1, t0, REG(B11_8)); t2 = tcg_temp_new(); tcg_gen_xor_i32(t2, REG(B7_4), REG(B11_8)); - tcg_gen_andc_i32(t1, t1, t2); + tcg_gen_andc_i32(cpu_sr_t, t1, t2); tcg_temp_free(t2); - tcg_gen_shri_i32(t1, t1, 31); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); - tcg_gen_or_i32(cpu_sr, cpu_sr, t1); + tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31); tcg_temp_free(t1); tcg_gen_mov_i32(REG(B7_4), t0); tcg_temp_free(t0); @@ -703,51 +677,45 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_and_i32(REG(B11_8), REG(B11_8), REG(B7_4)); return; case 0x3000: /* cmp/eq Rm,Rn */ - gen_cmp(TCG_COND_EQ, REG(B7_4), REG(B11_8)); + tcg_gen_setcond_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), REG(B7_4)); return; case 0x3003: /* cmp/ge Rm,Rn */ - gen_cmp(TCG_COND_GE, REG(B7_4), REG(B11_8)); + tcg_gen_setcond_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), REG(B7_4)); return; case 0x3007: /* cmp/gt Rm,Rn */ - gen_cmp(TCG_COND_GT, REG(B7_4), REG(B11_8)); + tcg_gen_setcond_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), REG(B7_4)); return; case 0x3006: /* cmp/hi Rm,Rn */ - gen_cmp(TCG_COND_GTU, REG(B7_4), REG(B11_8)); + tcg_gen_setcond_i32(TCG_COND_GTU, cpu_sr_t, REG(B11_8), REG(B7_4)); return; case 0x3002: /* cmp/hs Rm,Rn */ - gen_cmp(TCG_COND_GEU, REG(B7_4), REG(B11_8)); + tcg_gen_setcond_i32(TCG_COND_GEU, cpu_sr_t, REG(B11_8), REG(B7_4)); return; case 0x200c: /* cmp/str Rm,Rn */ { TCGv cmp1 = tcg_temp_new(); TCGv cmp2 = tcg_temp_new(); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); tcg_gen_xor_i32(cmp1, REG(B7_4), REG(B11_8)); tcg_gen_andi_i32(cmp2, cmp1, 0xff000000); - tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0); - tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, cmp2, 0); tcg_gen_andi_i32(cmp2, cmp1, 0x00ff0000); tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0); - tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2); + tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2); tcg_gen_andi_i32(cmp2, cmp1, 0x0000ff00); tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0); - tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2); + tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2); tcg_gen_andi_i32(cmp2, cmp1, 0x000000ff); tcg_gen_setcondi_i32(TCG_COND_EQ, cmp2, cmp2, 0); - tcg_gen_or_i32(cpu_sr, cpu_sr, cmp2); + tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, cmp2); tcg_temp_free(cmp2); tcg_temp_free(cmp1); } return; case 0x2007: /* div0s Rm,Rn */ - { - gen_copy_bit_i32(cpu_sr, SR_Q, REG(B11_8), 31); /* SR_Q */ - gen_copy_bit_i32(cpu_sr, SR_M, REG(B7_4), 31); /* SR_M */ - TCGv val = tcg_temp_new(); - tcg_gen_xor_i32(val, REG(B7_4), REG(B11_8)); - gen_copy_bit_i32(cpu_sr, SR_T, val, 31); /* SR_T */ - tcg_temp_free(val); - } + gen_copy_bit_i32(cpu_sr, SR_Q, REG(B11_8), 31); /* SR_Q */ + gen_copy_bit_i32(cpu_sr, SR_M, REG(B7_4), 31); /* SR_M */ + tcg_gen_xor_i32(cpu_sr_t, REG(B7_4), REG(B11_8)); + tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31); /* SR_T */ return; case 0x3004: /* div1 Rm,Rn */ gen_helper_div1(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8)); @@ -830,19 +798,13 @@ static void _decode_opc(DisasContext * ctx) return; case 0x600a: /* negc Rm,Rn */ { - TCGv t0, t1; - t0 = tcg_temp_new(); + TCGv t0 = tcg_temp_new(); tcg_gen_neg_i32(t0, REG(B7_4)); - t1 = tcg_temp_new(); - tcg_gen_andi_i32(t1, cpu_sr, (1u << SR_T)); - tcg_gen_sub_i32(REG(B11_8), t0, t1); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); - tcg_gen_setcondi_i32(TCG_COND_GTU, t1, t0, 0); - tcg_gen_or_i32(cpu_sr, cpu_sr, t1); - tcg_gen_setcond_i32(TCG_COND_GTU, t1, REG(B11_8), t0); - tcg_gen_or_i32(cpu_sr, cpu_sr, t1); + tcg_gen_sub_i32(REG(B11_8), t0, cpu_sr_t); + tcg_gen_setcondi_i32(TCG_COND_GTU, cpu_sr_t, t0, 0); + tcg_gen_setcond_i32(TCG_COND_GTU, t0, REG(B11_8), t0); + tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, t0); tcg_temp_free(t0); - tcg_temp_free(t1); } return; case 0x6007: /* not Rm,Rn */ @@ -923,17 +885,14 @@ static void _decode_opc(DisasContext * ctx) { TCGv t0, t1, t2; t0 = tcg_temp_new(); - tcg_gen_andi_i32(t0, cpu_sr, (1u << SR_T)); t1 = tcg_temp_new(); tcg_gen_sub_i32(t1, REG(B11_8), REG(B7_4)); - tcg_gen_sub_i32(t0, t1, t0); + tcg_gen_sub_i32(t0, t1, cpu_sr_t); t2 = tcg_temp_new(); tcg_gen_setcond_i32(TCG_COND_LTU, t2, REG(B11_8), t1); tcg_gen_setcond_i32(TCG_COND_LTU, t1, t1, t0); - tcg_gen_or_i32(t1, t1, t2); + tcg_gen_or_i32(cpu_sr_t, t1, t2); tcg_temp_free(t2); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); - tcg_gen_or_i32(cpu_sr, cpu_sr, t1); tcg_temp_free(t1); tcg_gen_mov_i32(REG(B11_8), t0); tcg_temp_free(t0); @@ -950,9 +909,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_xor_i32(t2, REG(B11_8), REG(B7_4)); tcg_gen_and_i32(t1, t1, t2); tcg_temp_free(t2); - tcg_gen_shri_i32(t1, t1, 31); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); - tcg_gen_or_i32(cpu_sr, cpu_sr, t1); + tcg_gen_shri_i32(cpu_sr_t, t1, 31); tcg_temp_free(t1); tcg_gen_mov_i32(REG(B11_8), t0); tcg_temp_free(t0); @@ -962,7 +919,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv val = tcg_temp_new(); tcg_gen_and_i32(val, REG(B7_4), REG(B11_8)); - gen_cmp_imm(TCG_COND_EQ, val, 0); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0); tcg_temp_free(val); } return; @@ -1213,7 +1170,7 @@ static void _decode_opc(DisasContext * ctx) ctx->flags |= DELAY_SLOT_CONDITIONAL; return; case 0x8800: /* cmp/eq #imm,R0 */ - gen_cmp_imm(TCG_COND_EQ, REG(0), B7_0s); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(0), B7_0s); return; case 0xc400: /* mov.b @(disp,GBR),R0 */ { @@ -1329,7 +1286,7 @@ static void _decode_opc(DisasContext * ctx) { TCGv val = tcg_temp_new(); tcg_gen_andi_i32(val, REG(0), B7_0); - gen_cmp_imm(TCG_COND_EQ, val, 0); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0); tcg_temp_free(val); } return; @@ -1339,7 +1296,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_add_i32(val, REG(0), cpu_gbr); tcg_gen_qemu_ld_i32(val, val, ctx->memidx, MO_UB); tcg_gen_andi_i32(val, val, B7_0); - gen_cmp_imm(TCG_COND_EQ, val, 0); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0); tcg_temp_free(val); } return; @@ -1402,14 +1359,14 @@ static void _decode_opc(DisasContext * ctx) ctx->delayed_pc = (uint32_t) - 1; return; case 0x4015: /* cmp/pl Rn */ - gen_cmp_imm(TCG_COND_GT, REG(B11_8), 0); + tcg_gen_setcondi_i32(TCG_COND_GT, cpu_sr_t, REG(B11_8), 0); return; case 0x4011: /* cmp/pz Rn */ - gen_cmp_imm(TCG_COND_GE, REG(B11_8), 0); + tcg_gen_setcondi_i32(TCG_COND_GE, cpu_sr_t, REG(B11_8), 0); return; case 0x4010: /* dt Rn */ tcg_gen_subi_i32(REG(B11_8), REG(B11_8), 1); - gen_cmp_imm(TCG_COND_EQ, REG(B11_8), 0); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, REG(B11_8), 0); return; case 0x402b: /* jmp @Rn */ CHECK_NOT_DELAY_SLOT @@ -1426,15 +1383,21 @@ static void _decode_opc(DisasContext * ctx) return; case 0x400e: /* ldc Rm,SR */ CHECK_PRIVILEGED - tcg_gen_andi_i32(cpu_sr, REG(B11_8), 0x700083f3); - ctx->bstate = BS_STOP; + { + TCGv val = tcg_temp_new(); + tcg_gen_andi_i32(val, REG(B11_8), 0x700083f3); + gen_write_sr(val); + tcg_temp_free(val); + ctx->bstate = BS_STOP; + } return; case 0x4007: /* ldc.l @Rm+,SR */ CHECK_PRIVILEGED { TCGv val = tcg_temp_new(); tcg_gen_qemu_ld_i32(val, REG(B11_8), ctx->memidx, MO_TESL); - tcg_gen_andi_i32(cpu_sr, val, 0x700083f3); + tcg_gen_andi_i32(val, val, 0x700083f3); + gen_write_sr(val); tcg_temp_free(val); tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); ctx->bstate = BS_STOP; @@ -1442,15 +1405,18 @@ static void _decode_opc(DisasContext * ctx) return; case 0x0002: /* stc SR,Rn */ CHECK_PRIVILEGED - tcg_gen_mov_i32(REG(B11_8), cpu_sr); + gen_read_sr(REG(B11_8)); return; case 0x4003: /* stc SR,@-Rn */ CHECK_PRIVILEGED { TCGv addr = tcg_temp_new(); + TCGv val = tcg_temp_new(); tcg_gen_subi_i32(addr, REG(B11_8), 4); - tcg_gen_qemu_st_i32(cpu_sr, addr, ctx->memidx, MO_TEUL); + gen_read_sr(val); + tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_TEUL); tcg_gen_mov_i32(REG(B11_8), addr); + tcg_temp_free(val); tcg_temp_free(addr); } return; @@ -1548,7 +1514,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_addi_i32(REG(B11_8), REG(B11_8), 4); return; case 0x0029: /* movt Rn */ - tcg_gen_andi_i32(REG(B11_8), cpu_sr, (1u << SR_T)); + tcg_gen_mov_i32(REG(B11_8), cpu_sr_t); return; case 0x0073: /* MOVCO.L @@ -1558,8 +1524,7 @@ static void _decode_opc(DisasContext * ctx) */ if (ctx->features & SH_FEATURE_SH4A) { TCGLabel *label = gen_new_label(); - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~(1u << SR_T)); - tcg_gen_or_i32(cpu_sr, cpu_sr, cpu_ldst); + tcg_gen_mov_i32(cpu_sr_t, cpu_ldst); tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_ldst, 0, label); tcg_gen_qemu_st_i32(REG(0), REG(B11_8), ctx->memidx, MO_TEUL); gen_set_label(label); @@ -1612,42 +1577,42 @@ static void _decode_opc(DisasContext * ctx) case 0x4024: /* rotcl Rn */ { TCGv tmp = tcg_temp_new(); - tcg_gen_mov_i32(tmp, cpu_sr); - gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 31); + tcg_gen_mov_i32(tmp, cpu_sr_t); + tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31); tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1); - gen_copy_bit_i32(REG(B11_8), SR_T, tmp, 0); + tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp); tcg_temp_free(tmp); } return; case 0x4025: /* rotcr Rn */ { TCGv tmp = tcg_temp_new(); - tcg_gen_mov_i32(tmp, cpu_sr); - gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0); + tcg_gen_shli_i32(tmp, cpu_sr_t, 31); + tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1); tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1); - gen_copy_bit_i32(REG(B11_8), 31, tmp, 0); + tcg_gen_or_i32(REG(B11_8), REG(B11_8), tmp); tcg_temp_free(tmp); } return; case 0x4004: /* rotl Rn */ tcg_gen_rotli_i32(REG(B11_8), REG(B11_8), 1); - gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0); + tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0); return; case 0x4005: /* rotr Rn */ - gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0); + tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 0); tcg_gen_rotri_i32(REG(B11_8), REG(B11_8), 1); return; case 0x4000: /* shll Rn */ case 0x4020: /* shal Rn */ - gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 31); + tcg_gen_shri_i32(cpu_sr_t, REG(B11_8), 31); tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1); return; case 0x4021: /* shar Rn */ - gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0); + tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1); tcg_gen_sari_i32(REG(B11_8), REG(B11_8), 1); return; case 0x4001: /* shlr Rn */ - gen_copy_bit_i32(cpu_sr, SR_T, REG(B11_8), 0); + tcg_gen_andi_i32(cpu_sr_t, REG(B11_8), 1); tcg_gen_shri_i32(REG(B11_8), REG(B11_8), 1); return; case 0x4008: /* shll2 Rn */ @@ -1675,7 +1640,7 @@ static void _decode_opc(DisasContext * ctx) tcg_gen_mov_i32(addr, REG(B11_8)); val = tcg_temp_local_new(); tcg_gen_qemu_ld_i32(val, addr, ctx->memidx, MO_UB); - gen_cmp_imm(TCG_COND_EQ, val, 0); + tcg_gen_setcondi_i32(TCG_COND_EQ, cpu_sr_t, val, 0); tcg_gen_ori_i32(val, val, 0x80); tcg_gen_qemu_st_i32(val, addr, ctx->memidx, MO_UB); tcg_temp_free(val); -- cgit v1.1 From a2368e01c95a093d250a0e5d3cef53dddf642f1e Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 25 May 2015 01:28:56 +0200 Subject: target-sh4: optimize addc using add2 Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/translate.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/target-sh4/translate.c b/target-sh4/translate.c index bcdf4f3..5c90fe3 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -644,15 +644,12 @@ static void _decode_opc(DisasContext * ctx) case 0x300e: /* addc Rm,Rn */ { TCGv t0, t1; - t0 = tcg_temp_new(); + t0 = tcg_const_tl(0); t1 = tcg_temp_new(); - tcg_gen_add_i32(t0, REG(B7_4), REG(B11_8)); - tcg_gen_add_i32(t1, cpu_sr_t, t0); - tcg_gen_setcond_i32(TCG_COND_GTU, cpu_sr_t, REG(B11_8), t0); - tcg_gen_setcond_i32(TCG_COND_GTU, t0, t0, t1); - tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, t0); + tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0); + tcg_gen_add2_i32(REG(B11_8), cpu_sr_t, + REG(B11_8), t0, t1, cpu_sr_t); tcg_temp_free(t0); - tcg_gen_mov_i32(REG(B11_8), t1); tcg_temp_free(t1); } return; -- cgit v1.1 From d0f44a55fa321e042bd6b2a0fa25ac48864b7a25 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 25 May 2015 01:28:56 +0200 Subject: target-sh4: optimize subc using sub2 Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/translate.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/target-sh4/translate.c b/target-sh4/translate.c index 5c90fe3..b8abfd5 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -880,19 +880,15 @@ static void _decode_opc(DisasContext * ctx) return; case 0x300a: /* subc Rm,Rn */ { - TCGv t0, t1, t2; - t0 = tcg_temp_new(); + TCGv t0, t1; + t0 = tcg_const_tl(0); t1 = tcg_temp_new(); - tcg_gen_sub_i32(t1, REG(B11_8), REG(B7_4)); - tcg_gen_sub_i32(t0, t1, cpu_sr_t); - t2 = tcg_temp_new(); - tcg_gen_setcond_i32(TCG_COND_LTU, t2, REG(B11_8), t1); - tcg_gen_setcond_i32(TCG_COND_LTU, t1, t1, t0); - tcg_gen_or_i32(cpu_sr_t, t1, t2); - tcg_temp_free(t2); - tcg_temp_free(t1); - tcg_gen_mov_i32(REG(B11_8), t0); + tcg_gen_add2_i32(t1, cpu_sr_t, cpu_sr_t, t0, REG(B7_4), t0); + tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t, + REG(B11_8), t0, t1, cpu_sr_t); + tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1); tcg_temp_free(t0); + tcg_temp_free(t1); } return; case 0x300b: /* subv Rm,Rn */ -- cgit v1.1 From 60eb27fe4951fbe6cf5e24cc3d6df7e97c43a909 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 25 May 2015 01:28:56 +0200 Subject: target-sh4: optimize negc using add2 and sub2 Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/translate.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/target-sh4/translate.c b/target-sh4/translate.c index b8abfd5..9ab3ba0 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -795,12 +795,12 @@ static void _decode_opc(DisasContext * ctx) return; case 0x600a: /* negc Rm,Rn */ { - TCGv t0 = tcg_temp_new(); - tcg_gen_neg_i32(t0, REG(B7_4)); - tcg_gen_sub_i32(REG(B11_8), t0, cpu_sr_t); - tcg_gen_setcondi_i32(TCG_COND_GTU, cpu_sr_t, t0, 0); - tcg_gen_setcond_i32(TCG_COND_GTU, t0, REG(B11_8), t0); - tcg_gen_or_i32(cpu_sr_t, cpu_sr_t, t0); + TCGv t0 = tcg_const_i32(0); + tcg_gen_add2_i32(REG(B11_8), cpu_sr_t, + REG(B7_4), t0, cpu_sr_t, t0); + tcg_gen_sub2_i32(REG(B11_8), cpu_sr_t, + t0, t0, REG(B11_8), cpu_sr_t); + tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1); tcg_temp_free(t0); } return; -- cgit v1.1 From 1d565b21e1aecbb0da6589f3c4ea83c9c788ad63 Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 25 May 2015 01:28:56 +0200 Subject: target-sh4: split out Q and M from of SR and optimize div1 Splitting Q and M out of SR, it's possible to optimize div1 by using TCG code instead of an helper. At the same time removed the now unused gen_copy_bit_i32 function. Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/cpu.h | 12 +++-- target-sh4/helper.h | 1 - target-sh4/op_helper.c | 118 ------------------------------------------------- target-sh4/translate.c | 88 ++++++++++++++++++++++++------------ 4 files changed, 69 insertions(+), 150 deletions(-) diff --git a/target-sh4/cpu.h b/target-sh4/cpu.h index a308c53..4a027a6 100644 --- a/target-sh4/cpu.h +++ b/target-sh4/cpu.h @@ -139,6 +139,8 @@ typedef struct CPUSH4State { uint32_t gregs[24]; /* general registers */ float32 fregs[32]; /* floating point registers */ uint32_t sr; /* status register (with T split out) */ + uint32_t sr_m; /* M bit of status register */ + uint32_t sr_q; /* Q bit of status register */ uint32_t sr_t; /* T bit of status register */ uint32_t ssr; /* saved status register */ uint32_t spc; /* saved program counter */ @@ -334,13 +336,17 @@ static inline int cpu_ptel_pr (uint32_t ptel) static inline target_ulong cpu_read_sr(CPUSH4State *env) { - return env->sr | (env->sr_t << SR_T); + return env->sr | (env->sr_m << SR_M) | + (env->sr_q << SR_Q) | + (env->sr_t << SR_T); } static inline void cpu_write_sr(CPUSH4State *env, target_ulong sr) { - env->sr_t = sr & (1u << SR_T); - env->sr = sr & ~(1u << SR_T); + env->sr_m = (sr >> SR_M) & 1; + env->sr_q = (sr >> SR_Q) & 1; + env->sr_t = (sr >> SR_T) & 1; + env->sr = sr & ~((1u << SR_M) | (1u << SR_Q) | (1u << SR_T)); } static inline void cpu_get_tb_cpu_state(CPUSH4State *env, target_ulong *pc, diff --git a/target-sh4/helper.h b/target-sh4/helper.h index 3b5c436..c9bc407 100644 --- a/target-sh4/helper.h +++ b/target-sh4/helper.h @@ -11,7 +11,6 @@ DEF_HELPER_3(movcal, void, env, i32, i32) DEF_HELPER_1(discard_movcal_backup, void, env) DEF_HELPER_2(ocbi, void, env, i32) -DEF_HELPER_3(div1, i32, env, i32, i32) DEF_HELPER_3(macl, void, env, i32, i32) DEF_HELPER_3(macw, void, env, i32, i32) diff --git a/target-sh4/op_helper.c b/target-sh4/op_helper.c index 524d7f6..cbc11ae 100644 --- a/target-sh4/op_helper.c +++ b/target-sh4/op_helper.c @@ -156,124 +156,6 @@ void helper_ocbi(CPUSH4State *env, uint32_t address) } } -#define T (env->sr_t) -#define Q (env->sr & (1u << SR_Q) ? 1 : 0) -#define M (env->sr & (1u << SR_M) ? 1 : 0) -#define SETT (env->sr_t = 1) -#define CLRT (env->sr_t = 0) -#define SETQ (env->sr |= (1u << SR_Q)) -#define CLRQ (env->sr &= ~(1u << SR_Q)) -#define SETM (env->sr |= (1u << SR_M)) -#define CLRM (env->sr &= ~(1u << SR_M)) - -uint32_t helper_div1(CPUSH4State *env, uint32_t arg0, uint32_t arg1) -{ - uint32_t tmp0, tmp2; - uint8_t old_q, tmp1 = 0xff; - - //printf("div1 arg0=0x%08x arg1=0x%08x M=%d Q=%d T=%d\n", arg0, arg1, M, Q, T); - old_q = Q; - if ((0x80000000 & arg1) != 0) - SETQ; - else - CLRQ; - tmp2 = arg0; - arg1 <<= 1; - arg1 |= T; - switch (old_q) { - case 0: - switch (M) { - case 0: - tmp0 = arg1; - arg1 -= tmp2; - tmp1 = arg1 > tmp0; - switch (Q) { - case 0: - if (tmp1) - SETQ; - else - CLRQ; - break; - case 1: - if (tmp1 == 0) - SETQ; - else - CLRQ; - break; - } - break; - case 1: - tmp0 = arg1; - arg1 += tmp2; - tmp1 = arg1 < tmp0; - switch (Q) { - case 0: - if (tmp1 == 0) - SETQ; - else - CLRQ; - break; - case 1: - if (tmp1) - SETQ; - else - CLRQ; - break; - } - break; - } - break; - case 1: - switch (M) { - case 0: - tmp0 = arg1; - arg1 += tmp2; - tmp1 = arg1 < tmp0; - switch (Q) { - case 0: - if (tmp1) - SETQ; - else - CLRQ; - break; - case 1: - if (tmp1 == 0) - SETQ; - else - CLRQ; - break; - } - break; - case 1: - tmp0 = arg1; - arg1 -= tmp2; - tmp1 = arg1 > tmp0; - switch (Q) { - case 0: - if (tmp1 == 0) - SETQ; - else - CLRQ; - break; - case 1: - if (tmp1) - SETQ; - else - CLRQ; - break; - } - break; - } - break; - } - if (Q == M) - SETT; - else - CLRT; - //printf("Output: arg1=0x%08x M=%d Q=%d T=%d\n", arg1, M, Q, T); - return arg1; -} - void helper_macl(CPUSH4State *env, uint32_t arg0, uint32_t arg1) { int64_t res; diff --git a/target-sh4/translate.c b/target-sh4/translate.c index 9ab3ba0..44d0e94 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -62,7 +62,8 @@ enum { /* global register indexes */ static TCGv_ptr cpu_env; static TCGv cpu_gregs[24]; -static TCGv cpu_pc, cpu_sr, cpu_sr_t, cpu_ssr, cpu_spc, cpu_gbr; +static TCGv cpu_sr, cpu_sr_m, cpu_sr_q, cpu_sr_t; +static TCGv cpu_pc, cpu_ssr, cpu_spc, cpu_gbr; static TCGv cpu_vbr, cpu_sgr, cpu_dbr, cpu_mach, cpu_macl; static TCGv cpu_pr, cpu_fpscr, cpu_fpul, cpu_ldst; static TCGv cpu_fregs[32]; @@ -110,6 +111,10 @@ void sh4_translate_init(void) offsetof(CPUSH4State, pc), "PC"); cpu_sr = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUSH4State, sr), "SR"); + cpu_sr_m = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUSH4State, sr_m), "SR_M"); + cpu_sr_q = tcg_global_mem_new_i32(TCG_AREG0, + offsetof(CPUSH4State, sr_q), "SR_Q"); cpu_sr_t = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUSH4State, sr_t), "SR_T"); cpu_ssr = tcg_global_mem_new_i32(TCG_AREG0, @@ -179,13 +184,26 @@ void superh_cpu_dump_state(CPUState *cs, FILE *f, static void gen_read_sr(TCGv dst) { - tcg_gen_or_i32(dst, cpu_sr, cpu_sr_t); + TCGv t0 = tcg_temp_new(); + tcg_gen_shli_i32(t0, cpu_sr_q, SR_Q); + tcg_gen_or_i32(dst, dst, t0); + tcg_gen_shli_i32(t0, cpu_sr_m, SR_M); + tcg_gen_or_i32(dst, dst, t0); + tcg_gen_shli_i32(t0, cpu_sr_t, SR_T); + tcg_gen_or_i32(dst, cpu_sr, t0); + tcg_temp_free_i32(t0); } static void gen_write_sr(TCGv src) { - tcg_gen_andi_i32(cpu_sr, src, ~(1u << SR_T)); - tcg_gen_andi_i32(cpu_sr_t, src, (1u << SR_T)); + tcg_gen_andi_i32(cpu_sr, src, + ~((1u << SR_Q) | (1u << SR_M) | (1u << SR_T))); + tcg_gen_shri_i32(cpu_sr_q, src, SR_Q); + tcg_gen_andi_i32(cpu_sr_q, cpu_sr_q, 1); + tcg_gen_shri_i32(cpu_sr_m, src, SR_M); + tcg_gen_andi_i32(cpu_sr_m, cpu_sr_m, 1); + tcg_gen_shri_i32(cpu_sr_t, src, SR_T); + tcg_gen_andi_i32(cpu_sr_t, cpu_sr_t, 1); } static void gen_goto_tb(DisasContext * ctx, int n, target_ulong dest) @@ -263,24 +281,6 @@ static inline void gen_store_flags(uint32_t flags) tcg_gen_ori_i32(cpu_flags, cpu_flags, flags); } -static inline void gen_copy_bit_i32(TCGv t0, int p0, TCGv t1, int p1) -{ - TCGv tmp = tcg_temp_new(); - - p0 &= 0x1f; - p1 &= 0x1f; - - tcg_gen_andi_i32(tmp, t1, (1 << p1)); - tcg_gen_andi_i32(t0, t0, ~(1 << p0)); - if (p0 < p1) - tcg_gen_shri_i32(tmp, tmp, p1 - p0); - else if (p0 > p1) - tcg_gen_shli_i32(tmp, tmp, p0 - p1); - tcg_gen_or_i32(t0, t0, tmp); - - tcg_temp_free(tmp); -} - static inline void gen_load_fpr64(TCGv_i64 t, int reg) { tcg_gen_concat_i32_i64(t, cpu_fregs[reg + 1], cpu_fregs[reg]); @@ -392,7 +392,8 @@ static void _decode_opc(DisasContext * ctx) switch (ctx->opcode) { case 0x0019: /* div0u */ - tcg_gen_andi_i32(cpu_sr, cpu_sr, ~((1u << SR_M) | (1u << SR_Q))); + tcg_gen_movi_i32(cpu_sr_m, 0); + tcg_gen_movi_i32(cpu_sr_q, 0); tcg_gen_movi_i32(cpu_sr_t, 0); return; case 0x000b: /* rts */ @@ -709,13 +710,44 @@ static void _decode_opc(DisasContext * ctx) } return; case 0x2007: /* div0s Rm,Rn */ - gen_copy_bit_i32(cpu_sr, SR_Q, REG(B11_8), 31); /* SR_Q */ - gen_copy_bit_i32(cpu_sr, SR_M, REG(B7_4), 31); /* SR_M */ - tcg_gen_xor_i32(cpu_sr_t, REG(B7_4), REG(B11_8)); - tcg_gen_shri_i32(cpu_sr_t, cpu_sr_t, 31); /* SR_T */ + tcg_gen_shri_i32(cpu_sr_q, REG(B11_8), 31); /* SR_Q */ + tcg_gen_shri_i32(cpu_sr_m, REG(B7_4), 31); /* SR_M */ + tcg_gen_xor_i32(cpu_sr_t, cpu_sr_q, cpu_sr_m); /* SR_T */ return; case 0x3004: /* div1 Rm,Rn */ - gen_helper_div1(REG(B11_8), cpu_env, REG(B7_4), REG(B11_8)); + { + TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); + TCGv t2 = tcg_temp_new(); + TCGv zero = tcg_const_i32(0); + + /* shift left arg1, saving the bit being pushed out and inserting + T on the right */ + tcg_gen_shri_i32(t0, REG(B11_8), 31); + tcg_gen_shli_i32(REG(B11_8), REG(B11_8), 1); + tcg_gen_or_i32(REG(B11_8), REG(B11_8), cpu_sr_t); + + /* Add or subtract arg0 from arg1 depending if Q == M. To avoid + using 64-bit temps, we compute arg0's high part from q ^ m, so + that it is 0x00000000 when adding the value or 0xffffffff when + subtracting it. */ + tcg_gen_xor_i32(t1, cpu_sr_q, cpu_sr_m); + tcg_gen_subi_i32(t1, t1, 1); + tcg_gen_neg_i32(t2, REG(B7_4)); + tcg_gen_movcond_i32(TCG_COND_EQ, t2, t1, zero, REG(B7_4), t2); + tcg_gen_add2_i32(REG(B11_8), t1, REG(B11_8), zero, t2, t1); + + /* compute T and Q depending on carry */ + tcg_gen_andi_i32(t1, t1, 1); + tcg_gen_xor_i32(t1, t1, t0); + tcg_gen_xori_i32(cpu_sr_t, t1, 1); + tcg_gen_xor_i32(cpu_sr_q, cpu_sr_m, t1); + + tcg_temp_free(zero); + tcg_temp_free(t2); + tcg_temp_free(t1); + tcg_temp_free(t0); + } return; case 0x300d: /* dmuls.l Rm,Rn */ tcg_gen_muls2_i32(cpu_macl, cpu_mach, REG(B7_4), REG(B11_8)); -- cgit v1.1 From 91b4d29f4eecab14c5f8888ecd7b3a740ad80b7c Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 25 May 2015 01:28:56 +0200 Subject: target-sh4: factorize fmov implementation Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/translate.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/target-sh4/translate.c b/target-sh4/translate.c index 44d0e94..e8b9217 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -1010,24 +1010,19 @@ static void _decode_opc(DisasContext * ctx) return; case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */ CHECK_FPU_ENABLED + TCGv addr = tcg_temp_new_i32(); + tcg_gen_subi_i32(addr, REG(B11_8), 4); if (ctx->flags & FPSCR_SZ) { - TCGv addr = tcg_temp_new_i32(); int fr = XREG(B7_4); - tcg_gen_subi_i32(addr, REG(B11_8), 4); tcg_gen_qemu_st_i32(cpu_fregs[fr+1], addr, ctx->memidx, MO_TEUL); tcg_gen_subi_i32(addr, addr, 4); tcg_gen_qemu_st_i32(cpu_fregs[fr], addr, ctx->memidx, MO_TEUL); - tcg_gen_mov_i32(REG(B11_8), addr); - tcg_temp_free(addr); } else { - TCGv addr; - addr = tcg_temp_new_i32(); - tcg_gen_subi_i32(addr, REG(B11_8), 4); tcg_gen_qemu_st_i32(cpu_fregs[FREG(B7_4)], addr, ctx->memidx, MO_TEUL); - tcg_gen_mov_i32(REG(B11_8), addr); - tcg_temp_free(addr); } + tcg_gen_mov_i32(REG(B11_8), addr); + tcg_temp_free(addr); return; case 0xf006: /* fmov @(R0,Rm),{F,D,X}Rm - FPSCR: Nothing */ CHECK_FPU_ENABLED -- cgit v1.1 From d218b28d28b8f4de297bfd35c082b22f153cf0df Mon Sep 17 00:00:00 2001 From: Aurelien Jarno Date: Mon, 25 May 2015 01:28:56 +0200 Subject: target-sh4: remove dead code Reviewed-by: Richard Henderson Signed-off-by: Aurelien Jarno --- target-sh4/translate.c | 1 - 1 file changed, 1 deletion(-) diff --git a/target-sh4/translate.c b/target-sh4/translate.c index e8b9217..28259f9 100644 --- a/target-sh4/translate.c +++ b/target-sh4/translate.c @@ -18,7 +18,6 @@ */ #define DEBUG_DISAS -//#define SH4_SINGLE_STEP #include "cpu.h" #include "disas/disas.h" -- cgit v1.1