diff options
Diffstat (limited to 'target-arm')
-rw-r--r-- | target-arm/cpu.h | 64 | ||||
-rw-r--r-- | target-arm/helper.c | 46 | ||||
-rw-r--r-- | target-arm/helper.h | 11 | ||||
-rw-r--r-- | target-arm/op_helper.c | 10 | ||||
-rw-r--r-- | target-arm/translate-a64.c | 318 | ||||
-rw-r--r-- | target-arm/translate.c | 324 | ||||
-rw-r--r-- | target-arm/translate.h | 2 |
7 files changed, 731 insertions, 44 deletions
diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 815fef8..1087075 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -437,7 +437,7 @@ typedef struct CPUARMState { * the two execution states, and means we do not need to explicitly * map these registers when changing states. */ - float64 regs[64]; + float64 regs[64] __attribute__((aligned(16))); uint32_t xregs[16]; /* We store these fpcsr fields separately for convenience. */ @@ -496,6 +496,8 @@ typedef struct CPUARMState { /* Internal CPU feature flags. */ uint64_t features; + CPU_OPTIMIZATION_COMMON + /* PMSAv7 MPU */ struct { uint32_t *drbar; @@ -1509,7 +1511,7 @@ bool write_cpustate_to_list(ARMCPU *cpu); /* The ARM MMU allows 1k pages. */ /* ??? Linux doesn't actually use these, and they're deprecated in recent architecture revisions. Maybe a configure option to disable them. */ -#define TARGET_PAGE_BITS 10 +#define TARGET_PAGE_BITS 12 #endif #if defined(TARGET_AARCH64) @@ -1523,7 +1525,7 @@ bool write_cpustate_to_list(ARMCPU *cpu); static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, unsigned int target_el) { - CPUARMState *env = cs->env_ptr; + CPUARMState *env = (CPUARMState *)cs->env_ptr; unsigned int cur_el = arm_current_el(env); bool secure = arm_is_secure(env); bool pstate_unmasked; @@ -1983,6 +1985,62 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, *cs_base = 0; } +static inline target_ulong cpu_get_pc(CPUARMState *env) +{ +#if defined(TARGET_AARCH64) + return env->pc; +#else + return env->regs[15]; +#endif +} + +static inline int cpu_check_state(CPUARMState *env, + target_ulong cs_base, int flags) +{ + int f; + if (is_a64(env)) { + f = ARM_TBFLAG_AARCH64_STATE_MASK; + } else { + f = (env->thumb << ARM_TBFLAG_THUMB_SHIFT) + | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT) + | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT) + | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT) + | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT); + if (!(access_secure_reg(env))) { + f |= ARM_TBFLAG_NS_MASK; + } + if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30) + || arm_el_is_aa64(env, 1)) { + f |= ARM_TBFLAG_VFPEN_MASK; + } + f |= (extract32(env->cp15.c15_cpar, 0, 2) + << ARM_TBFLAG_XSCALE_CPAR_SHIFT); + } + + f |= (cpu_mmu_index(env, false) << ARM_TBFLAG_MMUIDX_SHIFT); + /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine + * states defined in the ARM ARM for software singlestep: + * SS_ACTIVE PSTATE.SS State + * 0 x Inactive (the TB flag for SS is always 0) + * 1 0 Active-pending + * 1 1 Active-not-pending + */ + if (arm_singlestep_active(env)) { + f |= ARM_TBFLAG_SS_ACTIVE_MASK; + if (is_a64(env)) { + if (env->pstate & PSTATE_SS) { + f |= ARM_TBFLAG_PSTATE_SS_MASK; + } + } else { + if (env->uncached_cpsr & PSTATE_SS) { + f |= ARM_TBFLAG_PSTATE_SS_MASK; + } + } + } + f |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT; + return f == flags; +} + #include "exec/exec-all.h" enum { diff --git a/target-arm/helper.c b/target-arm/helper.c index afc4163..302e88c 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -11,6 +11,7 @@ #include "arm_ldst.h" #include <zlib.h> /* For crc32 */ #include "exec/semihost.h" +#include "hqemu.h" #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ @@ -2225,6 +2226,8 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush(CPU(cpu), 1); } raw_write(env, ri, value); + + pcid = (target_ulong)value >> 12; } static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -7977,29 +7980,23 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env) /* XXX: check quiet/signaling case */ #define DO_VFP_cmp(p, type) \ -void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \ +uint32_t VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \ { \ - uint32_t flags; \ - switch(type ## _compare_quiet(a, b, &env->vfp.fp_status)) { \ - case 0: flags = 0x6; break; \ - case -1: flags = 0x8; break; \ - case 1: flags = 0x2; break; \ - default: case 2: flags = 0x3; break; \ - } \ - env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ - | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ + uint32_t flags = 0x3; \ + int ret = type ## _compare_quiet(a, b, &env->vfp.fp_status); \ + if (ret == 0) flags = 0x6; \ + else if (ret == -1) flags = 0x8; \ + else if (ret == 1) flags = 0x2; \ + return flags << 28; \ } \ -void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \ +uint32_t VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \ { \ - uint32_t flags; \ - switch(type ## _compare(a, b, &env->vfp.fp_status)) { \ - case 0: flags = 0x6; break; \ - case -1: flags = 0x8; break; \ - case 1: flags = 0x2; break; \ - default: case 2: flags = 0x3; break; \ - } \ - env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ - | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ + uint32_t flags = 0x3; \ + int ret = type ## _compare(a, b, &env->vfp.fp_status); \ + if (ret == 0) flags = 0x6; \ + else if (ret == -1) flags = 0x8; \ + else if (ret == 1) flags = 0x2; \ + return flags << 28; \ } DO_VFP_cmp(s, float32) DO_VFP_cmp(d, float64) @@ -8777,3 +8774,12 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) /* Linux crc32c converts the output to one's complement. */ return crc32c(acc, buf, bytes) ^ 0xffffffff; } + +CPUState *cpu_create(void) +{ + ARMCPU *cpu = g_malloc0(sizeof(ARMCPU)); + CPUState *cs = CPU(cpu); + memcpy(cpu, ARM_CPU(first_cpu), sizeof(ARMCPU)); + cs->env_ptr = &cpu->env; + return cs; +} diff --git a/target-arm/helper.h b/target-arm/helper.h index c2a85c7..41c2c6d 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -56,6 +56,7 @@ DEF_HELPER_2(pre_smc, void, env, i32) DEF_HELPER_1(check_breakpoints, void, env) +DEF_HELPER_3(cpsr_write_nzcv, void, env, i32, i32) DEF_HELPER_3(cpsr_write, void, env, i32, i32) DEF_HELPER_1(cpsr_read, i32, env) @@ -103,10 +104,10 @@ DEF_HELPER_1(vfp_abss, f32, f32) DEF_HELPER_1(vfp_absd, f64, f64) DEF_HELPER_2(vfp_sqrts, f32, f32, env) DEF_HELPER_2(vfp_sqrtd, f64, f64, env) -DEF_HELPER_3(vfp_cmps, void, f32, f32, env) -DEF_HELPER_3(vfp_cmpd, void, f64, f64, env) -DEF_HELPER_3(vfp_cmpes, void, f32, f32, env) -DEF_HELPER_3(vfp_cmped, void, f64, f64, env) +DEF_HELPER_3(vfp_cmps, i32, f32, f32, env) +DEF_HELPER_3(vfp_cmpd, i32, f64, f64, env) +DEF_HELPER_3(vfp_cmpes, i32, f32, f32, env) +DEF_HELPER_3(vfp_cmped, i32, f64, f64, env) DEF_HELPER_2(vfp_fcvtds, f64, f32, env) DEF_HELPER_2(vfp_fcvtsd, f32, f64, env) @@ -535,3 +536,5 @@ DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) #ifdef TARGET_AARCH64 #include "helper-a64.h" #endif + +#include "hqemu-helper.h" diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index 6cd54c8..fdea907 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -386,6 +386,16 @@ void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask) cpsr_write(env, val, mask); } +void HELPER(cpsr_write_nzcv)(CPUARMState *env, uint32_t val, uint32_t mask) +{ + if (mask & CPSR_NZCV) { + env->ZF = (~val) & CPSR_Z; + env->NF = val; + env->CF = (val >> 29) & 1; + env->VF = (val << 3) & 0x80000000; + } +} + /* Access to user mode registers from privileged modes. */ uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno) { diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 14e8131..21cf214 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -37,10 +37,17 @@ #include "exec/helper-gen.h" #include "trace-tcg.h" +#include "hqemu.h" static TCGv_i64 cpu_X[32]; static TCGv_i64 cpu_pc; +#if defined(CONFIG_USER_ONLY) +#define IS_USER(s) 1 +#else +#define IS_USER(s) (s->user) +#endif + /* Load/store exclusive handling */ static TCGv_i64 cpu_exclusive_high; @@ -119,6 +126,31 @@ static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s) } } +static inline void gen_ibtc_stub(DisasContext *s) +{ +#ifdef ENABLE_IBTC + if (!build_llvm(s->env)) { + TCGv_ptr ibtc_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_ibtc(ibtc_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(ibtc_host_pc)); + tcg_temp_free_ptr(ibtc_host_pc); + s->gen_ibtc = 0; + } +#endif +} + +static inline void gen_cpbl_stub(DisasContext *s) +{ +#ifdef ENABLE_CPBL + if (!build_llvm(s->env)) { + TCGv_ptr cpbl_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_cpbl(cpbl_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(cpbl_host_pc)); + tcg_temp_free_ptr(cpbl_host_pc); + } +#endif +} + void aarch64_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, int flags) { @@ -285,12 +317,38 @@ static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest) return true; } +#if defined(CONFIG_USER_ONLY) +static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) +{ + TranslationBlock *tb; + + tb = s->tb; + tcg_gen_goto_tb(n); + gen_a64_set_pc_im(dest); + tcg_gen_exit_tb((intptr_t)tb + n); + s->is_jmp = DISAS_TB_JUMP; + tb->jmp_pc[n] = dest; +} +#else +static int try_link_pages(DisasContext *s, TranslationBlock *tb, target_ulong dest) +{ +#ifdef ENABLE_LPAGE + if (!build_llvm(s->env)) { + target_ulong addr, size; + int ret = lpt_search_page(s->env, dest, &addr, &size); + if (ret == 1 && (tb->pc & ~(size - 1)) == addr) + return 1; + } +#endif + return 0; +} + static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) { TranslationBlock *tb; tb = s->tb; - if (use_goto_tb(s, n, dest)) { + if (use_goto_tb(s, n, dest) || try_link_pages(s, tb, dest) == 1) { tcg_gen_goto_tb(n); gen_a64_set_pc_im(dest); tcg_gen_exit_tb((intptr_t)tb + n); @@ -302,11 +360,14 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) } else if (s->singlestep_enabled) { gen_exception_internal(EXCP_DEBUG); } else { + gen_cpbl_stub(s); tcg_gen_exit_tb(0); s->is_jmp = DISAS_TB_JUMP; } } + tb->jmp_pc[n] = dest; } +#endif static void unallocated_encoding(DisasContext *s) { @@ -568,6 +629,7 @@ static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) tcg_gen_movi_i64(tmp, 0); tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); + tcg_gen_annotate(A_SetCC); tcg_gen_extrl_i64_i32(cpu_CF, flag); @@ -614,6 +676,7 @@ static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) result = tcg_temp_new_i64(); flag = tcg_temp_new_i64(); tcg_gen_sub_i64(result, t0, t1); + tcg_gen_annotate(A_SetCC); gen_set_NZ64(result); @@ -764,11 +827,51 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, get_mem_index(s)); } +#ifdef ENABLE_TCG_VECTOR +#include "simd_helper.h" + +#define VFP_DREG(reg) \ +do { \ + reg = reg * 2; \ +} while (0) +#define tcg_vector_abort() \ +do {\ + fprintf(stderr, "%s:%d: tcg fatal error - unhandled vector op.\n", __FILE__, __LINE__);\ + exit(0);\ +} while (0) + +/* + * disas_neon_ls_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_ls_vector(DisasContext *s, int reg, int is_load, + TCGv_i64 tcg_addr) +{ + TCGArg vop, alignment = 32; + + if (!build_llvm(s->env)) + return 0; + + VFP_DREG(reg); + vop = (is_load) ? INDEX_op_vload_128 : INDEX_op_vstore_128; + gen_vector_op3(vop, + offsetof(CPUARMState, vfp.regs[reg]), + GET_TCGV_I64(tcg_addr), + alignment); + return 1; +} +#endif + /* * Store from FP register to memory */ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) { +#ifdef ENABLE_TCG_VECTOR + if (size >= 4 && disas_neon_ls_vector(s, srcidx, 0, tcg_addr) == 1) + return; +#endif + /* This writes the bottom N bits of a 128 bit wide vector to memory */ TCGv_i64 tmp = tcg_temp_new_i64(); tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64)); @@ -791,6 +894,11 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) */ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) { +#ifdef ENABLE_TCG_VECTOR + if (size >= 4 && disas_neon_ls_vector(s, destidx, 1, tcg_addr) == 1) + return; +#endif + /* This always zero-extends and writes to a full 128 bit wide vector */ TCGv_i64 tmplo = tcg_temp_new_i64(); TCGv_i64 tmphi; @@ -1653,6 +1761,7 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) } s->is_jmp = DISAS_JUMP; + s->gen_ibtc = 1; } /* C3.2 Branches, exception generating and system instructions */ @@ -3624,6 +3733,8 @@ static void disas_cc(DisasContext *s, uint32_t insn) TCGv_i64 tcg_tmp, tcg_y, tcg_rn; DisasCompare c; + tcg_gen_annotate(A_NoSIMDization); + if (!extract32(insn, 29, 1)) { unallocated_encoding(s); return; @@ -8854,6 +8965,153 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) } } +#ifdef ENABLE_TCG_VECTOR +static int disas_neon_misc(DisasContext *s, uint32_t insn) +{ + if (!build_llvm(s->env)) + return 0; + + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + bool u = extract32(insn, 29, 1); + bool is_q = extract32(insn, 30, 1); + int rm = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + VFP_DREG(rm); + VFP_DREG(rd); + + switch (opcode) { + case 0xc ... 0xf: + case 0x16 ... 0x1d: + case 0x1f: + { + /* Floating point: U, size[1] and opcode indicate operation; + * size[0] indicates single or double precision. + */ + int is_double = extract32(size, 0, 1); + opcode |= (extract32(size, 1, 1) << 5) | (u << 6); + size = is_double ? 64 : 32; + + switch (opcode) { + case 0x1d: /* SCVTF */ + case 0x5d: /* UCVTF */ + { + if (is_double && !is_q) { + unallocated_encoding(s); + return 0; + } + if (!fp_access_check(s)) { + return 0; + } + if (opcode == 0x1d) + gen_vector_cvt(vsitofp, size); + else + gen_vector_cvt(vuitofp, size); + break; + } + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + if (is_double && !is_q) { + unallocated_encoding(s); + return 0; + } + gen_vector_cvt(vfptosi, size); + break; + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + if (is_double && !is_q) { + unallocated_encoding(s); + return 0; + } + gen_vector_cvt(vfptoui, size); + break; + default: + return 0; + } + break; + } + default: + return 0; + } + + return 1; +} + +/* + * disas_neon_data_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_data_vector(DisasContext *s, uint32_t insn) +{ + if (!build_llvm(s->env)) + return 0; + + int q = extract32(insn, 30, 1); + int u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int op = extract32(insn, 11, 5); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + VFP_DREG(rm); + VFP_DREG(rn); + VFP_DREG(rd); + + switch(op) { + case 0x10: /* ADD, SUB */ + if(!u) /* ADD */ + gen_vector_arith(vadd, i, size); + else /* SUB */ + gen_vector_arith(vsub, i, size); + break; + case 0x3: /* logic ops */ + switch ((u << 2) | size) { + case 0: gen_vector_logical(vand); break; /* AND */ + case 1: gen_vector_logical(vbic); break; /* BIC rd = rn&(~rm)*/ + case 2: gen_vector_logical(vorr); break; /* ORR */ + case 3: gen_vector_logical(vorn); break; /* ORN */ + case 4: gen_vector_logical(veor); break; /* EOR */ + case 5: gen_vector_logical(vbsl); break; /* BSL */ + case 6: gen_vector_logical(vbit); break; /* BIT */ + case 7: gen_vector_logical(vbif); break; /* BIF */ + default: + return 0; + } + break; + case 0x18 ... 0x31: + { + int fpopcode = extract32(insn, 11, 5) + | (extract32(insn, 23, 1) << 5) + | (extract32(insn, 29, 1) << 6); + int size = extract32(insn, 22, 1); + switch (fpopcode) { + case 0x1a: gen_vector_fop2(vadd); break; /* FADD */ + case 0x3a: gen_vector_fop2(vsub); break; /* FSUB */ + case 0x5b: gen_vector_fop2(vmul); break; /* FMUL */ + case 0x5f: gen_vector_fop2(vdiv); break; /* FDIV */ + case 0x19: gen_vector_fop2(vmla); break; /* FMLA */ + case 0x39: gen_vector_fop2(vmls); break; /* FMLS */ + default: + return 0; + } + break; + } + default: + return 0; + } + + return 1; +} +#endif + /* Logic op (opcode == 3) subgroup of C3.6.16. */ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) { @@ -8870,6 +9128,11 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) return; } +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_data_vector(s, insn) == 1) + return; +#endif + tcg_op1 = tcg_temp_new_i64(); tcg_op2 = tcg_temp_new_i64(); tcg_res[0] = tcg_temp_new_i64(); @@ -9138,6 +9401,11 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) return; } +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_data_vector(s, insn) == 1) + return; +#endif + switch (fpopcode) { case 0x58: /* FMAXNMP */ case 0x5a: /* FADDP */ @@ -9232,6 +9500,11 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) return; } +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_data_vector(s, insn) == 1) + return; +#endif + if (size == 3) { assert(is_q); for (pass = 0; pass < 2; pass++) { @@ -9778,6 +10051,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) TCGv_i32 tcg_rmode; TCGv_ptr tcg_fpstatus; +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_misc(s, insn) == 1) + return; +#endif + switch (opcode) { case 0x0: /* REV64, REV32 */ case 0x1: /* REV16 */ @@ -11018,6 +11296,8 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) pc_start = tb->pc; + dc->gen_ibtc = 0; + dc->env = env; dc->tb = tb; dc->is_jmp = DISAS_NEXT; @@ -11078,7 +11358,12 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) max_insns = TCG_MAX_INSNS; } - gen_tb_start(tb); + if (!build_llvm(env)) { + gen_tb_start(tb); + if (tracer_mode != TRANS_MODE_NONE) + tcg_gen_hotpatch(IS_USER(dc), tracer_mode == TRANS_MODE_HYBRIDS || + tracer_mode == TRANS_MODE_HYBRIDM); + } tcg_clear_temp_count(); @@ -11144,6 +11429,9 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) * Also stop translation when a page boundary is reached. This * ensures prefetch aborts occur at the right place. */ + + if (build_llvm(env) && num_insns == tb->icount) + break; } while (!dc->is_jmp && !tcg_op_buf_full() && !cs->singlestep_enabled && !singlestep && @@ -11155,6 +11443,15 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) gen_io_end(); } + if (build_llvm(env) && tb->size != dc->pc - pc_start) { + /* consistency check with tb info. we must make sure + * guest basic blocks are the same. skip this trace if inconsistent */ + fprintf(stderr, "inconsistent block with pc 0x"TARGET_FMT_lx" size=%d" + " icount=%d (error size="TARGET_FMT_ld")\n", + tb->pc, tb->size, tb->icount, dc->pc - pc_start); + exit(0); + } + if (unlikely(cs->singlestep_enabled || dc->ss_active) && dc->is_jmp != DISAS_EXC) { /* Note that this means single stepping WFI doesn't halt the CPU. @@ -11182,6 +11479,8 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) /* fall through */ case DISAS_JUMP: /* indicate that the hash table must be used to find the next TB */ + if (dc->gen_ibtc == 1) + gen_ibtc_stub(dc); tcg_gen_exit_tb(0); break; case DISAS_TB_JUMP: @@ -11211,10 +11510,15 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) } done_generating: - gen_tb_end(tb, num_insns); + if (build_llvm(env)) { + /* Terminate the linked list. */ + tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1; + } else { + gen_tb_end(tb, num_insns); + } #ifdef DEBUG_DISAS - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) { qemu_log("----------------\n"); qemu_log("IN: %s\n", lookup_symbol(pc_start)); log_target_disas(cs, pc_start, dc->pc - pc_start, @@ -11222,6 +11526,8 @@ done_generating: qemu_log("\n"); } #endif - tb->size = dc->pc - pc_start; - tb->icount = num_insns; + if (!build_llvm(env)) { + tb->size = dc->pc - pc_start; + tb->icount = num_insns; + } } diff --git a/target-arm/translate.c b/target-arm/translate.c index 5d22879..256227b 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -36,6 +36,7 @@ #include "exec/helper-gen.h" #include "trace-tcg.h" +#include "hqemu.h" #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T) @@ -110,6 +111,33 @@ void arm_translate_init(void) #endif a64_translate_init(); + + copy_tcg_context_global(); +} + +static inline void gen_ibtc_stub(DisasContext *s) +{ +#ifdef ENABLE_IBTC + if (!build_llvm(s->env)) { + TCGv_ptr ibtc_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_ibtc(ibtc_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(ibtc_host_pc)); + tcg_temp_free_ptr(ibtc_host_pc); + s->gen_ibtc = 0; + } +#endif +} + +static inline void gen_cpbl_stub(DisasContext *s) +{ +#ifdef ENABLE_CPBL + if (!build_llvm(s->env)) { + TCGv_ptr cpbl_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_cpbl(cpbl_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(cpbl_host_pc)); + tcg_temp_free_ptr(cpbl_host_pc); + } +#endif } static inline ARMMMUIdx get_a32_user_mem_index(DisasContext *s) @@ -201,7 +229,10 @@ static void store_reg(DisasContext *s, int reg, TCGv_i32 var) static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask) { TCGv_i32 tmp_mask = tcg_const_i32(mask); - gen_helper_cpsr_write(cpu_env, var, tmp_mask); + if (mask & ~CPSR_NZCV) + gen_helper_cpsr_write(cpu_env, var, tmp_mask); + else + gen_helper_cpsr_write_nzcv(cpu_env, var, tmp_mask); tcg_temp_free_i32(tmp_mask); } /* Set NZCV flags from the high 4 bits of var. */ @@ -493,6 +524,7 @@ static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) { TCGv_i32 tmp; tcg_gen_sub_i32(cpu_NF, t0, t1); + tcg_gen_annotate(A_SetCC); tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); @@ -878,6 +910,7 @@ static inline void gen_bx_im(DisasContext *s, uint32_t addr) tcg_temp_free_i32(tmp); } tcg_gen_movi_i32(cpu_R[15], addr & ~1); + s->gen_ibtc = 1; } /* Set PC and Thumb state from var. var is marked as dead. */ @@ -887,6 +920,7 @@ static inline void gen_bx(DisasContext *s, TCGv_i32 var) tcg_gen_andi_i32(cpu_R[15], var, ~1); tcg_gen_andi_i32(var, var, 1); store_cpu_field(var, thumb); + s->gen_ibtc = 1; } /* Variant of store_reg which uses branch&exchange logic when storing @@ -1199,20 +1233,38 @@ static inline void gen_vfp_sqrt(int dp) gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env); } +static inline void gen_update_fpscr(TCGv_i32 flags) +{ + TCGv_i32 tmp; + tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); + tcg_gen_andi_i32(tmp, tmp, 0x0fffffff); + tcg_gen_or_i32(tmp, tmp, flags); + store_cpu_field(tmp, vfp.xregs[ARM_VFP_FPSCR]); + tcg_temp_free_i32(tmp); +} + static inline void gen_vfp_cmp(int dp) { + TCGv_i32 flags = tcg_temp_new_i32(); if (dp) - gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env); + gen_helper_vfp_cmpd(flags, cpu_F0d, cpu_F1d, cpu_env); else - gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env); + gen_helper_vfp_cmps(flags, cpu_F0s, cpu_F1s, cpu_env); + + gen_update_fpscr(flags); + tcg_temp_free_i32(flags); } static inline void gen_vfp_cmpe(int dp) { + TCGv_i32 flags = tcg_temp_new_i32(); if (dp) - gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env); + gen_helper_vfp_cmped(flags, cpu_F0d, cpu_F1d, cpu_env); else - gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env); + gen_helper_vfp_cmpes(flags, cpu_F0s, cpu_F1s, cpu_env); + + gen_update_fpscr(flags); + tcg_temp_free_i32(flags); } static inline void gen_vfp_F1_ld0(int dp) @@ -3977,20 +4029,49 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) return 0; } +#if defined(CONFIG_USER_ONLY) +static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) +{ + TranslationBlock *tb; + + tb = s->tb; + tcg_gen_goto_tb(n); + gen_set_pc_im(s, dest); + tcg_gen_exit_tb((uintptr_t)tb + n); + tb->jmp_pc[n] = dest; +} +#else +static int try_link_pages(DisasContext *s, TranslationBlock *tb, target_ulong dest) +{ +#ifdef ENABLE_LPAGE + if (!build_llvm(s->env)) { + target_ulong addr, size; + int ret = lpt_search_page(s->env, dest, &addr, &size); + if (ret == 1 && (tb->pc & ~(size - 1)) == addr) + return 1; + } +#endif + return 0; +} + static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) { TranslationBlock *tb; tb = s->tb; - if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) { + if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) || + try_link_pages(s, tb, dest) == 1) { tcg_gen_goto_tb(n); gen_set_pc_im(s, dest); tcg_gen_exit_tb((uintptr_t)tb + n); } else { gen_set_pc_im(s, dest); + gen_cpbl_stub(s); tcg_gen_exit_tb(0); } + tb->jmp_pc[n] = dest; } +#endif static inline void gen_jmp (DisasContext *s, uint32_t dest) { @@ -4372,6 +4453,54 @@ static struct { {2, 1, 1} }; +#ifdef ENABLE_TCG_VECTOR +#include "simd_helper.h" + +#define tcg_vector_abort() \ +do {\ + fprintf(stderr, "%s:%d: tcg fatal error - unhandled vector op.\n", __FILE__, __LINE__);\ + exit(0);\ +} while (0) + +/* + * disas_neon_ls_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_ls_vector(DisasContext *s, uint32_t insn, TCGv_i32 addr) +{ + int rd, op, load; + int nregs, reg; + int interleave, spacing; + TCGArg vop, alignment = 32; + + if (!build_llvm(s->env)) + return 0; + + /* Load store all elements. */ + op = (insn >> 8) & 0xf; + nregs = neon_ls_element_type[op].nregs; + interleave = neon_ls_element_type[op].interleave; + spacing = neon_ls_element_type[op].spacing; + + if (interleave != 1 || nregs % 2 != 0) + return 0; + + VFP_DREG_D(rd, insn); + load = (insn & (1 << 21)) != 0; + vop = (load) ? INDEX_op_vload_128 : INDEX_op_vstore_128; + + for (reg = 0; reg < nregs; reg += 2) { + gen_vector_op3(vop, + offsetof(CPUARMState, vfp.regs[rd]), + GET_TCGV_I32(addr), + alignment); + rd += spacing * 2; + tcg_gen_addi_i32(addr, addr, 16); + } + return 1; +} +#endif + /* Translate a NEON load/store element instruction. Return nonzero if the instruction is invalid. */ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) @@ -4438,6 +4567,11 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); stride = (1 << size) * interleave; + +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_ls_vector(s, insn, addr) == 1) + goto vector_done; +#endif for (reg = 0; reg < nregs; reg++) { if (interleave > 2 || (interleave == 2 && nregs == 2)) { load_reg_var(s, addr, rn); @@ -4529,6 +4663,9 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) } rd += spacing; } +#ifdef ENABLE_TCG_VECTOR +vector_done: +#endif tcg_temp_free_i32(addr); stride = nregs * 8; } else { @@ -5111,6 +5248,131 @@ static const uint8_t neon_2rm_sizes[] = { [NEON_2RM_VCVT_UF] = 0x4, }; +#ifdef ENABLE_TCG_VECTOR +static int disas_neon_misc(DisasContext *s, uint32_t insn) +{ + int op, rd, rm; + + if (!build_llvm(s->env)) + return 0; + + op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); + VFP_DREG_D(rd, insn); + VFP_DREG_M(rm, insn); + + switch (op) { + case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ + gen_vector_cvt(vsitofp, 32); + break; + case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */ + gen_vector_cvt(vuitofp, 32); + break; + case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */ + gen_vector_cvt(vfptosi, 32); + break; + case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */ + gen_vector_cvt(vfptoui, 32); + break; + default: + return 0; + } + + return 1; +} + +/* + * disas_neon_data_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_data_vector(DisasContext *s, uint32_t insn) +{ + int op, q, u, size; + int rd, rn, rm; + + if (!build_llvm(s->env)) + return 0; + + /* Three register same length. */ + q = (insn & (1 << 6)) != 0; + u = (insn >> 24) & 1; + VFP_DREG_D(rd, insn); + VFP_DREG_N(rn, insn); + VFP_DREG_M(rm, insn); + size = (insn >> 20) & 3; + op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); + + switch (op) { + case NEON_3R_VSHL: + case NEON_3R_VQSHL: + case NEON_3R_VRSHL: + case NEON_3R_VQRSHL: + { + int rtmp; + /* Shift instruction operands are reversed. */ + rtmp = rn; + rn = rm; + rm = rtmp; + } + break; + default: + break; + } + + switch(op) { + case NEON_3R_VADD_VSUB: + if(!u) /* VADD */ + gen_vector_arith(vadd, i, size); + else /* VSUB */ + gen_vector_arith(vsub, i, size); + break; + case NEON_3R_LOGIC: + switch ((u << 2) | size) { + case 0: gen_vector_logical(vand); break; /* VAND */ + case 1: gen_vector_logical(vbic); break; /* BIC rd = rn&(~rm)*/ + case 2: gen_vector_logical(vorr); break; /* VORR */ + case 3: gen_vector_logical(vorn); break; /* VORN OR NOT */ + case 4: gen_vector_logical(veor); break; /* VEOR Vector Bitwise Exclusive OR*/ + case 5: gen_vector_logical(vbsl); break; /* VBSL */ + case 6: gen_vector_logical(vbit); break; /* VBIT */ + case 7: gen_vector_logical(vbif); break; /* VBIF */ + } + break; + case NEON_3R_VFM: + if (size) /* VFMS */ + gen_vector_fop(vfms); + else /* VFMA */ + gen_vector_fop(vfma); + break; + case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */ + switch ((u << 2) | size) { + case 0: gen_vector_fop(vadd); break; /* VADD */ + case 4: gen_vector_fop(vpadd); break; /* VPADD */ + case 2: gen_vector_fop(vsub); break; /* VSUB */ + case 6: gen_vector_fop(vabd); break; /* VABD */ + default: + tcg_vector_abort(); + break; + } + break; + case NEON_3R_FLOAT_MULTIPLY: /* float VMLA, VMLS, VMUL */ + if(u) + gen_vector_fop(vmul); + else if (!u) { + if (size == 0) + gen_vector_fop(vmla); + else + gen_vector_fop(vmls); + } else + tcg_vector_abort(); + break; + default: + return 0; + } + + return 1; +} +#endif + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -5341,6 +5603,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } +#ifdef ENABLE_TCG_VECTOR + if (!pairwise && disas_neon_data_vector(s, insn) == 1) + return 0; +#endif + for (pass = 0; pass < (q ? 4 : 2); pass++) { if (pairwise) { @@ -6741,6 +7008,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; default: elementwise: +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_misc(s, insn) == 1) + return 0; +#endif for (pass = 0; pass < (q ? 4 : 2); pass++) { if (neon_2rm_is_float_op(op)) { tcg_gen_ld_f32(cpu_F0s, cpu_env, @@ -11234,6 +11505,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) pc_start = tb->pc; + dc->gen_ibtc = 0; + dc->env = env; dc->tb = tb; dc->is_jmp = DISAS_NEXT; @@ -11303,7 +11576,12 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) max_insns = TCG_MAX_INSNS; } - gen_tb_start(tb); + if (!build_llvm(env)) { + gen_tb_start(tb); + if (tracer_mode != TRANS_MODE_NONE) + tcg_gen_hotpatch(IS_USER(dc), tracer_mode == TRANS_MODE_HYBRIDS || + tracer_mode == TRANS_MODE_HYBRIDM); + } tcg_clear_temp_count(); @@ -11460,6 +11738,12 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) end_of_page = (dc->pc >= next_page_start) || ((dc->pc >= next_page_start - 3) && insn_crosses_page(env, dc)); +#if defined(CONFIG_LLVM) && defined(CONFIG_USER_ONLY) + if (llvm_has_annotation(dc->pc, ANNOTATION_LOOP)) + break; +#endif + if (build_llvm(env) && num_insns == tb->icount) + break; } while (!dc->is_jmp && !tcg_op_buf_full() && !cs->singlestep_enabled && !singlestep && @@ -11476,6 +11760,15 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) gen_io_end(); } + if (build_llvm(env) && tb->size != dc->pc - pc_start) { + /* consistency check with tb info. we must make sure + * guest basic blocks are the same. skip this trace if inconsistent */ + fprintf(stderr, "inconsistent block with pc 0x"TARGET_FMT_lx" size=%d" + " icount=%d (error size="TARGET_FMT_ld")\n", + tb->pc, tb->size, tb->icount, dc->pc - pc_start); + exit(0); + } + /* At this stage dc->condjmp will only be set when the skipped instruction was a conditional branch or trap, and the PC has already been written. */ @@ -11543,6 +11836,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) case DISAS_JUMP: default: /* indicate that the hash table must be used to find the next TB */ + if (dc->gen_ibtc == 1) + gen_ibtc_stub(dc); tcg_gen_exit_tb(0); break; case DISAS_TB_JUMP: @@ -11581,10 +11876,15 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) } done_generating: - gen_tb_end(tb, num_insns); + if (build_llvm(env)) { + /* Terminate the linked list. */ + tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1; + } else { + gen_tb_end(tb, num_insns); + } #ifdef DEBUG_DISAS - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) { qemu_log("----------------\n"); qemu_log("IN: %s\n", lookup_symbol(pc_start)); log_target_disas(cs, pc_start, dc->pc - pc_start, @@ -11592,8 +11892,10 @@ done_generating: qemu_log("\n"); } #endif - tb->size = dc->pc - pc_start; - tb->icount = num_insns; + if (!build_llvm(env)) { + tb->size = dc->pc - pc_start; + tb->icount = num_insns; + } } static const char *cpu_mode_names[16] = { diff --git a/target-arm/translate.h b/target-arm/translate.h index 53ef971..10f6a05 100644 --- a/target-arm/translate.h +++ b/target-arm/translate.h @@ -61,6 +61,8 @@ typedef struct DisasContext { #define TMP_A64_MAX 16 int tmp_a64_count; TCGv_i64 tmp_a64[TMP_A64_MAX]; + int gen_ibtc; + CPUArchState *env; } DisasContext; typedef struct DisasCompare { |