diff options
64 files changed, 2719 insertions, 333 deletions
diff --git a/Makefile.target b/Makefile.target index 962d004..4e4b1fe 100644 --- a/Makefile.target +++ b/Makefile.target @@ -158,6 +158,8 @@ GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h qmp-commands-old.h endif # CONFIG_SOFTMMU +include $(SRC_PATH)/llvm/hqemu.mk + # Workaround for http://gcc.gnu.org/PR55489, see configure. %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS) @@ -189,8 +191,8 @@ all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y) $(QEMU_PROG_BUILD): config-devices.mak # build either PROG or PROGW -$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a - $(call LINK, $(filter-out %.mak, $^)) +$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a $(LLVM_BITCODE) + $(call LINK, $(filter-out %.mak %.bc, $^)) ifdef CONFIG_DARWIN $(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@," REZ $(TARGET_DIR)$@") $(call quiet-command,SetFile -a C $@," SETFILE $(TARGET_DIR)$@") @@ -225,6 +227,9 @@ ifdef CONFIG_TRACE_SYSTEMTAP $(INSTALL_DATA) $(QEMU_PROG).stp-installed "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG).stp" $(INSTALL_DATA) $(QEMU_PROG)-simpletrace.stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG)-simpletrace.stp" endif +ifneq ($(LLVM_BITCODE),) + $(INSTALL) -m 644 $(LLVM_BITCODE) "$(DESTDIR)$(bindir)" +endif GENERATED_HEADERS += config-target.h Makefile: $(GENERATED_HEADERS) @@ -345,6 +345,9 @@ vhdx="" numa="" tcmalloc="no" jemalloc="no" +llvm="no" +bcflags="" +libopencsd="" # parse CC options first for opt do @@ -1169,6 +1172,12 @@ for opt do ;; --enable-jemalloc) jemalloc="yes" ;; + --enable-llvm) llvm="yes" + ;; + --clang-flags=*) bcflags="$optarg" + ;; + --with-libopencsd=*) libopencsd="$optarg" + ;; *) echo "ERROR: unknown option $opt" echo "Try '$0 --help' for more information" @@ -1391,12 +1400,26 @@ disabled with --disable-FEATURE, default is enabled if available: numa libnuma support tcmalloc tcmalloc support jemalloc jemalloc support + llvm enable LLVM optimization + --clang-flags flags for clang compiler + --with-libopencsd path to libopencsd library NOTE: The object files are built at the place where configure is launched EOF exit 0 fi +if test "$llvm" != "no" ; then + llvm-config --version > /dev/null 2>&1 || { echo >&2 "llvm-config is not in the PATH"; exit 1; } + llvm_major=`llvm-config --version | cut -d'.' -f1` + llvm_minor=`llvm-config --version | cut -d'.' -f2` + if test "$llvm_major" -lt "3" ; then + error_exit "LLVM version too old. Version 3.5 or later is required." + elif test "$llvm_major" -eq "3" && test "$llvm_minor" -lt "5" ; then + error_exit "LLVM version too old. Version 3.5 or later is required." + fi +fi + # Now we have handled --enable-tcg-interpreter and know we're not just # printing the help message, bail out if the host CPU isn't supported. if test "$ARCH" = "unknown"; then @@ -1469,6 +1492,7 @@ gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags" gcc_flags="-Wendif-labels $gcc_flags" gcc_flags="-Wno-initializer-overrides $gcc_flags" gcc_flags="-Wno-string-plus-int $gcc_flags" +gcc_flags="-Wno-format-truncation $gcc_flags" # Note that we do not add -Werror to gcc_flags here, because that would # enable it for all configure tests. If a configure test failed due # to -Werror this would just silently disable some features, @@ -4843,6 +4867,11 @@ echo "bzip2 support $bzip2" echo "NUMA host support $numa" echo "tcmalloc support $tcmalloc" echo "jemalloc support $jemalloc" +echo "LLVM enabled $llvm (version `llvm-config --version`)" + +if test "$libopencsd" != ""; then + echo "libopencsd $libopencsd" +fi if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -5248,6 +5277,21 @@ if test "$seccomp" = "yes"; then echo "CONFIG_SECCOMP=y" >> $config_host_mak fi +if test "$llvm" != "no" ; then + echo "CONFIG_LLVM=y" >> $config_host_mak + echo "BCFLAGS=$bcflags" >> $config_host_mak + echo "LLVM_VERSION=LLVM_V`llvm-config --version | sed -e "s/\.//g" | cut -c 1-2`" >> $config_host_mak + echo "LLVM_CFLAGS=`llvm-config --cflags`" >> $config_host_mak + echo "LLVM_CXXFLAGS=`llvm-config --cxxflags`" >> $config_host_mak + echo "LLVM_LDFLAGS=`llvm-config --ldflags`" >> $config_host_mak + echo "LLVM_LIBS=`llvm-config --libs`" >> $config_host_mak +fi + +if test "$libopencsd" != "" ; then + echo "CONFIG_LIBOPENCSD=y" >> $config_host_mak + echo "LIBOPENCSD=$libopencsd" >> $config_host_mak +fi + # XXX: suppress that if [ "$bsd" = "yes" ] ; then echo "CONFIG_BSD=y" >> $config_host_mak @@ -5848,6 +5892,23 @@ fi echo "LDFLAGS+=$ldflags" >> $config_target_mak echo "QEMU_CFLAGS+=$cflags" >> $config_target_mak +if test "$cpu" = "i386" -o "$cpu" = "x86_64" -o "$cpu" = "arm" ; then + case "$target_name" in + i386|x86_64) + echo "CONFIG_COREMU=y" >> $config_target_mak + ;; + esac +fi + +if test "$llvm" != "no" ; then + bitcode="llvm_helper_$target_name" + if test "$target_softmmu" = "yes" ; then + bitcode=$bitcode"_softmmu" + fi + echo "LLVM_EXTRA_FLAGS+=-I. -I\$(SRC_PATH) $cflags $LLVM_EXTRA_FLAGS" >> $config_target_mak + echo "CONFIG_LLVM_BITCODE=\"$prefix/bin/$bitcode.bc\"" >> $config_target_mak +fi + done # for target in $targets if [ "$pixman" = "internal" ]; then @@ -31,6 +31,7 @@ #include "hw/i386/apic.h" #endif #include "sysemu/replay.h" +#include "hqemu.h" /* -icount align implementation. */ @@ -104,6 +105,7 @@ static void print_delay(const SyncClocks *sc) static void init_delay_params(SyncClocks *sc, const CPUState *cpu) { + memset(sc, 0, sizeof(SyncClocks)); if (!icount_align_option) { return; } @@ -159,6 +161,10 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr) trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK), next_tb & TB_EXIT_MASK); +#if defined(CONFIG_LLVM) + if ((next_tb & TB_EXIT_MASK) == TB_EXIT_LLVM) + return next_tb; +#endif if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) { /* We didn't start executing this TB (eg because the instruction * counter hit zero); we must restore the guest PC to the address @@ -197,7 +203,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags, max_cycles | CF_NOCACHE | (ignore_icount ? CF_IGNORE_ICOUNT : 0)); - tb->orig_tb = tcg_ctx.tb_ctx.tb_invalidated_flag ? NULL : orig_tb; + tb->orig_tb = tcg_ctx.tb_ctx->tb_invalidated_flag ? NULL : orig_tb; cpu->current_tb = tb; /* execute the generated code */ trace_exec_tb_nocache(tb, tb->pc); @@ -218,13 +224,13 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, tb_page_addr_t phys_pc, phys_page1; target_ulong virt_page2; - tcg_ctx.tb_ctx.tb_invalidated_flag = 0; + tcg_ctx.tb_ctx->tb_invalidated_flag = 0; /* find translated block using physical mappings */ phys_pc = get_page_addr_code(env, pc); phys_page1 = phys_pc & TARGET_PAGE_MASK; - h = tb_phys_hash_func(phys_pc); - ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h]; + h = tb_phys_hash_func(pc); + ptb1 = &tcg_ctx.tb_ctx->tb_phys_hash[h]; for(;;) { tb = *ptb1; if (!tb) { @@ -253,8 +259,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, /* Move the TB to the head of the list */ *ptb1 = tb->phys_hash_next; - tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h]; - tcg_ctx.tb_ctx.tb_phys_hash[h] = tb; + tb->phys_hash_next = tcg_ctx.tb_ctx->tb_phys_hash[h]; + tcg_ctx.tb_ctx->tb_phys_hash[h] = tb; return tb; } @@ -315,6 +321,10 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu) tb->flags != flags)) { tb = tb_find_slow(cpu, pc, cs_base, flags); } + + itlb_update_entry(env, tb); + ibtc_update_entry(env, tb); + return tb; } @@ -492,29 +502,23 @@ int cpu_exec(CPUState *cpu) tb = tb_find_fast(cpu); /* Note: we do it here to avoid a gcc bug on Mac OS X when doing it in tb_find_slow */ - if (tcg_ctx.tb_ctx.tb_invalidated_flag) { + if (tcg_ctx.tb_ctx->tb_invalidated_flag) { /* as some TB could have been invalidated because of memory exceptions while generating the code, we must recompute the hash index here */ next_tb = 0; - tcg_ctx.tb_ctx.tb_invalidated_flag = 0; + tcg_ctx.tb_ctx->tb_invalidated_flag = 0; } if (qemu_loglevel_mask(CPU_LOG_EXEC)) { qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n", tb->tc_ptr, tb->pc, lookup_symbol(tb->pc)); } - /* see if we can patch the calling TB. When the TB - spans two pages, we cannot safely do a direct - jump. */ - if (next_tb != 0 && tb->page_addr[1] == -1 - && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { - tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK), - next_tb & TB_EXIT_MASK, tb); - } + + tracer_exec_tb(cpu->env_ptr, next_tb, tb); tb_unlock(); if (likely(!cpu->exit_request)) { trace_exec_tb(tb, tb->pc); - tc_ptr = tb->tc_ptr; + tc_ptr = tb->opt_ptr; /* execute the generated code */ cpu->current_tb = tb; next_tb = cpu_tb_exec(cpu, tc_ptr); @@ -533,9 +537,14 @@ int cpu_exec(CPUState *cpu) */ smp_rmb(); next_tb = 0; + + tracer_reset(cpu->env_ptr); break; case TB_EXIT_ICOUNT_EXPIRED: { +#if defined(CONFIG_LLVM) + break; +#endif /* Instruction counter expired. */ int insns_left = cpu->icount_decr.u32; if (cpu->icount_extra && insns_left >= 0) { @@ -590,6 +599,8 @@ int cpu_exec(CPUState *cpu) #endif /* buggy compiler */ cpu->can_do_io = 1; tb_lock_reset(); + + tracer_reset(cpu->env_ptr); } } /* for(;;) */ @@ -66,6 +66,9 @@ #endif /* CONFIG_LINUX */ +#include "tcg.h" +#include "hqemu.h" + static CPUState *next_cpu; int64_t max_delay; int64_t max_advance; @@ -892,6 +895,18 @@ void qemu_init_cpu_loop(void) qemu_thread_get_self(&io_thread); } +void qemu_end_cpu_loop(void) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) + optimization_finalize(cpu->env_ptr); + +#if defined(CONFIG_LLVM) + llvm_finalize(); +#endif +} + void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data) { struct qemu_work_item wi; @@ -1134,6 +1149,16 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) /* process any pending work */ atomic_mb_set(&exit_request, 1); +#if defined(CONFIG_LLVM) + llvm_init(); +#endif + /* we can safely initialize optimization resources after + * the setup of CPUArchState is completed. */ + CPU_FOREACH(cpu) { + copy_tcg_context(); + optimization_init(cpu->env_ptr); + } + while (1) { tcg_exec_all(); @@ -19,6 +19,7 @@ #include "config.h" #include "cpu.h" +#include "exec/tb-hash.h" #include "exec/exec-all.h" #include "exec/memory.h" #include "exec/address-spaces.h" @@ -30,12 +31,38 @@ #include "exec/ram_addr.h" #include "tcg/tcg.h" +#include "hqemu.h" + +#if defined(ENABLE_TLBVERSION) +#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK | TLB_VERSION_MASK) +#define page_val(addr, env) (((tlbaddr_t)addr & TARGET_PAGE_MASK) | tlb_version(env)) +#else +#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK) +#define page_val(addr, env) (addr & TARGET_PAGE_MASK) +#endif + //#define DEBUG_TLB //#define DEBUG_TLB_CHECK /* statistics */ int tlb_flush_count; +static inline void tlb_reset(CPUArchState *env) +{ +#if defined(ENABLE_TLBVERSION) + tlbaddr_t version = env->tlb_version >> TLB_VERSION_SHIFT; + if (++version == TLB_VERSION_SIZE) { + version = 0; + memset(env->tlb_table, -1, sizeof(env->tlb_table)); + memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); + } + env->tlb_version = version << TLB_VERSION_SHIFT; +#else + memset(env->tlb_table, -1, sizeof(env->tlb_table)); + memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); +#endif +} + /* NOTE: * If flush_global is true (the usual case), flush all tlb entries. * If flush_global is false, flush (at least) all tlb entries not @@ -59,10 +86,12 @@ void tlb_flush(CPUState *cpu, int flush_global) links while we are modifying them */ cpu->current_tb = NULL; - memset(env->tlb_table, -1, sizeof(env->tlb_table)); - memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); + tlb_reset(env); memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); + optimization_reset(env, 0); + lpt_reset(env); + env->vtlb_index = 0; env->tlb_flush_addr = -1; env->tlb_flush_mask = 0; @@ -110,18 +139,67 @@ void tlb_flush_by_mmuidx(CPUState *cpu, ...) va_end(argp); } -static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr) +static inline void tlb_flush_entry(CPUArchState *env, CPUTLBEntry *tlb_entry, + target_ulong addr) { - if (addr == (tlb_entry->addr_read & - (TARGET_PAGE_MASK | TLB_INVALID_MASK)) || - addr == (tlb_entry->addr_write & - (TARGET_PAGE_MASK | TLB_INVALID_MASK)) || - addr == (tlb_entry->addr_code & - (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) == (tlb_entry->addr_read & TLB_NONIO_MASK) || + page_val(addr, env) == (tlb_entry->addr_write & TLB_NONIO_MASK) || + page_val(addr, env) == (tlb_entry->addr_code & TLB_NONIO_MASK)) { memset(tlb_entry, -1, sizeof(*tlb_entry)); } } +#ifdef ENABLE_LPAGE +static int tlb_flush_large_page(CPUState *cpu, target_ulong addr) +{ + int i, j, k, ret, mmu_idx, num_base_pages, max_flush_pages; + target_ulong page_addr, page_size, flush_addr; + CPUArchState *env = cpu->env_ptr; + +#if defined(DEBUG_TLB) + printf("tlb_flush:\n"); +#endif + /* must reset current TB so that interrupts cannot modify the + links while we are modifying them */ + cpu->current_tb = NULL; + + ret = lpt_flush_page(env, addr, &page_addr, &page_size); + if (ret == 0) + return 0; + + /* If the large page occupies a small set of the tlb, do a partial flush + * optimzation, otherwise, do a full flush. */ + num_base_pages = page_size / TARGET_PAGE_SIZE; + max_flush_pages = (CPU_TLB_SIZE / 4 < 1024) ? CPU_TLB_SIZE / 4 : 1024; + if (num_base_pages > max_flush_pages) { + tlb_flush(cpu, 1); + return 1; + } + + for (i = 0; i < num_base_pages; i++) { + flush_addr = addr + i * TARGET_PAGE_SIZE; + j = (flush_addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) + tlb_flush_entry(env, &env->tlb_table[mmu_idx][j], flush_addr); + + /* check whether there are entries that need to be flushed in the vtlb */ + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { + for (k = 0; k < CPU_VTLB_SIZE; k++) + tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], flush_addr); + } + } + + for (i = -1; i < num_base_pages; i++) { + j = tb_jmp_cache_hash_page(addr + i * TARGET_PAGE_SIZE); + memset(&cpu->tb_jmp_cache[j], 0, + TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); + } + optimization_reset(env, 0); + + return 1; +} +#endif + void tlb_flush_page(CPUState *cpu, target_ulong addr) { CPUArchState *env = cpu->env_ptr; @@ -138,8 +216,14 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr) TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", env->tlb_flush_addr, env->tlb_flush_mask); #endif + +#ifdef ENABLE_LPAGE + if (tlb_flush_large_page(cpu, addr)) + return; +#else tlb_flush(cpu, 1); return; +#endif } /* must reset current TB so that interrupts cannot modify the links while we are modifying them */ @@ -148,18 +232,19 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr) addr &= TARGET_PAGE_MASK; i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr); + tlb_flush_entry(env, &env->tlb_table[mmu_idx][i], addr); } /* check whether there are entries that need to be flushed in the vtlb */ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { int k; for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr); + tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], addr); } } tb_flush_jmp_cache(cpu, addr); + optimization_flush_page(env, addr); } void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...) @@ -202,11 +287,11 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...) printf(" %d", mmu_idx); #endif - tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr); + tlb_flush_entry(env, &env->tlb_table[mmu_idx][i], addr); /* check whether there are vltb entries that need to be flushed */ for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr); + tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], addr); } } va_end(argp); @@ -284,10 +369,11 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) } } -static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr) +static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr, + tlbaddr_t version) { - if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { - tlb_entry->addr_write = vaddr; + if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY | version)) { + tlb_entry->addr_write = vaddr | version; } } @@ -302,13 +388,13 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) vaddr &= TARGET_PAGE_MASK; i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr); + tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr, tlb_version(env)); } for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { int k; for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr); + tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr, tlb_version(env)); } } } @@ -360,6 +446,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, assert(size >= TARGET_PAGE_SIZE); if (size != TARGET_PAGE_SIZE) { tlb_add_large_page(env, vaddr, size); + lpt_add_page(env, vaddr, size); } sz = size; @@ -424,6 +511,13 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, } else { te->addr_write = -1; } + +#ifdef ENABLE_TLBVERSION + tlbaddr_t version = tlb_version(env); + te->addr_read |= version; + te->addr_write |= version; + te->addr_code |= version; +#endif } /* Add a new TLB entry, but without specifying the memory @@ -452,7 +546,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = cpu_mmu_index(env1, true); if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code != - (addr & TARGET_PAGE_MASK))) { + page_val(addr, env1))) { cpu_ldub_code(env1, addr); } pd = env1->iotlb[mmu_idx][page_index].addr & ~TARGET_PAGE_MASK; @@ -471,6 +565,9 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) return qemu_ram_addr_from_host_nofail(p); } +#undef TLB_NONIO_MASK +#undef page_val + #define MMUSUFFIX _mmu #define SHIFT 0 @@ -706,7 +706,7 @@ int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len, } wp = g_malloc(sizeof(*wp)); - wp->vaddr = addr; + wp->addr = addr; wp->len = len; wp->flags = flags; @@ -731,7 +731,7 @@ int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len, CPUWatchpoint *wp; QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { - if (addr == wp->vaddr && len == wp->len + if (addr == wp->addr && len == wp->len && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) { cpu_watchpoint_remove_by_ref(cpu, wp); return 0; @@ -745,7 +745,7 @@ void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint) { QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry); - tlb_flush_page(cpu, watchpoint->vaddr); + tlb_flush_page(cpu, watchpoint->addr); g_free(watchpoint); } @@ -776,10 +776,10 @@ static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp, * exactly at the top of the address space and so addr + len * wraps round to zero. */ - vaddr wpend = wp->vaddr + wp->len - 1; + vaddr wpend = wp->addr + wp->len - 1; vaddr addrend = addr + len - 1; - return !(addr > wpend || wp->vaddr > addrend); + return !(addr > wpend || wp->addr > addrend); } #endif @@ -1267,7 +1267,7 @@ static void gdb_vm_state_change(void *opaque, int running, RunState state) snprintf(buf, sizeof(buf), "T%02xthread:%02x;%swatch:" TARGET_FMT_lx ";", GDB_SIGNAL_TRAP, cpu_index(cpu), type, - (target_ulong)cpu->watchpoint_hit->vaddr); + (target_ulong)cpu->watchpoint_hit->addr); cpu->watchpoint_hit = NULL; goto send_packet; } diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 83b1781..9471dc6 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -271,12 +271,12 @@ CPUArchState *cpu_copy(CPUArchState *env); /* Flags stored in the low bits of the TLB virtual address. These are defined so that fast path ram access is all zeros. */ /* Zero if TLB entry is valid. */ -#define TLB_INVALID_MASK (1 << 3) +#define TLB_INVALID_MASK (1 << TLB_INVALID_SHIFT) /* Set if TLB entry references a clean RAM page. The iotlb entry will contain the page physical address. */ -#define TLB_NOTDIRTY (1 << 4) +#define TLB_NOTDIRTY (1 << TLB_NOTDIRTY_SHIFT) /* Set if TLB entry is an IO callback. */ -#define TLB_MMIO (1 << 5) +#define TLB_MMIO (1 << TLB_MMIO_SHIFT) void dump_exec_info(FILE *f, fprintf_function cpu_fprintf); void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf); diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 85aa403..ce7deb9 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -76,12 +76,12 @@ void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf, static inline void cpu_physical_memory_read(hwaddr addr, void *buf, int len) { - cpu_physical_memory_rw(addr, buf, len, 0); + cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 0); } static inline void cpu_physical_memory_write(hwaddr addr, const void *buf, int len) { - cpu_physical_memory_rw(addr, (void *)buf, len, 1); + cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 1); } void *cpu_physical_memory_map(hwaddr addr, hwaddr *plen, diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index 5093be2..b44e3f2 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -56,6 +56,8 @@ typedef uint64_t target_ulong; #error TARGET_LONG_SIZE undefined #endif +#include "hqemu-config.h" + #if !defined(CONFIG_USER_ONLY) /* use a fully associative victim tlb of 8 entries */ #define CPU_VTLB_SIZE 8 @@ -89,7 +91,7 @@ typedef uint64_t target_ulong; * of tlb_table inside env (which is non-trivial but not huge). */ #define CPU_TLB_BITS \ - MIN(8, \ + MIN(12, \ TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS - \ (NB_MMU_MODES <= 1 ? 0 : \ NB_MMU_MODES <= 2 ? 1 : \ @@ -107,9 +109,9 @@ typedef struct CPUTLBEntry { */ union { struct { - target_ulong addr_read; - target_ulong addr_write; - target_ulong addr_code; + tlbaddr_t addr_read; + tlbaddr_t addr_write; + tlbaddr_t addr_code; /* Addend to virtual address to get host address. IO accesses use the corresponding iotlb value. */ uintptr_t addend; @@ -140,6 +142,7 @@ typedef struct CPUIOTLBEntry { target_ulong tlb_flush_addr; \ target_ulong tlb_flush_mask; \ target_ulong vtlb_index; \ + tlbaddr_t tlb_version; \ #else diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index b573df5..72acce7 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -405,7 +405,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr, #else int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index]; - target_ulong tlb_addr; + tlbaddr_t tlb_addr; uintptr_t haddr; switch (access_type) { @@ -422,13 +422,22 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr, g_assert_not_reached(); } +#if defined(ENABLE_TLBVERSION) + if (tlb_version(env) != (tlb_addr & TLB_VERSION_MASK)) + return NULL; +#endif + if ((addr & TARGET_PAGE_MASK) != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { /* TLB entry is for a different page */ return NULL; } +#if defined(ENABLE_TLBVERSION) + if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO)) { +#else if (tlb_addr & ~TARGET_PAGE_MASK) { +#endif /* IO access */ return NULL; } diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h index 3091c00..2a01c6f 100644 --- a/include/exec/cpu_ldst_template.h +++ b/include/exec/cpu_ldst_template.h @@ -67,6 +67,14 @@ #define SRETSUFFIX glue(s, SUFFIX) #endif +#include "hqemu.h" + +#if defined(ENABLE_TLBVERSION) +#define page_val(addr, env) ((((tlbaddr_t)addr + DATA_SIZE - 1) & TARGET_PAGE_MASK) | tlb_version(env)) +#else +#define page_val(addr, env) (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))) +#endif + /* generic load/store macros */ static inline RES_TYPE @@ -80,12 +88,17 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, int mmu_idx; TCGMemOpIdx oi; +#ifdef SOFTMMU_CODE_ACCESS + if (build_llvm_only(env)) + return glue(glue(ld, USUFFIX), _p)((uint8_t *)env->image_base + ptr); +#endif + addr = ptr; page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != - (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { - oi = make_memop_idx(SHIFT, mmu_idx); + page_val(addr, env))) { + oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx); res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr, oi, retaddr); } else { @@ -112,12 +125,17 @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, int mmu_idx; TCGMemOpIdx oi; +#ifdef SOFTMMU_CODE_ACCESS + if (build_llvm_only(env)) + return glue(glue(lds, SUFFIX), _p)((uint8_t *)env->image_base + ptr); +#endif + addr = ptr; page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != - (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { - oi = make_memop_idx(SHIFT, mmu_idx); + page_val(addr, env))) { + oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx); res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX), MMUSUFFIX)(env, addr, oi, retaddr); } else { @@ -152,8 +170,8 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write != - (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { - oi = make_memop_idx(SHIFT, mmu_idx); + page_val(addr, env))) { + oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx); glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi, retaddr); } else { @@ -171,6 +189,7 @@ glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr, #endif /* !SOFTMMU_CODE_ACCESS */ +#undef page_val #undef RES_TYPE #undef DATA_TYPE #undef DATA_STYPE diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index d900b0d..a225bea 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -21,6 +21,7 @@ #define _EXEC_ALL_H_ #include "qemu-common.h" +#include "hqemu-config.h" /* allow to see translation results - the slowdown should be negligible, so we leave it */ #define DEBUG_DISAS @@ -59,7 +60,7 @@ typedef struct TranslationBlock TranslationBlock; * and up to 4 + N parameters on 64-bit archs * (N = number of input arguments + output arguments). */ #define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) -#define OPC_BUF_SIZE 640 +#define OPC_BUF_SIZE 2048 #define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR) #define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM) @@ -216,6 +217,8 @@ struct TranslationBlock { jmp_first */ struct TranslationBlock *jmp_next[2]; struct TranslationBlock *jmp_first; + + TB_OPTIMIZATION_COMMON }; #include "qemu/thread.h" @@ -305,7 +308,7 @@ static inline void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) { uint16_t offset = tb->tb_jmp_offset[n]; - tb_set_jmp_target1((uintptr_t)(tb->tc_ptr + offset), addr); + tb_set_jmp_target1((uintptr_t)((uint8_t *)tb->tc_ptr + offset), addr); } #else @@ -405,4 +408,6 @@ extern int singlestep; extern CPUState *tcg_current_cpu; extern bool exit_request; +size_t get_cpu_size(void); + #endif diff --git a/include/exec/memory.h b/include/exec/memory.h index 0f07159..c2a1cd3 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -208,9 +208,9 @@ struct MemoryListener { void (*region_del)(MemoryListener *listener, MemoryRegionSection *section); void (*region_nop)(MemoryListener *listener, MemoryRegionSection *section); void (*log_start)(MemoryListener *listener, MemoryRegionSection *section, - int old, int new); + int _old, int _new); void (*log_stop)(MemoryListener *listener, MemoryRegionSection *section, - int old, int new); + int _old, int _new); void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section); void (*log_global_start)(MemoryListener *listener); void (*log_global_stop)(MemoryListener *listener); diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index c537969..4453e5b 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -10,6 +10,8 @@ #include "qapi/error.h" #include "hw/hotplug.h" +#define typename QEMUtypename + enum { DEV_NVECTORS_UNSPECIFIED = -1, }; @@ -401,4 +403,6 @@ static inline bool qbus_is_hotpluggable(BusState *bus) void device_listener_register(DeviceListener *listener); void device_listener_unregister(DeviceListener *listener); +#undef typename + #endif diff --git a/include/qemu-common.h b/include/qemu-common.h index 405364f..d0c2e20 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -454,7 +454,7 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end); void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size); /* vector definitions */ -#ifdef __ALTIVEC__ +#if defined(__ALTIVEC__) && !defined(__clang__) #include <altivec.h> /* The altivec.h header says we're allowed to undef these for * C++ compatibility. Here we don't care about C++, but we diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index bd2c075..e2125bd 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -158,13 +158,13 @@ #ifndef atomic_rcu_read #ifdef __ATOMIC_CONSUME #define atomic_rcu_read(ptr) ({ \ - typeof(*ptr) _val; \ + __typeof__(*ptr) _val; \ __atomic_load(ptr, &_val, __ATOMIC_CONSUME); \ _val; \ }) #else #define atomic_rcu_read(ptr) ({ \ - typeof(*ptr) _val = atomic_read(ptr); \ + __typeof__(*ptr) _val = atomic_read(ptr); \ smp_read_barrier_depends(); \ _val; \ }) @@ -185,7 +185,7 @@ #ifndef atomic_rcu_set #ifdef __ATOMIC_RELEASE #define atomic_rcu_set(ptr, i) do { \ - typeof(*ptr) _val = (i); \ + __typeof__(*ptr) _val = (i); \ __atomic_store(ptr, &_val, __ATOMIC_RELEASE); \ } while(0) #else @@ -220,7 +220,7 @@ */ #ifndef atomic_mb_read #define atomic_mb_read(ptr) ({ \ - typeof(*ptr) _val = atomic_read(ptr); \ + __typeof__(*ptr) _val = atomic_read(ptr); \ smp_rmb(); \ _val; \ }) @@ -239,7 +239,7 @@ #define atomic_xchg(ptr, i) __sync_swap(ptr, i) #elif defined(__ATOMIC_SEQ_CST) #define atomic_xchg(ptr, i) ({ \ - typeof(*ptr) _new = (i), _old; \ + __typeof__(*ptr) _new = (i), _old; \ __atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \ _old; \ }) diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index 86dd9cd..b53f462 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -71,7 +71,7 @@ unsigned long name[BITS_TO_LONGS(bits)] #define small_nbits(nbits) \ - ((nbits) <= BITS_PER_LONG) + ((nbits) <= (long)BITS_PER_LONG) int slow_bitmap_empty(const unsigned long *bitmap, long bits); int slow_bitmap_full(const unsigned long *bitmap, long bits); @@ -97,7 +97,7 @@ int slow_bitmap_intersects(const unsigned long *bitmap1, static inline unsigned long *bitmap_try_new(long nbits) { long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); - return g_try_malloc0(len); + return (unsigned long *)g_try_malloc0(len); } static inline unsigned long *bitmap_new(long nbits) @@ -241,9 +241,9 @@ static inline unsigned long *bitmap_zero_extend(unsigned long *old, long old_nbits, long new_nbits) { long new_len = BITS_TO_LONGS(new_nbits) * sizeof(unsigned long); - unsigned long *new = g_realloc(old, new_len); - bitmap_clear(new, old_nbits, new_nbits - old_nbits); - return new; + unsigned long *new_bitmap = (unsigned long *)g_realloc(old, new_len); + bitmap_clear(new_bitmap, old_nbits, new_nbits - old_nbits); + return new_bitmap; } #endif /* BITMAP_H */ diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index d22eb01..0abf0f8 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -60,7 +60,7 @@ #ifndef container_of #define container_of(ptr, type, member) ({ \ - const typeof(((type *) 0)->member) *__mptr = (ptr); \ + const __typeof__(((type *) 0)->member) *__mptr = (ptr); \ (type *) ((char *) __mptr - offsetof(type, member));}) #endif @@ -74,7 +74,7 @@ #define DO_UPCAST(type, field, dev) container_of(dev, type, field) #endif -#define typeof_field(type, field) typeof(((type *)0)->field) +#define typeof_field(type, field) __typeof__(((type *)0)->field) #define type_check(t1,t2) ((t1*)0 - (t2*)0) #ifndef always_inline diff --git a/include/qemu/queue.h b/include/qemu/queue.h index f781aa2..b56bce5 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -198,7 +198,7 @@ struct { \ } while (/*CONSTCOND*/0) #define QSLIST_INSERT_HEAD_ATOMIC(head, elm, field) do { \ - typeof(elm) save_sle_next; \ + __typeof__(elm) save_sle_next; \ do { \ save_sle_next = (elm)->field.sle_next = (head)->slh_first; \ } while (atomic_cmpxchg(&(head)->slh_first, save_sle_next, (elm)) != \ diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index f6d1d56..0d9f677 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -135,8 +135,8 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func); #define call_rcu(head, func, field) \ call_rcu1(({ \ char __attribute__((unused)) \ - offset_must_be_zero[-offsetof(typeof(*(head)), field)], \ - func_type_invalid = (func) - (void (*)(typeof(head)))(func); \ + offset_must_be_zero[-offsetof(__typeof__(*(head)), field)], \ + func_type_invalid = (func) - (void (*)(__typeof__(head)))(func); \ &(head)->field; \ }), \ (RCUCBFunc *)(func)) @@ -144,7 +144,7 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func); #define g_free_rcu(obj, field) \ call_rcu1(({ \ char __attribute__((unused)) \ - offset_must_be_zero[-offsetof(typeof(*(obj)), field)]; \ + offset_must_be_zero[-offsetof(__typeof__(*(obj)), field)]; \ &(obj)->field; \ }), \ (RCUCBFunc *)g_free); diff --git a/include/qemu/timer.h b/include/qemu/timer.h index d0946cb..a16effa 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -523,7 +523,7 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list, QEMUTimerCB *cb, void *opaque) { - QEMUTimer *ts = g_malloc0(sizeof(QEMUTimer)); + QEMUTimer *ts = (QEMUTimer *)g_malloc0(sizeof(QEMUTimer)); timer_init_tl(ts, timer_list, scale, cb, opaque); return ts; } @@ -965,7 +965,7 @@ static inline int64_t cpu_get_host_ticks (void) #define MIPS_RDHWR(rd, value) { \ __asm__ __volatile__ (".set push\n\t" \ ".set mips32r2\n\t" \ - "rdhwr %0, "rd"\n\t" \ + "rdhwr %0, " rd "\n\t" \ ".set pop" \ : "=r" (value)); \ } diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 51a1323..4b005ff 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -30,6 +30,8 @@ #include "qemu/thread.h" #include "qemu/typedefs.h" +#define typename QEMUtypename + typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size, void *opaque); @@ -196,7 +198,7 @@ typedef struct CPUBreakpoint { } CPUBreakpoint; typedef struct CPUWatchpoint { - vaddr vaddr; + vaddr addr; vaddr len; vaddr hitaddr; MemTxAttrs hitattrs; @@ -775,4 +777,7 @@ extern const struct VMStateDescription vmstate_cpu_common; .offset = 0, \ } +CPUState *cpu_create(void); +#undef typename + #endif diff --git a/include/qom/object.h b/include/qom/object.h index 4509166..118c227 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -20,6 +20,10 @@ #include "qemu/queue.h" #include "qapi/error.h" +#define Type QEMUType +#define class QEMUclass +#define typename QEMUtypename + struct Visitor; struct TypeImpl; @@ -1570,5 +1574,8 @@ int object_child_foreach_recursive(Object *obj, */ Object *container_get(Object *root, const char *path); +#undef Type +#undef class +#undef typename #endif diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index 3d1e5ba..d594ebf 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -4,6 +4,7 @@ /* cpus.c */ bool qemu_in_vcpu_thread(void); void qemu_init_cpu_loop(void); +void qemu_end_cpu_loop(void); void resume_all_vcpus(void); void pause_all_vcpus(void); void cpu_stop_current(void); diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 8b17c0e..7be6e71 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -2001,9 +2001,13 @@ static void load_elf_image(const char *image_name, int image_fd, info->brk = info->end_code; } +#if defined(CONFIG_LLVM) + load_symbols(ehdr, image_fd, load_bias); +#else if (qemu_log_enabled()) { load_symbols(ehdr, image_fd, load_bias); } +#endif close(image_fd); return; diff --git a/linux-user/main.c b/linux-user/main.c index 8acfe0f..0f67ad4 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -33,11 +33,12 @@ #include "qemu/timer.h" #include "qemu/envlist.h" #include "elf.h" +#include "hqemu.h" char *exec_path; int singlestep; -static const char *filename; +const char *filename; static const char *argv0; static int gdbstub_port; static envlist_t *envlist; @@ -105,7 +106,10 @@ static int pending_cpus; /* Make sure everything is in a consistent state for calling fork(). */ void fork_start(void) { - qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); +#if defined(CONFIG_LLVM) + llvm_fork_start(); +#endif + qemu_mutex_lock(&tcg_ctx.tb_ctx->tb_lock); pthread_mutex_lock(&exclusive_lock); mmap_fork_start(); } @@ -127,12 +131,15 @@ void fork_end(int child) pthread_mutex_init(&cpu_list_mutex, NULL); pthread_cond_init(&exclusive_cond, NULL); pthread_cond_init(&exclusive_resume, NULL); - qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_init(&tcg_ctx.tb_ctx->tb_lock); gdbserver_fork(thread_cpu); } else { pthread_mutex_unlock(&exclusive_lock); - qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_unlock(&tcg_ctx.tb_ctx->tb_lock); } +#if defined(CONFIG_LLVM) + llvm_fork_end(child); +#endif } /* Wait for pending exclusive operations to complete. The exclusive lock @@ -276,6 +283,9 @@ void cpu_loop(CPUX86State *env) abi_ulong pc; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + for(;;) { cpu_exec_start(cs); trapnr = cpu_x86_exec(cs); @@ -670,6 +680,9 @@ void cpu_loop(CPUARMState *env) target_siginfo_t info; uint32_t addr; + copy_tcg_context(); + optimization_init(env); + for(;;) { cpu_exec_start(cs); trapnr = cpu_arm_exec(cs); @@ -1001,6 +1014,9 @@ void cpu_loop(CPUARMState *env) int trapnr, sig; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + for (;;) { cpu_exec_start(cs); trapnr = cpu_arm_exec(cs); @@ -1083,6 +1099,9 @@ void cpu_loop(CPUUniCore32State *env) unsigned int n, insn; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + for (;;) { cpu_exec_start(cs); trapnr = uc32_cpu_exec(cs); @@ -1284,6 +1303,9 @@ void cpu_loop (CPUSPARCState *env) abi_long ret; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_sparc_exec(cs); @@ -1564,6 +1586,9 @@ void cpu_loop(CPUPPCState *env) int trapnr; target_ulong ret; + copy_tcg_context(); + optimization_init(env); + for(;;) { cpu_exec_start(cs); trapnr = cpu_ppc_exec(cs); @@ -2416,6 +2441,9 @@ void cpu_loop(CPUMIPSState *env) unsigned int syscall_num; # endif + copy_tcg_context(); + optimization_init(env); + for(;;) { cpu_exec_start(cs); trapnr = cpu_mips_exec(cs); @@ -2653,6 +2681,9 @@ void cpu_loop(CPUOpenRISCState *env) CPUState *cs = CPU(openrisc_env_get_cpu(env)); int trapnr, gdbsig; + copy_tcg_context(); + optimization_init(env); + for (;;) { cpu_exec_start(cs); trapnr = cpu_openrisc_exec(cs); @@ -2743,6 +2774,9 @@ void cpu_loop(CPUSH4State *env) int trapnr, ret; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_sh4_exec(cs); @@ -2805,6 +2839,9 @@ void cpu_loop(CPUCRISState *env) int trapnr, ret; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_cris_exec(cs); @@ -2866,6 +2903,9 @@ void cpu_loop(CPUMBState *env) int trapnr, ret; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_mb_exec(cs); @@ -2971,6 +3011,9 @@ void cpu_loop(CPUM68KState *env) target_siginfo_t info; TaskState *ts = cs->opaque; + copy_tcg_context(); + optimization_init(env); + for(;;) { cpu_exec_start(cs); trapnr = cpu_m68k_exec(cs); @@ -3110,6 +3153,9 @@ void cpu_loop(CPUAlphaState *env) target_siginfo_t info; abi_long sysret; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_alpha_exec(cs); @@ -3298,6 +3344,9 @@ void cpu_loop(CPUS390XState *env) target_siginfo_t info; target_ulong addr; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_s390x_exec(cs); @@ -3602,6 +3651,9 @@ void cpu_loop(CPUTLGState *env) CPUState *cs = CPU(tilegx_env_get_cpu(env)); int trapnr; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_tilegx_exec(cs); @@ -3711,7 +3763,7 @@ CPUArchState *cpu_copy(CPUArchState *env) cpu_breakpoint_insert(new_cpu, bp->pc, bp->flags, NULL); } QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { - cpu_watchpoint_insert(new_cpu, wp->vaddr, wp->len, wp->flags, NULL); + cpu_watchpoint_insert(new_cpu, wp->addr, wp->len, wp->flags, NULL); } return new_env; @@ -4009,6 +4061,12 @@ static void usage(int exitcode) "Note that if you provide several changes to a single variable\n" "the last change will stay in effect.\n"); +#if defined(CONFIG_LLVM) + printf("\n\nHQEMU "); + fflush(stdout); + hqemu_help(); +#endif + exit(exitcode); } @@ -4324,7 +4382,11 @@ int main(int argc, char **argv, char **envp) /* Now that we've loaded the binary, GUEST_BASE is fixed. Delay generating the prologue until now so that the prologue can take the real value of GUEST_BASE into account. */ - tcg_prologue_init(&tcg_ctx); + tcg_prologue_init(&tcg_ctx_global); + +#if defined(CONFIG_LLVM) + llvm_init(); +#endif #if defined(TARGET_I386) env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; @@ -4663,6 +4725,7 @@ int main(int argc, char **argv, char **envp) } gdb_handlesig(cpu, 0); } + cpu_loop(env); /* never exits */ return 0; diff --git a/linux-user/strace.c b/linux-user/strace.c index ea6c1d2..69d5408 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -7,6 +7,7 @@ #include <sys/types.h> #include <sys/mount.h> #include <sys/mman.h> +#include <sys/sysmacros.h> #include <unistd.h> #include <sched.h> #include "qemu.h" diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 6c64ba6..030eb2a 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -114,6 +114,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base, #include "uname.h" #include "qemu.h" +#include "hqemu.h" #define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \ CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID) @@ -4495,7 +4496,7 @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr) #endif /* defined(TARGET_I386) */ -#define NEW_STACK_SIZE 0x40000 +#define NEW_STACK_SIZE 0x80000 static pthread_mutex_t clone_lock = PTHREAD_MUTEX_INITIALIZER; @@ -5710,6 +5711,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, rcu_unregister_thread(); pthread_exit(NULL); } + + optimization_finalize((CPUArchState *)cpu_env); +#if defined(CONFIG_LLVM) + llvm_finalize(); +#endif + #ifdef TARGET_GPROF _mcleanup(); #endif @@ -7615,6 +7622,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #ifdef __NR_exit_group /* new thread calls */ case TARGET_NR_exit_group: + optimization_finalize((CPUArchState *)cpu_env); +#if defined(CONFIG_LLVM) + llvm_finalize(); +#endif #ifdef TARGET_GPROF _mcleanup(); #endif diff --git a/qga/commands-posix.c b/qga/commands-posix.c index c2ff970..e6c9f51 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -15,6 +15,7 @@ #include <sys/types.h> #include <sys/ioctl.h> #include <sys/wait.h> +#include <sys/sysmacros.h> #include <unistd.h> #include <errno.h> #include <fcntl.h> diff --git a/qom/object.c b/qom/object.c index d751569..deb182f 100644 --- a/qom/object.c +++ b/qom/object.c @@ -28,6 +28,10 @@ #include "qapi/qmp/qint.h" #include "qapi/qmp/qstring.h" +#define Type QEMUType +#define class QEMUclass +#define typename QEMUtypename + #define MAX_INTERFACES 32 typedef struct InterfaceImpl InterfaceImpl; @@ -2126,3 +2130,7 @@ static void register_types(void) } type_init(register_types) + +#undef Type +#undef class +#undef typename diff --git a/softmmu_template.h b/softmmu_template.h index 6803890..4574545 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -24,6 +24,7 @@ #include "qemu/timer.h" #include "exec/address-spaces.h" #include "exec/memory.h" +#include "hqemu-config.h" #define DATA_SIZE (1 << SHIFT) @@ -116,6 +117,16 @@ # define helper_te_st_name helper_le_st_name #endif +#if defined(ENABLE_TLBVERSION) +#define TLB_IO_MASK (TLB_NOTDIRTY | TLB_MMIO) +#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK | TLB_VERSION_MASK) +#define page_val(addr, env) (((tlbaddr_t)addr & TARGET_PAGE_MASK) | tlb_version(env)) +#else +#define TLB_IO_MASK (~TARGET_PAGE_MASK) +#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK) +#define page_val(addr, env) ((addr & TARGET_PAGE_MASK)) +#endif + /* macro to check the victim tlb */ #define VICTIM_TLB_HIT(ty) \ ({ \ @@ -126,7 +137,7 @@ CPUIOTLBEntry tmpiotlb; \ CPUTLBEntry tmptlb; \ for (vidx = CPU_VTLB_SIZE-1; vidx >= 0; --vidx) { \ - if (env->tlb_v_table[mmu_idx][vidx].ty == (addr & TARGET_PAGE_MASK)) {\ + if (env->tlb_v_table[mmu_idx][vidx].ty == page_val(addr, env)) { \ /* found entry in victim tlb, swap tlb and iotlb */ \ tmptlb = env->tlb_table[mmu_idx][index]; \ env->tlb_table[mmu_idx][index] = env->tlb_v_table[mmu_idx][vidx]; \ @@ -170,7 +181,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, { unsigned mmu_idx = get_mmuidx(oi); int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; uintptr_t haddr; DATA_TYPE res; @@ -178,8 +189,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, retaddr -= GETPC_ADJ; /* If the TLB entry is for a different page, reload and try again. */ - if ((addr & TARGET_PAGE_MASK) - != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { if ((addr & (DATA_SIZE - 1)) != 0 && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, @@ -193,7 +203,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, } /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if (unlikely(tlb_addr & TLB_IO_MASK)) { CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; @@ -254,7 +264,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, { unsigned mmu_idx = get_mmuidx(oi); int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; uintptr_t haddr; DATA_TYPE res; @@ -262,8 +272,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, retaddr -= GETPC_ADJ; /* If the TLB entry is for a different page, reload and try again. */ - if ((addr & TARGET_PAGE_MASK) - != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { if ((addr & (DATA_SIZE - 1)) != 0 && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, @@ -277,7 +286,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, } /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if (unlikely(tlb_addr & TLB_IO_MASK)) { CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; @@ -375,15 +384,14 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, { unsigned mmu_idx = get_mmuidx(oi); int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].addr_write; uintptr_t haddr; /* Adjust the given return address. */ retaddr -= GETPC_ADJ; /* If the TLB entry is for a different page, reload and try again. */ - if ((addr & TARGET_PAGE_MASK) - != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { if ((addr & (DATA_SIZE - 1)) != 0 && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, @@ -396,7 +404,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, } /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if (unlikely(tlb_addr & TLB_IO_MASK)) { CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; @@ -455,15 +463,14 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, { unsigned mmu_idx = get_mmuidx(oi); int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].addr_write; uintptr_t haddr; /* Adjust the given return address. */ retaddr -= GETPC_ADJ; /* If the TLB entry is for a different page, reload and try again. */ - if ((addr & TARGET_PAGE_MASK) - != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { if ((addr & (DATA_SIZE - 1)) != 0 && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, @@ -476,7 +483,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, } /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if (unlikely(tlb_addr & TLB_IO_MASK)) { CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; @@ -537,10 +544,9 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, uintptr_t retaddr) { int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].addr_write; - if ((addr & TARGET_PAGE_MASK) - != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { /* TLB entry is for a different page */ if (!VICTIM_TLB_HIT(addr_write)) { tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr); @@ -550,6 +556,11 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, #endif #endif /* !defined(SOFTMMU_CODE_ACCESS) */ +#include "softmmu_template_llvm.h" + +#undef TLB_IO_MASK +#undef TLB_NONIO_MASK +#undef page_val #undef READ_ACCESS_TYPE #undef SHIFT #undef DATA_TYPE diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 815fef8..1087075 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -437,7 +437,7 @@ typedef struct CPUARMState { * the two execution states, and means we do not need to explicitly * map these registers when changing states. */ - float64 regs[64]; + float64 regs[64] __attribute__((aligned(16))); uint32_t xregs[16]; /* We store these fpcsr fields separately for convenience. */ @@ -496,6 +496,8 @@ typedef struct CPUARMState { /* Internal CPU feature flags. */ uint64_t features; + CPU_OPTIMIZATION_COMMON + /* PMSAv7 MPU */ struct { uint32_t *drbar; @@ -1509,7 +1511,7 @@ bool write_cpustate_to_list(ARMCPU *cpu); /* The ARM MMU allows 1k pages. */ /* ??? Linux doesn't actually use these, and they're deprecated in recent architecture revisions. Maybe a configure option to disable them. */ -#define TARGET_PAGE_BITS 10 +#define TARGET_PAGE_BITS 12 #endif #if defined(TARGET_AARCH64) @@ -1523,7 +1525,7 @@ bool write_cpustate_to_list(ARMCPU *cpu); static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, unsigned int target_el) { - CPUARMState *env = cs->env_ptr; + CPUARMState *env = (CPUARMState *)cs->env_ptr; unsigned int cur_el = arm_current_el(env); bool secure = arm_is_secure(env); bool pstate_unmasked; @@ -1983,6 +1985,62 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, *cs_base = 0; } +static inline target_ulong cpu_get_pc(CPUARMState *env) +{ +#if defined(TARGET_AARCH64) + return env->pc; +#else + return env->regs[15]; +#endif +} + +static inline int cpu_check_state(CPUARMState *env, + target_ulong cs_base, int flags) +{ + int f; + if (is_a64(env)) { + f = ARM_TBFLAG_AARCH64_STATE_MASK; + } else { + f = (env->thumb << ARM_TBFLAG_THUMB_SHIFT) + | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT) + | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT) + | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT) + | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT); + if (!(access_secure_reg(env))) { + f |= ARM_TBFLAG_NS_MASK; + } + if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30) + || arm_el_is_aa64(env, 1)) { + f |= ARM_TBFLAG_VFPEN_MASK; + } + f |= (extract32(env->cp15.c15_cpar, 0, 2) + << ARM_TBFLAG_XSCALE_CPAR_SHIFT); + } + + f |= (cpu_mmu_index(env, false) << ARM_TBFLAG_MMUIDX_SHIFT); + /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine + * states defined in the ARM ARM for software singlestep: + * SS_ACTIVE PSTATE.SS State + * 0 x Inactive (the TB flag for SS is always 0) + * 1 0 Active-pending + * 1 1 Active-not-pending + */ + if (arm_singlestep_active(env)) { + f |= ARM_TBFLAG_SS_ACTIVE_MASK; + if (is_a64(env)) { + if (env->pstate & PSTATE_SS) { + f |= ARM_TBFLAG_PSTATE_SS_MASK; + } + } else { + if (env->uncached_cpsr & PSTATE_SS) { + f |= ARM_TBFLAG_PSTATE_SS_MASK; + } + } + } + f |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT; + return f == flags; +} + #include "exec/exec-all.h" enum { diff --git a/target-arm/helper.c b/target-arm/helper.c index afc4163..302e88c 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -11,6 +11,7 @@ #include "arm_ldst.h" #include <zlib.h> /* For crc32 */ #include "exec/semihost.h" +#include "hqemu.h" #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ @@ -2225,6 +2226,8 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush(CPU(cpu), 1); } raw_write(env, ri, value); + + pcid = (target_ulong)value >> 12; } static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -7977,29 +7980,23 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env) /* XXX: check quiet/signaling case */ #define DO_VFP_cmp(p, type) \ -void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \ +uint32_t VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \ { \ - uint32_t flags; \ - switch(type ## _compare_quiet(a, b, &env->vfp.fp_status)) { \ - case 0: flags = 0x6; break; \ - case -1: flags = 0x8; break; \ - case 1: flags = 0x2; break; \ - default: case 2: flags = 0x3; break; \ - } \ - env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ - | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ + uint32_t flags = 0x3; \ + int ret = type ## _compare_quiet(a, b, &env->vfp.fp_status); \ + if (ret == 0) flags = 0x6; \ + else if (ret == -1) flags = 0x8; \ + else if (ret == 1) flags = 0x2; \ + return flags << 28; \ } \ -void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \ +uint32_t VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \ { \ - uint32_t flags; \ - switch(type ## _compare(a, b, &env->vfp.fp_status)) { \ - case 0: flags = 0x6; break; \ - case -1: flags = 0x8; break; \ - case 1: flags = 0x2; break; \ - default: case 2: flags = 0x3; break; \ - } \ - env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ - | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ + uint32_t flags = 0x3; \ + int ret = type ## _compare(a, b, &env->vfp.fp_status); \ + if (ret == 0) flags = 0x6; \ + else if (ret == -1) flags = 0x8; \ + else if (ret == 1) flags = 0x2; \ + return flags << 28; \ } DO_VFP_cmp(s, float32) DO_VFP_cmp(d, float64) @@ -8777,3 +8774,12 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) /* Linux crc32c converts the output to one's complement. */ return crc32c(acc, buf, bytes) ^ 0xffffffff; } + +CPUState *cpu_create(void) +{ + ARMCPU *cpu = g_malloc0(sizeof(ARMCPU)); + CPUState *cs = CPU(cpu); + memcpy(cpu, ARM_CPU(first_cpu), sizeof(ARMCPU)); + cs->env_ptr = &cpu->env; + return cs; +} diff --git a/target-arm/helper.h b/target-arm/helper.h index c2a85c7..41c2c6d 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -56,6 +56,7 @@ DEF_HELPER_2(pre_smc, void, env, i32) DEF_HELPER_1(check_breakpoints, void, env) +DEF_HELPER_3(cpsr_write_nzcv, void, env, i32, i32) DEF_HELPER_3(cpsr_write, void, env, i32, i32) DEF_HELPER_1(cpsr_read, i32, env) @@ -103,10 +104,10 @@ DEF_HELPER_1(vfp_abss, f32, f32) DEF_HELPER_1(vfp_absd, f64, f64) DEF_HELPER_2(vfp_sqrts, f32, f32, env) DEF_HELPER_2(vfp_sqrtd, f64, f64, env) -DEF_HELPER_3(vfp_cmps, void, f32, f32, env) -DEF_HELPER_3(vfp_cmpd, void, f64, f64, env) -DEF_HELPER_3(vfp_cmpes, void, f32, f32, env) -DEF_HELPER_3(vfp_cmped, void, f64, f64, env) +DEF_HELPER_3(vfp_cmps, i32, f32, f32, env) +DEF_HELPER_3(vfp_cmpd, i32, f64, f64, env) +DEF_HELPER_3(vfp_cmpes, i32, f32, f32, env) +DEF_HELPER_3(vfp_cmped, i32, f64, f64, env) DEF_HELPER_2(vfp_fcvtds, f64, f32, env) DEF_HELPER_2(vfp_fcvtsd, f32, f64, env) @@ -535,3 +536,5 @@ DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) #ifdef TARGET_AARCH64 #include "helper-a64.h" #endif + +#include "hqemu-helper.h" diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index 6cd54c8..fdea907 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -386,6 +386,16 @@ void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask) cpsr_write(env, val, mask); } +void HELPER(cpsr_write_nzcv)(CPUARMState *env, uint32_t val, uint32_t mask) +{ + if (mask & CPSR_NZCV) { + env->ZF = (~val) & CPSR_Z; + env->NF = val; + env->CF = (val >> 29) & 1; + env->VF = (val << 3) & 0x80000000; + } +} + /* Access to user mode registers from privileged modes. */ uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno) { diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 14e8131..21cf214 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -37,10 +37,17 @@ #include "exec/helper-gen.h" #include "trace-tcg.h" +#include "hqemu.h" static TCGv_i64 cpu_X[32]; static TCGv_i64 cpu_pc; +#if defined(CONFIG_USER_ONLY) +#define IS_USER(s) 1 +#else +#define IS_USER(s) (s->user) +#endif + /* Load/store exclusive handling */ static TCGv_i64 cpu_exclusive_high; @@ -119,6 +126,31 @@ static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s) } } +static inline void gen_ibtc_stub(DisasContext *s) +{ +#ifdef ENABLE_IBTC + if (!build_llvm(s->env)) { + TCGv_ptr ibtc_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_ibtc(ibtc_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(ibtc_host_pc)); + tcg_temp_free_ptr(ibtc_host_pc); + s->gen_ibtc = 0; + } +#endif +} + +static inline void gen_cpbl_stub(DisasContext *s) +{ +#ifdef ENABLE_CPBL + if (!build_llvm(s->env)) { + TCGv_ptr cpbl_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_cpbl(cpbl_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(cpbl_host_pc)); + tcg_temp_free_ptr(cpbl_host_pc); + } +#endif +} + void aarch64_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, int flags) { @@ -285,12 +317,38 @@ static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest) return true; } +#if defined(CONFIG_USER_ONLY) +static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) +{ + TranslationBlock *tb; + + tb = s->tb; + tcg_gen_goto_tb(n); + gen_a64_set_pc_im(dest); + tcg_gen_exit_tb((intptr_t)tb + n); + s->is_jmp = DISAS_TB_JUMP; + tb->jmp_pc[n] = dest; +} +#else +static int try_link_pages(DisasContext *s, TranslationBlock *tb, target_ulong dest) +{ +#ifdef ENABLE_LPAGE + if (!build_llvm(s->env)) { + target_ulong addr, size; + int ret = lpt_search_page(s->env, dest, &addr, &size); + if (ret == 1 && (tb->pc & ~(size - 1)) == addr) + return 1; + } +#endif + return 0; +} + static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) { TranslationBlock *tb; tb = s->tb; - if (use_goto_tb(s, n, dest)) { + if (use_goto_tb(s, n, dest) || try_link_pages(s, tb, dest) == 1) { tcg_gen_goto_tb(n); gen_a64_set_pc_im(dest); tcg_gen_exit_tb((intptr_t)tb + n); @@ -302,11 +360,14 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) } else if (s->singlestep_enabled) { gen_exception_internal(EXCP_DEBUG); } else { + gen_cpbl_stub(s); tcg_gen_exit_tb(0); s->is_jmp = DISAS_TB_JUMP; } } + tb->jmp_pc[n] = dest; } +#endif static void unallocated_encoding(DisasContext *s) { @@ -568,6 +629,7 @@ static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) tcg_gen_movi_i64(tmp, 0); tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); + tcg_gen_annotate(A_SetCC); tcg_gen_extrl_i64_i32(cpu_CF, flag); @@ -614,6 +676,7 @@ static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) result = tcg_temp_new_i64(); flag = tcg_temp_new_i64(); tcg_gen_sub_i64(result, t0, t1); + tcg_gen_annotate(A_SetCC); gen_set_NZ64(result); @@ -764,11 +827,51 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, get_mem_index(s)); } +#ifdef ENABLE_TCG_VECTOR +#include "simd_helper.h" + +#define VFP_DREG(reg) \ +do { \ + reg = reg * 2; \ +} while (0) +#define tcg_vector_abort() \ +do {\ + fprintf(stderr, "%s:%d: tcg fatal error - unhandled vector op.\n", __FILE__, __LINE__);\ + exit(0);\ +} while (0) + +/* + * disas_neon_ls_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_ls_vector(DisasContext *s, int reg, int is_load, + TCGv_i64 tcg_addr) +{ + TCGArg vop, alignment = 32; + + if (!build_llvm(s->env)) + return 0; + + VFP_DREG(reg); + vop = (is_load) ? INDEX_op_vload_128 : INDEX_op_vstore_128; + gen_vector_op3(vop, + offsetof(CPUARMState, vfp.regs[reg]), + GET_TCGV_I64(tcg_addr), + alignment); + return 1; +} +#endif + /* * Store from FP register to memory */ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) { +#ifdef ENABLE_TCG_VECTOR + if (size >= 4 && disas_neon_ls_vector(s, srcidx, 0, tcg_addr) == 1) + return; +#endif + /* This writes the bottom N bits of a 128 bit wide vector to memory */ TCGv_i64 tmp = tcg_temp_new_i64(); tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64)); @@ -791,6 +894,11 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) */ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) { +#ifdef ENABLE_TCG_VECTOR + if (size >= 4 && disas_neon_ls_vector(s, destidx, 1, tcg_addr) == 1) + return; +#endif + /* This always zero-extends and writes to a full 128 bit wide vector */ TCGv_i64 tmplo = tcg_temp_new_i64(); TCGv_i64 tmphi; @@ -1653,6 +1761,7 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) } s->is_jmp = DISAS_JUMP; + s->gen_ibtc = 1; } /* C3.2 Branches, exception generating and system instructions */ @@ -3624,6 +3733,8 @@ static void disas_cc(DisasContext *s, uint32_t insn) TCGv_i64 tcg_tmp, tcg_y, tcg_rn; DisasCompare c; + tcg_gen_annotate(A_NoSIMDization); + if (!extract32(insn, 29, 1)) { unallocated_encoding(s); return; @@ -8854,6 +8965,153 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) } } +#ifdef ENABLE_TCG_VECTOR +static int disas_neon_misc(DisasContext *s, uint32_t insn) +{ + if (!build_llvm(s->env)) + return 0; + + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + bool u = extract32(insn, 29, 1); + bool is_q = extract32(insn, 30, 1); + int rm = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + VFP_DREG(rm); + VFP_DREG(rd); + + switch (opcode) { + case 0xc ... 0xf: + case 0x16 ... 0x1d: + case 0x1f: + { + /* Floating point: U, size[1] and opcode indicate operation; + * size[0] indicates single or double precision. + */ + int is_double = extract32(size, 0, 1); + opcode |= (extract32(size, 1, 1) << 5) | (u << 6); + size = is_double ? 64 : 32; + + switch (opcode) { + case 0x1d: /* SCVTF */ + case 0x5d: /* UCVTF */ + { + if (is_double && !is_q) { + unallocated_encoding(s); + return 0; + } + if (!fp_access_check(s)) { + return 0; + } + if (opcode == 0x1d) + gen_vector_cvt(vsitofp, size); + else + gen_vector_cvt(vuitofp, size); + break; + } + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + if (is_double && !is_q) { + unallocated_encoding(s); + return 0; + } + gen_vector_cvt(vfptosi, size); + break; + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + if (is_double && !is_q) { + unallocated_encoding(s); + return 0; + } + gen_vector_cvt(vfptoui, size); + break; + default: + return 0; + } + break; + } + default: + return 0; + } + + return 1; +} + +/* + * disas_neon_data_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_data_vector(DisasContext *s, uint32_t insn) +{ + if (!build_llvm(s->env)) + return 0; + + int q = extract32(insn, 30, 1); + int u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int op = extract32(insn, 11, 5); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + VFP_DREG(rm); + VFP_DREG(rn); + VFP_DREG(rd); + + switch(op) { + case 0x10: /* ADD, SUB */ + if(!u) /* ADD */ + gen_vector_arith(vadd, i, size); + else /* SUB */ + gen_vector_arith(vsub, i, size); + break; + case 0x3: /* logic ops */ + switch ((u << 2) | size) { + case 0: gen_vector_logical(vand); break; /* AND */ + case 1: gen_vector_logical(vbic); break; /* BIC rd = rn&(~rm)*/ + case 2: gen_vector_logical(vorr); break; /* ORR */ + case 3: gen_vector_logical(vorn); break; /* ORN */ + case 4: gen_vector_logical(veor); break; /* EOR */ + case 5: gen_vector_logical(vbsl); break; /* BSL */ + case 6: gen_vector_logical(vbit); break; /* BIT */ + case 7: gen_vector_logical(vbif); break; /* BIF */ + default: + return 0; + } + break; + case 0x18 ... 0x31: + { + int fpopcode = extract32(insn, 11, 5) + | (extract32(insn, 23, 1) << 5) + | (extract32(insn, 29, 1) << 6); + int size = extract32(insn, 22, 1); + switch (fpopcode) { + case 0x1a: gen_vector_fop2(vadd); break; /* FADD */ + case 0x3a: gen_vector_fop2(vsub); break; /* FSUB */ + case 0x5b: gen_vector_fop2(vmul); break; /* FMUL */ + case 0x5f: gen_vector_fop2(vdiv); break; /* FDIV */ + case 0x19: gen_vector_fop2(vmla); break; /* FMLA */ + case 0x39: gen_vector_fop2(vmls); break; /* FMLS */ + default: + return 0; + } + break; + } + default: + return 0; + } + + return 1; +} +#endif + /* Logic op (opcode == 3) subgroup of C3.6.16. */ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) { @@ -8870,6 +9128,11 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) return; } +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_data_vector(s, insn) == 1) + return; +#endif + tcg_op1 = tcg_temp_new_i64(); tcg_op2 = tcg_temp_new_i64(); tcg_res[0] = tcg_temp_new_i64(); @@ -9138,6 +9401,11 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) return; } +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_data_vector(s, insn) == 1) + return; +#endif + switch (fpopcode) { case 0x58: /* FMAXNMP */ case 0x5a: /* FADDP */ @@ -9232,6 +9500,11 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) return; } +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_data_vector(s, insn) == 1) + return; +#endif + if (size == 3) { assert(is_q); for (pass = 0; pass < 2; pass++) { @@ -9778,6 +10051,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) TCGv_i32 tcg_rmode; TCGv_ptr tcg_fpstatus; +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_misc(s, insn) == 1) + return; +#endif + switch (opcode) { case 0x0: /* REV64, REV32 */ case 0x1: /* REV16 */ @@ -11018,6 +11296,8 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) pc_start = tb->pc; + dc->gen_ibtc = 0; + dc->env = env; dc->tb = tb; dc->is_jmp = DISAS_NEXT; @@ -11078,7 +11358,12 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) max_insns = TCG_MAX_INSNS; } - gen_tb_start(tb); + if (!build_llvm(env)) { + gen_tb_start(tb); + if (tracer_mode != TRANS_MODE_NONE) + tcg_gen_hotpatch(IS_USER(dc), tracer_mode == TRANS_MODE_HYBRIDS || + tracer_mode == TRANS_MODE_HYBRIDM); + } tcg_clear_temp_count(); @@ -11144,6 +11429,9 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) * Also stop translation when a page boundary is reached. This * ensures prefetch aborts occur at the right place. */ + + if (build_llvm(env) && num_insns == tb->icount) + break; } while (!dc->is_jmp && !tcg_op_buf_full() && !cs->singlestep_enabled && !singlestep && @@ -11155,6 +11443,15 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) gen_io_end(); } + if (build_llvm(env) && tb->size != dc->pc - pc_start) { + /* consistency check with tb info. we must make sure + * guest basic blocks are the same. skip this trace if inconsistent */ + fprintf(stderr, "inconsistent block with pc 0x"TARGET_FMT_lx" size=%d" + " icount=%d (error size="TARGET_FMT_ld")\n", + tb->pc, tb->size, tb->icount, dc->pc - pc_start); + exit(0); + } + if (unlikely(cs->singlestep_enabled || dc->ss_active) && dc->is_jmp != DISAS_EXC) { /* Note that this means single stepping WFI doesn't halt the CPU. @@ -11182,6 +11479,8 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) /* fall through */ case DISAS_JUMP: /* indicate that the hash table must be used to find the next TB */ + if (dc->gen_ibtc == 1) + gen_ibtc_stub(dc); tcg_gen_exit_tb(0); break; case DISAS_TB_JUMP: @@ -11211,10 +11510,15 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) } done_generating: - gen_tb_end(tb, num_insns); + if (build_llvm(env)) { + /* Terminate the linked list. */ + tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1; + } else { + gen_tb_end(tb, num_insns); + } #ifdef DEBUG_DISAS - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) { qemu_log("----------------\n"); qemu_log("IN: %s\n", lookup_symbol(pc_start)); log_target_disas(cs, pc_start, dc->pc - pc_start, @@ -11222,6 +11526,8 @@ done_generating: qemu_log("\n"); } #endif - tb->size = dc->pc - pc_start; - tb->icount = num_insns; + if (!build_llvm(env)) { + tb->size = dc->pc - pc_start; + tb->icount = num_insns; + } } diff --git a/target-arm/translate.c b/target-arm/translate.c index 5d22879..256227b 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -36,6 +36,7 @@ #include "exec/helper-gen.h" #include "trace-tcg.h" +#include "hqemu.h" #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T) @@ -110,6 +111,33 @@ void arm_translate_init(void) #endif a64_translate_init(); + + copy_tcg_context_global(); +} + +static inline void gen_ibtc_stub(DisasContext *s) +{ +#ifdef ENABLE_IBTC + if (!build_llvm(s->env)) { + TCGv_ptr ibtc_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_ibtc(ibtc_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(ibtc_host_pc)); + tcg_temp_free_ptr(ibtc_host_pc); + s->gen_ibtc = 0; + } +#endif +} + +static inline void gen_cpbl_stub(DisasContext *s) +{ +#ifdef ENABLE_CPBL + if (!build_llvm(s->env)) { + TCGv_ptr cpbl_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_cpbl(cpbl_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(cpbl_host_pc)); + tcg_temp_free_ptr(cpbl_host_pc); + } +#endif } static inline ARMMMUIdx get_a32_user_mem_index(DisasContext *s) @@ -201,7 +229,10 @@ static void store_reg(DisasContext *s, int reg, TCGv_i32 var) static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask) { TCGv_i32 tmp_mask = tcg_const_i32(mask); - gen_helper_cpsr_write(cpu_env, var, tmp_mask); + if (mask & ~CPSR_NZCV) + gen_helper_cpsr_write(cpu_env, var, tmp_mask); + else + gen_helper_cpsr_write_nzcv(cpu_env, var, tmp_mask); tcg_temp_free_i32(tmp_mask); } /* Set NZCV flags from the high 4 bits of var. */ @@ -493,6 +524,7 @@ static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) { TCGv_i32 tmp; tcg_gen_sub_i32(cpu_NF, t0, t1); + tcg_gen_annotate(A_SetCC); tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); @@ -878,6 +910,7 @@ static inline void gen_bx_im(DisasContext *s, uint32_t addr) tcg_temp_free_i32(tmp); } tcg_gen_movi_i32(cpu_R[15], addr & ~1); + s->gen_ibtc = 1; } /* Set PC and Thumb state from var. var is marked as dead. */ @@ -887,6 +920,7 @@ static inline void gen_bx(DisasContext *s, TCGv_i32 var) tcg_gen_andi_i32(cpu_R[15], var, ~1); tcg_gen_andi_i32(var, var, 1); store_cpu_field(var, thumb); + s->gen_ibtc = 1; } /* Variant of store_reg which uses branch&exchange logic when storing @@ -1199,20 +1233,38 @@ static inline void gen_vfp_sqrt(int dp) gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env); } +static inline void gen_update_fpscr(TCGv_i32 flags) +{ + TCGv_i32 tmp; + tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); + tcg_gen_andi_i32(tmp, tmp, 0x0fffffff); + tcg_gen_or_i32(tmp, tmp, flags); + store_cpu_field(tmp, vfp.xregs[ARM_VFP_FPSCR]); + tcg_temp_free_i32(tmp); +} + static inline void gen_vfp_cmp(int dp) { + TCGv_i32 flags = tcg_temp_new_i32(); if (dp) - gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env); + gen_helper_vfp_cmpd(flags, cpu_F0d, cpu_F1d, cpu_env); else - gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env); + gen_helper_vfp_cmps(flags, cpu_F0s, cpu_F1s, cpu_env); + + gen_update_fpscr(flags); + tcg_temp_free_i32(flags); } static inline void gen_vfp_cmpe(int dp) { + TCGv_i32 flags = tcg_temp_new_i32(); if (dp) - gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env); + gen_helper_vfp_cmped(flags, cpu_F0d, cpu_F1d, cpu_env); else - gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env); + gen_helper_vfp_cmpes(flags, cpu_F0s, cpu_F1s, cpu_env); + + gen_update_fpscr(flags); + tcg_temp_free_i32(flags); } static inline void gen_vfp_F1_ld0(int dp) @@ -3977,20 +4029,49 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) return 0; } +#if defined(CONFIG_USER_ONLY) +static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) +{ + TranslationBlock *tb; + + tb = s->tb; + tcg_gen_goto_tb(n); + gen_set_pc_im(s, dest); + tcg_gen_exit_tb((uintptr_t)tb + n); + tb->jmp_pc[n] = dest; +} +#else +static int try_link_pages(DisasContext *s, TranslationBlock *tb, target_ulong dest) +{ +#ifdef ENABLE_LPAGE + if (!build_llvm(s->env)) { + target_ulong addr, size; + int ret = lpt_search_page(s->env, dest, &addr, &size); + if (ret == 1 && (tb->pc & ~(size - 1)) == addr) + return 1; + } +#endif + return 0; +} + static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) { TranslationBlock *tb; tb = s->tb; - if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) { + if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) || + try_link_pages(s, tb, dest) == 1) { tcg_gen_goto_tb(n); gen_set_pc_im(s, dest); tcg_gen_exit_tb((uintptr_t)tb + n); } else { gen_set_pc_im(s, dest); + gen_cpbl_stub(s); tcg_gen_exit_tb(0); } + tb->jmp_pc[n] = dest; } +#endif static inline void gen_jmp (DisasContext *s, uint32_t dest) { @@ -4372,6 +4453,54 @@ static struct { {2, 1, 1} }; +#ifdef ENABLE_TCG_VECTOR +#include "simd_helper.h" + +#define tcg_vector_abort() \ +do {\ + fprintf(stderr, "%s:%d: tcg fatal error - unhandled vector op.\n", __FILE__, __LINE__);\ + exit(0);\ +} while (0) + +/* + * disas_neon_ls_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_ls_vector(DisasContext *s, uint32_t insn, TCGv_i32 addr) +{ + int rd, op, load; + int nregs, reg; + int interleave, spacing; + TCGArg vop, alignment = 32; + + if (!build_llvm(s->env)) + return 0; + + /* Load store all elements. */ + op = (insn >> 8) & 0xf; + nregs = neon_ls_element_type[op].nregs; + interleave = neon_ls_element_type[op].interleave; + spacing = neon_ls_element_type[op].spacing; + + if (interleave != 1 || nregs % 2 != 0) + return 0; + + VFP_DREG_D(rd, insn); + load = (insn & (1 << 21)) != 0; + vop = (load) ? INDEX_op_vload_128 : INDEX_op_vstore_128; + + for (reg = 0; reg < nregs; reg += 2) { + gen_vector_op3(vop, + offsetof(CPUARMState, vfp.regs[rd]), + GET_TCGV_I32(addr), + alignment); + rd += spacing * 2; + tcg_gen_addi_i32(addr, addr, 16); + } + return 1; +} +#endif + /* Translate a NEON load/store element instruction. Return nonzero if the instruction is invalid. */ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) @@ -4438,6 +4567,11 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); stride = (1 << size) * interleave; + +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_ls_vector(s, insn, addr) == 1) + goto vector_done; +#endif for (reg = 0; reg < nregs; reg++) { if (interleave > 2 || (interleave == 2 && nregs == 2)) { load_reg_var(s, addr, rn); @@ -4529,6 +4663,9 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) } rd += spacing; } +#ifdef ENABLE_TCG_VECTOR +vector_done: +#endif tcg_temp_free_i32(addr); stride = nregs * 8; } else { @@ -5111,6 +5248,131 @@ static const uint8_t neon_2rm_sizes[] = { [NEON_2RM_VCVT_UF] = 0x4, }; +#ifdef ENABLE_TCG_VECTOR +static int disas_neon_misc(DisasContext *s, uint32_t insn) +{ + int op, rd, rm; + + if (!build_llvm(s->env)) + return 0; + + op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); + VFP_DREG_D(rd, insn); + VFP_DREG_M(rm, insn); + + switch (op) { + case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ + gen_vector_cvt(vsitofp, 32); + break; + case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */ + gen_vector_cvt(vuitofp, 32); + break; + case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */ + gen_vector_cvt(vfptosi, 32); + break; + case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */ + gen_vector_cvt(vfptoui, 32); + break; + default: + return 0; + } + + return 1; +} + +/* + * disas_neon_data_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_data_vector(DisasContext *s, uint32_t insn) +{ + int op, q, u, size; + int rd, rn, rm; + + if (!build_llvm(s->env)) + return 0; + + /* Three register same length. */ + q = (insn & (1 << 6)) != 0; + u = (insn >> 24) & 1; + VFP_DREG_D(rd, insn); + VFP_DREG_N(rn, insn); + VFP_DREG_M(rm, insn); + size = (insn >> 20) & 3; + op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); + + switch (op) { + case NEON_3R_VSHL: + case NEON_3R_VQSHL: + case NEON_3R_VRSHL: + case NEON_3R_VQRSHL: + { + int rtmp; + /* Shift instruction operands are reversed. */ + rtmp = rn; + rn = rm; + rm = rtmp; + } + break; + default: + break; + } + + switch(op) { + case NEON_3R_VADD_VSUB: + if(!u) /* VADD */ + gen_vector_arith(vadd, i, size); + else /* VSUB */ + gen_vector_arith(vsub, i, size); + break; + case NEON_3R_LOGIC: + switch ((u << 2) | size) { + case 0: gen_vector_logical(vand); break; /* VAND */ + case 1: gen_vector_logical(vbic); break; /* BIC rd = rn&(~rm)*/ + case 2: gen_vector_logical(vorr); break; /* VORR */ + case 3: gen_vector_logical(vorn); break; /* VORN OR NOT */ + case 4: gen_vector_logical(veor); break; /* VEOR Vector Bitwise Exclusive OR*/ + case 5: gen_vector_logical(vbsl); break; /* VBSL */ + case 6: gen_vector_logical(vbit); break; /* VBIT */ + case 7: gen_vector_logical(vbif); break; /* VBIF */ + } + break; + case NEON_3R_VFM: + if (size) /* VFMS */ + gen_vector_fop(vfms); + else /* VFMA */ + gen_vector_fop(vfma); + break; + case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */ + switch ((u << 2) | size) { + case 0: gen_vector_fop(vadd); break; /* VADD */ + case 4: gen_vector_fop(vpadd); break; /* VPADD */ + case 2: gen_vector_fop(vsub); break; /* VSUB */ + case 6: gen_vector_fop(vabd); break; /* VABD */ + default: + tcg_vector_abort(); + break; + } + break; + case NEON_3R_FLOAT_MULTIPLY: /* float VMLA, VMLS, VMUL */ + if(u) + gen_vector_fop(vmul); + else if (!u) { + if (size == 0) + gen_vector_fop(vmla); + else + gen_vector_fop(vmls); + } else + tcg_vector_abort(); + break; + default: + return 0; + } + + return 1; +} +#endif + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -5341,6 +5603,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } +#ifdef ENABLE_TCG_VECTOR + if (!pairwise && disas_neon_data_vector(s, insn) == 1) + return 0; +#endif + for (pass = 0; pass < (q ? 4 : 2); pass++) { if (pairwise) { @@ -6741,6 +7008,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; default: elementwise: +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_misc(s, insn) == 1) + return 0; +#endif for (pass = 0; pass < (q ? 4 : 2); pass++) { if (neon_2rm_is_float_op(op)) { tcg_gen_ld_f32(cpu_F0s, cpu_env, @@ -11234,6 +11505,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) pc_start = tb->pc; + dc->gen_ibtc = 0; + dc->env = env; dc->tb = tb; dc->is_jmp = DISAS_NEXT; @@ -11303,7 +11576,12 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) max_insns = TCG_MAX_INSNS; } - gen_tb_start(tb); + if (!build_llvm(env)) { + gen_tb_start(tb); + if (tracer_mode != TRANS_MODE_NONE) + tcg_gen_hotpatch(IS_USER(dc), tracer_mode == TRANS_MODE_HYBRIDS || + tracer_mode == TRANS_MODE_HYBRIDM); + } tcg_clear_temp_count(); @@ -11460,6 +11738,12 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) end_of_page = (dc->pc >= next_page_start) || ((dc->pc >= next_page_start - 3) && insn_crosses_page(env, dc)); +#if defined(CONFIG_LLVM) && defined(CONFIG_USER_ONLY) + if (llvm_has_annotation(dc->pc, ANNOTATION_LOOP)) + break; +#endif + if (build_llvm(env) && num_insns == tb->icount) + break; } while (!dc->is_jmp && !tcg_op_buf_full() && !cs->singlestep_enabled && !singlestep && @@ -11476,6 +11760,15 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) gen_io_end(); } + if (build_llvm(env) && tb->size != dc->pc - pc_start) { + /* consistency check with tb info. we must make sure + * guest basic blocks are the same. skip this trace if inconsistent */ + fprintf(stderr, "inconsistent block with pc 0x"TARGET_FMT_lx" size=%d" + " icount=%d (error size="TARGET_FMT_ld")\n", + tb->pc, tb->size, tb->icount, dc->pc - pc_start); + exit(0); + } + /* At this stage dc->condjmp will only be set when the skipped instruction was a conditional branch or trap, and the PC has already been written. */ @@ -11543,6 +11836,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) case DISAS_JUMP: default: /* indicate that the hash table must be used to find the next TB */ + if (dc->gen_ibtc == 1) + gen_ibtc_stub(dc); tcg_gen_exit_tb(0); break; case DISAS_TB_JUMP: @@ -11581,10 +11876,15 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) } done_generating: - gen_tb_end(tb, num_insns); + if (build_llvm(env)) { + /* Terminate the linked list. */ + tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1; + } else { + gen_tb_end(tb, num_insns); + } #ifdef DEBUG_DISAS - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) { qemu_log("----------------\n"); qemu_log("IN: %s\n", lookup_symbol(pc_start)); log_target_disas(cs, pc_start, dc->pc - pc_start, @@ -11592,8 +11892,10 @@ done_generating: qemu_log("\n"); } #endif - tb->size = dc->pc - pc_start; - tb->icount = num_insns; + if (!build_llvm(env)) { + tb->size = dc->pc - pc_start; + tb->icount = num_insns; + } } static const char *cpu_mode_names[16] = { diff --git a/target-arm/translate.h b/target-arm/translate.h index 53ef971..10f6a05 100644 --- a/target-arm/translate.h +++ b/target-arm/translate.h @@ -61,6 +61,8 @@ typedef struct DisasContext { #define TMP_A64_MAX 16 int tmp_a64_count; TCGv_i64 tmp_a64[TMP_A64_MAX]; + int gen_ibtc; + CPUArchState *env; } DisasContext; typedef struct DisasCompare { diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 84edfd0..cbd8b2a 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -845,7 +845,7 @@ typedef struct CPUX86State { uint64_t efer; /* Beginning of state preserved by INIT (dummy marker). */ - struct {} start_init_save; + struct { int dummy; } start_init_save; /* FPU state */ unsigned int fpstt; /* top of stack index */ @@ -865,8 +865,8 @@ typedef struct CPUX86State { float_status mmx_status; /* for 3DNow! float ops */ float_status sse_status; uint32_t mxcsr; - XMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32]; - XMMReg xmm_t0; + XMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32] __attribute__((aligned(64))); + XMMReg xmm_t0 __attribute__((aligned(64))); MMXReg mmx_t0; uint64_t opmask_regs[NB_OPMASK_REGS]; @@ -906,7 +906,7 @@ typedef struct CPUX86State { uint32_t smbase; /* End of state preserved by INIT (dummy marker). */ - struct {} end_init_save; + struct { int dummy; } end_init_save; uint64_t system_time_msr; uint64_t wall_clock_msr; @@ -966,6 +966,8 @@ typedef struct CPUX86State { uint64_t mtrr_deftype; MTRRVar mtrr_var[MSR_MTRRcap_VCNT]; + CPU_OPTIMIZATION_COMMON + /* For KVM */ uint32_t mp_state; int32_t exception_injected; @@ -1237,6 +1239,19 @@ static inline void cpu_get_tb_cpu_state(CPUX86State *env, target_ulong *pc, (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK)); } +static inline target_ulong cpu_get_pc(CPUX86State *env) +{ + return env->eip + env->segs[R_CS].base; +} + +static inline int cpu_check_state(CPUX86State *env, + target_ulong cs_base, int flags) +{ + int mask = IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK; + return (cs_base == env->segs[R_CS].base) && + ((uint32_t)flags == (env->hflags | (env->eflags & mask))); +} + void do_cpu_init(X86CPU *cpu); void do_cpu_sipi(X86CPU *cpu); @@ -1297,7 +1312,9 @@ static inline void cpu_load_efer(CPUX86State *env, uint64_t val) static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State *env) { - return ((MemTxAttrs) { .secure = (env->hflags & HF_SMM_MASK) != 0 }); + MemTxAttrs attrs = { 0 }; + attrs.secure = (env->hflags & HF_SMM_MASK) != 0; + return attrs; } /* fpu_helper.c */ diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c index d421a47..4f50cd9 100644 --- a/target-i386/fpu_helper.c +++ b/target-i386/fpu_helper.c @@ -385,7 +385,7 @@ void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) /* FPU operations */ -static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; +const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; void helper_fcom_ST0_FT0(CPUX86State *env) { diff --git a/target-i386/helper.c b/target-i386/helper.c index d18be95..4bc1e13 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -25,6 +25,7 @@ #include "monitor/monitor.h" #include "hw/i386/apic_internal.h" #endif +#include "hqemu.h" static void cpu_x86_version(CPUX86State *env, int *family, int *model) { @@ -641,6 +642,8 @@ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3) "CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3); tlb_flush(CPU(cpu), 0); } + + pcid = new_cr3 >> 12; } void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) @@ -1432,3 +1435,12 @@ void x86_stq_phys(CPUState *cs, hwaddr addr, uint64_t val) NULL); } #endif + +CPUState *cpu_create(void) +{ + X86CPU *cpu = g_malloc0(sizeof(X86CPU)); + CPUState *cs = CPU(cpu); + memcpy(cpu, X86_CPU(first_cpu), sizeof(X86CPU)); + cs->env_ptr = &cpu->env; + return cs; +} diff --git a/target-i386/helper.h b/target-i386/helper.h index ecfcfd1..8fbdde6 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -219,3 +219,6 @@ DEF_HELPER_3(rcrl, tl, env, tl, tl) DEF_HELPER_3(rclq, tl, env, tl, tl) DEF_HELPER_3(rcrq, tl, env, tl, tl) #endif + +#include "hqemu-helper.h" +#include "atomic-helper.h" diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 6dc9846..4a98890 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -2785,13 +2785,13 @@ static int kvm_handle_debug(X86CPU *cpu, case 0x1: ret = EXCP_DEBUG; cs->watchpoint_hit = &hw_watchpoint; - hw_watchpoint.vaddr = hw_breakpoint[n].addr; + hw_watchpoint.addr = hw_breakpoint[n].addr; hw_watchpoint.flags = BP_MEM_WRITE; break; case 0x3: ret = EXCP_DEBUG; cs->watchpoint_hit = &hw_watchpoint; - hw_watchpoint.vaddr = hw_breakpoint[n].addr; + hw_watchpoint.addr = hw_breakpoint[n].addr; hw_watchpoint.flags = BP_MEM_ACCESS; break; } diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c index 13bd4f5..b446daa 100644 --- a/target-i386/misc_helper.c +++ b/target-i386/misc_helper.c @@ -599,3 +599,7 @@ void helper_debug(CPUX86State *env) cs->exception_index = EXCP_DEBUG; cpu_loop_exit(cs); } + +#ifdef CONFIG_COREMU +#include "atomic-x86.c" +#endif diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h index 1780d1d..4a96ed7 100644 --- a/target-i386/ops_sse.h +++ b/target-i386/ops_sse.h @@ -995,7 +995,7 @@ SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) SSE_HELPER_CMP(cmpord, FPU_CMPORD) -static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; +const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s) { diff --git a/target-i386/translate.c b/target-i386/translate.c index a3dd167..7204635 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -32,7 +32,13 @@ #include "exec/helper-gen.h" #include "trace-tcg.h" +#include "hqemu.h" +#if defined(CONFIG_USER_ONLY) +#define IS_USER(s) 1 +#else +#define IS_USER(s) (s->cpl == 3) +#endif #define PREFIX_REPZ 0x01 #define PREFIX_REPNZ 0x02 @@ -59,26 +65,35 @@ # define clztl clz32 #endif +#ifdef CONFIG_COREMU +#ifdef TARGET_X86_64 +#define X86_64_HREGS x86_64_hregs +#else +#define X86_64_HREGS 0 +#endif +#endif + //#define MACRO_TEST 1 /* global register indexes */ static TCGv_ptr cpu_env; -static TCGv cpu_A0; -static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT; +static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2; static TCGv_i32 cpu_cc_op; static TCGv cpu_regs[CPU_NB_REGS]; /* local temps */ -static TCGv cpu_T[2]; +static __thread TCGv cpu_T[2]; /* local register indexes (only used inside old micro ops) */ -static TCGv cpu_tmp0, cpu_tmp4; -static TCGv_ptr cpu_ptr0, cpu_ptr1; -static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32; -static TCGv_i64 cpu_tmp1_i64; +static __thread TCGv cpu_A0; +static __thread TCGv cpu_tmp0, cpu_tmp4; +static __thread TCGv_ptr cpu_ptr0, cpu_ptr1; +static __thread TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32; +static __thread TCGv_i64 cpu_tmp1_i64; +static __thread TCGv cpu_cc_srcT; #include "exec/gen-icount.h" #ifdef TARGET_X86_64 -static int x86_64_hregs; +static __thread int x86_64_hregs; #endif typedef struct DisasContext { @@ -123,6 +138,10 @@ typedef struct DisasContext { int cpuid_ext2_features; int cpuid_ext3_features; int cpuid_7_0_ebx_features; + int fallthrough; + int gen_ibtc; + int gen_cpbl; + CPUX86State *env; } DisasContext; static void gen_eob(DisasContext *s); @@ -209,6 +228,36 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_CLR] = 0, }; +static inline void gen_ibtc_stub(DisasContext *s) +{ +#ifdef ENABLE_IBTC + if (!build_llvm(s->env)) { + TCGv_ptr ibtc_host_pc = tcg_temp_new_ptr(); + if (s->fallthrough) { + tcg_gen_st_i32(tcg_const_i32(1), cpu_env, offsetof(CPUX86State, fallthrough)); + s->fallthrough = 0; + } + gen_helper_lookup_ibtc(ibtc_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(ibtc_host_pc)); + tcg_temp_free_ptr(ibtc_host_pc); + s->gen_ibtc = 0; + } +#endif +} + +static inline void gen_cpbl_stub(DisasContext *s) +{ +#ifdef ENABLE_CPBL + if (!build_llvm(s->env)) { + TCGv_ptr cpbl_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_cpbl(cpbl_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(cpbl_host_pc)); + tcg_temp_free_ptr(cpbl_host_pc); + s->gen_cpbl = 0; + } +#endif +} + static void set_cc_op(DisasContext *s, CCOp op) { int dead; @@ -1312,6 +1361,30 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg) /* if d == OR_TMP0, it means memory operand (address in A0) */ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d) { +#ifdef CONFIG_COREMU + if (s1->prefix & PREFIX_LOCK) { + gen_update_cc_op(s1); + + switch (ot & 3) { + case 0: + gen_helper_atomic_opb(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op)); + break; + case 1: + gen_helper_atomic_opw(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op)); + break; + case 2: + gen_helper_atomic_opl(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op)); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_opq(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op)); +#endif + } + set_cc_op(s1, CC_OP_EFLAGS); + return; + } +#endif + if (d != OR_TMP0) { gen_op_mov_v_reg(ot, cpu_T[0], d); } else { @@ -1378,6 +1451,35 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d) /* if d == OR_TMP0, it means memory operand (address in A0) */ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c) { +#ifdef CONFIG_COREMU + /* with lock prefix */ + if (s1->prefix & PREFIX_LOCK) { + assert(d == OR_TMP0); + + /* The helper will use CAS1 as a unified way to + implement atomic inc (locked inc) */ + gen_update_cc_op(s1); + + switch(ot & 3) { + case 0: + gen_helper_atomic_incb(cpu_env, cpu_A0, tcg_const_i32(c)); + break; + case 1: + gen_helper_atomic_incw(cpu_env, cpu_A0, tcg_const_i32(c)); + break; + case 2: + gen_helper_atomic_incl(cpu_env, cpu_A0, tcg_const_i32(c)); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_incq(cpu_env, cpu_A0, tcg_const_i32(c)); +#endif + } + set_cc_op(s1, CC_OP_EFLAGS); + return; + } +#endif + if (d != OR_TMP0) { gen_op_mov_v_reg(ot, cpu_T[0], d); } else { @@ -2205,6 +2307,31 @@ static inline int insn_const_size(TCGMemOp ot) } } +#if defined(CONFIG_USER_ONLY) +static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) +{ + TranslationBlock *tb; + + tb = s->tb; + tcg_gen_goto_tb(tb_num); + gen_jmp_im(eip); + tcg_gen_exit_tb((uintptr_t)tb + tb_num); + tb->jmp_pc[tb_num] = tb->cs_base + eip; +} +#else +static int try_link_pages(DisasContext *s, TranslationBlock *tb, target_ulong dest) +{ +#ifdef ENABLE_LPAGE + if (!build_llvm(s->env)) { + target_ulong addr, size; + int ret = lpt_search_page(s->env, dest, &addr, &size); + if (ret == 1 && (tb->pc & ~(size - 1)) == addr) + return 1; + } +#endif + return 0; +} + static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) { TranslationBlock *tb; @@ -2214,7 +2341,8 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) tb = s->tb; /* NOTE: we handle the case where the TB spans two pages here */ if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) || - (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK)) { + (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK) || + try_link_pages(s, tb, pc) == 1) { /* jump to same page: we can use a direct jump */ tcg_gen_goto_tb(tb_num); gen_jmp_im(eip); @@ -2222,9 +2350,12 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) } else { /* jump to another page: currently not optimized */ gen_jmp_im(eip); + s->gen_cpbl = 1; gen_eob(s); } + tb->jmp_pc[tb_num] = pc; } +#endif static inline void gen_jcc(DisasContext *s, int b, target_ulong val, target_ulong next_eip) @@ -2561,6 +2692,10 @@ static void gen_eob(DisasContext *s) } else if (s->tf) { gen_helper_single_step(cpu_env); } else { + if (s->gen_ibtc == 1) + gen_ibtc_stub(s); + if (s->gen_cpbl == 1) + gen_cpbl_stub(s); tcg_gen_exit_tb(0); } s->is_jmp = DISAS_TB_JUMP; @@ -2974,6 +3109,192 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = { [0xdf] = AESNI_OP(aeskeygenassist), }; +#ifdef ENABLE_TCG_VECTOR +#include "simd_helper.h" + +#define tcg_vector_abort() \ +do {\ + fprintf(stderr, "%s:%d: tcg fatal error - unhandled vector op.\n", __FILE__, __LINE__);\ + exit(0);\ +} while (0) + +static int gen_vload(DisasContext *s, int op, int mod, int modrm, int reg) +{ + int rm; + TCGArg alignment = 128; + CPUX86State *env = s->env; + + if (!build_llvm(env)) + return 0; + + switch (op) { + case 0x010: /* movups */ + case 0x110: /* movupd */ + case 0x26f: /* movdqu xmm, ea */ + alignment = (TCGArg)-1; + break; + default: + break; + } + + if (mod != 3) { + gen_lea_modrm(env, s, modrm); + gen_vector_op3(INDEX_op_vload_128, + offsetof(CPUX86State, xmm_regs[reg]), + (TCGArg)cpu_A0, + alignment); + } else { + rm = (modrm & 7) | REX_B(s); + gen_vector_op3(INDEX_op_vmov_128, + offsetof(CPUX86State, xmm_regs[reg]), + offsetof(CPUX86State, xmm_regs[rm]), + alignment); + } + + return 1; +} + +static int gen_vstore(DisasContext *s, int op, int mod, int modrm, int reg) +{ + int rm; + TCGArg alignment = 128; + CPUX86State *env = s->env; + + if (!build_llvm(env)) + return 0; + + switch (op) { + case 0x011: /* movups */ + case 0x111: /* movupd */ + case 0x27f: /* movdqu ea, xmm */ + alignment = (TCGArg)-1; + break; + default: + break; + } + + if (mod != 3) { + gen_lea_modrm(env, s, modrm); + gen_vector_op3(INDEX_op_vstore_128, + offsetof(CPUX86State, xmm_regs[reg]), + (TCGArg)cpu_A0, + alignment); + } else { + rm = (modrm & 7) | REX_B(s); + gen_vector_op3(INDEX_op_vmov_128, + offsetof(CPUX86State, xmm_regs[rm]), + offsetof(CPUX86State, xmm_regs[reg]), + alignment); + } + + return 1; +} + +static int gen_tcg_vector(DisasContext *s, int op, int b1, int mod, int modrm, int reg) +{ + int rd, rm, rn; + TCGArg alignment = 128; + CPUX86State *env = s->env; + + if (!build_llvm(env)) + return 0; + + switch(op) { + case 0x54 ... 0x59: + case 0x5b: /* cvtdq2ps cvtps2dq cvttps2dq */ + case 0x5c: + case 0x5e: + case 0xd4: + case 0xdb: + case 0xdf: + case 0xeb: + case 0xef: + case 0xf8 ... 0xfe: + break; + default: /* unhandled op */ + return 0; + } + + switch (op) { + case 0x50 ... 0x5a: + case 0x5c ... 0x5f: + case 0xc2: + /* Most sse scalar operations. */ + if (b1 == 2 || b1 == 3) + return 0; + break; + } + + rd = rn = reg; + if (mod != 3) { + gen_lea_modrm(env, s, modrm); + gen_vector_op3(INDEX_op_vload_128, + offsetof(CPUX86State, xmm_t0), + (TCGArg)cpu_A0, + alignment); + rm = -1; + } else { + rm = (modrm & 7) | REX_B(s); + } + + switch(op) { + case 0x54: /* andps, andpd */ + case 0xdb: /* MMX_OP2(pand) */ + gen_vector_logical(vand); break; + case 0x55: /* andnps, andnpd */ + case 0xdf: /* MMX_OP2(pandn) */ + { + int rtmp = rn; + rn = rm; + rm = rtmp; + gen_vector_logical(vbic); break; + } + case 0x56: /* orps, orpd */ + case 0xeb: /* por */ + gen_vector_logical(vorr); break; + case 0x57: /* xorps, xorpd */ + case 0xef: /* pxor */ + gen_vector_logical(veor); break; + case 0x58: /* SSE_FOP(add) */ + gen_vector_fop(vadd, b1); break; + case 0x59: /* SSE_FOP(mul) */ + gen_vector_fop(vmul, b1); break; + case 0x5c: /* SSE_FOP(sub) */ + gen_vector_fop(vsub, b1); break; + case 0x5e: /* SSE_FOP(div) */ + gen_vector_fop(vdiv, b1); break; + case 0x5b: /* cvtdq2ps cvtps2dq cvttps2dq */ + if(b1 == 0) + gen_vector_cvt(vsitofp, 32); + else if(b1 == 1) + gen_vector_cvt(vfptosi, 32); + else if(b1 == 2) + gen_vector_cvt(vfptosi, 32); + else + tcg_vector_abort(); + break; + case 0xd4: /* MMX_OP2(paddq) */ + if (b1 != 1) + tcg_vector_abort(); + gen_vector_arith(vadd, i, 3); break; + case 0xf8 ... 0xfb: /* MMX_OP2(psubb ... psubq) */ + if (b1 != 1) + tcg_vector_abort(); + gen_vector_arith(vsub, i, (op-0xf8)); break; + case 0xfc ... 0xfe: /* MMX_OP2(paddb ... paddl) */ + if (b1 != 1) + tcg_vector_abort(); + gen_vector_arith(vadd, i, (op-0xfc)); break; + default: + tcg_vector_abort(); + break; + } + + return 1; +} + +#endif + static void gen_sse(CPUX86State *env, DisasContext *s, int b, target_ulong pc_start, int rex_r) { @@ -3131,6 +3452,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x128: /* movapd */ case 0x16f: /* movdqa xmm, ea */ case 0x26f: /* movdqu xmm, ea */ +#ifdef ENABLE_TCG_VECTOR + if (gen_vload(s, b, mod, modrm, reg) == 1) + break; +#endif if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); @@ -3317,6 +3642,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x129: /* movapd */ case 0x17f: /* movdqa ea, xmm */ case 0x27f: /* movdqu ea, xmm */ +#ifdef ENABLE_TCG_VECTOR + if (gen_vstore(s, b, mod, modrm, reg) == 1) + break; +#endif if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); @@ -4283,6 +4612,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, default: break; } +#ifdef ENABLE_TCG_VECTOR + if (is_xmm && gen_tcg_vector(s, b, b1, mod, modrm, reg) == 1) + return; +#endif if (is_xmm) { op1_offset = offsetof(CPUX86State,xmm_regs[reg]); if (mod != 3) { @@ -4565,9 +4898,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, s->aflag = aflag; s->dflag = dflag; +#ifndef CONFIG_COREMU /* lock generation */ if (prefixes & PREFIX_LOCK) gen_helper_lock(); +#endif /* now check op code */ reswitch: @@ -4719,6 +5054,29 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, set_cc_op(s, CC_OP_LOGICB + ot); break; case 2: /* not */ +#ifdef CONFIG_COREMU + if (s->prefix & PREFIX_LOCK) { + if (mod == 3) + goto illegal_op; + + switch(ot & 3) { + case 0: + gen_helper_atomic_notb(cpu_env, cpu_A0); + break; + case 1: + gen_helper_atomic_notw(cpu_env, cpu_A0); + break; + case 2: + gen_helper_atomic_notl(cpu_env, cpu_A0); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_notq(cpu_env, cpu_A0); +#endif + } + break; + } +#endif tcg_gen_not_tl(cpu_T[0], cpu_T[0]); if (mod != 3) { gen_op_st_v(s, ot, cpu_T[0], cpu_A0); @@ -4727,6 +5085,32 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 3: /* neg */ +#ifdef CONFIG_COREMU + if (s->prefix & PREFIX_LOCK) { + if (mod == 3) + goto illegal_op; + + gen_update_cc_op(s); + + switch(ot & 3) { + case 0: + gen_helper_atomic_negb(cpu_env, cpu_A0); + break; + case 1: + gen_helper_atomic_negw(cpu_env, cpu_A0); + break; + case 2: + gen_helper_atomic_negl(cpu_env, cpu_A0); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_negq(cpu_env, cpu_A0); +#endif + } + set_cc_op(s, CC_OP_EFLAGS); + break; + } +#endif tcg_gen_neg_tl(cpu_T[0], cpu_T[0]); if (mod != 3) { gen_op_st_v(s, ot, cpu_T[0], cpu_A0); @@ -4936,6 +5320,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_movi_tl(cpu_T[1], next_eip); gen_push_v(s, cpu_T[1]); gen_op_jmp_v(cpu_T[0]); + s->gen_ibtc = 1; gen_eob(s); break; case 3: /* lcall Ev */ @@ -4954,6 +5339,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_const_i32(dflag - 1), tcg_const_i32(s->pc - s->cs_base)); } + s->gen_ibtc = 1; gen_eob(s); break; case 4: /* jmp Ev */ @@ -4961,6 +5347,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]); } gen_op_jmp_v(cpu_T[0]); + s->gen_ibtc = 1; gen_eob(s); break; case 5: /* ljmp Ev */ @@ -4976,6 +5363,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_movl_seg_T0_vm(R_CS); gen_op_jmp_v(cpu_T[1]); } + s->gen_ibtc = 1; gen_eob(s); break; case 6: /* push Ev */ @@ -5124,7 +5512,36 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); gen_op_mov_reg_v(ot, reg, cpu_T[1]); gen_op_mov_reg_v(ot, rm, cpu_T[0]); - } else { + } else +#ifdef CONFIG_COREMU + if (s->prefix & PREFIX_LOCK) { + gen_lea_modrm(env, s, modrm); + gen_update_cc_op(s); + + switch (ot & 3) { + case 0: + gen_helper_atomic_xaddb(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 1: + gen_helper_atomic_xaddw(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 2: + gen_helper_atomic_xaddl(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_xaddq(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(x86_64_hregs)); +#endif + } + set_cc_op(s, CC_OP_EFLAGS); + break; + } else +#endif + { gen_lea_modrm(env, s, modrm); gen_op_mov_v_reg(ot, cpu_T[0], reg); gen_op_ld_v(s, ot, cpu_T[1], cpu_A0); @@ -5145,6 +5562,38 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, modrm = cpu_ldub_code(env, s->pc++); reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; + +#ifdef CONFIG_COREMU + if (s->prefix & PREFIX_LOCK) { + if (mod == 3) + goto illegal_op; + + gen_lea_modrm(env, s, modrm); + gen_update_cc_op(s); + + switch(ot & 3) { + case 0: + gen_helper_atomic_cmpxchgb(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 1: + gen_helper_atomic_cmpxchgw(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 2: + gen_helper_atomic_cmpxchgl(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_cmpxchgq(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(x86_64_hregs)); +#endif + } + set_cc_op(s, CC_OP_EFLAGS); + break; + } +#endif t0 = tcg_temp_local_new(); t1 = tcg_temp_local_new(); t2 = tcg_temp_local_new(); @@ -5201,6 +5650,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) goto illegal_op; gen_lea_modrm(env, s, modrm); +#ifdef CONFIG_COREMU + if (s->prefix | PREFIX_LOCK) { + gen_helper_atomic_cmpxchg16b(cpu_env, cpu_A0); + } else +#endif gen_helper_cmpxchg16b(cpu_env, cpu_A0); } else #endif @@ -5208,6 +5662,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_features & CPUID_CX8)) goto illegal_op; gen_lea_modrm(env, s, modrm); +#ifdef CONFIG_COREMU + if (s->prefix | PREFIX_LOCK) { + gen_helper_atomic_cmpxchg8b(cpu_env, cpu_A0); + } else +#endif gen_helper_cmpxchg8b(cpu_env, cpu_A0); } set_cc_op(s, CC_OP_EFLAGS); @@ -5550,15 +6009,41 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_reg_v(ot, reg, cpu_T[1]); } else { gen_lea_modrm(env, s, modrm); +#ifdef CONFIG_COREMU + /* for xchg, lock is implicit. + XXX: none flag is affected! */ + switch (ot & 3) { + case 0: + gen_helper_xchgb(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 1: + gen_helper_xchgw(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 2: + gen_helper_xchgl(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_xchgq(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(x86_64_hregs)); +#endif + } +#else gen_op_mov_v_reg(ot, cpu_T[0], reg); /* for xchg, lock is implicit */ if (!(prefixes & PREFIX_LOCK)) gen_helper_lock(); gen_op_ld_v(s, ot, cpu_T[1], cpu_A0); gen_op_st_v(s, ot, cpu_T[0], cpu_A0); +#ifndef CONFIG_COREMU if (!(prefixes & PREFIX_LOCK)) gen_helper_unlock(); +#endif gen_op_mov_reg_v(ot, reg, cpu_T[1]); +#endif } break; case 0xc4: /* les Gv */ @@ -6360,6 +6845,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_stack_update(s, val + (1 << ot)); /* Note that gen_pop_T0 uses a zero-extending load. */ gen_op_jmp_v(cpu_T[0]); + s->gen_ibtc = 1; gen_eob(s); break; case 0xc3: /* ret */ @@ -6367,6 +6853,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_pop_update(s, ot); /* Note that gen_pop_T0 uses a zero-extending load. */ gen_op_jmp_v(cpu_T[0]); + s->gen_ibtc = 1; gen_eob(s); break; case 0xca: /* lret im */ @@ -6392,6 +6879,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, /* add stack offset */ gen_stack_update(s, val + (2 << dflag)); } + s->gen_ibtc = 1; gen_eob(s); break; case 0xcb: /* lret */ @@ -6415,6 +6903,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_const_i32(s->pc - s->cs_base)); set_cc_op(s, CC_OP_EFLAGS); } + s->gen_ibtc = 1; gen_eob(s); break; case 0xe8: /* call im */ @@ -6680,6 +7169,27 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } bt_op: tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1); +#ifdef CONFIG_COREMU + if (s->prefix & PREFIX_LOCK) { + gen_update_cc_op(s); + + switch (op) { + case 0: + goto illegal_op; + break; + case 1: + gen_helper_atomic_bts(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(ot)); + break; + case 2: + gen_helper_atomic_btr(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(ot)); + break; + case 3: + gen_helper_atomic_btc(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(ot)); + } + set_cc_op(s, CC_OP_EFLAGS); + break; + } +#endif tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); switch(op) { case 0: @@ -7818,12 +8328,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, goto illegal_op; } /* lock generation */ +#ifndef CONFIG_COREMU if (s->prefix & PREFIX_LOCK) gen_helper_unlock(); +#endif return s->pc; illegal_op: +#ifndef CONFIG_COREMU if (s->prefix & PREFIX_LOCK) gen_helper_unlock(); +#endif /* XXX: ensure that no lock was generated */ gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); return s->pc; @@ -7879,6 +8393,8 @@ void optimize_flags_init(void) } helper_lock_init(); + + copy_tcg_context_global(); } /* generate intermediate code in gen_opc_buf and gen_opparam_buf for @@ -7900,6 +8416,10 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb) cs_base = tb->cs_base; flags = tb->flags; + dc->fallthrough = 0; + dc->gen_ibtc = 0; + dc->gen_cpbl = 0; + dc->env = env; dc->pe = (flags >> HF_PE_SHIFT) & 1; dc->code32 = (flags >> HF_CS32_SHIFT) & 1; dc->ss32 = (flags >> HF_SS32_SHIFT) & 1; @@ -7977,7 +8497,12 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb) max_insns = TCG_MAX_INSNS; } - gen_tb_start(tb); + if (!build_llvm(env)) { + gen_tb_start(tb); + if (tracer_mode != TRANS_MODE_NONE) + tcg_gen_hotpatch(IS_USER(dc), tracer_mode == TRANS_MODE_HYBRIDS || + tracer_mode == TRANS_MODE_HYBRIDM); + } for(;;) { tcg_gen_insn_start(pc_ptr, dc->cc_op); num_insns++; @@ -8027,12 +8552,27 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb) gen_eob(dc); break; } + +#if defined(CONFIG_LLVM) && defined(CONFIG_USER_ONLY) + if (llvm_has_annotation(pc_ptr, ANNOTATION_LOOP)) + break; +#endif + if (build_llvm(env) && num_insns == tb->icount) { + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } + /* if too long translation, stop generation too */ if (tcg_op_buf_full() || (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) || num_insns >= max_insns) { gen_jmp_im(pc_ptr - dc->cs_base); + dc->fallthrough = 1; + dc->gen_ibtc = 1; gen_eob(dc); + + tb->jmp_pc[0] = pc_ptr; break; } if (singlestep) { @@ -8041,13 +8581,28 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb) break; } } + if (build_llvm(env) && tb->size != dc->pc - pc_start) { + /* consistency check with tb info. we must make sure + * guest basic blocks are the same. skip this trace if inconsistent */ + fprintf(stderr, "inconsistent block with pc 0x"TARGET_FMT_lx" size=%d" + " icount=%d (error size="TARGET_FMT_ld")\n", + tb->pc, tb->size, tb->icount, dc->pc - pc_start); + exit(0); + } + if (tb->cflags & CF_LAST_IO) gen_io_end(); done_generating: - gen_tb_end(tb, num_insns); + + if (build_llvm(env)) { + /* Terminate the linked list. */ + tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1; + } else { + gen_tb_end(tb, num_insns); + } #ifdef DEBUG_DISAS - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) { int disas_flags; qemu_log("----------------\n"); qemu_log("IN: %s\n", lookup_symbol(pc_start)); @@ -8062,8 +8617,10 @@ done_generating: } #endif - tb->size = pc_ptr - pc_start; - tb->icount = num_insns; + if (!build_llvm(env)) { + tb->size = pc_ptr - pc_start; + tb->icount = num_insns; + } } void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, diff --git a/target-ppc/Makefile.objs b/target-ppc/Makefile.objs index e667e69..363a701 100644 --- a/target-ppc/Makefile.objs +++ b/target-ppc/Makefile.objs @@ -1,5 +1,5 @@ obj-y += cpu-models.o -obj-y += translate.o +obj-y += translate.o helper.o ifeq ($(CONFIG_SOFTMMU),y) obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o obj-$(TARGET_PPC64) += mmu-hash64.o arch_dump.o diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 9706000..bf1481a 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -88,7 +88,6 @@ /*****************************************************************************/ /* MMU model */ -typedef enum powerpc_mmu_t powerpc_mmu_t; enum powerpc_mmu_t { POWERPC_MMU_UNKNOWN = 0x00000000, /* Standard 32 bits PowerPC MMU */ @@ -133,10 +132,10 @@ enum powerpc_mmu_t { | 0x00000004, #endif /* defined(TARGET_PPC64) */ }; +typedef enum powerpc_mmu_t powerpc_mmu_t; /*****************************************************************************/ /* Exception model */ -typedef enum powerpc_excp_t powerpc_excp_t; enum powerpc_excp_t { POWERPC_EXCP_UNKNOWN = 0, /* Standard PowerPC exception model */ @@ -170,6 +169,7 @@ enum powerpc_excp_t { POWERPC_EXCP_POWER7, #endif /* defined(TARGET_PPC64) */ }; +typedef enum powerpc_excp_t powerpc_excp_t; /*****************************************************************************/ /* Exception vectors definitions */ @@ -298,7 +298,6 @@ enum { /*****************************************************************************/ /* Input pins model */ -typedef enum powerpc_input_t powerpc_input_t; enum powerpc_input_t { PPC_FLAGS_INPUT_UNKNOWN = 0, /* PowerPC 6xx bus */ @@ -316,6 +315,7 @@ enum powerpc_input_t { /* Freescale RCPU bus */ PPC_FLAGS_INPUT_RCPU, }; +typedef enum powerpc_input_t powerpc_input_t; #define PPC_INPUT(env) (env->bus_model) @@ -1168,6 +1168,8 @@ struct CPUPPCState { uint32_t tm_vscr; uint64_t tm_dscr; uint64_t tm_tar; + + CPU_OPTIMIZATION_COMMON }; #define SET_FIT_PERIOD(a_, b_, c_, d_) \ @@ -2226,6 +2228,17 @@ static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc, *flags = env->hflags; } +static inline target_ulong cpu_get_pc(CPUPPCState *env) +{ + return env->nip; +} + +static inline int cpu_check_state(CPUPPCState *env, + target_ulong cs_base, int flags) +{ + return cs_base == 0 && (uint32_t)flags == env->hflags; +} + #if !defined(CONFIG_USER_ONLY) static inline int booke206_tlbm_id(CPUPPCState *env, ppcmas_tlb_t *tlbm) { @@ -2311,7 +2324,7 @@ static inline uint32_t booke206_tlbnps(CPUPPCState *env, const int tlbn) uint32_t tlbncfg = env->spr[SPR_BOOKE_TLB0CFG + tlbn]; uint32_t min = (tlbncfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT; uint32_t max = (tlbncfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT; - int i; + unsigned i; for (i = min; i <= max; i++) { ret |= (1 << (i << 1)); } diff --git a/target-ppc/helper.h b/target-ppc/helper.h index 869be15..c96f51b 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -667,3 +667,5 @@ DEF_HELPER_4(dscli, void, env, fprp, fprp, i32) DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32) DEF_HELPER_1(tbegin, void, env) + +#include "hqemu-helper.h" diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 41a7258..15cedc5 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -28,7 +28,13 @@ #include "exec/helper-gen.h" #include "trace-tcg.h" +#include "hqemu.h" +#if defined(CONFIG_USER_ONLY) +#define IS_USER(s) 1 +#else +#define IS_USER(s) (s->mem_idx == MMU_USER_IDX) +#endif #define CPU_SINGLE_STEP 0x1 #define CPU_BRANCH_STEP 0x2 @@ -180,6 +186,8 @@ void ppc_translate_init(void) offsetof(CPUPPCState, access_type), "access_type"); done_init = 1; + + copy_tcg_context_global(); } /* internal defines */ @@ -11479,7 +11487,12 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) max_insns = TCG_MAX_INSNS; } - gen_tb_start(tb); + if (!build_llvm(env)) { + gen_tb_start(tb); + if (tracer_mode != TRANS_MODE_NONE) + tcg_gen_hotpatch(IS_USER(ctxp), tracer_mode == TRANS_MODE_HYBRIDS || + tracer_mode == TRANS_MODE_HYBRIDM); + } tcg_clear_temp_count(); /* Set env in case of segfault during code fetch */ while (ctx.exception == POWERPC_EXCP_NONE && !tcg_op_buf_full()) { @@ -11553,6 +11566,9 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) #if defined(DO_PPC_STATISTICS) handler->count++; #endif + if (build_llvm(env) && num_insns == tb->icount) + break; + /* Check trace mode exceptions */ if (unlikely(ctx.singlestep_enabled & CPU_SINGLE_STEP && (ctx.nip <= 0x100 || ctx.nip > 0xF00) && @@ -11576,6 +11592,16 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) exit(1); } } + + if (build_llvm(env) && tb->size != ctx.nip - pc_start) { + /* consistency check with tb info. we must make sure + * guest basic blocks are the same */ + fprintf(stderr, "inconsistant block with pc 0x"TARGET_FMT_lx" size %d" + " icount=%d (error size="TARGET_FMT_ld")\n", + tb->pc, tb->size, tb->icount, ctx.nip - pc_start); + exit(0); + } + if (tb->cflags & CF_LAST_IO) gen_io_end(); if (ctx.exception == POWERPC_EXCP_NONE) { @@ -11587,13 +11613,18 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) /* Generate the return instruction */ tcg_gen_exit_tb(0); } - gen_tb_end(tb, num_insns); - tb->size = ctx.nip - pc_start; - tb->icount = num_insns; + if (build_llvm(env)) { + /* Terminate the linked list. */ + tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1; + } else { + gen_tb_end(tb, num_insns); + tb->size = ctx.nip - pc_start; + tb->icount = num_insns; + } #if defined(DEBUG_DISAS) - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) { int flags; flags = env->bfd_mach; flags |= ctx.le_mode << 16; diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index 0ed10a9..05e26af 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -1264,7 +1264,56 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, #endif /* CONFIG_SOFTMMU */ } -static tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *ibtc_ret_addr; + +/* + * Emit trace profiling/prediction stubs. The code sequence is as following: + * S1: direct jump (the reloc part requires 4-byte alignment) + * S2: trace profiling stub + * S3: trace prediction stub + * S4: beginning of QEMU emulation code + * + * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2). + * Remember the offset of S3 (patch_next) which is used to turn the + * trace profiling off. Also remember the offset of S4 (patch_skip) + * so that the trace stubs can be skipped quickly while searching pc. + */ +static void tcg_out_hotpatch(TCGContext *s, int is_user, int emit_helper) +{ + tcg_insn_unit *label_ptr[2]; + TranslationBlock *tb = s->tb; + + tb->patch_jmp = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + /* S1: Direct Jump */ + if (is_user == 0 || emit_helper == 0) { + tcg_out_goto(s, s->code_ptr + 1); + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + return; + } + + label_ptr[0] = s->code_ptr; + tcg_out_goto_noaddr(s); + /* S2: Trace Profiling Stub */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile); + reloc_pc26(label_ptr[0], s->code_ptr); + + /* S3: Trace Prediction stub */ + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + tcg_out_ld(s, TCG_TYPE_I32, tcg_target_reg_alloc_order[0], + TCG_AREG0, offsetof(CPUArchState, start_trace_prediction)); + tcg_out_cmp(s, 0, tcg_target_reg_alloc_order[0], 0, 1); + label_ptr[1] = s->code_ptr; + tcg_out_goto_cond_noaddr(s, TCG_COND_EQ); + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict); + reloc_pc19(label_ptr[1], s->code_ptr); +} static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1302,6 +1351,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, s->tb_next_offset[a0] = tcg_current_code_size(s); break; + case INDEX_op_hotpatch: + tcg_out_hotpatch(s, args[0], args[1]); + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_goto(s, (tcg_insn_unit *)args[0]); + } else { + tcg_out_insn(s, 3207, BR, args[0]); + } + break; case INDEX_op_br: tcg_out_goto_label(s, arg_label(a0)); break; @@ -1637,6 +1696,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } static const TCGTargetOpDef aarch64_op_defs[] = { + { INDEX_op_hotpatch, { "i", "i" } }, + { INDEX_op_jmp, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, @@ -1748,6 +1809,10 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_muluh_i64, { "r", "r", "r" } }, { INDEX_op_mulsh_i64, { "r", "r", "r" } }, +#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} }, +#include "tcg-opc-vector.h" +#undef DEF + { -1 }, }; @@ -1777,12 +1842,24 @@ static void tcg_target_init(TCGContext *s) tcg_add_target_add_op_defs(aarch64_op_defs); } +static void tcg_out_epilogue(TCGContext *s) +{ + /* IBTC exit entry */ + ibtc_ret_addr = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X0, 0); +} + +#if defined(CONFIG_LLVM) +#define STACK_SIZE 0x800 +#else +#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE +#endif /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ #define PUSH_SIZE ((30 - 19 + 1) * 8) #define FRAME_SIZE \ ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ + + STACK_SIZE \ + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + TCG_TARGET_STACK_ALIGN - 1) \ & ~(TCG_TARGET_STACK_ALIGN - 1)) @@ -1828,6 +1905,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); + tcg_out_epilogue(s); tb_ret_addr = s->code_ptr; /* Remove TCG locals stack space. */ diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 9187d34..b95f5fb 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -139,7 +139,8 @@ static bool have_bmi2; # define have_bmi2 0 #endif -static tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *ibtc_ret_addr; static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) @@ -323,6 +324,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_MOVB_EvGv (0x88) /* stores, more or less */ #define OPC_MOVL_EvGv (0x89) /* stores, more or less */ #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ +#define OPC_NOP (0x90) #define OPC_MOVB_EvIz (0xc6) #define OPC_MOVL_EvIz (0xc7) #define OPC_MOVL_Iv (0xb8) @@ -1150,6 +1152,62 @@ static void * const qemu_st_helpers[16] = { [MO_BEQ] = helper_be_stq_mmu, }; +/* helpers for LLVM */ +void * const llvm_ld_helpers[16] = { + [MO_UB] = llvm_ret_ldub_mmu, + [MO_LEUW] = llvm_le_lduw_mmu, + [MO_LEUL] = llvm_le_ldul_mmu, + [MO_LEQ] = llvm_le_ldq_mmu, + [MO_BEUW] = llvm_be_lduw_mmu, + [MO_BEUL] = llvm_be_ldul_mmu, + [MO_BEQ] = llvm_be_ldq_mmu, +}; + +void * const llvm_st_helpers[16] = { + [MO_UB] = llvm_ret_stb_mmu, + [MO_LEUW] = llvm_le_stw_mmu, + [MO_LEUL] = llvm_le_stl_mmu, + [MO_LEQ] = llvm_le_stq_mmu, + [MO_BEUW] = llvm_be_stw_mmu, + [MO_BEUL] = llvm_be_stl_mmu, + [MO_BEQ] = llvm_be_stq_mmu, +}; + +static inline void tcg_out_compute_gva(TCGContext *s, TCGReg addrlo, + TCGMemOp opc, int trexw, int tv_hrexw) +{ + const TCGReg r1 = TCG_REG_L1; + int s_mask = (1 << (opc & MO_SIZE)) - 1; + +#if defined(ALIGNED_ONLY) + TCGType ttype = TCG_TYPE_I32; + bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) + ttype = TCG_TYPE_I64; + if (aligned) { + tcg_out_mov(s, ttype, r1, addrlo); + } else { + /* For unaligned access check that we don't cross pages using + the page address of the last byte. */ + tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); + } + tgen_arithi(s, ARITH_AND + trexw, r1, + TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); +#elif defined(ENABLE_TLBVERSION) + /* the following code is as equivalent to + * (((addr + (size - 1)) & TARGET_PAGE_MASK) | env->tlb_version) */ + tcg_out_modrm_sib_offset(s, OPC_LEA + trexw, r1, addrlo, -1, 0, s_mask); + tgen_arithi(s, ARITH_AND + trexw, r1, TARGET_PAGE_MASK, 0); + tcg_out_modrm_offset(s, (OPC_ARITH_GvEv | (ARITH_OR << 3)) + trexw + tv_hrexw, + r1, TCG_AREG0, offsetof(CPUArchState, tlb_version)); +#else + /* the following code is as equivalent to + * ((addr + (size - 1)) & TARGET_PAGE_MASK) */ + tcg_out_modrm_sib_offset(s, OPC_LEA + trexw, r1, addrlo, -1, 0, s_mask); + tgen_arithi(s, ARITH_AND + trexw, r1, TARGET_PAGE_MASK, 0); +#endif +} + /* Perform the TLB load and compare. Inputs: @@ -1179,9 +1237,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, const TCGReg r1 = TCG_REG_L1; TCGType ttype = TCG_TYPE_I32; TCGType tlbtype = TCG_TYPE_I32; - int trexw = 0, hrexw = 0, tlbrexw = 0; - int s_mask = (1 << (opc & MO_SIZE)) - 1; - bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; + int trexw = 0, hrexw = 0, tlbrexw = 0, tv_hrexw = 0; if (TCG_TARGET_REG_BITS == 64) { if (TARGET_LONG_BITS == 64) { @@ -1197,20 +1253,18 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } } +#if defined(ENABLE_TLBVERSION_EXT) + trexw = 0; + tv_hrexw = P_REXW; +#endif + tcg_out_mov(s, tlbtype, r0, addrlo); - if (aligned) { - tcg_out_mov(s, ttype, r1, addrlo); - } else { - /* For unaligned access check that we don't cross pages using - the page address of the last byte. */ - tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); - } tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tgen_arithi(s, ARITH_AND + trexw, r1, - TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); + tcg_out_compute_gva(s, addrlo, opc, trexw, tv_hrexw); + tgen_arithi(s, ARITH_AND + tlbrexw, r0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); @@ -1219,7 +1273,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, + which); /* cmp 0(r0), r1 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); + tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw + tv_hrexw, r1, r0, 0); /* Prepare for both the fast path add of the tlb addend, and the slow path function argument setup. There are two cases worth note: @@ -1754,6 +1808,73 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #endif } +/* + * Emit trace profiling/prediction stubs. The code sequence is as following: + * S1: direct jump (the reloc part requires 4-byte alignment) + * S2: trace profiling stub + * S3: trace prediction stub + * S4: beginning of QEMU emulation code + * + * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2). + * Remember the offset of S3 (patch_next) which is used to turn the + * trace profiling off. Also remember the offset of S4 (patch_skip) + * so that the trace stubs can be skipped quickly while searching pc. + */ +static void tcg_out_hotpatch(TCGContext *s, uint32_t is_user, uint32_t emit_helper) +{ + uint8_t *label_ptr[2]; + TranslationBlock *tb = s->tb; + + /* S1: direct jump */ + while (((uintptr_t)s->code_ptr + 1) % 4) + tcg_out8(s, OPC_NOP); + + tb->patch_jmp = (uint16_t)(s->code_ptr - s->code_buf); + + tcg_out8(s, OPC_JMP_long); + label_ptr[0] = s->code_ptr; + s->code_ptr += 4; + + if (is_user == 0 || emit_helper == 0) { + *(uint32_t *)label_ptr[0] = s->code_ptr - label_ptr[0] - 4; + tb->patch_next = (uint16_t)(s->code_ptr - s->code_buf); + return; + } + + /* S2: trace profiling stub */ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, 0); + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, 4, tb->id); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + } + tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile); + *(uint32_t *)label_ptr[0] = s->code_ptr - label_ptr[0] - 4; + + /* S3: trace prediction stub */ + tb->patch_next = (uint16_t)(s->code_ptr - s->code_buf); + + tcg_out_ld(s, TCG_TYPE_I32, tcg_target_reg_alloc_order[0], + TCG_AREG0, offsetof(CPUArchState, start_trace_prediction)); + tcg_out_cmp(s, tcg_target_reg_alloc_order[0], 0, 1, 0); + tcg_out_opc(s, OPC_JCC_long + JCC_JE, 0, 0, 0); + label_ptr[1] = s->code_ptr; + s->code_ptr += 4; + + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, 0); + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, 4, tb->id); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + } + tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict); + *(uint32_t *)label_ptr[1] = s->code_ptr - label_ptr[1] - 4; + + /* S4: QEMU emulation code */ +} + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -1777,6 +1898,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ +#if defined(CONFIG_USER_ONLY) + while (((uintptr_t)s->code_ptr + 1) % 4) /* need 4-byte aligned */ + tcg_out8(s, OPC_NOP); +#endif tcg_out8(s, OPC_JMP_long); /* jmp im */ s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); tcg_out32(s, 0); @@ -1787,6 +1912,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_hotpatch: + tcg_out_hotpatch(s, args[0], args[1]); + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_jmp(s, (tcg_insn_unit *)args[0]); + } else { + /* jmp *reg */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]); + } + break; case INDEX_op_br: tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0); break; @@ -2110,6 +2246,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } static const TCGTargetOpDef x86_op_defs[] = { + { INDEX_op_hotpatch, { "i", "i" } }, + { INDEX_op_jmp, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, @@ -2238,6 +2376,11 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, #endif + +#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} }, +#include "tcg-opc-vector.h" +#undef DEF + { -1 }, }; @@ -2261,16 +2404,29 @@ static int tcg_target_callee_save_regs[] = { #endif }; +static void tcg_out_epilogue(TCGContext *s) +{ + /* IBTC exit entry */ + ibtc_ret_addr = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, 0); +} + /* Compute frame size via macros, to share between tcg_target_qemu_prologue and tcg_register_jit. */ +#if defined(CONFIG_LLVM) +#define STACK_SIZE 0x2000 +#else +#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE +#endif + #define PUSH_SIZE \ ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ * (TCG_TARGET_REG_BITS / 8)) #define FRAME_SIZE \ ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ + + STACK_SIZE \ + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + TCG_TARGET_STACK_ALIGN - 1) \ & ~(TCG_TARGET_STACK_ALIGN - 1)) @@ -2279,10 +2435,12 @@ static int tcg_target_callee_save_regs[] = { static void tcg_target_qemu_prologue(TCGContext *s) { int i, stack_addend; + tcg_target_long stack_align_mask; /* TB prologue */ /* Reserve some stack space, also for TCG temps. */ + stack_align_mask = ~(TCG_TARGET_STACK_ALIGN - 1); stack_addend = FRAME_SIZE - PUSH_SIZE; tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, CPU_TEMP_BUF_NLONGS * sizeof(long)); @@ -2296,6 +2454,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0, + offsetof(CPUArchState, sp)); + tgen_arithi(s, ARITH_AND, TCG_REG_ESP, stack_align_mask, 0); /* jmp *tb. */ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 @@ -2303,13 +2464,19 @@ static void tcg_target_qemu_prologue(TCGContext *s) #else tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0, + offsetof(CPUArchState, sp)); + tgen_arithi(s, ARITH_AND + P_REXW, TCG_REG_ESP, stack_align_mask, 0); /* jmp *tb. */ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); #endif /* TB epilogue */ + tcg_out_epilogue(s); tb_ret_addr = s->code_ptr; + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0, + offsetof(CPUArchState, sp)); tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 92be341..c5715dc 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -67,7 +67,7 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_ESP -#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_STACK_ALIGN 32 #if defined(_WIN64) #define TCG_TARGET_CALL_STACK_OFFSET 32 #else diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 2c72565..ca5c7a4 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -78,7 +78,8 @@ #define TCG_CT_CONST_ZERO 0x1000 #define TCG_CT_CONST_MONE 0x2000 -static tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *ibtc_ret_addr; #include "elf.h" static bool have_isa_2_06; @@ -1785,8 +1786,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) #define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) +#if defined(CONFIG_LLVM) +#define STACK_SIZE 0x800 +#else +#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE +#endif + #define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ - + TCG_STATIC_CALL_ARGS_SIZE \ + + STACK_SIZE \ + CPU_TEMP_BUF_SIZE \ + REG_SAVE_SIZE \ + TCG_TARGET_STACK_ALIGN - 1) \ @@ -1794,6 +1801,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) +static unsigned num_epilogue_insns = 1; +static void tcg_out_epilogue(TCGContext *s) +{ + /* IBTC exit entry */ + ibtc_ret_addr = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0); +} + static void tcg_target_qemu_prologue(TCGContext *s) { int i; @@ -1832,27 +1847,29 @@ static void tcg_target_qemu_prologue(TCGContext *s) if (USE_REG_RA) { #ifdef _CALL_AIX /* Make the caller load the value as the TOC into R2. */ - tb_ret_addr = s->code_ptr + 2; + tb_ret_addr = s->code_ptr + 2 + num_epilogue_insns; desc[1] = tb_ret_addr; tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2); tcg_out32(s, BCCTR | BO_ALWAYS); #elif defined(_CALL_ELF) && _CALL_ELF == 2 /* Compute from the incoming R12 value. */ - tb_ret_addr = s->code_ptr + 2; + tb_ret_addr = s->code_ptr + 2 + num_epilogue_insns; tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12, tcg_ptr_byte_diff(tb_ret_addr, s->code_buf))); tcg_out32(s, BCCTR | BO_ALWAYS); #else /* Reserve max 5 insns for the constant load. */ - tb_ret_addr = s->code_ptr + 6; + tb_ret_addr = s->code_ptr + 6 + num_epilogue_insns; tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr); tcg_out32(s, BCCTR | BO_ALWAYS); while (s->code_ptr < tb_ret_addr) { tcg_out32(s, NOP); } #endif + tcg_out_epilogue(s); } else { tcg_out32(s, BCCTR | BO_ALWAYS); + tcg_out_epilogue(s); tb_ret_addr = s->code_ptr; } @@ -1869,6 +1886,85 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out32(s, BCLR | BO_ALWAYS); } +static void tcg_out_jmp_short(uintptr_t jmp_addr, uintptr_t addr) +{ + tcg_insn_unit i1, i2; + uint64_t pair; + intptr_t diff = addr - jmp_addr; + + if (!in_range_b(diff)) + tcg_abort(); + + i1 = B | (diff & 0x3fffffc); + i2 = NOP; +#ifdef HOST_WORDS_BIGENDIAN + pair = (uint64_t)i1 << 32 | i2; +#else + pair = (uint64_t)i2 << 32 | i1; +#endif + *(uint64_t *)jmp_addr = pair; +} + +/* + * Emit trace profiling/prediction stubs. The code sequence is as following: + * S1: direct jump (the reloc part requires 4-byte alignment) + * S2: trace profiling stub + * S3: trace prediction stub + * S4: beginning of QEMU emulation code + * + * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2). + * Remember the offset of S3 (patch_next) which is used to turn the + * trace profiling off. Also remember the offset of S4 (patch_skip) + * so that the trace stubs can be skipped quickly while searching pc. + */ +static void tcg_out_hotpatch(TCGContext *s, int is_user, int emit_helper) +{ + tcg_insn_unit *label_ptr[2]; + TranslationBlock *tb = s->tb; + + /* S1: direct jump. Ensure the next insns are 8-byte aligned. */ + if ((uintptr_t)s->code_ptr & 7) + tcg_out32(s, NOP); + + tb->patch_jmp = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + /* S1: Direct Jump */ + if (is_user == 0 || emit_helper == 0) { + tcg_out_jmp_short((uintptr_t)s->code_ptr, (uintptr_t)(s->code_ptr + 4)); + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + return; + } + + label_ptr[0] = s->code_ptr; + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + + /* S2: Trace Profiling Stub */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile); + tcg_out_jmp_short((uintptr_t)label_ptr[0], (uintptr_t)s->code_ptr); + + /* S3: Trace Prediction stub */ + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_AREG0, + offsetof(CPUArchState, start_trace_prediction)); + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_TMP1, 0, 1, 7, TCG_TYPE_I32); + label_ptr[1] = s->code_ptr; + tcg_out_bc_noaddr(s, tcg_to_bc[TCG_COND_EQ]); + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict); + reloc_pc14(label_ptr[1], s->code_ptr); + + /* S4: QEMU emulation code */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -1906,6 +2002,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out32(s, BCCTR | BO_ALWAYS); s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_hotpatch: + tcg_out_hotpatch(s, args[0], args[1]); + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_b(s, 0, (tcg_insn_unit *)args[0]); + } else { + tcg_out32(s, MTSPR | RS(args[0]) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + } + break; case INDEX_op_br: { TCGLabel *l = arg_label(args[0]); @@ -2436,6 +2543,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, } static const TCGTargetOpDef ppc_op_defs[] = { + { INDEX_op_hotpatch, { "i", "i" } }, + { INDEX_op_jmp, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, @@ -2572,6 +2681,10 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } }, #endif +#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} }, +#include "tcg-opc-vector.h" +#undef DEF + { -1 }, }; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 0b9dd8f..3773253 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -39,7 +39,7 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); Up to and including filling in the forward link immediately. We'll do proper termination of the end of the list after we finish translation. */ -static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args) +void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args) { int oi = ctx->gen_next_op_idx; int ni = oi + 1; diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 4e20dc1..17d31df 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -28,6 +28,7 @@ /* Basic output routines. Not for general consumption. */ +void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args); void tcg_gen_op1(TCGContext *, TCGOpcode, TCGArg); void tcg_gen_op2(TCGContext *, TCGOpcode, TCGArg, TCGArg); void tcg_gen_op3(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg); @@ -311,6 +312,16 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg); +static inline void tcg_gen_hotpatch(uint32_t arg1, uint32_t arg2) +{ + tcg_gen_op2(&tcg_ctx, INDEX_op_hotpatch, arg1, arg2); +} + +static inline void tcg_gen_annotate(uint32_t arg) +{ + tcg_gen_op1(&tcg_ctx, INDEX_op_annotate, arg); +} + static inline void tcg_gen_discard_i32(TCGv_i32 arg) { tcg_gen_op1_i32(INDEX_op_discard, arg); diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 6d0410c..5ba1e05 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -26,12 +26,16 @@ * DEF(name, oargs, iargs, cargs, flags) */ +DEF(hotpatch, 0, 0, 2, 0) +DEF(annotate, 0, 0, 1, TCG_OPF_NOT_PRESENT) + /* predefined ops */ DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT) DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) /* variable number of parameters */ DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT) +DEF(jmp, 0, 1, 0, TCG_OPF_BB_END) DEF(br, 0, 0, 1, TCG_OPF_BB_END) @@ -191,6 +195,8 @@ DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1, DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) +#include "tcg-opc-vector.h" + #undef TLADDR_ARGS #undef DATA64_ARGS #undef IMPL @@ -304,19 +304,22 @@ void tcg_pool_reset(TCGContext *s) s->pool_current = NULL; } -typedef struct TCGHelperInfo { - void *func; - const char *name; - unsigned flags; - unsigned sizemask; -} TCGHelperInfo; - #include "exec/helper-proto.h" -static const TCGHelperInfo all_helpers[] = { +const TCGHelperInfo all_helpers[] = { #include "exec/helper-tcg.h" }; +int tcg_num_helpers(void) +{ + return ARRAY_SIZE(all_helpers); +} + +const TCGHelperInfo *get_tcg_helpers(void) +{ + return all_helpers; +} + void tcg_context_init(TCGContext *s) { int op, total_args, n, i; @@ -413,7 +416,7 @@ void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size) s->frame_reg = reg; } -void tcg_func_start(TCGContext *s) +void tcg_func_start(TCGContext *s, TranslationBlock *tb) { tcg_pool_reset(s); s->nb_temps = s->nb_globals; @@ -432,8 +435,10 @@ void tcg_func_start(TCGContext *s) s->gen_last_op_idx = -1; s->gen_next_op_idx = 0; s->gen_next_parm_idx = 0; + s->vec_opparam_ptr = s->vec_opparam_buf; s->be = tcg_malloc(sizeof(TCGBackendData)); + s->tb = tb; } static inline void tcg_temp_alloc(TCGContext *s, int n) @@ -1004,6 +1009,7 @@ void tcg_dump_ops(TCGContext *s) char buf[128]; TCGOp *op; int oi; + const TCGArg *vec_args = s->vec_opparam_buf; for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) { int i, k, nb_oargs, nb_iargs, nb_cargs; @@ -1051,8 +1057,29 @@ void tcg_dump_ops(TCGContext *s) qemu_log(",%s", t); } } else { + int is_vec = 0; qemu_log(" %s ", def->name); + /* print vector opc */ + switch (c) { + case INDEX_op_vector_start ... INDEX_op_vector_end: + is_vec = 1; + break; + default: + break; + } + if (is_vec) { + qemu_log("$0x%" TCG_PRIlx, vec_args[0]); + if (c == INDEX_op_vload_128 || c == INDEX_op_vstore_128) + qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), vec_args[1])); + else + qemu_log(",$0x%" TCG_PRIlx, vec_args[1]); + qemu_log(",$0x%" TCG_PRIlx, vec_args[2]); + qemu_log("\n"); + vec_args += 3; + continue; + } + nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; nb_cargs = def->nb_cargs; @@ -1138,6 +1165,172 @@ void tcg_dump_ops(TCGContext *s) } } +void tcg_dump_ops_fn(TCGContext *s, void (*fn)(const char *)) +{ + char buf[128]; + char outbuf[128]; + TCGOp *op; + int oi; + const TCGArg *vec_args = s->vec_opparam_buf; + +#define printops(args...) \ + do { snprintf(outbuf, 128, ##args); (*fn)(outbuf); } while(0) + + for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) { + int i, k, nb_oargs, nb_iargs, nb_cargs; + const TCGOpDef *def; + const TCGArg *args; + TCGOpcode c; + + op = &s->gen_op_buf[oi]; + c = op->opc; + def = &tcg_op_defs[c]; + args = &s->gen_opparam_buf[op->args]; + + if (c == INDEX_op_insn_start) { + printops("%s ----", oi != s->gen_first_op_idx ? "\n" : ""); + + for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { + target_ulong a; +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; +#else + a = args[i]; +#endif + printops(" " TARGET_FMT_lx, a); + } + } else if (c == INDEX_op_call) { + /* variable number of arguments */ + nb_oargs = op->callo; + nb_iargs = op->calli; + nb_cargs = def->nb_cargs; + + /* function name, flags, out args */ + printops(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, + tcg_find_helper(s, args[nb_oargs + nb_iargs]), + args[nb_oargs + nb_iargs + 1], nb_oargs); + for (i = 0; i < nb_oargs; i++) { + printops(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[i])); + } + for (i = 0; i < nb_iargs; i++) { + TCGArg arg = args[nb_oargs + i]; + const char *t = "<dummy>"; + if (arg != TCG_CALL_DUMMY_ARG) { + t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg); + } + printops(",%s", t); + } + } else { + int is_vec = 0; + printops(" %s ", def->name); + + /* print vector opc */ + switch (c) { + case INDEX_op_vector_start ... INDEX_op_vector_end: + is_vec = 1; + break; + default: + break; + } + if (is_vec) { + printops("$0x%" TCG_PRIlx, vec_args[0]); + if (c == INDEX_op_vload_128 || c == INDEX_op_vstore_128) + printops(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), vec_args[1])); + else + printops(",$0x%" TCG_PRIlx, vec_args[1]); + printops(",$0x%" TCG_PRIlx, vec_args[2]); + printops("\n"); + vec_args += 3; + continue; + } + + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + nb_cargs = def->nb_cargs; + + k = 0; + for (i = 0; i < nb_oargs; i++) { + if (k != 0) { + printops(","); + } + printops("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[k++])); + } + for (i = 0; i < nb_iargs; i++) { + if (k != 0) { + printops(","); + } + printops("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[k++])); + } + switch (c) { + case INDEX_op_brcond_i32: + case INDEX_op_setcond_i32: + case INDEX_op_movcond_i32: + case INDEX_op_brcond2_i32: + case INDEX_op_setcond2_i32: + case INDEX_op_brcond_i64: + case INDEX_op_setcond_i64: + case INDEX_op_movcond_i64: + if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { + printops(",%s", cond_name[args[k++]]); + } else { + printops(",$0x%" TCG_PRIlx, args[k++]); + } + i = 1; + break; + case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_st_i64: + { + TCGMemOpIdx oi = args[k++]; + TCGMemOp op = get_memop(oi); + unsigned ix = get_mmuidx(oi); + + if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { + printops(",$0x%x,%u", op, ix); + } else { + const char *s_al = "", *s_op; + if (op & MO_AMASK) { + if ((op & MO_AMASK) == MO_ALIGN) { + s_al = "al+"; + } else { + s_al = "un+"; + } + } + s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; + printops(",%s%s,%u", s_al, s_op, ix); + } + i = 1; + } + break; + default: + i = 0; + break; + } + switch (c) { + case INDEX_op_set_label: + case INDEX_op_br: + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + case INDEX_op_brcond2_i32: + printops("%s$L%d", k ? "," : "", arg_label(args[k])->id); + i++, k++; + break; + default: + break; + } + for (; i < nb_cargs; i++, k++) { + printops("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]); + } + } + printops("\n"); + } +#undef printops +} + /* we give more priority to constraints with less registers */ static int get_constraint_priority(const TCGOpDef *def, int k) { @@ -1334,10 +1527,11 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, /* Liveness analysis : update the opc_dead_args array to tell if a given input arguments is dead. Instructions updating dead temporaries are removed. */ -static void tcg_liveness_analysis(TCGContext *s) +void tcg_liveness_analysis(TCGContext *s) { uint8_t *dead_temps, *mem_temps; int oi, oi_prev, nb_ops; + TCGArg *vec_args = s->vec_opparam_ptr; nb_ops = s->gen_next_op_idx; s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t)); @@ -1427,6 +1621,7 @@ static void tcg_liveness_analysis(TCGContext *s) } } break; + case INDEX_op_annotate: case INDEX_op_insn_start: break; case INDEX_op_discard: @@ -1434,7 +1629,11 @@ static void tcg_liveness_analysis(TCGContext *s) dead_temps[args[0]] = 1; mem_temps[args[0]] = 0; break; - + case INDEX_op_vector_start ... INDEX_op_vector_end: + vec_args -= 3; + if (opc == INDEX_op_vload_128 || opc == INDEX_op_vstore_128) + dead_temps[vec_args[1]] = 0; + break; case INDEX_op_add2_i32: opc_new = INDEX_op_add_i32; goto do_addsub2; @@ -1577,7 +1776,7 @@ static void tcg_liveness_analysis(TCGContext *s) } #else /* dummy liveness analysis */ -static void tcg_liveness_analysis(TCGContext *s) +void tcg_liveness_analysis(TCGContext *s) { int nb_ops; nb_ops = s->gen_opc_ptr - s->gen_opc_buf; @@ -2418,6 +2617,8 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf) s->gen_insn_data[num_insns][i] = a; } break; + case INDEX_op_annotate: + break; case INDEX_op_discard: temp_dead(s, args[0]); break; @@ -2554,15 +2755,15 @@ struct jit_descriptor { struct jit_code_entry *first_entry; }; -void __jit_debug_register_code(void) __attribute__((noinline)); -void __jit_debug_register_code(void) +void qemu_jit_debug_register_code(void) __attribute__((noinline)); +void qemu_jit_debug_register_code(void) { asm(""); } /* Must statically initialize the version, because GDB may check the version before we can set it. */ -struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; +struct jit_descriptor qemu_jit_debug_descriptor = { 1, 0, 0, 0 }; /* End GDB interface. */ @@ -2771,10 +2972,10 @@ static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, one_entry.symfile_addr = img; one_entry.symfile_size = img_size; - __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; - __jit_debug_descriptor.relevant_entry = &one_entry; - __jit_debug_descriptor.first_entry = &one_entry; - __jit_debug_register_code(); + qemu_jit_debug_descriptor.action_flag = JIT_REGISTER_FN; + qemu_jit_debug_descriptor.relevant_entry = &one_entry; + qemu_jit_debug_descriptor.first_entry = &one_entry; + qemu_jit_debug_register_code(); } #else /* No support for the feature. Provide the entry point expected by exec.c, @@ -2790,3 +2991,34 @@ void tcg_register_jit(void *buf, size_t buf_size) { } #endif /* ELF_HOST_MACHINE */ + + +/* + * copy_tcg_context_global() + * Copy thread's local TCG context to the global TCG context. + * + * We first initialize main thread's tcg_ctx and copy it to tcg_ctx_global + * at this point. The tcg_ctx_global is copied to each thread's local + * tcg_ctx later using copy_tcg_context(). + * + * Note: This copy must be done after tcg_ctx is completely initialized + * and should be setup by the main thread. + */ +void copy_tcg_context_global(void) +{ + static int init_once = 0; + if (init_once == 1) + return; + + memcpy(&tcg_ctx_global, &tcg_ctx, sizeof(TCGContext)); + init_once = 1; +} + +/* + * copy_tcg_context() + * Copy the global TCG context to the thread's local TCG context. + */ +void copy_tcg_context(void) +{ + memcpy(&tcg_ctx, &tcg_ctx_global, sizeof(TCGContext)); +} @@ -193,6 +193,7 @@ typedef struct TCGPool { #define TCG_POOL_CHUNK_SIZE 32768 +#define TCG_MAX_LABELS 512 #define TCG_MAX_TEMPS 512 #define TCG_MAX_INSNS 512 @@ -564,7 +565,7 @@ struct TCGContext { /* Threshold to flush the translated code buffer. */ void *code_gen_highwater; - TBContext tb_ctx; + TBContext *tb_ctx; /* The TCGBackendData structure is private to tcg-target.c. */ struct TCGBackendData *be; @@ -578,12 +579,33 @@ struct TCGContext { TCGOp gen_op_buf[OPC_BUF_SIZE]; TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE]; + TCGArg vec_opparam_buf[OPPARAM_BUF_SIZE]; + TCGArg *vec_opparam_ptr; uint16_t gen_insn_end_off[TCG_MAX_INSNS]; target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS]; + + TranslationBlock *tb; }; -extern TCGContext tcg_ctx; +extern TCGContext tcg_ctx_global; +extern __thread TCGContext tcg_ctx; + +typedef struct TCGHelperInfo { + void *func; + const char *name; + unsigned flags; + unsigned sizemask; +} TCGHelperInfo; + +void copy_tcg_context_global(void); +void copy_tcg_context(void); +int tcg_num_helpers(void); +const TCGHelperInfo *get_tcg_helpers(void); +void tcg_liveness_analysis(TCGContext *s); +void tcg_dump_ops_fn(TCGContext *s, void (*fn)(const char *)); +target_long decode_sleb128(uint8_t **pp); + /* The number of opcodes emitted so far. */ static inline int tcg_op_buf_count(void) @@ -624,7 +646,7 @@ static inline void *tcg_malloc(int size) void tcg_context_init(TCGContext *s); void tcg_prologue_init(TCGContext *s); -void tcg_func_start(TCGContext *s); +void tcg_func_start(TCGContext *s, TranslationBlock *tb); int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf); @@ -822,7 +844,7 @@ static inline TCGLabel *arg_label(TCGArg i) static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b) { - return a - b; + return (ptrdiff_t)a - (ptrdiff_t)b; } /** @@ -876,7 +898,7 @@ static inline TCGMemOpIdx make_memop_idx(TCGMemOp op, unsigned idx) */ static inline TCGMemOp get_memop(TCGMemOpIdx oi) { - return oi >> 4; + return (TCGMemOp)(oi >> 4); } /** @@ -939,6 +961,7 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi) #define TB_EXIT_IDX1 1 #define TB_EXIT_ICOUNT_EXPIRED 2 #define TB_EXIT_REQUESTED 3 +#define TB_EXIT_LLVM TB_EXIT_ICOUNT_EXPIRED #ifdef HAVE_TCG_QEMU_TB_EXEC uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); @@ -1011,6 +1034,31 @@ uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr, uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr); + +/* Value zero-extended to tcg register size. */ +tcg_target_ulong llvm_ret_ldub_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_lduw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_ldul_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +uint64_t llvm_le_ldq_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_lduw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_ldul_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +uint64_t llvm_be_ldq_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); + +/* Value sign-extended to tcg register size. */ +tcg_target_ulong llvm_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_ldsw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_ldsl_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_ldsw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_ldsl_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); + +void llvm_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, TCGMemOpIdx oi); +void llvm_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, TCGMemOpIdx oi); +void llvm_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, TCGMemOpIdx oi); +void llvm_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, TCGMemOpIdx oi); +void llvm_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, TCGMemOpIdx oi); +void llvm_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, TCGMemOpIdx oi); +void llvm_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, TCGMemOpIdx oi); + /* Temporary aliases until backends are converted. */ #ifdef TARGET_WORDS_BIGENDIAN # define helper_ret_ldsw_mmu helper_be_ldsw_mmu diff --git a/translate-all.c b/translate-all.c index 042a857..bf05326 100644 --- a/translate-all.c +++ b/translate-all.c @@ -63,6 +63,10 @@ #include "qemu/bitmap.h" #include "qemu/timer.h" +#include "hqemu.h" + +size_t get_cpu_size(void) { return sizeof(CPUArchState); } + //#define DEBUG_TB_INVALIDATE //#define DEBUG_FLUSH /* make various TB consistency checks */ @@ -124,7 +128,8 @@ intptr_t qemu_host_page_mask; static void *l1_map[V_L1_SIZE]; /* code generation context */ -TCGContext tcg_ctx; +TCGContext tcg_ctx_global; +__thread TCGContext tcg_ctx; /* translation block context */ #ifdef CONFIG_USER_ONLY @@ -135,7 +140,7 @@ void tb_lock(void) { #ifdef CONFIG_USER_ONLY assert(!have_tb_lock); - qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_lock(&tcg_ctx.tb_ctx->tb_lock); have_tb_lock++; #endif } @@ -145,7 +150,7 @@ void tb_unlock(void) #ifdef CONFIG_USER_ONLY assert(have_tb_lock); have_tb_lock--; - qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_unlock(&tcg_ctx.tb_ctx->tb_lock); #endif } @@ -153,7 +158,7 @@ void tb_lock_reset(void) { #ifdef CONFIG_USER_ONLY if (have_tb_lock) { - qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_unlock(&tcg_ctx.tb_ctx->tb_lock); have_tb_lock = 0; } #endif @@ -161,11 +166,12 @@ void tb_lock_reset(void) static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2); -static TranslationBlock *tb_find_pc(uintptr_t tc_ptr); +static TranslationBlock *tb_find_pc(CPUState *cpu, uintptr_t tc_ptr); void cpu_gen_init(void) { tcg_context_init(&tcg_ctx); + tcg_ctx.tb_ctx = g_malloc0(sizeof(TBContext)); } /* Encode VAL as a signed leb128 sequence at P. @@ -190,7 +196,7 @@ static uint8_t *encode_sleb128(uint8_t *p, target_long val) /* Decode a signed leb128 sequence at *PP; increment *PP past the decoded value. Return the decoded value. */ -static target_long decode_sleb128(uint8_t **pp) +target_long decode_sleb128(uint8_t **pp) { uint8_t *p = *pp; target_long val = 0; @@ -268,6 +274,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, int64_t ti = profile_getclock(); #endif +#if defined(CONFIG_LLVM) + if (llvm_locate_trace(searched_pc)) + return llvm_restore_state(cpu, tb, searched_pc); +#endif + if (searched_pc < host_pc) { return -1; } @@ -297,8 +308,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, restore_state_to_opc(env, tb, data); #ifdef CONFIG_PROFILER - tcg_ctx.restore_time += profile_getclock() - ti; - tcg_ctx.restore_count++; + tcg_ctx_global.restore_time += profile_getclock() - ti; + tcg_ctx_global.restore_count++; #endif return 0; } @@ -307,7 +318,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr) { TranslationBlock *tb; - tb = tb_find_pc(retaddr); + tb = tb_find_pc(cpu, retaddr); if (tb) { cpu_restore_state_from_tb(cpu, tb, retaddr); if (tb->cflags & CF_NOCACHE) { @@ -485,7 +496,13 @@ static inline PageDesc *page_find(tb_page_addr_t index) # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) #endif +/* Note: The size of the code buffer is doubled. We steal half of the buffer + * acting as the trace code cache. */ +#if defined(CONFIG_LLVM) +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024 * 2) +#else #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024) +#endif #define DEFAULT_CODE_GEN_BUFFER_SIZE \ (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ @@ -503,6 +520,9 @@ static inline size_t size_code_gen_buffer(size_t tb_size) static buffer, we could size this on RESERVED_VA, on the text segment size of the executable, or continue to use the default. */ tb_size = (unsigned long)(ram_size / 4); +#if defined(CONFIG_LLVM) + tb_size = (unsigned long)(ram_size / 2); +#endif #endif } if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { @@ -730,15 +750,18 @@ static inline void code_gen_alloc(size_t tb_size) fprintf(stderr, "Could not allocate dynamic translator buffer\n"); exit(1); } +#if defined(CONFIG_LLVM) + llvm_alloc_cache(); +#endif /* Estimate a good size for the number of TBs we can support. We still haven't deducted the prologue from the buffer size here, but that's minimal and won't affect the estimate much. */ tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE; - tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks); + tcg_ctx.tb_ctx->tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks); - qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_init(&tcg_ctx.tb_ctx->tb_lock); } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -765,26 +788,35 @@ bool tcg_enabled(void) too many translation blocks or too much generated code. */ static TranslationBlock *tb_alloc(target_ulong pc) { + TCGContext *s = &tcg_ctx_global; TranslationBlock *tb; - if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) { + if (s->tb_ctx->nb_tbs >= s->code_gen_max_blocks) { return NULL; } - tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++]; +#if defined(CONFIG_LLVM) + if (llvm_check_cache() == 1) + return NULL; +#endif + + tb = &s->tb_ctx->tbs[s->tb_ctx->nb_tbs++]; tb->pc = pc; tb->cflags = 0; + + optimization_init_tb(tb, s->tb_ctx->nb_tbs - 1); return tb; } void tb_free(TranslationBlock *tb) { + TCGContext *s = &tcg_ctx_global; /* In practice this is mostly used for single use temporary TB Ignore the hard cases and just back up if this TB happens to be the last one generated. */ - if (tcg_ctx.tb_ctx.nb_tbs > 0 && - tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) { - tcg_ctx.code_gen_ptr = tb->tc_ptr; - tcg_ctx.tb_ctx.nb_tbs--; + if (s->tb_ctx->nb_tbs > 0 && + tb == &s->tb_ctx->tbs[s->tb_ctx->nb_tbs - 1]) { + s->code_gen_ptr = tb->tc_ptr; + s->tb_ctx->nb_tbs--; } } @@ -832,42 +864,49 @@ static void page_flush_tb(void) /* XXX: tb_flush is currently not thread safe */ void tb_flush(CPUState *cpu) { + TCGContext *s = &tcg_ctx_global; #if defined(DEBUG_FLUSH) printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n", - (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer), - tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ? - ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) / - tcg_ctx.tb_ctx.nb_tbs : 0); + (unsigned long)(s->code_gen_ptr - s->code_gen_buffer), + s->tb_ctx->nb_tbs, s->tb_ctx->nb_tbs > 0 ? + ((unsigned long)(s->code_gen_ptr - s->code_gen_buffer)) / + s->tb_ctx->nb_tbs : 0); #endif - if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) - > tcg_ctx.code_gen_buffer_size) { + if ((unsigned long)(s->code_gen_ptr - s->code_gen_buffer) + > s->code_gen_buffer_size) { cpu_abort(cpu, "Internal error: code buffer overflow\n"); } - tcg_ctx.tb_ctx.nb_tbs = 0; +#if defined(CONFIG_LLVM) + llvm_tb_flush(); +#endif + + s->tb_ctx->nb_tbs = 0; CPU_FOREACH(cpu) { memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); + optimization_reset(cpu->env_ptr, 1); } - memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash)); + memset(s->tb_ctx->tb_phys_hash, 0, sizeof(s->tb_ctx->tb_phys_hash)); page_flush_tb(); - tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; + s->code_gen_ptr = s->code_gen_buffer; /* XXX: flush processor icache at this point if cache flush is expensive */ - tcg_ctx.tb_ctx.tb_flush_count++; + s->tb_ctx->tb_flush_count++; } #ifdef DEBUG_TB_CHECK static void tb_invalidate_check(target_ulong address) { + TCGContext *s = &tcg_ctx_global; TranslationBlock *tb; int i; address &= TARGET_PAGE_MASK; for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { + for (tb = s->tb_ctx->tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { if (!(address + TARGET_PAGE_SIZE <= tb->pc || address >= tb->pc + tb->size)) { printf("ERROR invalidate: address=" TARGET_FMT_lx @@ -881,11 +920,12 @@ static void tb_invalidate_check(target_ulong address) /* verify that all the pages have correct rights for code */ static void tb_page_check(void) { + TCGContext *s = &tcg_ctx_global; TranslationBlock *tb; int i, flags1, flags2; for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL; + for (tb = s->tb_ctx->tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { flags1 = page_get_flags(tb->pc); flags2 = page_get_flags(tb->pc + tb->size - 1); @@ -911,6 +951,10 @@ static inline void tb_hash_remove(TranslationBlock **ptb, TranslationBlock *tb) } ptb = &tb1->phys_hash_next; } +#if defined(CONFIG_LLVM) + tb->mode = BLOCK_INVALID; + llvm_tb_remove(tb); +#endif } static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb) @@ -969,16 +1013,15 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n) /* invalidate one TB */ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) { + TCGContext *s = &tcg_ctx_global; CPUState *cpu; PageDesc *p; unsigned int h, n1; - tb_page_addr_t phys_pc; TranslationBlock *tb1, *tb2; /* remove the TB from the hash list */ - phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_phys_hash_func(phys_pc); - tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb); + h = tb_phys_hash_func(tb->pc); + tb_hash_remove(&s->tb_ctx->tb_phys_hash[h], tb); /* remove the TB from the page list */ if (tb->page_addr[0] != page_addr) { @@ -992,7 +1035,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) invalidate_page_bitmap(p); } - tcg_ctx.tb_ctx.tb_invalidated_flag = 1; + s->tb_ctx->tb_invalidated_flag = 1; /* remove the TB from the hash list */ h = tb_jmp_cache_hash_func(tb->pc); @@ -1000,6 +1043,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) if (cpu->tb_jmp_cache[h] == tb) { cpu->tb_jmp_cache[h] = NULL; } + optimization_remove_entry(cpu->env_ptr, tb); } /* suppress this TB from the two jump lists */ @@ -1021,7 +1065,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) } tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */ - tcg_ctx.tb_ctx.tb_phys_invalidate_count++; + s->tb_ctx->tb_phys_invalidate_count++; } static void build_page_bitmap(PageDesc *p) @@ -1058,6 +1102,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, target_ulong cs_base, int flags, int cflags) { + TCGContext *s = &tcg_ctx_global; CPUArchState *env = cpu->env_ptr; TranslationBlock *tb; tb_page_addr_t phys_pc, phys_page2; @@ -1082,22 +1127,22 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb = tb_alloc(pc); assert(tb != NULL); /* Don't forget to invalidate previous TB info. */ - tcg_ctx.tb_ctx.tb_invalidated_flag = 1; + s->tb_ctx->tb_invalidated_flag = 1; } - gen_code_buf = tcg_ctx.code_gen_ptr; - tb->tc_ptr = gen_code_buf; + gen_code_buf = s->code_gen_ptr; + tb->tc_ptr = tb->opt_ptr = gen_code_buf; tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; #ifdef CONFIG_PROFILER - tcg_ctx.tb_count1++; /* includes aborted translations because of + s->tb_count1++; /* includes aborted translations because of exceptions */ ti = profile_getclock(); #endif - tcg_func_start(&tcg_ctx); + tcg_func_start(&tcg_ctx, tb); gen_intermediate_code(env, tb); @@ -1116,9 +1161,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, #endif #ifdef CONFIG_PROFILER - tcg_ctx.tb_count++; - tcg_ctx.interm_time += profile_getclock() - ti; - tcg_ctx.code_time -= profile_getclock(); + s->tb_count++; + s->interm_time += profile_getclock() - ti; + s->code_time -= profile_getclock(); #endif /* ??? Overflow could be handled better here. In particular, we @@ -1136,10 +1181,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } #ifdef CONFIG_PROFILER - tcg_ctx.code_time += profile_getclock(); - tcg_ctx.code_in_len += tb->size; - tcg_ctx.code_out_len += gen_code_size; - tcg_ctx.search_out_len += search_size; + s->code_time += profile_getclock(); + s->code_in_len += tb->size; + s->code_out_len += gen_code_size; + s->search_out_len += search_size; #endif #ifdef DEBUG_DISAS @@ -1151,7 +1196,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } #endif - tcg_ctx.code_gen_ptr = (void *) + s->code_gen_ptr = (void *) ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, CODE_GEN_ALIGN); @@ -1247,7 +1292,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, current_tb = NULL; if (cpu->mem_io_pc) { /* now we have a real cpu fault */ - current_tb = tb_find_pc(cpu->mem_io_pc); + current_tb = tb_find_pc(cpu, cpu->mem_io_pc); } } if (current_tb == tb && @@ -1365,7 +1410,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr, tb = p->first_tb; #ifdef TARGET_HAS_PRECISE_SMC if (tb && pc != 0) { - current_tb = tb_find_pc(pc); + current_tb = tb_find_pc(cpu, pc); } if (cpu != NULL) { env = cpu->env_ptr; @@ -1475,12 +1520,13 @@ static inline void tb_alloc_page(TranslationBlock *tb, static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2) { + TCGContext *s = &tcg_ctx_global; unsigned int h; TranslationBlock **ptb; /* add in the physical hash table */ - h = tb_phys_hash_func(phys_pc); - ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h]; + h = tb_phys_hash_func(tb->pc); + ptb = &s->tb_ctx->tb_phys_hash[h]; tb->phys_hash_next = *ptb; *ptb = tb; @@ -1511,25 +1557,31 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < tb[1].tc_ptr. Return NULL if not found */ -static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) +static TranslationBlock *tb_find_pc(CPUState *cpu, uintptr_t tc_ptr) { + TCGContext *s = &tcg_ctx_global; int m_min, m_max, m; uintptr_t v; TranslationBlock *tb; - if (tcg_ctx.tb_ctx.nb_tbs <= 0) { + if (s->tb_ctx->nb_tbs <= 0) { return NULL; } - if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer || - tc_ptr >= (uintptr_t)tcg_ctx.code_gen_ptr) { +#if defined(CONFIG_LLVM) + tb = llvm_find_pc(cpu, tc_ptr); + if (tb) + return tb; +#endif + if (tc_ptr < (uintptr_t)s->code_gen_buffer || + tc_ptr >= (uintptr_t)s->code_gen_ptr) { return NULL; } /* binary search (cf Knuth) */ m_min = 0; - m_max = tcg_ctx.tb_ctx.nb_tbs - 1; + m_max = s->tb_ctx->nb_tbs - 1; while (m_min <= m_max) { m = (m_min + m_max) >> 1; - tb = &tcg_ctx.tb_ctx.tbs[m]; + tb = &s->tb_ctx->tbs[m]; v = (uintptr_t)tb->tc_ptr; if (v == tc_ptr) { return tb; @@ -1539,7 +1591,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) m_min = m + 1; } } - return &tcg_ctx.tb_ctx.tbs[m_max]; + return &s->tb_ctx->tbs[m_max]; } #if !defined(CONFIG_USER_ONLY) @@ -1567,7 +1619,7 @@ void tb_check_watchpoint(CPUState *cpu) { TranslationBlock *tb; - tb = tb_find_pc(cpu->mem_io_pc); + tb = tb_find_pc(cpu, cpu->mem_io_pc); if (tb) { /* We can use retranslation to find the PC. */ cpu_restore_state_from_tb(cpu, tb, cpu->mem_io_pc); @@ -1599,7 +1651,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) target_ulong pc, cs_base; uint64_t flags; - tb = tb_find_pc(retaddr); + tb = tb_find_pc(cpu, retaddr); if (!tb) { cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", (void *)retaddr); @@ -1675,6 +1727,7 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) { + TCGContext *s = &tcg_ctx_global; int i, target_code_size, max_target_code_size; int direct_jmp_count, direct_jmp2_count, cross_page; TranslationBlock *tb; @@ -1684,8 +1737,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) cross_page = 0; direct_jmp_count = 0; direct_jmp2_count = 0; - for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) { - tb = &tcg_ctx.tb_ctx.tbs[i]; + for (i = 0; i < s->tb_ctx->nb_tbs; i++) { + tb = &s->tb_ctx->tbs[i]; target_code_size += tb->size; if (tb->size > max_target_code_size) { max_target_code_size = tb->size; @@ -1703,35 +1756,35 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) /* XXX: avoid using doubles ? */ cpu_fprintf(f, "Translation buffer state:\n"); cpu_fprintf(f, "gen code size %td/%zd\n", - tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer, - tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer); + s->code_gen_ptr - s->code_gen_buffer, + s->code_gen_highwater - s->code_gen_buffer); cpu_fprintf(f, "TB count %d/%d\n", - tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks); + s->tb_ctx->nb_tbs, s->code_gen_max_blocks); cpu_fprintf(f, "TB avg target size %d max=%d bytes\n", - tcg_ctx.tb_ctx.nb_tbs ? target_code_size / - tcg_ctx.tb_ctx.nb_tbs : 0, + s->tb_ctx->nb_tbs ? target_code_size / + s->tb_ctx->nb_tbs : 0, max_target_code_size); cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n", - tcg_ctx.tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr - - tcg_ctx.code_gen_buffer) / - tcg_ctx.tb_ctx.nb_tbs : 0, - target_code_size ? (double) (tcg_ctx.code_gen_ptr - - tcg_ctx.code_gen_buffer) / + s->tb_ctx->nb_tbs ? (s->code_gen_ptr - + s->code_gen_buffer) / + s->tb_ctx->nb_tbs : 0, + target_code_size ? (double) (s->code_gen_ptr - + s->code_gen_buffer) / target_code_size : 0); cpu_fprintf(f, "cross page TB count %d (%d%%)\n", cross_page, - tcg_ctx.tb_ctx.nb_tbs ? (cross_page * 100) / - tcg_ctx.tb_ctx.nb_tbs : 0); + s->tb_ctx->nb_tbs ? (cross_page * 100) / + s->tb_ctx->nb_tbs : 0); cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n", direct_jmp_count, - tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp_count * 100) / - tcg_ctx.tb_ctx.nb_tbs : 0, + s->tb_ctx->nb_tbs ? (direct_jmp_count * 100) / + s->tb_ctx->nb_tbs : 0, direct_jmp2_count, - tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) / - tcg_ctx.tb_ctx.nb_tbs : 0); + s->tb_ctx->nb_tbs ? (direct_jmp2_count * 100) / + s->tb_ctx->nb_tbs : 0); cpu_fprintf(f, "\nStatistics:\n"); - cpu_fprintf(f, "TB flush count %d\n", tcg_ctx.tb_ctx.tb_flush_count); + cpu_fprintf(f, "TB flush count %d\n", s->tb_ctx->tb_flush_count); cpu_fprintf(f, "TB invalidate count %d\n", - tcg_ctx.tb_ctx.tb_phys_invalidate_count); + s->tb_ctx->tb_phys_invalidate_count); cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); tcg_dump_info(f, cpu_fprintf); } diff --git a/user-exec.c b/user-exec.c index 8ad89a4..dbf04be 100644 --- a/user-exec.c +++ b/user-exec.c @@ -58,7 +58,7 @@ static void exception_action(CPUState *cpu) void cpu_resume_from_signal(CPUState *cpu, void *puc) { #ifdef __linux__ - struct ucontext *uc = puc; + ucontext_t *uc = puc; #elif defined(__OpenBSD__) struct sigcontext *uc = puc; #endif @@ -172,7 +172,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, #elif defined(__OpenBSD__) struct sigcontext *uc = puc; #else - struct ucontext *uc = puc; + ucontext_t *uc = puc; #endif unsigned long pc; int trapno; @@ -227,7 +227,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, #elif defined(__OpenBSD__) struct sigcontext *uc = puc; #else - struct ucontext *uc = puc; + ucontext_t *uc = puc; #endif pc = PC_sig(uc); @@ -332,7 +332,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) ucontext_t *uc = puc; #else - struct ucontext *uc = puc; + ucontext_t *uc = puc; #endif unsigned long pc; int is_write; @@ -359,7 +359,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; uint32_t *pc = uc->uc_mcontext.sc_pc; uint32_t insn = *pc; int is_write = 0; @@ -457,7 +457,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, #if defined(__NetBSD__) ucontext_t *uc = puc; #else - struct ucontext *uc = puc; + ucontext_t *uc = puc; #endif unsigned long pc; int is_write; @@ -484,7 +484,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; uintptr_t pc = uc->uc_mcontext.pc; uint32_t insn = *(uint32_t *)pc; bool is_write; @@ -513,7 +513,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; unsigned long pc; int is_write; @@ -535,7 +535,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; unsigned long ip; int is_write = 0; @@ -566,7 +566,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; unsigned long pc; uint16_t *pinsn; int is_write = 0; @@ -619,7 +619,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; greg_t pc = uc->uc_mcontext.pc; int is_write; @@ -635,7 +635,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; unsigned long pc = uc->uc_mcontext.sc_iaoq[0]; uint32_t insn = *(uint32_t *)pc; int is_write = 0; diff --git a/util/memfd.c b/util/memfd.c index 7c40691..587ef5a 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -40,7 +40,7 @@ #include <sys/syscall.h> #include <asm/unistd.h> -static int memfd_create(const char *name, unsigned int flags) +static int qemu_memfd_create(const char *name, unsigned int flags) { #ifdef __NR_memfd_create return syscall(__NR_memfd_create, name, flags); @@ -74,12 +74,12 @@ void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, #ifdef CONFIG_LINUX if (seals) { - mfd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC); + mfd = qemu_memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC); } if (mfd == -1) { /* some systems have memfd without sealing */ - mfd = memfd_create(name, MFD_CLOEXEC); + mfd = qemu_memfd_create(name, MFD_CLOEXEC); seals = 0; } #endif @@ -4690,6 +4690,7 @@ int main(int argc, char **argv, char **envp) #ifdef CONFIG_TPM tpm_cleanup(); #endif + qemu_end_cpu_loop(); return 0; } |