diff options
146 files changed, 32918 insertions, 333 deletions
diff --git a/COPYRIGHT b/COPYRIGHT new file mode 100644 index 0000000..75a9c94 --- /dev/null +++ b/COPYRIGHT @@ -0,0 +1,28 @@ + + + + COPYRIGHT + +The following is a notice of limited availability of the code, and disclaimer +which must be included in the prologue of the code and in all source listings +of the code. + +Copyright Notice + + 2010 Computer System Laboratory, Institute of Information Science, + Academia Sinica, Taiwan + + 2016 COVART Laboratory, Department of Computer Science and Information + Engineering, National Taiwan University, Taiwan. + + +Permission is hereby granted to use, reproduce, prepare derivative works, and +to redistribute to others. This software was authored by the following authors, +sorted by surname: + +Name: Sheng-Yu Fu +Email: d03922013@csie.ntu.edu.tw + +Name: Ding-Yong Hong +Email: dyhong@iis.sinica.edu.tw + +Name: Yu-Ping Liu +Email: r04922005@csie.ntu.edu.tw diff --git a/Makefile.target b/Makefile.target index 962d004..4e4b1fe 100644 --- a/Makefile.target +++ b/Makefile.target @@ -158,6 +158,8 @@ GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h qmp-commands-old.h endif # CONFIG_SOFTMMU +include $(SRC_PATH)/llvm/hqemu.mk + # Workaround for http://gcc.gnu.org/PR55489, see configure. %/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS) @@ -189,8 +191,8 @@ all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y) $(QEMU_PROG_BUILD): config-devices.mak # build either PROG or PROGW -$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a - $(call LINK, $(filter-out %.mak, $^)) +$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a $(LLVM_BITCODE) + $(call LINK, $(filter-out %.mak %.bc, $^)) ifdef CONFIG_DARWIN $(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@," REZ $(TARGET_DIR)$@") $(call quiet-command,SetFile -a C $@," SETFILE $(TARGET_DIR)$@") @@ -225,6 +227,9 @@ ifdef CONFIG_TRACE_SYSTEMTAP $(INSTALL_DATA) $(QEMU_PROG).stp-installed "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG).stp" $(INSTALL_DATA) $(QEMU_PROG)-simpletrace.stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG)-simpletrace.stp" endif +ifneq ($(LLVM_BITCODE),) + $(INSTALL) -m 644 $(LLVM_BITCODE) "$(DESTDIR)$(bindir)" +endif GENERATED_HEADERS += config-target.h Makefile: $(GENERATED_HEADERS) @@ -345,6 +345,9 @@ vhdx="" numa="" tcmalloc="no" jemalloc="no" +llvm="no" +bcflags="" +libopencsd="" # parse CC options first for opt do @@ -1169,6 +1172,12 @@ for opt do ;; --enable-jemalloc) jemalloc="yes" ;; + --enable-llvm) llvm="yes" + ;; + --clang-flags=*) bcflags="$optarg" + ;; + --with-libopencsd=*) libopencsd="$optarg" + ;; *) echo "ERROR: unknown option $opt" echo "Try '$0 --help' for more information" @@ -1391,12 +1400,26 @@ disabled with --disable-FEATURE, default is enabled if available: numa libnuma support tcmalloc tcmalloc support jemalloc jemalloc support + llvm enable LLVM optimization + --clang-flags flags for clang compiler + --with-libopencsd path to libopencsd library NOTE: The object files are built at the place where configure is launched EOF exit 0 fi +if test "$llvm" != "no" ; then + llvm-config --version > /dev/null 2>&1 || { echo >&2 "llvm-config is not in the PATH"; exit 1; } + llvm_major=`llvm-config --version | cut -d'.' -f1` + llvm_minor=`llvm-config --version | cut -d'.' -f2` + if test "$llvm_major" -lt "3" ; then + error_exit "LLVM version too old. Version 3.5 or later is required." + elif test "$llvm_major" -eq "3" && test "$llvm_minor" -lt "5" ; then + error_exit "LLVM version too old. Version 3.5 or later is required." + fi +fi + # Now we have handled --enable-tcg-interpreter and know we're not just # printing the help message, bail out if the host CPU isn't supported. if test "$ARCH" = "unknown"; then @@ -1469,6 +1492,7 @@ gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags" gcc_flags="-Wendif-labels $gcc_flags" gcc_flags="-Wno-initializer-overrides $gcc_flags" gcc_flags="-Wno-string-plus-int $gcc_flags" +gcc_flags="-Wno-format-truncation $gcc_flags" # Note that we do not add -Werror to gcc_flags here, because that would # enable it for all configure tests. If a configure test failed due # to -Werror this would just silently disable some features, @@ -4847,6 +4871,11 @@ echo "bzip2 support $bzip2" echo "NUMA host support $numa" echo "tcmalloc support $tcmalloc" echo "jemalloc support $jemalloc" +echo "LLVM enabled $llvm (version `llvm-config --version`)" + +if test "$libopencsd" != ""; then + echo "libopencsd $libopencsd" +fi if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -5252,6 +5281,21 @@ if test "$seccomp" = "yes"; then echo "CONFIG_SECCOMP=y" >> $config_host_mak fi +if test "$llvm" != "no" ; then + echo "CONFIG_LLVM=y" >> $config_host_mak + echo "BCFLAGS=$bcflags" >> $config_host_mak + echo "LLVM_VERSION=LLVM_V`llvm-config --version | sed -e "s/\.//g" | cut -c 1-2`" >> $config_host_mak + echo "LLVM_CFLAGS=`llvm-config --cflags`" >> $config_host_mak + echo "LLVM_CXXFLAGS=`llvm-config --cxxflags`" >> $config_host_mak + echo "LLVM_LDFLAGS=`llvm-config --ldflags`" >> $config_host_mak + echo "LLVM_LIBS=`llvm-config --libs`" >> $config_host_mak +fi + +if test "$libopencsd" != "" ; then + echo "CONFIG_LIBOPENCSD=y" >> $config_host_mak + echo "LIBOPENCSD=$libopencsd" >> $config_host_mak +fi + # XXX: suppress that if [ "$bsd" = "yes" ] ; then echo "CONFIG_BSD=y" >> $config_host_mak @@ -5852,6 +5896,23 @@ fi echo "LDFLAGS+=$ldflags" >> $config_target_mak echo "QEMU_CFLAGS+=$cflags" >> $config_target_mak +if test "$cpu" = "i386" -o "$cpu" = "x86_64" -o "$cpu" = "arm" ; then + case "$target_name" in + i386|x86_64) + echo "CONFIG_COREMU=y" >> $config_target_mak + ;; + esac +fi + +if test "$llvm" != "no" ; then + bitcode="llvm_helper_$target_name" + if test "$target_softmmu" = "yes" ; then + bitcode=$bitcode"_softmmu" + fi + echo "LLVM_EXTRA_FLAGS+=-I. -I\$(SRC_PATH) $cflags $LLVM_EXTRA_FLAGS" >> $config_target_mak + echo "CONFIG_LLVM_BITCODE=\"$prefix/bin/$bitcode.bc\"" >> $config_target_mak +fi + done # for target in $targets if [ "$pixman" = "internal" ]; then @@ -31,6 +31,7 @@ #include "hw/i386/apic.h" #endif #include "sysemu/replay.h" +#include "hqemu.h" /* -icount align implementation. */ @@ -104,6 +105,7 @@ static void print_delay(const SyncClocks *sc) static void init_delay_params(SyncClocks *sc, const CPUState *cpu) { + memset(sc, 0, sizeof(SyncClocks)); if (!icount_align_option) { return; } @@ -159,6 +161,10 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr) trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK), next_tb & TB_EXIT_MASK); +#if defined(CONFIG_LLVM) + if ((next_tb & TB_EXIT_MASK) == TB_EXIT_LLVM) + return next_tb; +#endif if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) { /* We didn't start executing this TB (eg because the instruction * counter hit zero); we must restore the guest PC to the address @@ -197,7 +203,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags, max_cycles | CF_NOCACHE | (ignore_icount ? CF_IGNORE_ICOUNT : 0)); - tb->orig_tb = tcg_ctx.tb_ctx.tb_invalidated_flag ? NULL : orig_tb; + tb->orig_tb = tcg_ctx.tb_ctx->tb_invalidated_flag ? NULL : orig_tb; cpu->current_tb = tb; /* execute the generated code */ trace_exec_tb_nocache(tb, tb->pc); @@ -218,13 +224,13 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, tb_page_addr_t phys_pc, phys_page1; target_ulong virt_page2; - tcg_ctx.tb_ctx.tb_invalidated_flag = 0; + tcg_ctx.tb_ctx->tb_invalidated_flag = 0; /* find translated block using physical mappings */ phys_pc = get_page_addr_code(env, pc); phys_page1 = phys_pc & TARGET_PAGE_MASK; - h = tb_phys_hash_func(phys_pc); - ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h]; + h = tb_phys_hash_func(pc); + ptb1 = &tcg_ctx.tb_ctx->tb_phys_hash[h]; for(;;) { tb = *ptb1; if (!tb) { @@ -253,8 +259,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu, /* Move the TB to the head of the list */ *ptb1 = tb->phys_hash_next; - tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h]; - tcg_ctx.tb_ctx.tb_phys_hash[h] = tb; + tb->phys_hash_next = tcg_ctx.tb_ctx->tb_phys_hash[h]; + tcg_ctx.tb_ctx->tb_phys_hash[h] = tb; return tb; } @@ -315,6 +321,10 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu) tb->flags != flags)) { tb = tb_find_slow(cpu, pc, cs_base, flags); } + + itlb_update_entry(env, tb); + ibtc_update_entry(env, tb); + return tb; } @@ -492,29 +502,23 @@ int cpu_exec(CPUState *cpu) tb = tb_find_fast(cpu); /* Note: we do it here to avoid a gcc bug on Mac OS X when doing it in tb_find_slow */ - if (tcg_ctx.tb_ctx.tb_invalidated_flag) { + if (tcg_ctx.tb_ctx->tb_invalidated_flag) { /* as some TB could have been invalidated because of memory exceptions while generating the code, we must recompute the hash index here */ next_tb = 0; - tcg_ctx.tb_ctx.tb_invalidated_flag = 0; + tcg_ctx.tb_ctx->tb_invalidated_flag = 0; } if (qemu_loglevel_mask(CPU_LOG_EXEC)) { qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n", tb->tc_ptr, tb->pc, lookup_symbol(tb->pc)); } - /* see if we can patch the calling TB. When the TB - spans two pages, we cannot safely do a direct - jump. */ - if (next_tb != 0 && tb->page_addr[1] == -1 - && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { - tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK), - next_tb & TB_EXIT_MASK, tb); - } + + tracer_exec_tb(cpu->env_ptr, next_tb, tb); tb_unlock(); if (likely(!cpu->exit_request)) { trace_exec_tb(tb, tb->pc); - tc_ptr = tb->tc_ptr; + tc_ptr = tb->opt_ptr; /* execute the generated code */ cpu->current_tb = tb; next_tb = cpu_tb_exec(cpu, tc_ptr); @@ -533,9 +537,14 @@ int cpu_exec(CPUState *cpu) */ smp_rmb(); next_tb = 0; + + tracer_reset(cpu->env_ptr); break; case TB_EXIT_ICOUNT_EXPIRED: { +#if defined(CONFIG_LLVM) + break; +#endif /* Instruction counter expired. */ int insns_left = cpu->icount_decr.u32; if (cpu->icount_extra && insns_left >= 0) { @@ -590,6 +599,8 @@ int cpu_exec(CPUState *cpu) #endif /* buggy compiler */ cpu->can_do_io = 1; tb_lock_reset(); + + tracer_reset(cpu->env_ptr); } } /* for(;;) */ @@ -66,6 +66,9 @@ #endif /* CONFIG_LINUX */ +#include "tcg.h" +#include "hqemu.h" + static CPUState *next_cpu; int64_t max_delay; int64_t max_advance; @@ -892,6 +895,18 @@ void qemu_init_cpu_loop(void) qemu_thread_get_self(&io_thread); } +void qemu_end_cpu_loop(void) +{ + CPUState *cpu; + + CPU_FOREACH(cpu) + optimization_finalize(cpu->env_ptr); + +#if defined(CONFIG_LLVM) + llvm_finalize(); +#endif +} + void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data) { struct qemu_work_item wi; @@ -1134,6 +1149,16 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) /* process any pending work */ atomic_mb_set(&exit_request, 1); +#if defined(CONFIG_LLVM) + llvm_init(); +#endif + /* we can safely initialize optimization resources after + * the setup of CPUArchState is completed. */ + CPU_FOREACH(cpu) { + copy_tcg_context(); + optimization_init(cpu->env_ptr); + } + while (1) { tcg_exec_all(); @@ -19,6 +19,7 @@ #include "config.h" #include "cpu.h" +#include "exec/tb-hash.h" #include "exec/exec-all.h" #include "exec/memory.h" #include "exec/address-spaces.h" @@ -30,12 +31,38 @@ #include "exec/ram_addr.h" #include "tcg/tcg.h" +#include "hqemu.h" + +#if defined(ENABLE_TLBVERSION) +#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK | TLB_VERSION_MASK) +#define page_val(addr, env) (((tlbaddr_t)addr & TARGET_PAGE_MASK) | tlb_version(env)) +#else +#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK) +#define page_val(addr, env) (addr & TARGET_PAGE_MASK) +#endif + //#define DEBUG_TLB //#define DEBUG_TLB_CHECK /* statistics */ int tlb_flush_count; +static inline void tlb_reset(CPUArchState *env) +{ +#if defined(ENABLE_TLBVERSION) + tlbaddr_t version = env->tlb_version >> TLB_VERSION_SHIFT; + if (++version == TLB_VERSION_SIZE) { + version = 0; + memset(env->tlb_table, -1, sizeof(env->tlb_table)); + memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); + } + env->tlb_version = version << TLB_VERSION_SHIFT; +#else + memset(env->tlb_table, -1, sizeof(env->tlb_table)); + memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); +#endif +} + /* NOTE: * If flush_global is true (the usual case), flush all tlb entries. * If flush_global is false, flush (at least) all tlb entries not @@ -59,10 +86,12 @@ void tlb_flush(CPUState *cpu, int flush_global) links while we are modifying them */ cpu->current_tb = NULL; - memset(env->tlb_table, -1, sizeof(env->tlb_table)); - memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table)); + tlb_reset(env); memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); + optimization_reset(env, 0); + lpt_reset(env); + env->vtlb_index = 0; env->tlb_flush_addr = -1; env->tlb_flush_mask = 0; @@ -110,18 +139,67 @@ void tlb_flush_by_mmuidx(CPUState *cpu, ...) va_end(argp); } -static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr) +static inline void tlb_flush_entry(CPUArchState *env, CPUTLBEntry *tlb_entry, + target_ulong addr) { - if (addr == (tlb_entry->addr_read & - (TARGET_PAGE_MASK | TLB_INVALID_MASK)) || - addr == (tlb_entry->addr_write & - (TARGET_PAGE_MASK | TLB_INVALID_MASK)) || - addr == (tlb_entry->addr_code & - (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) == (tlb_entry->addr_read & TLB_NONIO_MASK) || + page_val(addr, env) == (tlb_entry->addr_write & TLB_NONIO_MASK) || + page_val(addr, env) == (tlb_entry->addr_code & TLB_NONIO_MASK)) { memset(tlb_entry, -1, sizeof(*tlb_entry)); } } +#ifdef ENABLE_LPAGE +static int tlb_flush_large_page(CPUState *cpu, target_ulong addr) +{ + int i, j, k, ret, mmu_idx, num_base_pages, max_flush_pages; + target_ulong page_addr, page_size, flush_addr; + CPUArchState *env = cpu->env_ptr; + +#if defined(DEBUG_TLB) + printf("tlb_flush:\n"); +#endif + /* must reset current TB so that interrupts cannot modify the + links while we are modifying them */ + cpu->current_tb = NULL; + + ret = lpt_flush_page(env, addr, &page_addr, &page_size); + if (ret == 0) + return 0; + + /* If the large page occupies a small set of the tlb, do a partial flush + * optimzation, otherwise, do a full flush. */ + num_base_pages = page_size / TARGET_PAGE_SIZE; + max_flush_pages = (CPU_TLB_SIZE / 4 < 1024) ? CPU_TLB_SIZE / 4 : 1024; + if (num_base_pages > max_flush_pages) { + tlb_flush(cpu, 1); + return 1; + } + + for (i = 0; i < num_base_pages; i++) { + flush_addr = addr + i * TARGET_PAGE_SIZE; + j = (flush_addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) + tlb_flush_entry(env, &env->tlb_table[mmu_idx][j], flush_addr); + + /* check whether there are entries that need to be flushed in the vtlb */ + for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { + for (k = 0; k < CPU_VTLB_SIZE; k++) + tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], flush_addr); + } + } + + for (i = -1; i < num_base_pages; i++) { + j = tb_jmp_cache_hash_page(addr + i * TARGET_PAGE_SIZE); + memset(&cpu->tb_jmp_cache[j], 0, + TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *)); + } + optimization_reset(env, 0); + + return 1; +} +#endif + void tlb_flush_page(CPUState *cpu, target_ulong addr) { CPUArchState *env = cpu->env_ptr; @@ -138,8 +216,14 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr) TARGET_FMT_lx "/" TARGET_FMT_lx ")\n", env->tlb_flush_addr, env->tlb_flush_mask); #endif + +#ifdef ENABLE_LPAGE + if (tlb_flush_large_page(cpu, addr)) + return; +#else tlb_flush(cpu, 1); return; +#endif } /* must reset current TB so that interrupts cannot modify the links while we are modifying them */ @@ -148,18 +232,19 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr) addr &= TARGET_PAGE_MASK; i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr); + tlb_flush_entry(env, &env->tlb_table[mmu_idx][i], addr); } /* check whether there are entries that need to be flushed in the vtlb */ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { int k; for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr); + tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], addr); } } tb_flush_jmp_cache(cpu, addr); + optimization_flush_page(env, addr); } void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...) @@ -202,11 +287,11 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...) printf(" %d", mmu_idx); #endif - tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr); + tlb_flush_entry(env, &env->tlb_table[mmu_idx][i], addr); /* check whether there are vltb entries that need to be flushed */ for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr); + tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], addr); } } va_end(argp); @@ -284,10 +369,11 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length) } } -static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr) +static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr, + tlbaddr_t version) { - if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) { - tlb_entry->addr_write = vaddr; + if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY | version)) { + tlb_entry->addr_write = vaddr | version; } } @@ -302,13 +388,13 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr) vaddr &= TARGET_PAGE_MASK; i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { - tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr); + tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr, tlb_version(env)); } for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) { int k; for (k = 0; k < CPU_VTLB_SIZE; k++) { - tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr); + tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr, tlb_version(env)); } } } @@ -360,6 +446,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, assert(size >= TARGET_PAGE_SIZE); if (size != TARGET_PAGE_SIZE) { tlb_add_large_page(env, vaddr, size); + lpt_add_page(env, vaddr, size); } sz = size; @@ -424,6 +511,13 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr, } else { te->addr_write = -1; } + +#ifdef ENABLE_TLBVERSION + tlbaddr_t version = tlb_version(env); + te->addr_read |= version; + te->addr_write |= version; + te->addr_code |= version; +#endif } /* Add a new TLB entry, but without specifying the memory @@ -452,7 +546,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = cpu_mmu_index(env1, true); if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code != - (addr & TARGET_PAGE_MASK))) { + page_val(addr, env1))) { cpu_ldub_code(env1, addr); } pd = env1->iotlb[mmu_idx][page_index].addr & ~TARGET_PAGE_MASK; @@ -471,6 +565,9 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr) return qemu_ram_addr_from_host_nofail(p); } +#undef TLB_NONIO_MASK +#undef page_val + #define MMUSUFFIX _mmu #define SHIFT 0 @@ -706,7 +706,7 @@ int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len, } wp = g_malloc(sizeof(*wp)); - wp->vaddr = addr; + wp->addr = addr; wp->len = len; wp->flags = flags; @@ -731,7 +731,7 @@ int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len, CPUWatchpoint *wp; QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { - if (addr == wp->vaddr && len == wp->len + if (addr == wp->addr && len == wp->len && flags == (wp->flags & ~BP_WATCHPOINT_HIT)) { cpu_watchpoint_remove_by_ref(cpu, wp); return 0; @@ -745,7 +745,7 @@ void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint) { QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry); - tlb_flush_page(cpu, watchpoint->vaddr); + tlb_flush_page(cpu, watchpoint->addr); g_free(watchpoint); } @@ -776,10 +776,10 @@ static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp, * exactly at the top of the address space and so addr + len * wraps round to zero. */ - vaddr wpend = wp->vaddr + wp->len - 1; + vaddr wpend = wp->addr + wp->len - 1; vaddr addrend = addr + len - 1; - return !(addr > wpend || wp->vaddr > addrend); + return !(addr > wpend || wp->addr > addrend); } #endif @@ -1267,7 +1267,7 @@ static void gdb_vm_state_change(void *opaque, int running, RunState state) snprintf(buf, sizeof(buf), "T%02xthread:%02x;%swatch:" TARGET_FMT_lx ";", GDB_SIGNAL_TRAP, cpu_index(cpu), type, - (target_ulong)cpu->watchpoint_hit->vaddr); + (target_ulong)cpu->watchpoint_hit->addr); cpu->watchpoint_hit = NULL; goto send_packet; } diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index 83b1781..9471dc6 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -271,12 +271,12 @@ CPUArchState *cpu_copy(CPUArchState *env); /* Flags stored in the low bits of the TLB virtual address. These are defined so that fast path ram access is all zeros. */ /* Zero if TLB entry is valid. */ -#define TLB_INVALID_MASK (1 << 3) +#define TLB_INVALID_MASK (1 << TLB_INVALID_SHIFT) /* Set if TLB entry references a clean RAM page. The iotlb entry will contain the page physical address. */ -#define TLB_NOTDIRTY (1 << 4) +#define TLB_NOTDIRTY (1 << TLB_NOTDIRTY_SHIFT) /* Set if TLB entry is an IO callback. */ -#define TLB_MMIO (1 << 5) +#define TLB_MMIO (1 << TLB_MMIO_SHIFT) void dump_exec_info(FILE *f, fprintf_function cpu_fprintf); void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf); diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h index 85aa403..ce7deb9 100644 --- a/include/exec/cpu-common.h +++ b/include/exec/cpu-common.h @@ -76,12 +76,12 @@ void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf, static inline void cpu_physical_memory_read(hwaddr addr, void *buf, int len) { - cpu_physical_memory_rw(addr, buf, len, 0); + cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 0); } static inline void cpu_physical_memory_write(hwaddr addr, const void *buf, int len) { - cpu_physical_memory_rw(addr, (void *)buf, len, 1); + cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 1); } void *cpu_physical_memory_map(hwaddr addr, hwaddr *plen, diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h index 5093be2..b44e3f2 100644 --- a/include/exec/cpu-defs.h +++ b/include/exec/cpu-defs.h @@ -56,6 +56,8 @@ typedef uint64_t target_ulong; #error TARGET_LONG_SIZE undefined #endif +#include "hqemu-config.h" + #if !defined(CONFIG_USER_ONLY) /* use a fully associative victim tlb of 8 entries */ #define CPU_VTLB_SIZE 8 @@ -89,7 +91,7 @@ typedef uint64_t target_ulong; * of tlb_table inside env (which is non-trivial but not huge). */ #define CPU_TLB_BITS \ - MIN(8, \ + MIN(12, \ TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS - \ (NB_MMU_MODES <= 1 ? 0 : \ NB_MMU_MODES <= 2 ? 1 : \ @@ -107,9 +109,9 @@ typedef struct CPUTLBEntry { */ union { struct { - target_ulong addr_read; - target_ulong addr_write; - target_ulong addr_code; + tlbaddr_t addr_read; + tlbaddr_t addr_write; + tlbaddr_t addr_code; /* Addend to virtual address to get host address. IO accesses use the corresponding iotlb value. */ uintptr_t addend; @@ -140,6 +142,7 @@ typedef struct CPUIOTLBEntry { target_ulong tlb_flush_addr; \ target_ulong tlb_flush_mask; \ target_ulong vtlb_index; \ + tlbaddr_t tlb_version; \ #else diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h index b573df5..72acce7 100644 --- a/include/exec/cpu_ldst.h +++ b/include/exec/cpu_ldst.h @@ -405,7 +405,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr, #else int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index]; - target_ulong tlb_addr; + tlbaddr_t tlb_addr; uintptr_t haddr; switch (access_type) { @@ -422,13 +422,22 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr, g_assert_not_reached(); } +#if defined(ENABLE_TLBVERSION) + if (tlb_version(env) != (tlb_addr & TLB_VERSION_MASK)) + return NULL; +#endif + if ((addr & TARGET_PAGE_MASK) != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { /* TLB entry is for a different page */ return NULL; } +#if defined(ENABLE_TLBVERSION) + if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO)) { +#else if (tlb_addr & ~TARGET_PAGE_MASK) { +#endif /* IO access */ return NULL; } diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h index 3091c00..2a01c6f 100644 --- a/include/exec/cpu_ldst_template.h +++ b/include/exec/cpu_ldst_template.h @@ -67,6 +67,14 @@ #define SRETSUFFIX glue(s, SUFFIX) #endif +#include "hqemu.h" + +#if defined(ENABLE_TLBVERSION) +#define page_val(addr, env) ((((tlbaddr_t)addr + DATA_SIZE - 1) & TARGET_PAGE_MASK) | tlb_version(env)) +#else +#define page_val(addr, env) (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))) +#endif + /* generic load/store macros */ static inline RES_TYPE @@ -80,12 +88,17 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, int mmu_idx; TCGMemOpIdx oi; +#ifdef SOFTMMU_CODE_ACCESS + if (build_llvm_only(env)) + return glue(glue(ld, USUFFIX), _p)((uint8_t *)env->image_base + ptr); +#endif + addr = ptr; page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != - (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { - oi = make_memop_idx(SHIFT, mmu_idx); + page_val(addr, env))) { + oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx); res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr, oi, retaddr); } else { @@ -112,12 +125,17 @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, int mmu_idx; TCGMemOpIdx oi; +#ifdef SOFTMMU_CODE_ACCESS + if (build_llvm_only(env)) + return glue(glue(lds, SUFFIX), _p)((uint8_t *)env->image_base + ptr); +#endif + addr = ptr; page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ != - (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { - oi = make_memop_idx(SHIFT, mmu_idx); + page_val(addr, env))) { + oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx); res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX), MMUSUFFIX)(env, addr, oi, retaddr); } else { @@ -152,8 +170,8 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env, page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); mmu_idx = CPU_MMU_INDEX; if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write != - (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) { - oi = make_memop_idx(SHIFT, mmu_idx); + page_val(addr, env))) { + oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx); glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi, retaddr); } else { @@ -171,6 +189,7 @@ glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr, #endif /* !SOFTMMU_CODE_ACCESS */ +#undef page_val #undef RES_TYPE #undef DATA_TYPE #undef DATA_STYPE diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index d900b0d..a225bea 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -21,6 +21,7 @@ #define _EXEC_ALL_H_ #include "qemu-common.h" +#include "hqemu-config.h" /* allow to see translation results - the slowdown should be negligible, so we leave it */ #define DEBUG_DISAS @@ -59,7 +60,7 @@ typedef struct TranslationBlock TranslationBlock; * and up to 4 + N parameters on 64-bit archs * (N = number of input arguments + output arguments). */ #define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS)) -#define OPC_BUF_SIZE 640 +#define OPC_BUF_SIZE 2048 #define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR) #define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM) @@ -216,6 +217,8 @@ struct TranslationBlock { jmp_first */ struct TranslationBlock *jmp_next[2]; struct TranslationBlock *jmp_first; + + TB_OPTIMIZATION_COMMON }; #include "qemu/thread.h" @@ -305,7 +308,7 @@ static inline void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr) { uint16_t offset = tb->tb_jmp_offset[n]; - tb_set_jmp_target1((uintptr_t)(tb->tc_ptr + offset), addr); + tb_set_jmp_target1((uintptr_t)((uint8_t *)tb->tc_ptr + offset), addr); } #else @@ -405,4 +408,6 @@ extern int singlestep; extern CPUState *tcg_current_cpu; extern bool exit_request; +size_t get_cpu_size(void); + #endif diff --git a/include/exec/memory.h b/include/exec/memory.h index 0f07159..c2a1cd3 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -208,9 +208,9 @@ struct MemoryListener { void (*region_del)(MemoryListener *listener, MemoryRegionSection *section); void (*region_nop)(MemoryListener *listener, MemoryRegionSection *section); void (*log_start)(MemoryListener *listener, MemoryRegionSection *section, - int old, int new); + int _old, int _new); void (*log_stop)(MemoryListener *listener, MemoryRegionSection *section, - int old, int new); + int _old, int _new); void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section); void (*log_global_start)(MemoryListener *listener); void (*log_global_stop)(MemoryListener *listener); diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index c537969..4453e5b 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -10,6 +10,8 @@ #include "qapi/error.h" #include "hw/hotplug.h" +#define typename QEMUtypename + enum { DEV_NVECTORS_UNSPECIFIED = -1, }; @@ -401,4 +403,6 @@ static inline bool qbus_is_hotpluggable(BusState *bus) void device_listener_register(DeviceListener *listener); void device_listener_unregister(DeviceListener *listener); +#undef typename + #endif diff --git a/include/qemu-common.h b/include/qemu-common.h index 405364f..d0c2e20 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -454,7 +454,7 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end); void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size); /* vector definitions */ -#ifdef __ALTIVEC__ +#if defined(__ALTIVEC__) && !defined(__clang__) #include <altivec.h> /* The altivec.h header says we're allowed to undef these for * C++ compatibility. Here we don't care about C++, but we diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index bd2c075..e2125bd 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -158,13 +158,13 @@ #ifndef atomic_rcu_read #ifdef __ATOMIC_CONSUME #define atomic_rcu_read(ptr) ({ \ - typeof(*ptr) _val; \ + __typeof__(*ptr) _val; \ __atomic_load(ptr, &_val, __ATOMIC_CONSUME); \ _val; \ }) #else #define atomic_rcu_read(ptr) ({ \ - typeof(*ptr) _val = atomic_read(ptr); \ + __typeof__(*ptr) _val = atomic_read(ptr); \ smp_read_barrier_depends(); \ _val; \ }) @@ -185,7 +185,7 @@ #ifndef atomic_rcu_set #ifdef __ATOMIC_RELEASE #define atomic_rcu_set(ptr, i) do { \ - typeof(*ptr) _val = (i); \ + __typeof__(*ptr) _val = (i); \ __atomic_store(ptr, &_val, __ATOMIC_RELEASE); \ } while(0) #else @@ -220,7 +220,7 @@ */ #ifndef atomic_mb_read #define atomic_mb_read(ptr) ({ \ - typeof(*ptr) _val = atomic_read(ptr); \ + __typeof__(*ptr) _val = atomic_read(ptr); \ smp_rmb(); \ _val; \ }) @@ -239,7 +239,7 @@ #define atomic_xchg(ptr, i) __sync_swap(ptr, i) #elif defined(__ATOMIC_SEQ_CST) #define atomic_xchg(ptr, i) ({ \ - typeof(*ptr) _new = (i), _old; \ + __typeof__(*ptr) _new = (i), _old; \ __atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \ _old; \ }) diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index 86dd9cd..b53f462 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -71,7 +71,7 @@ unsigned long name[BITS_TO_LONGS(bits)] #define small_nbits(nbits) \ - ((nbits) <= BITS_PER_LONG) + ((nbits) <= (long)BITS_PER_LONG) int slow_bitmap_empty(const unsigned long *bitmap, long bits); int slow_bitmap_full(const unsigned long *bitmap, long bits); @@ -97,7 +97,7 @@ int slow_bitmap_intersects(const unsigned long *bitmap1, static inline unsigned long *bitmap_try_new(long nbits) { long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); - return g_try_malloc0(len); + return (unsigned long *)g_try_malloc0(len); } static inline unsigned long *bitmap_new(long nbits) @@ -241,9 +241,9 @@ static inline unsigned long *bitmap_zero_extend(unsigned long *old, long old_nbits, long new_nbits) { long new_len = BITS_TO_LONGS(new_nbits) * sizeof(unsigned long); - unsigned long *new = g_realloc(old, new_len); - bitmap_clear(new, old_nbits, new_nbits - old_nbits); - return new; + unsigned long *new_bitmap = (unsigned long *)g_realloc(old, new_len); + bitmap_clear(new_bitmap, old_nbits, new_nbits - old_nbits); + return new_bitmap; } #endif /* BITMAP_H */ diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h index d22eb01..0abf0f8 100644 --- a/include/qemu/compiler.h +++ b/include/qemu/compiler.h @@ -60,7 +60,7 @@ #ifndef container_of #define container_of(ptr, type, member) ({ \ - const typeof(((type *) 0)->member) *__mptr = (ptr); \ + const __typeof__(((type *) 0)->member) *__mptr = (ptr); \ (type *) ((char *) __mptr - offsetof(type, member));}) #endif @@ -74,7 +74,7 @@ #define DO_UPCAST(type, field, dev) container_of(dev, type, field) #endif -#define typeof_field(type, field) typeof(((type *)0)->field) +#define typeof_field(type, field) __typeof__(((type *)0)->field) #define type_check(t1,t2) ((t1*)0 - (t2*)0) #ifndef always_inline diff --git a/include/qemu/queue.h b/include/qemu/queue.h index f781aa2..b56bce5 100644 --- a/include/qemu/queue.h +++ b/include/qemu/queue.h @@ -198,7 +198,7 @@ struct { \ } while (/*CONSTCOND*/0) #define QSLIST_INSERT_HEAD_ATOMIC(head, elm, field) do { \ - typeof(elm) save_sle_next; \ + __typeof__(elm) save_sle_next; \ do { \ save_sle_next = (elm)->field.sle_next = (head)->slh_first; \ } while (atomic_cmpxchg(&(head)->slh_first, save_sle_next, (elm)) != \ diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h index f6d1d56..0d9f677 100644 --- a/include/qemu/rcu.h +++ b/include/qemu/rcu.h @@ -135,8 +135,8 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func); #define call_rcu(head, func, field) \ call_rcu1(({ \ char __attribute__((unused)) \ - offset_must_be_zero[-offsetof(typeof(*(head)), field)], \ - func_type_invalid = (func) - (void (*)(typeof(head)))(func); \ + offset_must_be_zero[-offsetof(__typeof__(*(head)), field)], \ + func_type_invalid = (func) - (void (*)(__typeof__(head)))(func); \ &(head)->field; \ }), \ (RCUCBFunc *)(func)) @@ -144,7 +144,7 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func); #define g_free_rcu(obj, field) \ call_rcu1(({ \ char __attribute__((unused)) \ - offset_must_be_zero[-offsetof(typeof(*(obj)), field)]; \ + offset_must_be_zero[-offsetof(__typeof__(*(obj)), field)]; \ &(obj)->field; \ }), \ (RCUCBFunc *)g_free); diff --git a/include/qemu/timer.h b/include/qemu/timer.h index d0946cb..a16effa 100644 --- a/include/qemu/timer.h +++ b/include/qemu/timer.h @@ -523,7 +523,7 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list, QEMUTimerCB *cb, void *opaque) { - QEMUTimer *ts = g_malloc0(sizeof(QEMUTimer)); + QEMUTimer *ts = (QEMUTimer *)g_malloc0(sizeof(QEMUTimer)); timer_init_tl(ts, timer_list, scale, cb, opaque); return ts; } @@ -965,7 +965,7 @@ static inline int64_t cpu_get_host_ticks (void) #define MIPS_RDHWR(rd, value) { \ __asm__ __volatile__ (".set push\n\t" \ ".set mips32r2\n\t" \ - "rdhwr %0, "rd"\n\t" \ + "rdhwr %0, " rd "\n\t" \ ".set pop" \ : "=r" (value)); \ } diff --git a/include/qom/cpu.h b/include/qom/cpu.h index 51a1323..4b005ff 100644 --- a/include/qom/cpu.h +++ b/include/qom/cpu.h @@ -30,6 +30,8 @@ #include "qemu/thread.h" #include "qemu/typedefs.h" +#define typename QEMUtypename + typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size, void *opaque); @@ -196,7 +198,7 @@ typedef struct CPUBreakpoint { } CPUBreakpoint; typedef struct CPUWatchpoint { - vaddr vaddr; + vaddr addr; vaddr len; vaddr hitaddr; MemTxAttrs hitattrs; @@ -775,4 +777,7 @@ extern const struct VMStateDescription vmstate_cpu_common; .offset = 0, \ } +CPUState *cpu_create(void); +#undef typename + #endif diff --git a/include/qom/object.h b/include/qom/object.h index 4509166..118c227 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -20,6 +20,10 @@ #include "qemu/queue.h" #include "qapi/error.h" +#define Type QEMUType +#define class QEMUclass +#define typename QEMUtypename + struct Visitor; struct TypeImpl; @@ -1570,5 +1574,8 @@ int object_child_foreach_recursive(Object *obj, */ Object *container_get(Object *root, const char *path); +#undef Type +#undef class +#undef typename #endif diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h index 3d1e5ba..d594ebf 100644 --- a/include/sysemu/cpus.h +++ b/include/sysemu/cpus.h @@ -4,6 +4,7 @@ /* cpus.c */ bool qemu_in_vcpu_thread(void); void qemu_init_cpu_loop(void); +void qemu_end_cpu_loop(void); void resume_all_vcpus(void); void pause_all_vcpus(void); void cpu_stop_current(void); diff --git a/linux-user/elfload.c b/linux-user/elfload.c index 8b17c0e..7be6e71 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -2001,9 +2001,13 @@ static void load_elf_image(const char *image_name, int image_fd, info->brk = info->end_code; } +#if defined(CONFIG_LLVM) + load_symbols(ehdr, image_fd, load_bias); +#else if (qemu_log_enabled()) { load_symbols(ehdr, image_fd, load_bias); } +#endif close(image_fd); return; diff --git a/linux-user/main.c b/linux-user/main.c index 8acfe0f..0f67ad4 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -33,11 +33,12 @@ #include "qemu/timer.h" #include "qemu/envlist.h" #include "elf.h" +#include "hqemu.h" char *exec_path; int singlestep; -static const char *filename; +const char *filename; static const char *argv0; static int gdbstub_port; static envlist_t *envlist; @@ -105,7 +106,10 @@ static int pending_cpus; /* Make sure everything is in a consistent state for calling fork(). */ void fork_start(void) { - qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); +#if defined(CONFIG_LLVM) + llvm_fork_start(); +#endif + qemu_mutex_lock(&tcg_ctx.tb_ctx->tb_lock); pthread_mutex_lock(&exclusive_lock); mmap_fork_start(); } @@ -127,12 +131,15 @@ void fork_end(int child) pthread_mutex_init(&cpu_list_mutex, NULL); pthread_cond_init(&exclusive_cond, NULL); pthread_cond_init(&exclusive_resume, NULL); - qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_init(&tcg_ctx.tb_ctx->tb_lock); gdbserver_fork(thread_cpu); } else { pthread_mutex_unlock(&exclusive_lock); - qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_unlock(&tcg_ctx.tb_ctx->tb_lock); } +#if defined(CONFIG_LLVM) + llvm_fork_end(child); +#endif } /* Wait for pending exclusive operations to complete. The exclusive lock @@ -276,6 +283,9 @@ void cpu_loop(CPUX86State *env) abi_ulong pc; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + for(;;) { cpu_exec_start(cs); trapnr = cpu_x86_exec(cs); @@ -670,6 +680,9 @@ void cpu_loop(CPUARMState *env) target_siginfo_t info; uint32_t addr; + copy_tcg_context(); + optimization_init(env); + for(;;) { cpu_exec_start(cs); trapnr = cpu_arm_exec(cs); @@ -1001,6 +1014,9 @@ void cpu_loop(CPUARMState *env) int trapnr, sig; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + for (;;) { cpu_exec_start(cs); trapnr = cpu_arm_exec(cs); @@ -1083,6 +1099,9 @@ void cpu_loop(CPUUniCore32State *env) unsigned int n, insn; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + for (;;) { cpu_exec_start(cs); trapnr = uc32_cpu_exec(cs); @@ -1284,6 +1303,9 @@ void cpu_loop (CPUSPARCState *env) abi_long ret; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_sparc_exec(cs); @@ -1564,6 +1586,9 @@ void cpu_loop(CPUPPCState *env) int trapnr; target_ulong ret; + copy_tcg_context(); + optimization_init(env); + for(;;) { cpu_exec_start(cs); trapnr = cpu_ppc_exec(cs); @@ -2416,6 +2441,9 @@ void cpu_loop(CPUMIPSState *env) unsigned int syscall_num; # endif + copy_tcg_context(); + optimization_init(env); + for(;;) { cpu_exec_start(cs); trapnr = cpu_mips_exec(cs); @@ -2653,6 +2681,9 @@ void cpu_loop(CPUOpenRISCState *env) CPUState *cs = CPU(openrisc_env_get_cpu(env)); int trapnr, gdbsig; + copy_tcg_context(); + optimization_init(env); + for (;;) { cpu_exec_start(cs); trapnr = cpu_openrisc_exec(cs); @@ -2743,6 +2774,9 @@ void cpu_loop(CPUSH4State *env) int trapnr, ret; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_sh4_exec(cs); @@ -2805,6 +2839,9 @@ void cpu_loop(CPUCRISState *env) int trapnr, ret; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_cris_exec(cs); @@ -2866,6 +2903,9 @@ void cpu_loop(CPUMBState *env) int trapnr, ret; target_siginfo_t info; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_mb_exec(cs); @@ -2971,6 +3011,9 @@ void cpu_loop(CPUM68KState *env) target_siginfo_t info; TaskState *ts = cs->opaque; + copy_tcg_context(); + optimization_init(env); + for(;;) { cpu_exec_start(cs); trapnr = cpu_m68k_exec(cs); @@ -3110,6 +3153,9 @@ void cpu_loop(CPUAlphaState *env) target_siginfo_t info; abi_long sysret; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_alpha_exec(cs); @@ -3298,6 +3344,9 @@ void cpu_loop(CPUS390XState *env) target_siginfo_t info; target_ulong addr; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_s390x_exec(cs); @@ -3602,6 +3651,9 @@ void cpu_loop(CPUTLGState *env) CPUState *cs = CPU(tilegx_env_get_cpu(env)); int trapnr; + copy_tcg_context(); + optimization_init(env); + while (1) { cpu_exec_start(cs); trapnr = cpu_tilegx_exec(cs); @@ -3711,7 +3763,7 @@ CPUArchState *cpu_copy(CPUArchState *env) cpu_breakpoint_insert(new_cpu, bp->pc, bp->flags, NULL); } QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) { - cpu_watchpoint_insert(new_cpu, wp->vaddr, wp->len, wp->flags, NULL); + cpu_watchpoint_insert(new_cpu, wp->addr, wp->len, wp->flags, NULL); } return new_env; @@ -4009,6 +4061,12 @@ static void usage(int exitcode) "Note that if you provide several changes to a single variable\n" "the last change will stay in effect.\n"); +#if defined(CONFIG_LLVM) + printf("\n\nHQEMU "); + fflush(stdout); + hqemu_help(); +#endif + exit(exitcode); } @@ -4324,7 +4382,11 @@ int main(int argc, char **argv, char **envp) /* Now that we've loaded the binary, GUEST_BASE is fixed. Delay generating the prologue until now so that the prologue can take the real value of GUEST_BASE into account. */ - tcg_prologue_init(&tcg_ctx); + tcg_prologue_init(&tcg_ctx_global); + +#if defined(CONFIG_LLVM) + llvm_init(); +#endif #if defined(TARGET_I386) env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; @@ -4663,6 +4725,7 @@ int main(int argc, char **argv, char **envp) } gdb_handlesig(cpu, 0); } + cpu_loop(env); /* never exits */ return 0; diff --git a/linux-user/strace.c b/linux-user/strace.c index ea6c1d2..69d5408 100644 --- a/linux-user/strace.c +++ b/linux-user/strace.c @@ -7,6 +7,7 @@ #include <sys/types.h> #include <sys/mount.h> #include <sys/mman.h> +#include <sys/sysmacros.h> #include <unistd.h> #include <sched.h> #include "qemu.h" diff --git a/linux-user/syscall.c b/linux-user/syscall.c index 6c64ba6..030eb2a 100644 --- a/linux-user/syscall.c +++ b/linux-user/syscall.c @@ -114,6 +114,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base, #include "uname.h" #include "qemu.h" +#include "hqemu.h" #define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \ CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID) @@ -4495,7 +4496,7 @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr) #endif /* defined(TARGET_I386) */ -#define NEW_STACK_SIZE 0x40000 +#define NEW_STACK_SIZE 0x80000 static pthread_mutex_t clone_lock = PTHREAD_MUTEX_INITIALIZER; @@ -5710,6 +5711,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, rcu_unregister_thread(); pthread_exit(NULL); } + + optimization_finalize((CPUArchState *)cpu_env); +#if defined(CONFIG_LLVM) + llvm_finalize(); +#endif + #ifdef TARGET_GPROF _mcleanup(); #endif @@ -7615,6 +7622,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1, #ifdef __NR_exit_group /* new thread calls */ case TARGET_NR_exit_group: + optimization_finalize((CPUArchState *)cpu_env); +#if defined(CONFIG_LLVM) + llvm_finalize(); +#endif #ifdef TARGET_GPROF _mcleanup(); #endif diff --git a/llvm/analysis/InnerLoopAnalysis.cpp b/llvm/analysis/InnerLoopAnalysis.cpp new file mode 100644 index 0000000..f67d380 --- /dev/null +++ b/llvm/analysis/InnerLoopAnalysis.cpp @@ -0,0 +1,631 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm-debug.h" +#include "llvm-opc.h" +#include "llvm-pass.h" +#include "InnerLoopAnalysis.h" + + +/* + * The InnertLoop class represents a single innermost loop. The shape of + * InnerLoop is specific to the DBT decoded guest loop, and its loop definition + * is different to a nature loop, e.g., latch and exiting block. + * For example, the binary of a nature loop (a) will be translated to the loop + * CFG (b), which includes an additional block L(loopback) to check flag + * `tcg_exit_req' and exits the loop to block E if the flag is raised, otherwise, + * goes back to the loop header A. + * + * In loop (b), a latch is split into two blocks, B and L. The loop bottom test + * is in block B and the backward branch is included in block L (which also + * has an exit block E attached to it). We include block L in the loop body and + * have the following definitions: (1) block B and L are latch head and tail, + * respectively; (2) a latch tail is the source of a backedge; (3) block B is a + * loop exiting block, but block L is not, and block E is not included in the + * exit blocks. + * + * (a) A (b) A + * || | + * B B + * / / \ + * C C L -> A + * \ + * E + */ +InnerLoop::InnerLoop(Loop *loop) + : TheLoop(*loop), Blocks(TheLoop.getBlocks()), UnknownPhi(false) +{ + for (auto BB : Blocks) + DenseBlockSet.insert(BB); + + /* Find all latches and split latches. */ + SmallVector<BasicBlock *, 8> LoopLatches; + TheLoop.getLoopLatches(LoopLatches); + for (BasicBlock *BB : LoopLatches) { + Latches.push_back(BB); + + if (MDFactory::isLoop(BB->getTerminator()) && + BB->getSinglePredecessor()) { + /* Map latch tail to latch head. */ + SplitLatches[BB] = BB->getSinglePredecessor(); + } + } +} + + +/* True if terminator in the block can branch to another block that is + * outside of the current loop. */ +bool InnerLoop::isLoopExiting(BasicBlock *BB) const +{ + if (SplitLatches.find(BB) != SplitLatches.end()) + return false; + + typedef GraphTraits<const BasicBlock*> BlockTraits; + for (typename BlockTraits::ChildIteratorType SI = + BlockTraits::child_begin(BB), + SE = BlockTraits::child_end(BB); SI != SE; ++SI) { + if (!contains(*SI)) + return true; + } + return false; +} + +/* Calculate the number of back edges to the loop header. */ +unsigned InnerLoop::getNumBackEdges() const +{ + unsigned NumBackEdges = 0; + BasicBlock *H = getHeader(); + + typedef GraphTraits<Inverse<BasicBlock*> > InvBlockTraits; + for (typename InvBlockTraits::ChildIteratorType I = + InvBlockTraits::child_begin(H), + E = InvBlockTraits::child_end(H); I != E; ++I) + if (contains(*I)) + ++NumBackEdges; + + return NumBackEdges; +} + +/* Return all blocks inside the loop that have successors outside of the loop. */ +void InnerLoop::getExitingBlocks(SmallVectorImpl<BasicBlock *> &ExitingBlocks) const +{ + typedef GraphTraits<BasicBlock *> BlockTraits; + for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) { + /* Skip the latch tail block. */ + if (SplitLatches.find(*BI) != SplitLatches.end()) + continue; + + for (typename BlockTraits::ChildIteratorType I = + BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); + I != E; ++I) + if (!contains(*I)) { + /* Not in current loop? It must be an exit block. */ + ExitingBlocks.push_back(*BI); + break; + } + } +} + +/* If getExitingBlocks would return exactly one block, return that block. + * Otherwise return null. */ +BasicBlock *InnerLoop::getExitingBlock() const +{ + SmallVector<BasicBlock *, 8> ExitingBlocks; + getExitingBlocks(ExitingBlocks); + if (ExitingBlocks.size() == 1) + return ExitingBlocks[0]; + return nullptr; +} + +/* Return all of the successor blocks of this loop. */ +void InnerLoop::getExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const +{ + typedef GraphTraits<BasicBlock *> BlockTraits; + for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) { + /* Skip the latch tail block. */ + if (SplitLatches.find(*BI) != SplitLatches.end()) + continue; + + for (typename BlockTraits::ChildIteratorType I = + BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI); + I != E; ++I) + if (!contains(*I)) + /* Not in current loop? It must be an exit block. */ + ExitBlocks.push_back(*I); + } +} + +/* If getExitBlocks would return exactly one block, return that block. + * Otherwise return null. */ +BasicBlock *InnerLoop::getExitBlock() const +{ + SmallVector<BasicBlock *, 8> ExitBlocks; + getExitBlocks(ExitBlocks); + if (ExitBlocks.size() == 1) + return ExitBlocks[0]; + return nullptr; +} + +/* If there is a preheader for this loop, return it. A loop has a preheader + * if there is only one edge to the header of the loop from outside of the + * loop. If this is the case, the block branching to the header of the loop + * is the preheader node. + * + * This method returns null if there is no preheader for the loop. */ +BasicBlock *InnerLoop::getLoopPreheader() const +{ + /* Keep track of nodes outside the loop branching to the header. */ + BasicBlock *Out = getLoopPredecessor(); + if (!Out) return nullptr; + + /* Make sure there is only one exit out of the preheader. */ + typedef GraphTraits<BasicBlock *> BlockTraits; + typename BlockTraits::ChildIteratorType SI = BlockTraits::child_begin(Out); + ++SI; + if (SI != BlockTraits::child_end(Out)) + return nullptr; /* Multiple exits from the block, must not be a preheader. */ + + /* The predecessor has exactly one successor, so it is a preheader. */ + return Out; +} + +/* If the given loop's header has exactly one unique predecessor outside the + * loop, return it. Otherwise return null. + * This is less strict that the loop "preheader" concept, which requires + * the predecessor to have exactly one successor. */ +BasicBlock *InnerLoop::getLoopPredecessor() const +{ + /* Keep track of nodes outside the loop branching to the header. */ + BasicBlock *Out = nullptr; + + /* Loop over the predecessors of the header node. */ + BasicBlock *Header = getHeader(); +#if defined(LLVM_V35) || defined(LLVM_V38) || defined(LLVM_V39) + typedef GraphTraits<Inverse<BasicBlock *> > InvBlockTraits; + for (typename InvBlockTraits::ChildIteratorType PI = + InvBlockTraits::child_begin(Header), + PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) { + typename InvBlockTraits::NodeType *N = *PI; + if (!contains(N)) { /* If the block is not in the loop. */ + if (Out && Out != N) + return nullptr; /* Multiple predecessors outside the loop */ + Out = N; + } + } +#else + for (const auto Pred : children<Inverse<BasicBlock *> >(Header)) { + if (!contains(Pred)) { /* If the block is not in the loop. */ + if (Out && Out != Pred) + return nullptr; /* Multiple predecessors outside the loop */ + Out = Pred; + } + } +#endif + + return Out; +} + +bool InnerLoop::isReachable(Instruction *From, Instruction *To) +{ + if (!contains(From->getParent()) || !contains(To->getParent())) + return false; + if (From == To) + return true; + + SmallPtrSet<Instruction*, 8> Visited; + SmallVector<Instruction*, 8> VisitStack; + + VisitStack.push_back(From); + while (!VisitStack.empty()) { + Instruction *I = VisitStack.back(); + VisitStack.pop_back(); + + if (Visited.count(I)) + continue; + + Visited.insert(I); + for (User *U : I->users()) { + Instruction *UI = cast<Instruction>(U); + if (UI == To) + return true; + + if (contains(UI->getParent())) + VisitStack.push_back(UI); + } + } + + return false; +} + + +/* + * InnerLoopAnalysis + */ +static void addInnerLoop(Loop &L, std::vector<Loop *> &Loops) +{ + if (L.empty()) { + /* Innermost loop. + * If any basic block of current loop has been included in another + * loop, skip this loop. */ + for (Loop *InnerL : Loops) { + for (auto I = L.begin(), E = L.end(); I != E; ++I) { + if (InnerL->contains(*I)) + return; + } + } + Loops.push_back(&L); + return; + } + for (Loop *InnerL : L) + addInnerLoop(*InnerL, Loops); +} + + +void InnerLoopAnalysis::analyze(LoopInfo *LI, ScalarEvolution *SE) +{ + std::vector<Loop *> Loops; + for (Loop *L : *LI) + addInnerLoop(*L, Loops); + + for (auto L : Loops) + InnerLoops.push_back(new InnerLoop(L)); + + for (auto L : InnerLoops) + analyzePhi(*L, SE); +} + +bool InnerLoopAnalysis::analyzeInduction(InnerLoop &TheLoop, + ScalarEvolution *SE, + PHINode *Phi) +{ + Type *PhiTy = Phi->getType(); + /* We only handle integer and pointer inductions variables. */ + if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy()) + return false; + + /* We only handle induction that has no outside users (except that the + * outside users are all stores.) */ + for (User *U : Phi->users()) { + Instruction *UI = cast<Instruction>(U); + if (!TheLoop.contains(UI) && !isa<StoreInst>(UI)) + return false; + } + + const SCEV *PhiScev = SE->getSCEV(Phi); + const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev); + if (!AR) + return false; + + const SCEV *Step = AR->getStepRecurrence(*SE); + const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step); + if (!ConstStep && !SE->isLoopInvariant(Step, AR->getLoop())) + return false; + + /* We found an induction variable. */ + Value *StartValue = + Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader()); + TheLoop.addInduction(Phi, StartValue, Step); + + return true; +} + +/* + * isReductionInstr() + * Check if the reduction operation is supported. + * We don't allow a reduction to bind more than one operation, so drop a + * reduction if it already has one operation. + */ +static bool isReductionInstr(Instruction *I, ReductionDesc::ReductionKind &Kind, + Type *&Ty) +{ + ReductionDesc::ReductionKind K = ReductionDesc::NoReduction; + switch (I->getOpcode()) { + default: + return false; + case Instruction::PHI: + case Instruction::BitCast: + return true; + case Instruction::Add: + case Instruction::Sub: + K = ReductionDesc::IntegerAdd; + break; + case Instruction::Mul: + K = ReductionDesc::IntegerMult; + break; + case Instruction::And: + K = ReductionDesc::IntegerAnd; + break; + case Instruction::Or: + K = ReductionDesc::IntegerOr; + break; + case Instruction::Xor: + K = ReductionDesc::IntegerXor; + break; + case Instruction::FAdd: + case Instruction::FSub: + K = ReductionDesc::FloatAdd; + break; + case Instruction::FMul: + K = ReductionDesc::FloatMult; + break; + } + + if (VectorType *VecTy = dyn_cast<VectorType>(I->getType())) + Ty = VecTy->getScalarType(); + else + Ty = I->getType(); + + if (Kind == ReductionDesc::NoReduction) { + Kind = K; + return true; + } + + if (Kind != K) { + /* Different reduction operation to the previous one. */ + return false; + } + return true; +} + +static bool hasMultipleUsesOf(Instruction *I, + SmallPtrSet<Instruction *, 8> &Insts) +{ + unsigned NumUses = 0; + for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) { + if (Insts.count(dyn_cast<Instruction>(*Use))) + ++NumUses; + if (NumUses > 1) + return true; + } + return false; +} + +static bool isLegalUser(Instruction *I) +{ + if (isa<StoreInst>(I) && !MDFactory::isGuestMemory(I)) + return true; + return false; +} + +bool InnerLoopAnalysis::analyzeReduction(InnerLoop &TheLoop, PHINode *Phi) +{ + if (Phi->getNumIncomingValues() != 2) + return false; + + /* Reduction variables are only found in the loop header block. */ + if (Phi->getParent() != TheLoop.getHeader()) + return false; + + /* Obtain the reduction start value from from the loop preheader. */ + Value *StartValue = Phi->getIncomingValueForBlock(TheLoop.getLoopPreheader()); + + /* ExitInstruction is the single value which is used outside the loop. + * We only allow for a single reduction value to be used outside the loop. + * This includes users of the reduction, variables (which form a cycle + * which ends in the phi node). */ + Instruction *ExitInstruction = nullptr; + /* Indicates that we found a reduction operation in our scan. */ + bool FoundReduxOp = false; + + /* We start with the PHI node and scan for all of the users of this + * instruction. All users must be instructions that can be used as reduction + * variables (such as ADD). We must have a single out-of-block user. The cycle + * must include the original PHI. */ + bool FoundStartPHI = false; + + ReductionDesc::ReductionKind Kind = ReductionDesc::NoReduction; + Type *Ty = nullptr; + + SmallPtrSet<Instruction *, 8> VisitedInsts; + SmallVector<Instruction *, 8> Worklist; + Worklist.push_back(Phi); + VisitedInsts.insert(Phi); + + /* A value in the reduction can be used: + * - By the reduction: + * - Reduction operation: + * - One use of reduction value (safe). + * - Multiple use of reduction value (not safe). + * - PHI: + * - All uses of the PHI must be the reduction (safe). + * - Otherwise, not safe. + * - By one or no instruction outside of the loop (safe). + * - By further instructions outside of the loop (not safe). + * - By an instruction that is not part of the reduction (not safe). + * This is either: + * An instruction type other than PHI or the reduction operation. + * A PHI in the header other than the initial PHI. */ + while (!Worklist.empty()) { + Instruction *Cur = Worklist.back(); + Worklist.pop_back(); + + /* No Users. + * If the instruction has no users then this is a broken chain and + * cannot be a reduction variable. */ + if (Cur->use_empty()) + return false; + + bool IsAPhi = isa<PHINode>(Cur); + bool IsBitCast = isa<BitCastInst>(Cur); + + /* Currenly, we don't handle a reduction used by another PHI other than + * the original PHI. */ + if (IsAPhi && Cur != Phi) + return false; + + /* Any reduction instruction must be of one of the allowed kinds. */ + if (!isReductionInstr(Cur, Kind, Ty)) + return false; + + /* Reductions of instructions such as Div, and Sub is only possible if the + * LHS is the reduction variable. */ + if (!IsAPhi && !Cur->isCommutative() && + !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0)))) + return false; + + /* A reduction operation must only have one use of the reduction value. */ + if (!IsAPhi && hasMultipleUsesOf(Cur, VisitedInsts)) + return false; + + /* Check whether we found a reduction operator. */ + FoundReduxOp |= (!IsAPhi && !IsBitCast); + + /* Process users of current instruction. Push non-PHI nodes after PHI + * nodes onto the stack. This way we are going to have seen all inputs + * to PHI nodes once we get to them. */ + SmallVector<Instruction *, 8> NonPHIs; + SmallVector<Instruction *, 8> PHIs; + for (User *U : Cur->users()) { + Instruction *UI = cast<Instruction>(U); + + if (isLegalUser(UI)) + continue; + + /* Check if we found the exit user. */ + BasicBlock *Parent = UI->getParent(); + if (!TheLoop.contains(Parent)) { + /* Exit if you find multiple outside users or if the header phi node is + * being used. In this case the user uses the value of the previous + * iteration, in which case we would loose "VF-1" iterations of the + * reduction operation if we vectorize. */ + if (ExitInstruction != nullptr || Cur == Phi) + return false; + + /* The instruction used by an outside user must be the last instruction + * before we feed back to the reduction phi. Otherwise, we loose VF-1 + * operations on the value. */ + if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end()) + return false; + + ExitInstruction = Cur; + continue; + } + + /* Process instructions only once (termination). Each reduction cycle + * value must only be used once, except by phi nodes and min/max + * reductions which are represented as a cmp followed by a select. */ + if (!VisitedInsts.count(UI)) { + VisitedInsts.insert(UI); + if (isa<PHINode>(UI)) + PHIs.push_back(UI); + else + NonPHIs.push_back(UI); + } else if (!isa<PHINode>(UI)) + return false; + + /* Remember that we completed the cycle. */ + if (UI == Phi) + FoundStartPHI = true; + } + Worklist.append(PHIs.begin(), PHIs.end()); + Worklist.append(NonPHIs.begin(), NonPHIs.end()); + } + + /* Set the exit instruction to the last instruction feed back to the + * reduction phi if we cannot find an exit instruction. */ + if (!ExitInstruction) { + Value *NextValue = Phi->getIncomingValueForBlock(TheLoop.getSingleLatchTail()); + if (!isa<Instruction>(NextValue)) + return false; + ExitInstruction = cast<Instruction>(NextValue); + } + + if (!FoundStartPHI || !FoundReduxOp) + return false; + + /* We found an induction variable. */ + TheLoop.addReduction(Phi, StartValue, ExitInstruction, Kind, Ty); + + return true; +} + +void InnerLoopAnalysis::analyzePhi(InnerLoop &TheLoop, ScalarEvolution *SE) +{ + BasicBlock *Header = TheLoop.getHeader(); + for (BasicBlock *BB : TheLoop.blocks()) { + auto I = BB->begin(); + auto E = BasicBlock::iterator(BB->getFirstNonPHI()); + + for (; I != E; ++I) { + /* Currently, we cannot handle PHIs in a non-header block, so set + * the loop with unknown PHI if we find any of it. */ + if (BB != Header) { + TheLoop.UnknownPhi = true; + return; + } + + /* The loop must have a preheader and one split latch for us to + * analyze inductions and reductions. */ + if (!TheLoop.getLoopPreheader() || !TheLoop.getSingleLatchTail()) { + TheLoop.UnknownPhi = true; + return; + } + + PHINode *Phi = cast<PHINode>(I); + if (!analyzeInduction(TheLoop, SE, Phi) && + !analyzeReduction(TheLoop, Phi)) + TheLoop.UnknownPhi = true; + } + } +} + + +/* + * InnerLoopAnalysisWrapperPass Pass + */ +char InnerLoopAnalysisWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(InnerLoopAnalysisWrapperPass, "InnerLoopAnalysis", + "Inner Loop Analysis", true, true) +#if defined(LLVM_V35) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +#else +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +#endif +INITIALIZE_PASS_END(InnerLoopAnalysisWrapperPass, "InnerLoopAnalysis", + "Inner Loop Analysis", true, true) + +void InnerLoopAnalysisWrapperPass::releaseMemory() { + LA.releaseMemory(); +} + +void InnerLoopAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +#if defined(LLVM_V35) + AU.addRequired<LoopInfo>(); + AU.addRequired<ScalarEvolution>(); +#else + AU.addRequired<LoopInfoWrapperPass>(); + AU.addRequired<ScalarEvolutionWrapperPass>(); +#endif +} +void InnerLoopAnalysisWrapperPass::print(raw_ostream &OS, const Module *) const { + LA.print(OS); +} + +void InnerLoopAnalysisWrapperPass::verifyAnalysis() const { + LA.verify(); +} + +bool InnerLoopAnalysisWrapperPass::runOnFunction(Function &F) { +#if defined(LLVM_V35) + ScalarEvolution *SE = &getAnalysis<ScalarEvolution>(); + LoopInfo *LI = &getAnalysis<LoopInfo>(); +#else + ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); + LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); +#endif + + LA.analyze(LI, SE); + return false; +} + + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/atomic/atomic-arm.c b/llvm/atomic/atomic-arm.c new file mode 100644 index 0000000..4176caa --- /dev/null +++ b/llvm/atomic/atomic-arm.c @@ -0,0 +1,158 @@ +/* + * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ. + * <http://ppi.fudan.edu.cn/system_research_group> + * + * Authors: + * Zhaoguo Wang <zgwang@fudan.edu.cn> + * Yufei Chen <chenyufei@fudan.edu.cn> + * Ran Liu <naruilone@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* We include this file in op_helper.c */ + +#include <stdlib.h> +#include <pthread.h> +#include "coremu-atomic.h" + +__thread uint64_t cm_exclusive_val; +__thread uint32_t cm_exclusive_addr = -1; + +#define GEN_LOAD_EXCLUSIVE(type, TYPE) \ +void HELPER(load_exclusive##type)(CPUArchState *env, uint32_t reg, \ + uint32_t addr) \ +{ \ + unsigned long q_addr = 0; \ + DATA_##type val = 0; \ + \ + cm_exclusive_addr = addr; \ + CM_GET_QEMU_ADDR(env, q_addr,addr); \ + val = *(DATA_##type *)q_addr; \ + cm_exclusive_val = val; \ + env->regs[reg] = val; \ +} + +GEN_LOAD_EXCLUSIVE(b, B); +GEN_LOAD_EXCLUSIVE(w, W); +GEN_LOAD_EXCLUSIVE(l, L); +//GEN_LOAD_EXCLUSIVE(q, Q); + +#define GEN_STORE_EXCLUSIVE(type, TYPE) \ +void HELPER(store_exclusive##type)(CPUArchState *env, uint32_t res, \ + uint32_t reg, uint32_t addr) \ +{ \ + unsigned long q_addr = 0; \ + DATA_##type val = 0; \ + DATA_##type r = 0; \ + \ + if(addr != cm_exclusive_addr) \ + goto fail; \ + \ + CM_GET_QEMU_ADDR(env, q_addr,addr); \ + val = (DATA_##type)env->regs[reg]; \ + \ + r = atomic_compare_exchange##type((DATA_##type *)q_addr, \ + (DATA_##type)cm_exclusive_val, val); \ + \ + if(r == (DATA_##type)cm_exclusive_val) { \ + env->regs[res] = 0; \ + goto done; \ + } else { \ + goto fail; \ + } \ + \ +fail: \ + env->regs[res] = 1; \ + \ +done: \ + cm_exclusive_addr = -1; \ + return; \ +} + +GEN_STORE_EXCLUSIVE(b, B); +GEN_STORE_EXCLUSIVE(w, W); +GEN_STORE_EXCLUSIVE(l, L); +//GEN_STORE_EXCLUSIVE(q, Q); + +void HELPER(load_exclusiveq)(CPUArchState *env, uint32_t reg, uint32_t addr) +{ + unsigned long q_addr = 0; + uint64_t val = 0; + + cm_exclusive_addr = addr; + CM_GET_QEMU_ADDR(env, q_addr,addr); + val = *(uint64_t *)q_addr; + cm_exclusive_val = val; + env->regs[reg] = (uint32_t)val; + env->regs[reg + 1] = (uint32_t)(val>>32); +} + +void HELPER(store_exclusiveq)(CPUArchState *env, uint32_t res, uint32_t reg, uint32_t addr) +{ + unsigned long q_addr = 0; + uint64_t val = 0; + uint64_t r = 0; + + if(addr != cm_exclusive_addr) + goto fail; + + CM_GET_QEMU_ADDR(env, q_addr,addr); + val = (uint32_t)env->regs[reg]; + val |= ((uint64_t)env->regs[reg + 1]) << 32; + + r = atomic_compare_exchangeq((uint64_t *)q_addr, + (uint64_t)cm_exclusive_val, val); + + if(r == (uint64_t)cm_exclusive_val) { + env->regs[res] = 0; + goto done; + } else { + goto fail; + } + +fail: + env->regs[res] = 1; + +done: + cm_exclusive_addr = -1; + return; +} + +void HELPER(clear_exclusive)(CPUArchState *env) +{ + cm_exclusive_addr = -1; +} + +void HELPER(swpb)(CPUArchState *env, uint32_t dst, uint32_t src, uint32_t addr) +{ + uint8_t old, val; + unsigned long q_addr; + CM_GET_QEMU_ADDR(env, q_addr,env->regs[addr]); + val = (uint8_t)env->regs[src]; + old = atomic_exchangeb((uint8_t *)q_addr, (uint8_t)val); + env->regs[dst] = old; + //printf("SWPB\n"); +} + +void HELPER(swp)(CPUArchState *env, uint32_t dst, uint32_t src, uint32_t addr) +{ + uint32_t old, val; + unsigned long q_addr; + CM_GET_QEMU_ADDR(env, q_addr,env->regs[addr]); + val = env->regs[src]; + old = atomic_exchangel((uint32_t *)q_addr, val); + env->regs[dst] = old; + //printf("SWP\n"); +} diff --git a/llvm/atomic/atomic-helper.h b/llvm/atomic/atomic-helper.h new file mode 100644 index 0000000..9e3cedf --- /dev/null +++ b/llvm/atomic/atomic-helper.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ. + * <http://ppi.fudan.edu.cn/system_research_group> + * + * Authors: + * Zhaoguo Wang <zgwang@fudan.edu.cn> + * Yufei Chen <chenyufei@fudan.edu.cn> + * Ran Liu <naruilone@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "config-target.h" + +#ifdef CONFIG_COREMU + +#if defined(TARGET_I386) +#define __GEN_HEADER(type) \ +DEF_HELPER_3(atomic_inc##type, void, env, tl, int) \ +DEF_HELPER_4(xchg##type, void, env, tl, int, int) \ +DEF_HELPER_4(atomic_op##type, void, env, tl, tl, int) \ +DEF_HELPER_4(atomic_xadd##type, void, env, tl, int, int) \ +DEF_HELPER_4(atomic_cmpxchg##type, void, env, tl, int, int) \ +DEF_HELPER_2(atomic_not##type, void, env, tl) \ +DEF_HELPER_2(atomic_neg##type, void, env, tl) + +__GEN_HEADER(b) +__GEN_HEADER(w) +__GEN_HEADER(l) +#ifdef TARGET_X86_64 +__GEN_HEADER(q) +#endif + +DEF_HELPER_2(atomic_cmpxchg8b, void, env, tl) +DEF_HELPER_2(atomic_cmpxchg16b, void, env, tl) + +DEF_HELPER_4(atomic_bts, void, env, tl, tl, int) +DEF_HELPER_4(atomic_btr, void, env, tl, tl, int) +DEF_HELPER_4(atomic_btc, void, env, tl, tl, int) + +/* fence */ +DEF_HELPER_1(fence, void, env) + +#elif defined(TARGET_ARM) +#define __GEN_HEADER(type) \ +DEF_HELPER_3(load_exclusive##type, void, env, i32, i32) \ +DEF_HELPER_4(store_exclusive##type, void, env, i32, i32, i32) + +__GEN_HEADER(b) +__GEN_HEADER(w) +__GEN_HEADER(l) +__GEN_HEADER(q) + +DEF_HELPER_1(clear_exclusive, void, env) + +DEF_HELPER_4(swpb, void, env, i32, i32, i32) +DEF_HELPER_4(swp, void, env, i32, i32, i32) +#else +#error "unsupported processor type" +#endif + +#endif + diff --git a/llvm/atomic/atomic-x86.c b/llvm/atomic/atomic-x86.c new file mode 100644 index 0000000..dc0baf0 --- /dev/null +++ b/llvm/atomic/atomic-x86.c @@ -0,0 +1,504 @@ +/* + * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ. + * <http://ppi.fudan.edu.cn/system_research_group> + * + * Authors: + * Zhaoguo Wang <zgwang@fudan.edu.cn> + * Yufei Chen <chenyufei@fudan.edu.cn> + * Ran Liu <naruilone@gmail.com> + * Xi Wu <wuxi@fudan.edu.cn> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* We include this file in op_helper.c */ + +#include <stdlib.h> +#include <pthread.h> +#include <assert.h> +#include "coremu-atomic.h" + +#define EAX (env->regs[R_EAX]) +#define ECX (env->regs[R_ECX]) +#define EDX (env->regs[R_EDX]) +#define EBX (env->regs[R_EBX]) + +/* These definitions are copied from translate.c */ +#if defined(WORDS_BIGENDIAN) +#define REG_B_OFFSET (sizeof(target_ulong) - 1) +#define REG_H_OFFSET (sizeof(target_ulong) - 2) +#define REG_W_OFFSET (sizeof(target_ulong) - 2) +#define REG_L_OFFSET (sizeof(target_ulong) - 4) +#define REG_LH_OFFSET (sizeof(target_ulong) - 8) +#else +#define REG_B_OFFSET 0 +#define REG_H_OFFSET 1 +#define REG_W_OFFSET 0 +#define REG_L_OFFSET 0 +#define REG_LH_OFFSET 4 +#endif + +#ifdef TARGET_X86_64 +#define X86_64_DEF(...) __VA_ARGS__ +#else +#define X86_64_DEF(...) +#endif + +#define REG_LOW_MASK (~(uint64_t)0x0>>32) + +/* gen_op instructions */ +/* i386 arith/logic operations */ +enum { + OP_ADDL, + OP_ORL, + OP_ADCL, + OP_SBBL, + OP_ANDL, + OP_SUBL, + OP_XORL, + OP_CMPL, +}; + +/* */ +static target_ulong cm_get_reg_val(CPUX86State *env, int ot, int hregs, int reg) +{ + target_ulong val, offset; + CPUX86State *env1 = env; + + switch(ot) { + case 0: /* OT_BYTE */ + if (reg < 4 X86_64_DEF( || reg >= 8 || hregs)) { + goto std_case; + } else { + offset = offsetof(CPUX86State, regs[reg - 4]) + REG_H_OFFSET; + val = *(((uint8_t *)env1) + offset); + } + break; + default: + std_case: + val = env1->regs[reg]; + break; + } + + return val; +} + +static void cm_set_reg_val(CPUX86State *env, int ot, int hregs, int reg, target_ulong val) +{ + target_ulong offset; + + CPUX86State *env1 = env; + + switch(ot) { + case 0: /* OT_BYTE */ + if (reg < 4 X86_64_DEF (|| reg >= 8 || hregs)) { + offset = offsetof(CPUX86State, regs[reg]) + REG_B_OFFSET; + *(((uint8_t *) env1) + offset) = (uint8_t)val; + } else { + offset = offsetof(CPUX86State, regs[reg - 4]) + REG_H_OFFSET; + *(((uint8_t *) env1) + offset) = (uint8_t)val; + } + break; + case 1: /* OT_WORD */ + offset = offsetof(CPUX86State, regs[reg]) + REG_W_OFFSET; + *((uint16_t *)((uint8_t *)env1 + offset)) = (uint16_t)val; + break; + case 2: /* OT_LONG */ + env1->regs[reg] = REG_LOW_MASK & val; + break; + default: + case 3: /* OT_QUAD */ + env1->regs[reg] = val; + break; + } +} + +#define LD_b ldub_p +#define LD_w lduw_p +#define LD_l ldl_p +#define LD_q ldq_p + +/* Lightweight transactional memory. */ +#define TX(vaddr, type, value, command) \ + unsigned long __q_addr; \ + DATA_##type __oldv; \ + DATA_##type value; \ + \ + CM_GET_QEMU_ADDR(env, __q_addr, vaddr); \ + do { \ + __oldv = value = LD_##type((DATA_##type *)__q_addr); \ + {command;}; \ + mb(); \ + } while (__oldv != (atomic_compare_exchange##type( \ + (DATA_##type *)__q_addr, __oldv, value))) + +/* Atomically emulate INC instruction using CAS1 and memory transaction. */ + +#define GEN_ATOMIC_INC(type, TYPE) \ +void helper_atomic_inc##type(CPUX86State *env, target_ulong a0, int c) \ +{ \ + int eflags_c, eflags; \ + int cc_op; \ + \ + /* compute the previous instruction c flags */ \ + eflags_c = helper_cc_compute_c(CC_DST, CC_SRC, CC_SRC2, CC_OP); \ + \ + TX(a0, type, value, { \ + if (c > 0) { \ + value++; \ + cc_op = CC_OP_INC##TYPE; \ + } else { \ + value--; \ + cc_op = CC_OP_DEC##TYPE; \ + } \ + }); \ + \ + CC_SRC = eflags_c; \ + CC_DST = value; \ + \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, cc_op); \ + CC_SRC = eflags; \ +} \ + +GEN_ATOMIC_INC(b, B); +GEN_ATOMIC_INC(w, W); +GEN_ATOMIC_INC(l, L); +#ifdef TARGET_X86_64 +GEN_ATOMIC_INC(q, Q); +#endif + +#define OT_b 0 +#define OT_w 1 +#define OT_l 2 +#define OT_q 3 + +#define GEN_ATOMIC_XCHG(type) \ +void helper_xchg##type(CPUX86State *env, target_ulong a0, int reg, \ + int hreg) \ +{ \ + DATA_##type val, out; \ + unsigned long q_addr; \ + \ + CM_GET_QEMU_ADDR(env, q_addr, a0); \ + val = (DATA_##type)cm_get_reg_val(env, OT_##type, hreg, reg); \ + out = atomic_exchange##type((DATA_##type *)q_addr, val); \ + mb(); \ + \ + cm_set_reg_val(env, OT_##type, hreg, reg, out); \ +} + +GEN_ATOMIC_XCHG(b); +GEN_ATOMIC_XCHG(w); +GEN_ATOMIC_XCHG(l); +#ifdef TARGET_X86_64 +GEN_ATOMIC_XCHG(q); +#endif + +#define GEN_ATOMIC_OP(type, TYPE) \ +void helper_atomic_op##type(CPUX86State *env, target_ulong a0, \ + target_ulong t1, int op) \ +{ \ + DATA_##type operand; \ + int eflags_c, eflags; \ + int cc_op; \ + \ + /* compute the previous instruction c flags */ \ + eflags_c = helper_cc_compute_c(CC_DST, CC_SRC, CC_SRC2, CC_OP); \ + operand = (DATA_##type)t1; \ + \ + TX(a0, type, value, { \ + switch(op) { \ + case OP_ADCL: \ + value += operand + eflags_c; \ + cc_op = CC_OP_ADD##TYPE + (eflags_c << 2); \ + CC_SRC = operand; \ + break; \ + case OP_SBBL: \ + value = value - operand - eflags_c; \ + cc_op = CC_OP_SUB##TYPE + (eflags_c << 2); \ + CC_SRC = operand; \ + break; \ + case OP_ADDL: \ + value += operand; \ + cc_op = CC_OP_ADD##TYPE; \ + CC_SRC = operand; \ + break; \ + case OP_SUBL: \ + value -= operand; \ + cc_op = CC_OP_SUB##TYPE; \ + CC_SRC = operand; \ + break; \ + default: \ + case OP_ANDL: \ + value &= operand; \ + cc_op = CC_OP_LOGIC##TYPE; \ + break; \ + case OP_ORL: \ + value |= operand; \ + cc_op = CC_OP_LOGIC##TYPE; \ + break; \ + case OP_XORL: \ + value ^= operand; \ + cc_op = CC_OP_LOGIC##TYPE; \ + break; \ + case OP_CMPL: \ + abort(); \ + break; \ + } \ + }); \ + CC_DST = value; \ + /* successful transaction, compute the eflags */ \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, cc_op); \ + CC_SRC = eflags; \ +} + +GEN_ATOMIC_OP(b, B); +GEN_ATOMIC_OP(w, W); +GEN_ATOMIC_OP(l, L); +#ifdef TARGET_X86_64 +GEN_ATOMIC_OP(q, Q); +#endif + +/* xadd */ +#define GEN_ATOMIC_XADD(type, TYPE) \ +void helper_atomic_xadd##type(CPUX86State *env, target_ulong a0, \ + int reg, int hreg) \ +{ \ + DATA_##type operand, oldv; \ + int eflags; \ + \ + operand = (DATA_##type)cm_get_reg_val( \ + env, OT_##type, hreg, reg); \ + \ + TX(a0, type, newv, { \ + oldv = newv; \ + newv += operand; \ + }); \ + \ + /* transaction successes */ \ + /* xchg the register and compute the eflags */ \ + cm_set_reg_val(env, OT_##type, hreg, reg, oldv); \ + CC_SRC = oldv; \ + CC_DST = newv; \ + \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \ + CC_OP_ADD##TYPE); \ + CC_SRC = eflags; \ +} + +GEN_ATOMIC_XADD(b, B); +GEN_ATOMIC_XADD(w, W); +GEN_ATOMIC_XADD(l, L); +#ifdef TARGET_X86_64 +GEN_ATOMIC_XADD(q, Q); +#endif + +/* cmpxchg */ +#define GEN_ATOMIC_CMPXCHG(type, TYPE) \ +void helper_atomic_cmpxchg##type(CPUX86State *env, target_ulong a0, \ + int reg, int hreg) \ +{ \ + DATA_##type reg_v, eax_v, res; \ + int eflags; \ + unsigned long q_addr; \ + \ + CM_GET_QEMU_ADDR(env, q_addr, a0); \ + reg_v = (DATA_##type)cm_get_reg_val(env, OT_##type, hreg, reg); \ + eax_v = (DATA_##type)cm_get_reg_val(env, OT_##type, 0, R_EAX); \ + \ + res = atomic_compare_exchange##type( \ + (DATA_##type *)q_addr, eax_v, reg_v); \ + mb(); \ + \ + if (res != eax_v) \ + cm_set_reg_val(env, OT_##type, 0, R_EAX, res); \ + \ + CC_SRC = res; \ + CC_DST = eax_v - res; \ + \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \ + CC_OP_SUB##TYPE); \ + CC_SRC = eflags; \ +} + +GEN_ATOMIC_CMPXCHG(b, B); +GEN_ATOMIC_CMPXCHG(w, W); +GEN_ATOMIC_CMPXCHG(l, L); +#ifdef TARGET_X86_64 +GEN_ATOMIC_CMPXCHG(q, Q); +#endif + +#if defined(_LP64) +/* cmpxchgb (8, 16) */ +void helper_atomic_cmpxchg8b(CPUX86State *env, target_ulong a0) +{ + uint64_t edx_eax, ecx_ebx, res; + int eflags; + unsigned long q_addr; + + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, CC_OP); + CM_GET_QEMU_ADDR(env, q_addr, a0); + + edx_eax = (((uint64_t)EDX << 32) | (uint32_t)EAX); + ecx_ebx = (((uint64_t)ECX << 32) | (uint32_t)EBX); + + res = atomic_compare_exchangeq((uint64_t *)q_addr, edx_eax, ecx_ebx); + mb(); + + if (res == edx_eax) { + eflags |= CC_Z; + } else { + EDX = (uint32_t)(res >> 32); + EAX = (uint32_t)res; + eflags &= ~CC_Z; + } + + CC_SRC = eflags; +} +#else +void helper_atomic_cmpxchg8b(CPUX86State *env, target_ulong a0) +{ + assert("helper_atomic_cmpxchg8b: not supported.\n"); + exit(0); +} +#endif + +void helper_atomic_cmpxchg16b(CPUX86State *env, target_ulong a0) +{ + uint8_t res; + int eflags; + unsigned long q_addr; + + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, CC_OP); + CM_GET_QEMU_ADDR(env, q_addr, a0); + + uint64_t old_rax = *(uint64_t *)q_addr; + uint64_t old_rdx = *(uint64_t *)(q_addr + 8); + res = atomic_compare_exchange16b((uint64_t *)q_addr, EAX, EDX, EBX, ECX); + mb(); + + if (res) { + eflags |= CC_Z; /* swap success */ + } else { + EDX = old_rdx; + EAX = old_rax; + eflags &= ~CC_Z; /* read the old value ! */ + } + + CC_SRC = eflags; +} + +/* not */ +#define GEN_ATOMIC_NOT(type) \ +void helper_atomic_not##type(CPUX86State *env, \ + target_ulong a0) \ +{ \ + TX(a0, type, value, { \ + value = ~value; \ + }); \ +} + +GEN_ATOMIC_NOT(b); +GEN_ATOMIC_NOT(w); +GEN_ATOMIC_NOT(l); +#ifdef TARGET_X86_64 +GEN_ATOMIC_NOT(q); +#endif + +/* neg */ +#define GEN_ATOMIC_NEG(type, TYPE) \ +void helper_atomic_neg##type(CPUX86State *env, \ + target_ulong a0) \ +{ \ + int eflags; \ + \ + TX(a0, type, value, { \ + value = -value; \ + }); \ + \ + /* We should use the old value to compute CC */ \ + CC_SRC = CC_DST = -value; \ + \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \ + CC_OP_SUB##TYPE); \ + CC_SRC = eflags; \ +} \ + +GEN_ATOMIC_NEG(b, B); +GEN_ATOMIC_NEG(w, W); +GEN_ATOMIC_NEG(l, L); +#ifdef TARGET_X86_64 +GEN_ATOMIC_NEG(q, Q); +#endif + +/* This is only used in BTX instruction, with an additional offset. + * Note that, when using register bitoffset, the value can be larger than + * operand size - 1 (operand size can be 16/32/64), refer to intel manual 2A + * page 3-11. */ +#define TX2(vaddr, type, value, offset, command) \ + unsigned long __q_addr; \ + DATA_##type __oldv; \ + DATA_##type value; \ + \ + CM_GET_QEMU_ADDR(env, __q_addr, vaddr); \ + __q_addr += offset >> 3; \ + do { \ + __oldv = value = LD_##type((DATA_##type *)__q_addr); \ + {command;}; \ + mb(); \ + } while (__oldv != (atomic_compare_exchange##type( \ + (DATA_##type *)__q_addr, __oldv, value))) + +#define GEN_ATOMIC_BTX(ins, command) \ +void helper_atomic_##ins(CPUX86State *env, target_ulong a0, \ + target_ulong offset, int ot) \ +{ \ + uint8_t old_byte; \ + int eflags; \ + \ + TX2(a0, b, value, offset, { \ + old_byte = value; \ + {command;}; \ + }); \ + \ + CC_SRC = (old_byte >> (offset & 0x7)); \ + CC_DST = 0; \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \ + CC_OP_SARB + ot); \ + CC_SRC = eflags; \ +} + +/* bts */ +GEN_ATOMIC_BTX(bts, { + value |= (1 << (offset & 0x7)); +}); +/* btr */ +GEN_ATOMIC_BTX(btr, { + value &= ~(1 << (offset & 0x7)); +}); +/* btc */ +GEN_ATOMIC_BTX(btc, { + value ^= (1 << (offset & 0x7)); +}); + +/* fence **/ +void helper_fence(CPUX86State *env) +{ + mb(); +} + +#undef EAX +#undef ECX +#undef EDX +#undef EBX diff --git a/llvm/atomic/coremu-atomic.h b/llvm/atomic/coremu-atomic.h new file mode 100644 index 0000000..998232b --- /dev/null +++ b/llvm/atomic/coremu-atomic.h @@ -0,0 +1,412 @@ +/* + * COREMU Parallel Emulator Framework + * + * Atomic support for COREMU system. + * XXX: Now only support x86-64 architecture. + * + * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ. + * <http://ppi.fudan.edu.cn/system_research_group> + * + * Authors: + * Zhaoguo Wang <zgwang@fudan.edu.cn> + * Yufei Chen <chenyufei@fudan.edu.cn> + * Ran Liu <naruilone@gmail.com> + * Xi Wu <wuxi@fudan.edu.cn> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _COREMU_ATOMIC_H +#define _COREMU_ATOMIC_H + +#include <stdint.h> +#include <stdlib.h> +#include <assert.h> +#include "config-target.h" +#include "hqemu.h" + +/* Given the guest virtual address, get the corresponding host address. + * This macro resembles ldxxx in softmmu_template.h + * NOTE: This must be inlined since the use of GETPC needs to get the + * return address. Using always inline also works, we use macro here to be more + * explicit. */ +#if defined(CONFIG_USER_ONLY) +#define CM_GET_QEMU_ADDR(__env1, q_addr, v_addr) \ +do { \ + q_addr = v_addr + GUEST_BASE; \ +} while (0) + +#else +#define CM_GET_QEMU_ADDR(__env1, q_addr, v_addr) \ +do { \ + CPUState *cpu = ENV_GET_CPU(__env1); \ + int __mmu_idx, __index; \ + uintptr_t __retaddr; \ + __index = (v_addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); \ + /* get the CPL, hence determine the MMU mode */ \ + __mmu_idx = cpu_mmu_index(__env1, false); \ + /* We use this function in the implementation of atomic instructions */ \ + /* and we are going to modify these memory. So we use addr_write. */ \ + if (unlikely(__env1->tlb_table[__mmu_idx][__index].addr_write \ + != ((v_addr & TARGET_PAGE_MASK) | tlb_version(__env1)))) { \ + __retaddr = GETPC(); \ + tlb_fill(cpu, v_addr, 1, __mmu_idx, __retaddr); \ + } \ + q_addr = v_addr + __env1->tlb_table[__mmu_idx][__index].addend; \ +} while(0) +#endif + +/* XXX These are also used by atomic instruction handling. + * Put these defines in some other files? */ +#define DATA_b uint8_t +#define DATA_w uint16_t +#define DATA_l uint32_t +#define DATA_q uint64_t + +#define __inline__ inline __attribute__((always_inline)) + +#if defined(__i386__) || defined(__x86_64__) +// Is this the correct way to detect 64 system? +#if defined(_LP64) +static __inline__ uint8_t +atomic_compare_exchange16b(uint64_t *memp, + uint64_t rax, uint64_t rdx, + uint64_t rbx, uint64_t rcx) +{ + uint8_t z; + __asm __volatile__ ( "lock; cmpxchg16b %3\n\t" + "setz %2\n\t" + : "=a" (rax), "=d" (rdx), "=r" (z), "+m" (*memp) + : "a" (rax), "d" (rdx), "b" (rbx), "c" (rcx) + : "memory", "cc" ); + return z; +} +#else +static __inline__ uint8_t +atomic_compare_exchange16b(uint64_t *memp, + uint64_t rax, uint64_t rdx, + uint64_t rbx, uint64_t rcx) +{ + assert("atomic_compare_exchange16b: not supported.\n"); + exit(0); +} + +static __inline__ uint8_t +atomic_compare_exchangeq(uint64_t *addr, + uint64_t oldval, uint64_t newval) +{ + assert("atomic_compare_exchangeq: not supported.\n"); + exit(0); +} + +#endif + +/* Memory Barriers: x86-64 ONLY now */ +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence" ::: "memory") + +#define LOCK_PREFIX "lock; " + +#define coremu_xglue(a, b) a ## b +// If a/b is macro, it will expand first, then pass to coremu_xglue +#define coremu_glue(a, b) coremu_xglue(a, b) + +#define coremu_xstr(s) # s +#define coremu_str(s) coremu_xstr(s) + +#define DATA_BITS 8 +#include "coremu-template.h" + +#define DATA_BITS 16 +#include "coremu-template.h" + +#define DATA_BITS 32 +#include "coremu-template.h" + +#if defined(_LP64) +#define DATA_BITS 64 +#include "coremu-template.h" +#else +static inline uint64_t atomic_exchangeq(uint64_t *p, uint64_t val) +{ + assert("atomic_exchangeq: not supported.\n"); + exit(0); +} + +#endif + +#elif defined(__arm__) + +#if defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7EM__) || \ + defined(__ARM_ARCH_7M__) || \ + defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) +#define USE_ARMV6_INSTRUCTIONS +#endif + +#ifdef USE_ARMV6_INSTRUCTIONS +#define mb() __asm__ __volatile__("dmb" : : : "memory") +#define raw_local_irq_save(x) \ + ({ \ + __asm__ __volatile__( \ + "mrs %0, cpsr @ local_irq_save\n" \ + "cpsid i" \ + : "=r" (x) : : "memory", "cc"); \ + }) +#else +#define mb() __asm__ __volatile__("":::"memory") +#define raw_local_irq_save(x) \ + ({ \ + unsigned long temp; \ + (void) (&temp == &x); \ + __asm__ __volatile__( \ + "mrs %0, cpsr @ local_irq_save\n" \ +" orr %1, %0, #128\n" \ +" msr cpsr_c, %1" \ + : "=r" (x), "=r" (temp) \ + : \ + : "memory", "cc"); \ + }) +#endif + +#define raw_local_irq_restore(x) \ + __asm__ __volatile( \ + "msr cpsr_c, %0 @ local_irq_restore\n" \ + : \ + : "r" (x) \ + : "memory", "cc") + +static __inline__ uint8_t atomic_compare_exchangeb(uint8_t *addr, + uint8_t oldval, uint8_t newval) +{ + uint8_t ret; +#ifdef USE_ARMV6_INSTRUCTIONS + unsigned long tmp; + __asm__ __volatile__("@ atomic_cmpxchgl\n" + "1: ldrexb %1, [%3]\n" + " mov %0, #0\n" + " teq %1, %4\n" + " strexbeq %0, %5, [%3]\n" + " teq %0, #0\n" + " bne 1b\n" + : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr) + : "r" (addr), "Ir" (oldval), "r" (newval) + : "cc"); +#else + unsigned long flags; + raw_local_irq_save(flags); + ret = *addr; + if (likely(ret == oldval)) + *addr = newval; + raw_local_irq_restore(flags); +#endif + return ret; +} + +static __inline__ uint16_t atomic_compare_exchangew(uint16_t *addr, + uint16_t oldval, uint16_t newval) +{ + uint16_t ret; +#ifdef USE_ARMV6_INSTRUCTIONS + unsigned long tmp; + __asm__ __volatile__("@ atomic_cmpxchgl\n" + "1: ldrexh %1, [%3]\n" + " mov %0, #0\n" + " teq %1, %4\n" + " strexheq %0, %5, [%3]\n" + " teq %0, #0\n" + " bne 1b\n" + : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr) + : "r" (addr), "Ir" (oldval), "r" (newval) + : "cc"); +#else + unsigned long flags; + raw_local_irq_save(flags); + ret = *addr; + if (likely(ret == oldval)) + *addr = newval; + raw_local_irq_restore(flags); +#endif + return ret; +} + +static __inline__ uint32_t atomic_compare_exchangel(uint32_t *addr, + uint32_t oldval, uint32_t newval) +{ + uint32_t ret; +#ifdef USE_ARMV6_INSTRUCTIONS + unsigned long tmp; + __asm__ __volatile__("@ atomic_cmpxchgl\n" + "1: ldrex %1, [%3]\n" + " mov %0, #0\n" + " teq %1, %4\n" + " strexeq %0, %5, [%3]\n" + " teq %0, #0\n" + " bne 1b\n" + : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr) + : "r" (addr), "Ir" (oldval), "r" (newval) + : "cc"); +#else + unsigned long flags; + raw_local_irq_save(flags); + ret = *addr; + if (likely(ret == oldval)) + *addr = newval; + raw_local_irq_restore(flags); +#endif + return ret; +} + +static __inline__ uint64_t atomic_compare_exchangeq(uint64_t *addr, + uint64_t oldval, uint64_t newval) +{ + uint64_t ret; +#ifdef USE_ARMV6_INSTRUCTIONS + unsigned long tmp; + __asm__ __volatile__("@ atomic_cmpxchgl\n" + "1: ldrexd %1, %H1, [%3]\n" + " mov %0, #0\n" + " teq %1, %4\n" + " teqeq %H1, %H4\n" + " strexdeq %0, %5, %H5, [%3]\n" + " teq %0, #0\n" + " bne 1b\n" + : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr) + : "r" (addr), "Ir" (oldval), "r" (newval) + : "cc"); +#else + unsigned long flags; + raw_local_irq_save(flags); + ret = *addr; + if (likely(ret == oldval)) + *addr = newval; + raw_local_irq_restore(flags); +#endif + return ret; +} + +static __inline__ uint8_t +atomic_compare_exchange16b(uint64_t *memp, + uint64_t old_less, uint64_t old_most, + uint64_t new_less, uint64_t new_most) +{ + uint8_t ret = 0; + unsigned long flags; + raw_local_irq_save(flags); + ret = *memp; + if (likely(*memp == old_less && *(memp+1) == old_most)) + { + *memp = new_less; + *(memp+1) = new_most; + ret = 1; + } + raw_local_irq_restore(flags); + return ret; +} + +static __inline__ unsigned long __xchg(unsigned long x, volatile void *ptr, int size) +{ + unsigned long ret; +#ifdef USE_ARMV6_INSTRUCTIONS + unsigned int tmp; +#endif + + mb(); + + switch (size) { +#ifdef USE_ARMV6_INSTRUCTIONS + case 1: + __asm __volatile("@ __xchg1\n" + "1: ldrexb %0, [%3]\n" + " strexb %1, %2, [%3]\n" + " teq %1, #0\n" + " bne 1b" + : "=&r" (ret), "=&r" (tmp) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + case 2: + __asm __volatile("@ __xchg1\n" + "1: ldrexh %0, [%3]\n" + " strexh %1, %2, [%3]\n" + " teq %1, #0\n" + " bne 1b" + : "=&r" (ret), "=&r" (tmp) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + case 4: + __asm __volatile("@ __xchg4\n" + "1: ldrex %0, [%3]\n" + " strex %1, %2, [%3]\n" + " teq %1, #0\n" + " bne 1b" + : "=&r" (ret), "=&r" (tmp) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; +#else + case 1: + __asm __volatile("@ __xchg1\n" + " swpb %0, %1, [%2]" + : "=&r" (ret) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + + case 4: + __asm __volatile("@ __xchg4\n" + " swp %0, %1, [%2]" + : "=&r" (ret) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + case 2: + { + unsigned long flags = 0; + raw_local_irq_save(flags); + ret = *(volatile uint16_t *)ptr; + *(volatile uint16_t *)ptr = x; + raw_local_irq_restore(flags); + break; + } + +#endif + default: + exit(0); + } + mb(); + + return ret; +} + +#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) +#define GEN_ATOMIC_XCHG_HELPER(TYPE) \ +static __inline__ DATA_##TYPE atomic_exchange##TYPE(DATA_##TYPE *p, DATA_##TYPE val) { return xchg(p, val); } + +GEN_ATOMIC_XCHG_HELPER(b); +GEN_ATOMIC_XCHG_HELPER(w); +GEN_ATOMIC_XCHG_HELPER(l); + +#endif + +#endif /* _COREMU_ATOMIC_H */ + diff --git a/llvm/atomic/coremu-template.h b/llvm/atomic/coremu-template.h new file mode 100644 index 0000000..66b185c --- /dev/null +++ b/llvm/atomic/coremu-template.h @@ -0,0 +1,101 @@ +/* The following code may be included multiple times in a single file. */ + +#if DATA_BITS == 64 +# define DATA_TYPE uint64_t +# define SUFFIX q +#elif DATA_BITS == 32 +# define DATA_TYPE uint32_t +# define SUFFIX l +#elif DATA_BITS == 16 +# define DATA_TYPE uint16_t +# define SUFFIX w +#elif DATA_BITS == 8 +# define DATA_TYPE uint8_t +# define SUFFIX b +#else +#error unsupported data size +#endif + +static __inline__ void coremu_glue(atomic_inc, SUFFIX)(DATA_TYPE *p) { + asm volatile( + LOCK_PREFIX "inc"coremu_str(SUFFIX)" %0" + : "+m"(*p) + : + : "cc"); +} + +static __inline__ void coremu_glue(atomic_dec, SUFFIX)(DATA_TYPE *p) { + asm volatile( + LOCK_PREFIX "dec"coremu_str(SUFFIX)" %0" + : "+m"(*p) + : + : "cc"); +} + +static __inline__ void coremu_glue(atomic_add, SUFFIX)(DATA_TYPE* addr, + DATA_TYPE val) { + asm volatile( + LOCK_PREFIX "add"coremu_str(SUFFIX)" %1, %0" + : "+m"(*addr) + : "a"(val) + : "cc"); +} + +/* swap the value VAL and *p. + * Return the value swapped out from memory. */ +static inline DATA_TYPE coremu_glue(atomic_exchange, SUFFIX)( + DATA_TYPE *p, DATA_TYPE val) +{ + DATA_TYPE out; + __asm __volatile( + "lock; xchg"coremu_str(SUFFIX)" %1,%2 \n\t" + : "=a" (out), "+m" (*p) + : "a" (val) + ); + return out; +} +/* Return previous value in addr. So if the return value is the same as oldval, + * swap occured. */ +static __inline__ DATA_TYPE coremu_glue(atomic_compare_exchange, SUFFIX)(DATA_TYPE *addr, + DATA_TYPE oldval, DATA_TYPE newval) { + asm volatile( + LOCK_PREFIX "cmpxchg"coremu_str(SUFFIX)" %2, %1" + : "+a"(oldval), "+m"(*addr) + : "q"(newval) + : "cc"); + + return oldval; +} + +static __inline__ void coremu_glue(atomic_and, SUFFIX)(DATA_TYPE *addr, + DATA_TYPE mask) { + asm volatile( + LOCK_PREFIX "and"coremu_str(SUFFIX)" %1, %0" + : "+m"(*addr) + : "r"(mask) + : "cc"); +} + +static __inline__ void coremu_glue(atomic_or, SUFFIX)(DATA_TYPE *addr, + DATA_TYPE mask) { + asm volatile( + LOCK_PREFIX "or"coremu_str(SUFFIX)" %1, %0" + : "+m"(*addr) + : "r"(mask) + : "cc"); +} + +static __inline__ DATA_TYPE coremu_glue(atomic_xadd, SUFFIX)( + DATA_TYPE* addr, DATA_TYPE val) { + asm volatile( + LOCK_PREFIX "xadd"coremu_str(SUFFIX)" %0, %1" + : "+a"(val), "+m"(*addr) + : + : "cc"); + + return val; +} + +#undef DATA_BITS +#undef DATA_TYPE +#undef SUFFIX diff --git a/llvm/fpu/softfloat-native-def.h b/llvm/fpu/softfloat-native-def.h new file mode 100644 index 0000000..4b0fd22 --- /dev/null +++ b/llvm/fpu/softfloat-native-def.h @@ -0,0 +1,127 @@ +/* + * QEMU float support + * + * Derived from SoftFloat. + */ + +/*============================================================================ + +This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic +Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. + +=============================================================================*/ + +#ifndef SOFTFLOAT_NATIVE_DEF_H +#define SOFTFLOAT_NATIVE_DEF_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "fpu/softfloat.h" + +int num_native_fpu_helpers(void); +void *get_native_fpu_helpers(void); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE integer-to-floating-point conversion routines. +*----------------------------------------------------------------------------*/ +float32 llvm_int32_to_float32(int32_t v); +float64 llvm_int32_to_float64(int32_t v); +float32 llvm_uint32_to_float32(uint32_t v); +float64 llvm_uint32_to_float64(uint32_t v); +float32 llvm_int64_to_float32(int64_t v); +float32 llvm_uint64_to_float32(uint64_t v); +float64 llvm_int64_to_float64(int64_t v); +float64 llvm_uint64_to_float64(uint64_t v); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision conversion routines. +*----------------------------------------------------------------------------*/ +int32 llvm_float32_to_int32( float32 a ); +int32 llvm_float32_to_int32_round_to_zero( float32 a ); +int64 llvm_float32_to_int64( float32 a ); +int64 llvm_float32_to_int64_round_to_zero( float32 a ); +float64 llvm_float32_to_float64( float32 a ); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision operations. +*----------------------------------------------------------------------------*/ +float32 llvm_float32_round_to_int( float32 a ); +float32 llvm_float32_add( float32 a, float32 b ); +float32 llvm_float32_sub( float32 a, float32 b ); +float32 llvm_float32_mul( float32 a, float32 b ); +float32 llvm_float32_div( float32 a, float32 b ); +float32 llvm_float32_rem( float32 a, float32 b ); +float32 llvm_float32_sqrt( float32 a ); +int llvm_float32_eq( float32 a, float32 b ); +int llvm_float32_le( float32 a, float32 b ); +int llvm_float32_lt( float32 a, float32 b ); +int llvm_float32_unordered( float32 a, float32 b ); +float32 llvm_float32_abs(float32 a); +float32 llvm_float32_chs(float32 a); + +float32 llvm_float32_muladd( float32 a, float32 b, float32 c ); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision conversion routines. +*----------------------------------------------------------------------------*/ +int32 llvm_float64_to_int32( float64 a ); +int32 llvm_float64_to_int32_round_to_zero( float64 a ); +int64 llvm_float64_to_int64( float64 a ); +int64 llvm_float64_to_int64_round_to_zero( float64 a ); +float32 llvm_float64_to_float32( float64 a ); + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision operations. +*----------------------------------------------------------------------------*/ +float64 llvm_float64_round_to_int( float64 a ); +float64 llvm_float64_trunc_to_int( float64 a ); +float64 llvm_float64_add( float64 a, float64 b ); +float64 llvm_float64_sub( float64 a, float64 b ); +float64 llvm_float64_mul( float64 a, float64 b ); +float64 llvm_float64_div( float64 a, float64 b ); +float64 llvm_float64_rem( float64 a, float64 b ); +float64 llvm_float64_sqrt( float64 a ); +int llvm_float64_eq( float64 a, float64 b ); +int llvm_float64_le( float64 a, float64 b ); +int llvm_float64_lt( float64 a, float64 b ); +int llvm_float64_unordered( float64 a, float64 b ); +float64 llvm_float64_abs(float64 a); +float64 llvm_float64_chs(float64 a); + +float64 llvm_float64_muladd( float64 a, float64 b, float64 c ); + +float32 llvm_float32_maybe_silence_nan( float32 a ); +float64 llvm_float64_maybe_silence_nan( float64 a ); + +#ifdef __cplusplus +} +#endif + +#endif /* !SOFTFLOAT_NATIVE_DEF_H */ diff --git a/llvm/fpu/softfloat-native.h b/llvm/fpu/softfloat-native.h new file mode 100644 index 0000000..c12f62b --- /dev/null +++ b/llvm/fpu/softfloat-native.h @@ -0,0 +1,248 @@ +/* + * QEMU float support + * + * Derived from SoftFloat. + */ + +/*============================================================================ + +This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic +Package, Release 2b. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. + +=============================================================================*/ + +#ifndef SOFTFLOAT_NATIVE_H +#define SOFTFLOAT_NATIVE_H + +#include <math.h> +#include "fpu/softfloat-native-def.h" + +typedef union { + float32 f; + int32_t i; + uint32_t u; + float s; +} llvm_float32; + +typedef union { + float64 f; + int64_t i; + uint64_t u; + double d; +} llvm_float64; + +#ifdef float32_val +#undef float32_val +#endif +#ifdef float64_val +#undef float64_val +#endif + +#define float32_val(x) ((llvm_float32)(x)).f +#define float64_val(x) ((llvm_float64)(x)).f +#define lfloat(x) ((llvm_float32)(x)).s +#define ldouble(x) ((llvm_float64)(x)).d + +#define DEF_HELPER(name) { (void *)llvm_##name, "llvm_"#name } +static TCGHelperInfo native_fpu_helpers[] = { + DEF_HELPER(int32_to_float32), + DEF_HELPER(int32_to_float64), + DEF_HELPER(uint32_to_float32), + DEF_HELPER(uint32_to_float64), + DEF_HELPER(int64_to_float32), + DEF_HELPER(uint64_to_float32), + DEF_HELPER(int64_to_float64), + DEF_HELPER(uint64_to_float64), + DEF_HELPER(float32_to_int32), + DEF_HELPER(float32_to_int64), + DEF_HELPER(float32_to_float64), + DEF_HELPER(float32_add), + DEF_HELPER(float32_sub), + DEF_HELPER(float32_mul), + DEF_HELPER(float32_div), + DEF_HELPER(float32_rem), + DEF_HELPER(float32_sqrt), + DEF_HELPER(float32_abs), + DEF_HELPER(float32_chs), + DEF_HELPER(float64_to_int32), + DEF_HELPER(float64_to_int64), + DEF_HELPER(float64_to_float32), + DEF_HELPER(float64_add), + DEF_HELPER(float64_sub), + DEF_HELPER(float64_mul), + DEF_HELPER(float64_div), + DEF_HELPER(float64_rem), + DEF_HELPER(float64_sqrt), + DEF_HELPER(float64_abs), + DEF_HELPER(float64_chs), + + DEF_HELPER(float32_muladd), + DEF_HELPER(float64_muladd), + + DEF_HELPER(float32_maybe_silence_nan), + DEF_HELPER(float64_maybe_silence_nan), +#if 0 + DEF_HELPER(float32_to_int32_round_to_zero), + DEF_HELPER(float32_to_int64_round_to_zero), + DEF_HELPER(float32_round_to_int), + DEF_HELPER(float32_eq), + DEF_HELPER(float32_le), + DEF_HELPER(float32_lt), + DEF_HELPER(float32_unordered), + DEF_HELPER(float64_to_int32_round_to_zero), + DEF_HELPER(float64_to_int64_round_to_zero), + DEF_HELPER(float64_round_to_int), + DEF_HELPER(float64_trunc_to_int), + DEF_HELPER(float64_eq), + DEF_HELPER(float64_le), + DEF_HELPER(float64_lt), + DEF_HELPER(float64_unordered), +#endif +}; +#undef DEF_HELPER + +int num_native_fpu_helpers(void) +{ + return ARRAY_SIZE(native_fpu_helpers); +} + +void *get_native_fpu_helpers(void) +{ + return native_fpu_helpers; +} + +/* XXX: this code implements the x86 behaviour, not the IEEE one. */ +#if TCG_TARGET_REG_BITS == 32 +static inline int32 long_to_int32(long a) +{ + return a; +} +#else +static inline int32 long_to_int32(long a) +{ + if (a != (int32_t)a) + a = 0x80000000; + return a; +} +#endif + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE integer-to-floating-point conversion routines. +*----------------------------------------------------------------------------*/ +float32 llvm_int32_to_float32(int32_t v) { return float32_val((float)v); } +float64 llvm_int32_to_float64(int32_t v) { return float64_val((double)v); } +float32 llvm_uint32_to_float32(uint32_t v) { return float32_val((float)v); } +float64 llvm_uint32_to_float64(uint32_t v) { return float64_val((double)v); } +float32 llvm_int64_to_float32(int64_t v) { return float32_val((float)v); } +float32 llvm_uint64_to_float32(uint64_t v) { return float32_val((float)v); } +float64 llvm_int64_to_float64(int64_t v) { return float64_val((double)v); } +float64 llvm_uint64_to_float64(uint64_t v) { return float64_val((double)v); } + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision conversion routines. +*----------------------------------------------------------------------------*/ +int32 llvm_float32_to_int32( float32 a ) { return long_to_int32(lrintf(lfloat(a))); } +int32 llvm_float32_to_int32_round_to_zero( float32 a ) { return (int32)lfloat(a); } +int64 llvm_float32_to_int64( float32 a ) { return llrintf(lfloat(a)); } +int64 llvm_float32_to_int64_round_to_zero( float32 a ) { return (int64)lfloat(a); } +float64 llvm_float32_to_float64( float32 a ) { return float64_val((double)lfloat(a)); } + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE single-precision operations. +*----------------------------------------------------------------------------*/ +float32 llvm_float32_round_to_int( float32 a ) { return float32_val(rintf(lfloat(a))); } +float32 llvm_float32_add( float32 a, float32 b ) { return float32_val(lfloat(a) + lfloat(b)); } +float32 llvm_float32_sub( float32 a, float32 b ) { return float32_val(lfloat(a) - lfloat(b)); } +float32 llvm_float32_mul( float32 a, float32 b ) { return float32_val(lfloat(a) * lfloat(b)); } +float32 llvm_float32_div( float32 a, float32 b ) { return float32_val(lfloat(a) / lfloat(b)); } +float32 llvm_float32_rem( float32 a, float32 b ) { return float32_val(remainderf(lfloat(a), lfloat(b))); } +float32 llvm_float32_sqrt( float32 a ) { return float32_val(sqrtf(lfloat(a))); } +int llvm_float32_eq( float32 a, float32 b ) { return lfloat(a) == lfloat(b); } +int llvm_float32_le( float32 a, float32 b ) { return lfloat(a) <= lfloat(b); } +int llvm_float32_lt( float32 a, float32 b ) { return lfloat(a) < lfloat(b); } +int llvm_float32_unordered( float32 a, float32 b ) { return isunordered(lfloat(a), lfloat(b)); } +float32 llvm_float32_abs(float32 a) { return float32_val(fabsf(lfloat(a))); } +float32 llvm_float32_chs(float32 a) { return float32_val(-lfloat(a)); } + +float32 llvm_float32_muladd( float32 a, float32 b, float32 c ) { return float32_val(lfloat(a) * lfloat(b) + lfloat(c)); } + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision conversion routines. +*----------------------------------------------------------------------------*/ +int32 llvm_float64_to_int32( float64 a ) { return long_to_int32(lrint(ldouble(a))); } +int32 llvm_float64_to_int32_round_to_zero( float64 a ) { return (int32)ldouble(a); } +int64 llvm_float64_to_int64( float64 a ) { return llrint(ldouble(a)); } +int64 llvm_float64_to_int64_round_to_zero( float64 a ) { return (int64)ldouble(a); } +float32 llvm_float64_to_float32( float64 a ) { return float32_val((float)ldouble(a)); } + +/*---------------------------------------------------------------------------- +| Software IEC/IEEE double-precision operations. +*----------------------------------------------------------------------------*/ +float64 llvm_float64_round_to_int( float64 a ) { return float64_val(rint(ldouble(a))); } +float64 llvm_float64_trunc_to_int( float64 a ) { return float64_val(trunc(ldouble(a))); } +float64 llvm_float64_add( float64 a, float64 b ) { return float64_val(ldouble(a) + ldouble(b)); } +float64 llvm_float64_sub( float64 a, float64 b ) { return float64_val(ldouble(a) - ldouble(b)); } +float64 llvm_float64_mul( float64 a, float64 b ) { return float64_val(ldouble(a) * ldouble(b)); } +float64 llvm_float64_div( float64 a, float64 b ) { return float64_val(ldouble(a) / ldouble(b)); } +float64 llvm_float64_rem( float64 a, float64 b ) { return float64_val(remainder(ldouble(a), ldouble(b))); } +float64 llvm_float64_sqrt( float64 a ) { return float64_val(sqrt(ldouble(a))); } +int llvm_float64_eq( float64 a, float64 b ) { return ldouble(a) == ldouble(b); } +int llvm_float64_le( float64 a, float64 b ) { return ldouble(a) <= ldouble(b); } +int llvm_float64_lt( float64 a, float64 b ) { return ldouble(a) < ldouble(b); } +int llvm_float64_unordered( float64 a, float64 b ) { return isunordered(ldouble(a), ldouble(b)); } +float64 llvm_float64_abs(float64 a) { return float64_val(fabs(ldouble(a))); } +float64 llvm_float64_chs(float64 a) { return float64_val(-ldouble(a)); } + +float64 llvm_float64_muladd( float64 a, float64 b, float64 c ) { return float64_val(ldouble(a) * ldouble(b) + ldouble(c)); } + +float32 llvm_float32_maybe_silence_nan( float32 a ) { + uint32_t _a = ((llvm_float32)(a)).u; + if ( ((_a >> 22) & 0x1FF) == 0x1FE && (_a & 0x003FFFFF)) { + _a |= (1 << 22); + return float32_val(_a); + } + return a; +} +float64 llvm_float64_maybe_silence_nan( float64 a ) { + uint64_t _a = ((llvm_float64)(a)).u; + if (((_a >> 51) & 0xFFF) == 0xFFE && (_a & 0x0007FFFFFFFFFFFFLL)) { + _a |= 0x0008000000000000LL; + return float64_val(_a); + } + return a; +} + +#undef float32_val +#undef float64_val +#undef lfloat +#undef ldouble + +#endif /* !SOFTFLOAT_NATIVE_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/hqemu-helper.c b/llvm/hqemu-helper.c new file mode 100644 index 0000000..6325716 --- /dev/null +++ b/llvm/hqemu-helper.c @@ -0,0 +1,77 @@ +#include "cpu.h" +#include "tcg.h" +#include "exec/helper-proto.h" +#include "hqemu.h" +#include "fpu/softfloat-native.h" + +CPUArchState basereg; +target_ulong pcid; + +#if defined(TARGET_I386) +XMMReg xmm_reg; +#endif + +extern TranslationBlock *tbs; + +void *ibtc_lookup(CPUArchState *env); +void *cpbl_lookup(CPUArchState *env); +int cpbl_validate(CPUArchState *env, target_ulong pc, int id); + +/* This helper is a hack to export symbols of helper functions in the LLVM + * bitcode file. If a target is alerted with lacks of symbols of function/variable, + * add such symbols in this helper by accessing it. */ +void helper_export_hqemu(CPUArchState *env) +{ + helper_lookup_ibtc(env); + helper_lookup_cpbl(env); + helper_validate_cpbl(env, 0, 0); + +#if defined(CONFIG_SOFTMMU) && defined(CONFIG_LLVM) + target_ulong ptr = 0; + llvm_ret_ldub_mmu(env, ptr, 0); + llvm_le_lduw_mmu(env, ptr, 0); + llvm_le_ldul_mmu(env, ptr, 0); + llvm_le_ldq_mmu(env, ptr, 0); + llvm_be_lduw_mmu(env, ptr, 0); + llvm_be_ldul_mmu(env, ptr, 0); + llvm_be_ldq_mmu(env, ptr, 0); + llvm_ret_ldsb_mmu(env, ptr, 0); + llvm_le_ldsw_mmu(env, ptr, 0); + llvm_le_ldsl_mmu(env, ptr, 0); + llvm_be_ldsw_mmu(env, ptr, 0); + llvm_be_ldsl_mmu(env, ptr, 0); + llvm_ret_stb_mmu(env, ptr, 0, 0); + llvm_le_stw_mmu(env, ptr, 0, 0); + llvm_le_stl_mmu(env, ptr, 0, 0); + llvm_le_stq_mmu(env, ptr, 0, 0); + llvm_be_stw_mmu(env, ptr, 0, 0); + llvm_be_stl_mmu(env, ptr, 0, 0); + llvm_be_stq_mmu(env, ptr, 0, 0); +#endif +} + +void helper_verify_tb(CPUArchState *env, int id) +{ + static TranslationBlock *last_tb; + TranslationBlock *tb = &tbs[id]; + if (tb->mode == BLOCK_INVALID) { + fprintf(stderr, "%s: tb=%p pc=" TARGET_FMT_lx " last_pc=" + TARGET_FMT_lx "\n", __func__, tb, tb->pc, + (last_tb) ? last_tb->pc : -1U); + } + last_tb = tb; +} + +/* + * helper_profile_exec is used to profile LLVM translated code. + */ +void helper_profile_exec(CPUArchState *env, void *counter_p, int idx) +{ + CPUState *cpu = ENV_GET_CPU(env); + uint64_t **counter = (uint64_t **)counter_p; + counter[cpu->cpu_index][idx]++; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/hqemu.mk b/llvm/hqemu.mk new file mode 100644 index 0000000..01de6d6 --- /dev/null +++ b/llvm/hqemu.mk @@ -0,0 +1,191 @@ +# Makefile for HQEMU. + +QEMU_CFLAGS += -I$(SRC_PATH)/llvm -I$(SRC_PATH)/llvm/include -I$(SRC_PATH)/llvm/atomic +QEMU_CXXFLAGS += -std=c++11 -Wno-narrowing +obj-y += llvm/optimization.o llvm/tracer.o llvm/utils.o llvm/hqemu-helper.o + +# LLVM +ifdef CONFIG_LLVM + +LLVM_EXTRA_FLAGS += -Wall -DNEED_CPU_H -D$(LLVM_VERSION) -I.. + +LLVM_CXXFLAGS := $(patsubst -Wcast-qual, ,$(LLVM_CXXFLAGS)) +LLVM_CXXFLAGS := $(patsubst -fno-exceptions, ,$(LLVM_CXXFLAGS)) +LLVM_CXXFLAGS := $(patsubst -pedantic, ,$(LLVM_CXXFLAGS)) +LLVM_CXXFLAGS += -Wno-unused-local-typedefs -Wno-cast-qual -fno-rtti +LLVM_CFLAGS += $(patsubst -O2, ,$(patsubst -g, ,$(CFLAGS))) +LLVM_CFLAGS += $(LLVM_EXTRA_FLAGS) $(QEMU_INCLUDES) \ + -I$(SRC_PATH)/linux-user -I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) \ + -I$(SRC_PATH)/target-$(TARGET_BASE_ARCH) -I$(SRC_PATH)/llvm \ + -I$(SRC_PATH)/llvm/atomic -I$(SRC_PATH)/llvm/include/pmu +LLVM_CFLAGS := $(patsubst -pedantic, ,$(LLVM_CFLAGS)) +LLVM_CFLAGS := $(patsubst -g, ,$(LLVM_CFLAGS)) + +PASS := llvm/pass +ANALYSIS := llvm/analysis +HPM := llvm/pmu +QEMU_CXXFLAGS += $(LLVM_CXXFLAGS) $(LLVM_EXTRA_FLAGS) -Wno-undef +LDFLAGS += $(LLVM_LDFLAGS) +LIBS += $(LLVM_LIBS) -ldl -lz -lncurses + + +ifeq ($(CONFIG_WIN32), y) +LIBS += -lpthread -limagehlp -lpsapi +endif + +obj-y += llvm/xml/tinyxml2.o +obj-y += llvm/llvm.o \ + llvm/llvm-translator.o \ + llvm/llvm-opc.o \ + llvm/llvm-opc-vector.o \ + llvm/llvm-opc-mmu.o \ + llvm/llvm-debug.o \ + llvm/llvm-target.o \ + llvm/llvm-soft-perfmon.o \ + llvm/llvm-hard-perfmon.o \ + llvm/llvm-annotate.o +obj-y += $(PASS)/ProfileExec.o \ + $(PASS)/ReplaceIntrinsic.o \ + $(PASS)/CombineGuestMemory.o \ + $(PASS)/CombineCasts.o \ + $(PASS)/CombineZExtTrunc.o \ + $(PASS)/FastMathPass.o \ + $(PASS)/StateMappingPass.o \ + $(PASS)/RedundantStateElimination.o \ + $(PASS)/SimplifyPointer.o +obj-y += $(ANALYSIS)/InnerLoopAnalysis.o + +# HPM +obj-y += $(HPM)/pmu.o \ + $(HPM)/pmu-events.o + +ifeq ($(ARCH),$(filter $(ARCH),i386 x86_64)) +obj-y += $(HPM)/x86/x86-events.o +endif +ifeq ($(ARCH),$(filter $(ARCH),arm aarch64)) +obj-y += $(HPM)/arm/arm-events.o +endif +ifeq ($(ARCH),$(filter $(ARCH),ppc64)) +obj-y += $(HPM)/ppc/ppc-events.o +endif + +# +# LLVM Bitcode file +# + +ifdef CONFIG_SOFTMMU +BCSUF = _softmmu +MMU_HELPER = $(TARGET_PATH)/mmu_helper.bc +endif + +LLVM_BITCODE = llvm_helper_${TARGET_NAME}${BCSUF}.bc +TARGET_PATH = target-$(TARGET_BASE_ARCH) +LLVM_HELPER += tcg-runtime.bc llvm/hqemu-helper.bc $(TARGET_PATH)/helper.bc + +ifeq ($(TARGET_I386), y) +LLVM_HELPER += $(TARGET_PATH)/cc_helper.bc \ + $(TARGET_PATH)/int_helper.bc \ + $(TARGET_PATH)/smm_helper.bc \ + $(TARGET_PATH)/excp_helper.bc \ + $(TARGET_PATH)/mem_helper.bc \ + $(TARGET_PATH)/svm_helper.bc \ + $(TARGET_PATH)/fpu_helper.bc \ + $(TARGET_PATH)/misc_helper.bc \ + $(TARGET_PATH)/seg_helper.bc \ + $(TARGET_PATH)/bpt_helper.bc +endif +ifeq ($(TARGET_X86_64), y) +LLVM_HELPER += $(TARGET_PATH)/cc_helper.bc \ + $(TARGET_PATH)/int_helper.bc \ + $(TARGET_PATH)/smm_helper.bc \ + $(TARGET_PATH)/excp_helper.bc \ + $(TARGET_PATH)/mem_helper.bc \ + $(TARGET_PATH)/svm_helper.bc \ + $(TARGET_PATH)/fpu_helper.bc \ + $(TARGET_PATH)/misc_helper.bc \ + $(TARGET_PATH)/seg_helper.bc +endif +ifeq ($(TARGET_ALPHA), y) +LLVM_HELPER += $(TARGET_PATH)/fpu_helper.bc \ + $(TARGET_PATH)/int_helper.bc \ + $(TARGET_PATH)/mem_helper.bc \ + $(TARGET_PATH)/sys_helper.bc +endif +ifeq ($(TARGET_ARM), y) +LLVM_HELPER += $(TARGET_PATH)/op_helper.bc \ + $(TARGET_PATH)/neon_helper.bc +endif +ifeq ($(TARGET_AARCH64), y) +LLVM_HELPER += $(TARGET_PATH)/op_helper.bc \ + $(TARGET_PATH)/helper-a64.bc \ + $(TARGET_PATH)/neon_helper.bc +endif +ifeq ($(TARGET_MICROBLAZE), y) +LLVM_HELPER += $(TARGET_PATH)/op_helper.bc +endif +ifeq ($(TARGET_MIPS), y) +LLVM_HELPER += $(TARGET_PATH)/op_helper.bc \ + $(TARGET_PATH)/dsp_helper.bc \ + $(TARGET_PATH)/lmi_helper.bc +endif +ifeq ($(TARGET_OPENRISC), y) +LLVM_HELPER += $(TARGET_PATH)/exception_helper.bc \ + $(TARGET_PATH)/fpu_helper.bc \ + $(TARGET_PATH)/interrupt_helper.bc \ + $(TARGET_PATH)/int_helper.bc \ + $(TARGET_PATH)/sys_helper.bc \ + $(MMU_HELPER) +endif +ifeq ($(TARGET_PPC), y) +LLVM_HELPER += $(TARGET_PATH)/excp_helper.bc \ + $(TARGET_PATH)/int_helper.bc \ + $(TARGET_PATH)/misc_helper.bc \ + $(TARGET_PATH)/fpu_helper.bc \ + $(TARGET_PATH)/mem_helper.bc \ + $(TARGET_PATH)/timebase_helper.bc \ + $(MMU_HELPER) +endif +ifeq ($(TARGET_PPC64), y) +LLVM_HELPER += $(TARGET_PATH)/excp_helper.bc \ + $(TARGET_PATH)/int_helper.bc \ + $(TARGET_PATH)/misc_helper.bc \ + $(TARGET_PATH)/fpu_helper.bc \ + $(TARGET_PATH)/mem_helper.bc \ + $(TARGET_PATH)/timebase_helper.bc \ + $(MMU_HELPER) +endif +ifeq ($(TARGET_SH4), y) +LLVM_HELPER += $(TARGET_PATH)/op_helper.bc +endif +ifeq ($(TARGET_SPARC), y) +LLVM_HELPER += $(TARGET_PATH)/cc_helper.bc \ + $(TARGET_PATH)/fop_helper.bc \ + $(TARGET_PATH)/int32_helper.bc \ + $(TARGET_PATH)/ldst_helper.bc \ + $(TARGET_PATH)/vis_helper.bc \ + $(TARGET_PATH)/win_helper.bc \ + $(MMU_HELPER) +endif +ifeq ($(TARGET_SPARC64), y) +LLVM_HELPER += $(TARGET_PATH)/cc_helper.bc \ + $(TARGET_PATH)/fop_helper.bc \ + $(TARGET_PATH)/int64_helper.bc \ + $(TARGET_PATH)/ldst_helper.bc \ + $(TARGET_PATH)/vis_helper.bc \ + $(TARGET_PATH)/win_helper.bc \ + $(MMU_HELPER) +endif + +LOCAL_BC := clang +LOCAL_BC_CFLAGS := -S -emit-llvm $(BCFLAGS) -I$(SRC_PATH)/llvm/include $(LLVM_CFLAGS) \ + -Wno-missing-prototypes -Wno-sign-compare -Wno-unused-function \ + -Wno-constant-conversion + +%.bc: %.c + $(call quiet-command,$(LOCAL_BC) $(LOCAL_BC_CFLAGS) -c -o $@ $<, " LCC $(TARGET_DIR)$@") + + +$(LLVM_BITCODE): $(LLVM_HELPER) + $(call quiet-command,llvm-link -o $@ $^, " LCC $(TARGET_DIR)$@") + +endif diff --git a/llvm/include/InnerLoopAnalysis.h b/llvm/include/InnerLoopAnalysis.h new file mode 100644 index 0000000..f11225d --- /dev/null +++ b/llvm/include/InnerLoopAnalysis.h @@ -0,0 +1,291 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __INNERLOOPANALYSIS_H +#define __INNERLOOPANALYSIS_H + +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm-types.h" + + +class InductionDesc { + /* Start value. */ + Value *StartValue; + /* Step value. */ + const SCEV *Step; + +public: + InductionDesc() : StartValue(nullptr), Step(nullptr) {} + InductionDesc(Value *Start, const SCEV *Step) + : StartValue(Start), Step(Step) {} + + Value *getStartValue() const { return StartValue; } + const SCEV *getStep() const { return Step; } +}; + +class ReductionDesc { +public: + + enum ReductionKind { + NoReduction, /* Not a reduction. */ + IntegerAdd, /* Sum of numbers. */ + IntegerMult, /* Product of numbers. */ + IntegerOr, /* Bitwise or logical OR of numbers. */ + IntegerAnd, /* Bitwise or logical AND of numbers. */ + IntegerXor, /* Bitwise or logical XOR of numbers. */ + FloatAdd, /* Sum of float numbers. */ + FloatMult, /* Product of float numbers. */ + }; + + ReductionDesc() + : StartValue(nullptr), LoopExitInstr(nullptr), + Kind(ReductionKind::NoReduction), Ty(nullptr) {} + ReductionDesc(Value *Start, Instruction *Exit, ReductionKind K, Type *Ty) + : StartValue(Start), LoopExitInstr(Exit), Kind(K), Ty(Ty) {} + + Value *getStartValue() const { return StartValue; } + Value *getNextValue() const { return LoopExitInstr; } + Instruction *getLoopExitInstr() { return LoopExitInstr; } + ReductionKind getReductionKind() { return Kind; } + Type *getScalarType() { return Ty; } + +private: + /* The starting value of the recurrence. */ + Value *StartValue; + /* The instruction who's value is used outside the loop. */ + Instruction *LoopExitInstr; + /* The kind of the recurrence.*/ + ReductionKind Kind; + /* The scalar type. */ + Type *Ty; +}; + +/* + * The InnertLoop class represents a single innertmost loop. The InnerLoop has + * a special shape that is specific to the DBT decoded guest loop, and its loop + * definition is different to a nature loop, e.g., latch and exiting block. + */ +class InnerLoop { +public: + typedef std::map<PHINode *, InductionDesc> InductionList; + typedef std::map<PHINode *, ReductionDesc> ReductionList; + +private: + Loop &TheLoop; + + /* The list of blocks in this loop. First entry is the header node. */ + std::vector<BasicBlock *> Blocks; + SmallPtrSet<const BasicBlock *, 8> DenseBlockSet; + + std::vector<BasicBlock *> Latches; + std::map<BasicBlock *, BasicBlock *> SplitLatches; + + bool UnknownPhi; + InductionList Inductions; + ReductionList Reductions; + + void addInduction(PHINode *Phi, Value *Start, const SCEV *Step) { + Inductions[Phi] = InductionDesc(Start, Step); + } + + void addReduction(PHINode *Phi, Value *Start, Instruction *Exit, + ReductionDesc::ReductionKind K, Type *Ty) { + Reductions[Phi] = ReductionDesc(Start, Exit, K, Ty); + } + + InnerLoop(const InnerLoop &) = delete; + const InnerLoop& operator=(const InnerLoop &) = delete; + + friend class InnerLoopAnalysis; + +public: + InnerLoop(Loop *loop); + ~InnerLoop() {} + + Loop &getLoop() const { return TheLoop; } + + BasicBlock *getHeader() const { return Blocks.front(); } + + /* Return true if the specified basic block is in this loop. */ + bool contains(const BasicBlock *BB) const { + return DenseBlockSet.count(BB); + } + + /* Return true if the specified instruction is in this loop. */ + bool contains(const Instruction *Inst) const { + return contains(Inst->getParent()); + } + + /* Get a list of the basic blocks which make up this loop. */ + typedef typename std::vector<BasicBlock*>::const_iterator block_iterator; + const std::vector<BasicBlock*> &getBlocks() const { return Blocks; } + block_iterator block_begin() const { return Blocks.begin(); } + block_iterator block_end() const { return Blocks.end(); } + inline iterator_range<block_iterator> blocks() const { + return make_range(block_begin(), block_end()); + } + + /* Get the number of blocks in this loop in constant time. */ + unsigned getNumBlocks() const { return Blocks.size(); } + + /* True if terminator in the block can branch to another block that is + * outside of the current loop. */ + bool isLoopExiting(BasicBlock *BB) const; + + /* Calculate the number of back edges to the loop header. */ + unsigned getNumBackEdges() const; + + /* Return all blocks inside the loop that have successors outside of the + * loop. */ + void getExitingBlocks(SmallVectorImpl<BasicBlock *> &ExitingBlocks) const; + + /* If getExitingBlocks would return exactly one block, return that block. + * Otherwise return null. */ + BasicBlock *getExitingBlock() const; + + /* Return all of the successor blocks of this loop. */ + void getExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const; + + /* If getExitBlocks would return exactly one block, return that block. + * Otherwise return null. */ + BasicBlock *getExitBlock() const; + + /* If there is a preheader for this loop, return it. A loop has a preheader + * if there is only one edge to the header of the loop from outside of the + * loop. If this is the case, the block branching to the header of the loop + * is the preheader node. + * + * This method returns null if there is no preheader for the loop. */ + BasicBlock *getLoopPreheader() const; + + /* If the given loop's header has exactly one unique predecessor outside the + * loop, return it. Otherwise return null. + * This is less strict that the loop "preheader" concept, which requires + * the predecessor to have exactly one successor. */ + BasicBlock *getLoopPredecessor() const; + + unsigned getNumLoopLatches() const { return Latches.size(); } + unsigned getNumSplitLatches() const { return SplitLatches.size(); } + + /* Return all loop latch blocks of this loop. A latch block is a block that + * contains a branch back to the header. */ + void getLoopLatches(SmallVectorImpl<BasicBlock *> &LoopLatches) const { + for (auto I : Latches) + LoopLatches.push_back(I); + } + + /* If there is a latch tail, return it. */ + BasicBlock *getSingleLatchTail() const { + return (SplitLatches.size() == 1) ? SplitLatches.begin()->first : + nullptr; + } + + /* If there is a latch head, return it. */ + BasicBlock *getSingleLatchHead() const { + return (SplitLatches.size() == 1) ? SplitLatches.begin()->second : + nullptr; + } + + /* Return all of the latch tails of this loop. */ + void getLatchTails(SmallVectorImpl<BasicBlock *> &LatchTails) const { + for (auto &I : SplitLatches) + LatchTails.push_back(I.first); + } + + /* Given a latch tail, return its latch head. */ + BasicBlock *getLatchHead(BasicBlock *BB) { + if (SplitLatches.find(BB) == SplitLatches.end()) + return nullptr; + return SplitLatches[BB]; + } + + /* If the given phi is an induction of the loop, return the induciton. */ + InductionDesc *getInduction(PHINode *Phi) { + if (Inductions.find(Phi) == Inductions.end()) + return nullptr; + return &Inductions[Phi]; + } + + /* If the given phi is a reduction of the loop, return the induciton. */ + ReductionDesc *getReduction(PHINode *Phi) { + if (Reductions.find(Phi) == Reductions.end()) + return nullptr; + return &Reductions[Phi]; + } + + /* Return true if the loop has unknown phi(s). A loop has unknown phi(s) if + * a phi node is not identified, or the loop has no preheader or latch tail. + * + * If the loop has unknown phi(s), the data structure of Inductions and + * Reductions can be undefined. */ + bool hasUnknownPhi() { return UnknownPhi; } + + /* Return true if the instruction `From' can flow to instruction `To' in + * the loop. */ + bool isReachable(Instruction *From, Instruction *To); +}; + +class InnerLoopAnalysis { + std::vector<InnerLoop *> InnerLoops; + + void analyzePhi(InnerLoop &TheLoop, ScalarEvolution *SE); + bool analyzeInduction(InnerLoop &TheLoop, ScalarEvolution *SE, PHINode *Phi); + bool analyzeReduction(InnerLoop &TheLoop, PHINode *Phi); + +public: + InnerLoopAnalysis() {} + ~InnerLoopAnalysis() { releaseMemory(); } + + void releaseMemory() { + while (!InnerLoops.empty()) { + InnerLoop *L = InnerLoops.back(); + InnerLoops.pop_back(); + delete L; + } + } + void print(raw_ostream &OS, const Module * = nullptr) const {} + void verify() const {} + void analyze(LoopInfo *LI, ScalarEvolution *SE); + + /* iterator/begin/end - The interface to the innermost loops. */ + typedef typename std::vector<InnerLoop *>::const_iterator iterator; + typedef typename std::vector<InnerLoop *>::const_reverse_iterator + reverse_iterator; + iterator begin() const { return InnerLoops.begin(); } + iterator end() const { return InnerLoops.end(); } + reverse_iterator rbegin() const { return InnerLoops.rbegin(); } + reverse_iterator rend() const { return InnerLoops.rend(); } + bool empty() const { return InnerLoops.empty(); } + unsigned size() { return InnerLoops.size(); } +}; + +/* + * InnerLoopAnalysisWrapperPass Pass + */ +class InnerLoopAnalysisWrapperPass : public FunctionPass { + InnerLoopAnalysis LA; + +public: + static char ID; + InnerLoopAnalysisWrapperPass() : FunctionPass(ID) { + initializeInnerLoopAnalysisWrapperPassPass(*PassRegistry::getPassRegistry()); + } + + InnerLoopAnalysis &getLoopAnalysis() { return LA; } + const InnerLoopAnalysis &getLoopAnalysis() const { return LA; } + + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &OS, const Module * = nullptr) const override; + void verifyAnalysis() const override; + bool runOnFunction(Function &F) override; +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/JIT.h b/llvm/include/JIT.h new file mode 100644 index 0000000..a1b3c8d --- /dev/null +++ b/llvm/include/JIT.h @@ -0,0 +1,228 @@ +//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the top-level JIT data structure. +// +//===----------------------------------------------------------------------===// + +#ifndef __JIT_H +#define __JIT_H + +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/PassManager.h" + +namespace llvm { + +class Function; +struct JITEvent_EmittedFunctionDetails; +class MachineCodeEmitter; +class MachineCodeInfo; +class TargetJITInfo; +class TargetMachine; + +class JITState { +private: + FunctionPassManager PM; // Passes to compile a function + Module *M; // Module used to create the PM + + /// PendingFunctions - Functions which have not been code generated yet, but + /// were called from a function being code generated. + std::vector<AssertingVH<Function> > PendingFunctions; + +public: + explicit JITState(Module *M) : PM(M), M(M) {} + + FunctionPassManager &getPM() { + return PM; + } + + Module *getModule() const { return M; } + std::vector<AssertingVH<Function> > &getPendingFunctions() { + return PendingFunctions; + } +}; + + +class JIT : public ExecutionEngine { + /// types + typedef ValueMap<const BasicBlock *, void *> + BasicBlockAddressMapTy; + /// data + TargetMachine &TM; // The current target we are compiling to + TargetJITInfo &TJI; // The JITInfo for the target we are compiling to + JITCodeEmitter *JCE; // JCE object + JITMemoryManager *JMM; + std::vector<JITEventListener*> EventListeners; + + /// AllocateGVsWithCode - Some applications require that global variables and + /// code be allocated into the same region of memory, in which case this flag + /// should be set to true. Doing so breaks freeMachineCodeForFunction. + bool AllocateGVsWithCode; + + /// True while the JIT is generating code. Used to assert against recursive + /// entry. + bool isAlreadyCodeGenerating; + + JITState *jitstate; + + /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their + /// actualized version, only filled for basic blocks that have their address + /// taken. + BasicBlockAddressMapTy BasicBlockAddressMap; + + + JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, + JITMemoryManager *JMM, bool AllocateGVsWithCode); +public: + ~JIT(); + + static void Register() { + JITCtor = createJIT; + } + + /// getJITInfo - Return the target JIT information structure. + /// + TargetJITInfo &getJITInfo() const { return TJI; } + + /// create - Create an return a new JIT compiler if there is one available + /// for the current target. Otherwise, return null. + /// + static ExecutionEngine *create(Module *M, + std::string *Err, + JITMemoryManager *JMM, + CodeGenOpt::Level OptLevel = + CodeGenOpt::Default, + bool GVsWithCode = true, + Reloc::Model RM = Reloc::Default, + CodeModel::Model CMM = CodeModel::JITDefault) { + return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode, + RM, CMM); + } + + void addModule(Module *M) override; + + /// removeModule - Remove a Module from the list of modules. Returns true if + /// M is found. + bool removeModule(Module *M) override; + + /// runFunction - Start execution with the specified function and arguments. + /// + GenericValue runFunction(Function *F, + const std::vector<GenericValue> &ArgValues) override; + + /// getPointerToNamedFunction - This method returns the address of the + /// specified function by using the MemoryManager. As such it is only + /// useful for resolving library symbols, not code generated symbols. + /// + /// If AbortOnFailure is false and no function with the given name is + /// found, this function silently returns a null pointer. Otherwise, + /// it prints a message to stderr and aborts. + /// + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) override; + + // CompilationCallback - Invoked the first time that a call site is found, + // which causes lazy compilation of the target function. + // + static void CompilationCallback(); + + /// getPointerToFunction - This returns the address of the specified function, + /// compiling it if necessary. + /// + void *getPointerToFunction(Function *F) override; + + /// addPointerToBasicBlock - Adds address of the specific basic block. + void addPointerToBasicBlock(const BasicBlock *BB, void *Addr); + + /// clearPointerToBasicBlock - Removes address of specific basic block. + void clearPointerToBasicBlock(const BasicBlock *BB); + + /// getPointerToBasicBlock - This returns the address of the specified basic + /// block, assuming function is compiled. + void *getPointerToBasicBlock(BasicBlock *BB) override; + + /// getOrEmitGlobalVariable - Return the address of the specified global + /// variable, possibly emitting it to memory if needed. This is used by the + /// Emitter. + void *getOrEmitGlobalVariable(const GlobalVariable *GV) override; + + /// getPointerToFunctionOrStub - If the specified function has been + /// code-gen'd, return a pointer to the function. If not, compile it, or use + /// a stub to implement lazy compilation if available. + /// + void *getPointerToFunctionOrStub(Function *F) override; + + /// recompileAndRelinkFunction - This method is used to force a function + /// which has already been compiled, to be compiled again, possibly + /// after it has been modified. Then the entry to the old copy is overwritten + /// with a branch to the new copy. If there was no old copy, this acts + /// just like JIT::getPointerToFunction(). + /// + void *recompileAndRelinkFunction(Function *F) override; + + /// freeMachineCodeForFunction - deallocate memory used to code-generate this + /// Function. + /// + void freeMachineCodeForFunction(Function *F) override; + + /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd + /// function was encountered. Add it to a pending list to be processed after + /// the current function. + /// + void addPendingFunction(Function *F); + + /// getCodeEmitter - Return the code emitter this JIT is emitting into. + /// + JITCodeEmitter *getCodeEmitter() const { return JCE; } + + static ExecutionEngine *createJIT(Module *M, + std::string *ErrorStr, + JITMemoryManager *JMM, + bool GVsWithCode, + TargetMachine *TM); + + // Run the JIT on F and return information about the generated code + void runJITOnFunction(Function *F, MachineCodeInfo *MCI = nullptr) override; + + void RegisterJITEventListener(JITEventListener *L) override; + void UnregisterJITEventListener(JITEventListener *L) override; + + TargetMachine *getTargetMachine() override { return &TM; } + + /// These functions correspond to the methods on JITEventListener. They + /// iterate over the registered listeners and call the corresponding method on + /// each. + void NotifyFunctionEmitted( + const Function &F, void *Code, size_t Size, + const JITEvent_EmittedFunctionDetails &Details); + void NotifyFreeingMachineCode(void *OldPtr); + + BasicBlockAddressMapTy & + getBasicBlockAddressMap() { + return BasicBlockAddressMap; + } + + +private: + static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM, + TargetMachine &tm); + void runJITOnFunctionUnlocked(Function *F); + void updateFunctionStubUnlocked(Function *F); + void jitTheFunctionUnlocked(Function *F); + +protected: + + /// getMemoryforGV - Allocate memory for a global variable. + char* getMemoryForGV(const GlobalVariable* GV) override; + +}; + +} // End llvm namespace + +#endif diff --git a/llvm/include/JITMemoryManager.h b/llvm/include/JITMemoryManager.h new file mode 100644 index 0000000..301d227 --- /dev/null +++ b/llvm/include/JITMemoryManager.h @@ -0,0 +1,318 @@ +//===-- JITMemoryManager.cpp - Memory Allocator for JIT'd code ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the DefaultJITMemoryManager class. +// +//===----------------------------------------------------------------------===// + +#ifndef __JITMEMORYMANAGER_H +#define __JITMEMORYMANAGER_H + +#include <sys/mman.h> +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm-debug.h" +#include "utils.h" + +using namespace llvm; + +#define MIN_CODE_CACHE_SIZE (1 * 1024 * 1024) +#define DEFAULT_GLOBAL_SIZE (64 * 1024) +#define DEFAULT_THRESHOLD (32 * 1024) + + +// AtExitHandlers - List of functions to call when the program exits, +// registered with the atexit() library function. +static std::vector<void (*)()> AtExitHandlers; + +/// runAtExitHandlers - Run any functions registered by the program's +/// calls to atexit(3), which we intercept and store in +/// AtExitHandlers. +/// +static void runAtExitHandlers() { + while (!AtExitHandlers.empty()) { + void (*Fn)() = AtExitHandlers.back(); + AtExitHandlers.pop_back(); + Fn(); + } +} + +//===----------------------------------------------------------------------===// +// Function stubs that are invoked instead of certain library calls +// +// Force the following functions to be linked in to anything that uses the +// JIT. This is a hack designed to work around the all-too-clever Glibc +// strategy of making these functions work differently when inlined vs. when +// not inlined, and hiding their real definitions in a separate archive file +// that the dynamic linker can't see. For more info, search for +// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. +#if defined(__linux__) && defined(__GLIBC__) +/* stat functions are redirecting to __xstat with a version number. On x86-64 + * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' + * available as an exported symbol, so we have to add it explicitly. + */ +namespace { +class StatSymbols { +public: + StatSymbols() { + sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat); + sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat); + sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat); + sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64); + sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64); + sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64); + sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64); + sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64); + sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit); + sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod); + } +}; +} +static StatSymbols initStatSymbols; +#endif // __linux__ + +// jit_exit - Used to intercept the "exit" library call. +static void jit_exit(int Status) { + runAtExitHandlers(); // Run atexit handlers... + exit(Status); +} + +// jit_atexit - Used to intercept the "atexit" library call. +static int jit_atexit(void (*Fn)()) { + AtExitHandlers.push_back(Fn); // Take note of atexit handler... + return 0; // Always successful +} + +static int jit_noop() { + return 0; +} + + +/// DefaultJITMemoryManager - Manage trace cache memory for the JIT code generation. +class DefaultJITMemoryManager : public JITMemoryManager { + uint8_t *TraceCache; + size_t TraceCacheSize; + + uint8_t *GlobalBase; /* section for global data used by QEMU helpers */ + uint8_t *CodeBase; /* section for emitting trace code */ + uint8_t *CodeGenPtr; + + size_t GlobalRemain; + size_t CodeRemain; + size_t Threshold; + + hqemu::Mutex lock; + +public: + DefaultJITMemoryManager(uint8_t *Cache, size_t Size) + : TraceCache(Cache), TraceCacheSize(Size), Threshold(DEFAULT_THRESHOLD) + { + GlobalBase = TraceCache; + GlobalRemain = DEFAULT_GLOBAL_SIZE; + + CodeBase = GlobalBase + DEFAULT_GLOBAL_SIZE; + CodeBase = (uint8_t *)(((uintptr_t)CodeBase + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase; + CodeGenPtr = CodeBase; + } + + ~DefaultJITMemoryManager() {} + + //===----------------------------------------------------------------------===// + // + /// getPointerToNamedFunction - This method returns the address of the specified + /// function by using the dynamic loader interface. As such it is only useful + /// for resolving library symbols, not code generated symbols. + /// + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) override { + // Check to see if this is one of the functions we want to intercept. Note, + // we cast to intptr_t here to silence a -pedantic warning that complains + // about casting a function pointer to a normal pointer. + if (Name == "exit") return (void*)(intptr_t)&jit_exit; + if (Name == "atexit") return (void*)(intptr_t)&jit_atexit; + + // We should not invoke parent's ctors/dtors from generated main()! + // On Mingw and Cygwin, the symbol __main is resolved to + // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors + // (and register wrong callee's dtors with atexit(3)). + // We expect ExecutionEngine::runStaticConstructorsDestructors() + // is called before ExecutionEngine::runFunctionAsMain() is called. + if (Name == "__main") return (void*)(intptr_t)&jit_noop; + + const char *NameStr = Name.c_str(); + // If this is an asm specifier, skip the sentinal. + if (NameStr[0] == 1) ++NameStr; + + // If it's an external function, look it up in the process image... + void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); + if (Ptr) return Ptr; + + // If it wasn't found and if it starts with an underscore ('_') character, + // try again without the underscore. + if (NameStr[0] == '_') { + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); + if (Ptr) return Ptr; + } + + // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These + // are references to hidden visibility symbols that dlsym cannot resolve. + // If we have one of these, strip off $LDBLStub and try again. +#if defined(__APPLE__) && defined(__ppc__) + if (Name.size() > 9 && Name[Name.size()-9] == '$' && + memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) { + // First try turning $LDBLStub into $LDBL128. If that fails, strip it off. + // This mirrors logic in libSystemStubs.a. + std::string Prefix = std::string(Name.begin(), Name.end()-9); + if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false)) + return Ptr; + if (void *Ptr = getPointerToNamedFunction(Prefix, false)) + return Ptr; + } +#endif + + if (AbortOnFailure) { + report_fatal_error("Program used external function '"+Name+ + "' which could not be resolved!"); + } + return nullptr; + } + + void AllocateGOT() override { hqemu_error("fixme.\n"); } + + // Testing methods. + bool CheckInvariants(std::string &ErrorStr) override { hqemu_error("fixme.\n"); return false; } + size_t GetDefaultCodeSlabSize() override { hqemu_error("fixme.\n"); return 0; } + size_t GetDefaultDataSlabSize() override { hqemu_error("fixme.\n"); return 0; } + size_t GetDefaultStubSlabSize() override { hqemu_error("fixme.\n"); return 0; } + unsigned GetNumCodeSlabs() override { hqemu_error("fixme.\n"); return 0; } + unsigned GetNumDataSlabs() override { hqemu_error("fixme.\n"); return 0; } + unsigned GetNumStubSlabs() override { hqemu_error("fixme.\n"); return 0; } + + /// startFunctionBody - When a function starts, allocate a block of free + /// executable memory, returning a pointer to it and its actual size. + uint8_t *startFunctionBody(const Function *F, + uintptr_t &ActualSize) override { + lock.acquire(); + if (unlikely(CodeRemain < Threshold)) + hqemu_error("internal error (fixme).\n"); + + ActualSize = CodeRemain; + return CodeGenPtr; + } + + /// endFunctionBody - The function F is now allocated, and takes the memory + /// in the range [FunctionStart,FunctionEnd). + void endFunctionBody(const Function *F, uint8_t *FunctionStart, + uint8_t *FunctionEnd) override { + assert(FunctionEnd > FunctionStart); + + size_t GenSize = FunctionEnd - FunctionStart; + if (unlikely(GenSize > CodeRemain)) + hqemu_error("exceeds available cache size.\n"); + + CodeGenPtr = (uint8_t *)(((uintptr_t)CodeGenPtr + GenSize + CODE_GEN_ALIGN - 1) + & ~(CODE_GEN_ALIGN - 1)); + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeGenPtr; + lock.release(); + } + + /// allocateSpace - Allocate a memory block of the given size. This method + /// cannot be called between calls to startFunctionBody and endFunctionBody. + uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) override { + hqemu_error("fixme.\n"); + return nullptr; + } + + /// allocateStub - Allocate memory for a function stub. + uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize, + unsigned Alignment) override { + return allocateGlobal(StubSize, Alignment); + } + + /// allocateGlobal - Allocate memory for a global. + uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) override { + hqemu::MutexGuard locked(lock); + + if (!Alignment) + Alignment = 16; + if (Alignment & (Alignment - 1)) + hqemu_error("alignment must be a power of two.\n"); + + unsigned MisAligned = ((intptr_t)GlobalBase & (Alignment - 1)); + if (MisAligned) + MisAligned = Alignment - MisAligned; + + if (GlobalRemain < Size + MisAligned) + hqemu_error("exceeds available global size.\n"); + + uint8_t *GlobalPtr = GlobalBase + MisAligned; + GlobalBase = GlobalPtr + Size; + GlobalRemain -= (Size + MisAligned); + return GlobalPtr; + } + + /// allocateCodeSection - Allocate memory for a code section. + uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, + StringRef SectionName) override { + hqemu_error("fixme.\n"); return nullptr; + } + + /// allocateDataSection - Allocate memory for a data section. + uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, StringRef SectionName, + bool IsReadOnly) override { + hqemu_error("fixme.\n"); return nullptr; + } + + bool finalizeMemory(std::string *ErrMsg) override { return false; } + + uint8_t *getGOTBase() const override { return nullptr; } + + void deallocateBlock(void *Block) {} + + /// deallocateFunctionBody - Deallocate all memory for the specified + /// function body. + void deallocateFunctionBody(void *Body) override {} + + /// setMemoryWritable - When code generation is in progress, + /// the code pages may need permissions changed. + void setMemoryWritable() override {} + /// setMemoryExecutable - When code generation is done and we're ready to + /// start execution, the code pages may need permissions changed. + void setMemoryExecutable() override {} + + /// setPoisonMemory - Controls whether we write garbage over freed memory. + /// + void setPoisonMemory(bool poison) override {} + + size_t getCodeSize() { return CodeGenPtr - CodeBase; } + bool isSizeAvailable() { + hqemu::MutexGuard locked(lock); + return CodeRemain >= Threshold ? 1 : 0; + } + void Flush() { + CodeGenPtr = CodeBase; + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase; + } + + static DefaultJITMemoryManager *Create(uint8_t *Cache, size_t Size) { + if (Size < MIN_CODE_CACHE_SIZE) + hqemu_error("Trace cache size is too small.\n"); + return new DefaultJITMemoryManager(Cache, Size); + } +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/MCJITMemoryManager.h b/llvm/include/MCJITMemoryManager.h new file mode 100644 index 0000000..33059a5 --- /dev/null +++ b/llvm/include/MCJITMemoryManager.h @@ -0,0 +1,213 @@ +//===-- MCJITMemoryManager.cpp - Memory manager for MC-JIT -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Interface of the MCJIT memory manager base class. +// +//===----------------------------------------------------------------------===// + +#ifndef __MCJITMEMORYMANAGER_H +#define __MCJITMEMORYMANAGER_H + +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm-debug.h" +#include "utils.h" + +using namespace llvm; + +#define MIN_CODE_CACHE_SIZE (1 * 1024 * 1024) +#define DEFAULT_GLOBAL_SIZE (64 * 1024) +#define DEFAULT_THRESHOLD (32 * 1024) + +// RuntimeDyld clients often want to handle the memory management of +// what gets placed where. For JIT clients, this is the subset of +// JITMemoryManager required for dynamic loading of binaries. +// +// FIXME: As the RuntimeDyld fills out, additional routines will be needed +// for the varying types of objects to be allocated. +class DefaultMCJITMemoryManager : public RTDyldMemoryManager { + uint8_t *TraceCache; + size_t TraceCacheSize; + + uint8_t *GlobalBase; /* section for global data used by QEMU helpers */ + uint8_t *CodeBase; /* section for emitting trace code */ + uint8_t *CodeGenPtr; + + size_t GlobalRemain; + size_t CodeRemain; + size_t Threshold; + + hqemu::Mutex lock; + + SymbolMap Symbols; + +public: + DefaultMCJITMemoryManager(uint8_t *Cache, size_t Size) + : TraceCache(Cache), TraceCacheSize(Size), Threshold(DEFAULT_THRESHOLD) + { + GlobalBase = TraceCache; + GlobalRemain = DEFAULT_GLOBAL_SIZE; + + CodeBase = GlobalBase + DEFAULT_GLOBAL_SIZE; + CodeBase = (uint8_t *)(((uintptr_t)CodeBase + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1)); + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase; + CodeGenPtr = CodeBase; + } + ~DefaultMCJITMemoryManager() {} + + /// Allocate a memory block of (at least) the given size suitable for + /// executable code. The SectionID is a unique identifier assigned by the JIT + /// engine, and optionally recorded by the memory manager to access a loaded + /// section. + uint8_t *allocateCodeSection( + uintptr_t Size, unsigned Alignment, unsigned SectionID, + StringRef SectionName) override { + hqemu::MutexGuard locked(lock); + + if (!Alignment) + Alignment = 16; + + if (Alignment & (Alignment - 1)) + hqemu_error("Alignment must be a power of two.\n"); + + uintptr_t CurGenPtr = (uintptr_t)CodeGenPtr; + CurGenPtr = (CurGenPtr + Alignment - 1) & ~(uintptr_t)(Alignment - 1); + CodeGenPtr = (uint8_t *)((CurGenPtr + Size + CODE_GEN_ALIGN - 1) & + ~(uintptr_t)(CODE_GEN_ALIGN - 1)); + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeGenPtr; + return (uint8_t *)CurGenPtr; + } + + /// Allocate a memory block of (at least) the given size suitable for data. + /// The SectionID is a unique identifier assigned by the JIT engine, and + /// optionally recorded by the memory manager to access a loaded section. + uint8_t *allocateDataSection( + uintptr_t Size, unsigned Alignment, unsigned SectionID, + StringRef SectionName, bool IsReadOnly) override { + return allocateCodeSection(Size, Alignment, SectionID, SectionName); + } + + /// Inform the memory manager about the total amount of memory required to + /// allocate all sections to be loaded: + /// \p CodeSize - the total size of all code sections + /// \p DataSizeRO - the total size of all read-only data sections + /// \p DataSizeRW - the total size of all read-write data sections + /// + /// Note that by default the callback is disabled. To enable it + /// redefine the method needsToReserveAllocationSpace to return true. + void reserveAllocationSpace( + uintptr_t CodeSize, uintptr_t DataSizeRO, uintptr_t DataSizeRW) { + hqemu_error("fixme.\n"); + } + + /// Override to return true to enable the reserveAllocationSpace callback. + bool needsToReserveAllocationSpace() { return false; } + + /// Register the EH frames with the runtime so that c++ exceptions work. + /// + /// \p Addr parameter provides the local address of the EH frame section + /// data, while \p LoadAddr provides the address of the data in the target + /// address space. If the section has not been remapped (which will usually + /// be the case for local execution) these two values will be the same. + void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) { + hqemu_error("fixme.\n"); + } + + void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) { + hqemu_error("fixme.\n"); + } + + /// This method returns the address of the specified function or variable. + /// It is used to resolve symbols during module linking. + uint64_t getSymbolAddress(const std::string &Name) { + hqemu::MutexGuard locked(lock); + if (Symbols.find(Name) == Symbols.end()) { + std::string ErrMsg = "Program used external symbol '" + Name + + "'which could not be resolved!\n"; + hqemu_error(ErrMsg.c_str()); + } + return Symbols[Name]; + } + + /// This method returns the address of the specified function. As such it is + /// only useful for resolving library symbols, not code generated symbols. + /// + /// If \p AbortOnFailure is false and no function with the given name is + /// found, this function returns a null pointer. Otherwise, it prints a + /// message to stderr and aborts. + /// + /// This function is deprecated for memory managers to be used with + /// MCJIT or RuntimeDyld. Use getSymbolAddress instead. + void *getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure = true) { + if (AbortOnFailure) { + std::string ErrMsg = "Program used external symbol '" + Name + + "'which could not be resolved!\n"; + hqemu_error(ErrMsg.c_str()); + } + return nullptr; + } + + /// This method is called after an object has been loaded into memory but + /// before relocations are applied to the loaded sections. The object load + /// may have been initiated by MCJIT to resolve an external symbol for another + /// object that is being finalized. In that case, the object about which + /// the memory manager is being notified will be finalized immediately after + /// the memory manager returns from this call. + /// + /// Memory managers which are preparing code for execution in an external + /// address space can use this call to remap the section addresses for the + /// newly loaded object. +#if defined(LLVM_V35) + void notifyObjectLoaded(ExecutionEngine *EE, + const ObjectImage *Obj) { + } +#else + void notifyObjectLoaded(RuntimeDyld &RTDyld, + const object::ObjectFile &Obj) { + } +#endif + + /// This method is called when object loading is complete and section page + /// permissions can be applied. It is up to the memory manager implementation + /// to decide whether or not to act on this method. The memory manager will + /// typically allocate all sections as read-write and then apply specific + /// permissions when this method is called. Code sections cannot be executed + /// until this function has been called. In addition, any cache coherency + /// operations needed to reliably use the memory are also performed. + /// + /// Returns true if an error occurred, false otherwise. + bool finalizeMemory(std::string *ErrMsg = nullptr) override { + return false; + } + + void AddSymbols(SymbolMap &symbols) { + Symbols = symbols; + } + + size_t getCodeSize() { return CodeGenPtr - CodeBase; } + bool isSizeAvailable() { + hqemu::MutexGuard locked(lock); + return CodeRemain >= Threshold ? 1 : 0; + } + void Flush() { + CodeGenPtr = CodeBase; + CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase; + } + + static DefaultMCJITMemoryManager *Create(uint8_t *Cache, size_t Size) { + if (Size < MIN_CODE_CACHE_SIZE) { + std::string ErrMsg = "Trace cache size is too small (" + + std::to_string(Size) + ")\n."; + hqemu_error(ErrMsg.c_str()); + } + return new DefaultMCJITMemoryManager(Cache, Size); + } +}; + +#endif diff --git a/llvm/include/hqemu-config.h b/llvm/include/hqemu-config.h new file mode 100644 index 0000000..2e2f42f --- /dev/null +++ b/llvm/include/hqemu-config.h @@ -0,0 +1,142 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __HQEMU_CONFIG_H +#define __HQEMU_CONFIG_H + + +#define PACKAGE_NAME "HQEMU" +#define PACKAGE_VERSION_MAJOR "2.5" +#define PACKAGE_VERSION_MINOR "2" + +#define ENABLE_IBTC +#define ENABLE_CPBL +#define ENABLE_LPAGE +#define ENABLE_PASSES +#define ENABLE_MCJIT +//#define ENABLE_HPM_THREAD +//#define ENABLE_TLBVERSION +//#define ENALBE_CPU_PROFILE +//#define USE_TRACETREE_ONLY + + +#if defined(CONFIG_USER_ONLY) +# define ENABLE_TCG_VECTOR +# define GUEST_BASE guest_base +#else +# define GUEST_BASE (0UL) +#endif + +#if defined(ENABLE_TLBVERSION) +# if defined(ALIGNED_ONLY) +# undef ENABLE_TLBVERSION +# elif HOST_LONG_BITS == 64 && TARGET_LONG_BITS == 32 && defined(HOST_X86_64) +# define ENABLE_TLBVERSION_EXT +# endif +#endif + +#ifndef ENABLE_TLBVERSION +# define TLB_INVALID_SHIFT 3 +# define TLB_NOTDIRTY_SHIFT 4 +# define TLB_MMIO_SHIFT 5 +# define TLB_VERSION_BITS 0 +# define TLB_VERSION_MASK 0 +# define TLB_VERSION_SHIFT (0) +# define tlb_version(__env) 0 +typedef target_ulong tlbaddr_t; +#elif defined(ENABLE_TLBVERSION_EXT) +# define TLB_INVALID_SHIFT 3 +# define TLB_NOTDIRTY_SHIFT 4 +# define TLB_MMIO_SHIFT 5 +# define TLB_VERSION_BITS 32 +# define TLB_VERSION_SIZE (1UL << TLB_VERSION_BITS) +# define TLB_VERSION_MASK (0xFFFFFFFF00000000UL) +# define TLB_VERSION_SHIFT (32) +# define tlb_version(__env) (__env->tlb_version) +typedef unsigned long tlbaddr_t; +#else +# define TLB_INVALID_SHIFT (TARGET_PAGE_BITS - 3) +# define TLB_NOTDIRTY_SHIFT (TARGET_PAGE_BITS - 2) +# define TLB_MMIO_SHIFT (TARGET_PAGE_BITS - 1) +# define TLB_VERSION_BITS (TARGET_PAGE_BITS - 3) +# define TLB_VERSION_SIZE (1 << TLB_VERSION_BITS) +# define TLB_VERSION_MASK (TLB_VERSION_SIZE - 1) +# define TLB_VERSION_SHIFT (0) +# define tlb_version(__env) (__env->tlb_version) +typedef target_ulong tlbaddr_t; +#endif + + +typedef int BlockID; +typedef int TraceID; +#define BUILD_NONE ((uint16_t)0) +#define BUILD_TCG ((uint16_t)1 << 0) +#define BUILD_LLVM ((uint16_t)1 << 1) + +#define CPU_OPTIMIZATION_COMMON \ + unsigned long sp; \ + void *opt_link; \ + uint16_t build_mode; \ + int start_trace_prediction; \ + int fallthrough; \ + uintptr_t image_base; \ + uint32_t restore_val; \ + uint64_t num_trace_exits; \ + + +#define TB_OPTIMIZATION_COMMON \ + BlockID id; \ + TraceID tid; /* trace id */ \ + int mode; /* current state */ \ + void *opt_ptr; /* pointer to the optimized code */ \ + uint32_t exec_count; /* trace profile execution count */ \ + uint16_t patch_jmp; /* offset of trace trampoline */ \ + uint16_t patch_next; /* offset of trace prediction stub */ \ + target_ulong jmp_pc[2]; /* pc of the succeeding blocks */ \ + void *image; \ + void *state; \ + void *chain; + + +enum { + BLOCK_NONE = 0, + BLOCK_ACTIVE, + BLOCK_TRACEHEAD, + BLOCK_OPTIMIZED, + BLOCK_INVALID, +}; + +enum { + TRANS_MODE_NONE = 0, + TRANS_MODE_BLOCK, + TRANS_MODE_HYBRIDS, + TRANS_MODE_HYBRIDM, + TRANS_MODE_INVALID, +}; + +/* Parse translation mode from env-variable LLVM_MODE. */ +static inline int getTransMode(void) { + char *p = getenv("LLVM_MODE"); + if (p == NULL) return TRANS_MODE_HYBRIDM; + if (!strcmp(p, "hybridm")) return TRANS_MODE_HYBRIDM; + if (!strcmp(p, "hybrids")) return TRANS_MODE_HYBRIDS; + if (!strcmp(p, "block")) return TRANS_MODE_BLOCK; + if (!strcmp(p, "none")) return TRANS_MODE_NONE; + return TRANS_MODE_INVALID; +} + +/* Annotation/attribute for traces. */ +enum { + A_None = ((uint32_t)0), + A_SetCC = ((uint32_t)1 << 0), + A_NoSIMDization = ((uint32_t)1 << 1), +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/include/hqemu-helper.h b/llvm/include/hqemu-helper.h new file mode 100644 index 0000000..dfcb396 --- /dev/null +++ b/llvm/include/hqemu-helper.h @@ -0,0 +1,8 @@ +DEF_HELPER_1(export_hqemu, void, env) +DEF_HELPER_1(lookup_ibtc, ptr, env) +DEF_HELPER_1(lookup_cpbl, ptr, env) +DEF_HELPER_3(validate_cpbl, int, env, tl, int) +DEF_HELPER_2(NET_profile, void, env, int) +DEF_HELPER_2(NET_predict, void, env, int) +DEF_HELPER_2(verify_tb, void, env, int) +DEF_HELPER_3(profile_exec, void, env, ptr, int) diff --git a/llvm/include/hqemu.h b/llvm/include/hqemu.h new file mode 100644 index 0000000..f5e7180 --- /dev/null +++ b/llvm/include/hqemu.h @@ -0,0 +1,84 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __HQEMU_H +#define __HQEMU_H + +#ifdef __cplusplus +extern "C" +{ +#endif + +#include "config-host.h" +#include "config-target.h" +#include "hqemu-config.h" + +#define build_tcg(_env) ((_env)->build_mode & BUILD_TCG) +#define build_llvm(_env) ((_env)->build_mode & BUILD_LLVM) +#define build_llvm_only(_env) ((_env)->build_mode == BUILD_LLVM) + +void hqemu_help(void); + +/* Optimizations */ +int optimization_init(CPUArchState *env); +int optimization_finalize(CPUArchState *env); +int optimization_reset(CPUArchState *env, int force_flush); +int optimization_remove_entry(CPUArchState *env, TranslationBlock *tb); +int optimization_flush_page(CPUArchState *env, target_ulong pc); +int optimization_init_tb(TranslationBlock *tb, int id); + +void itlb_update_entry(CPUArchState *env, TranslationBlock *tb); +void ibtc_update_entry(CPUArchState *env, TranslationBlock *tb); + +int lpt_reset(CPUArchState *env); +int lpt_add_page(CPUArchState *env, target_ulong addr, target_ulong size); +int lpt_search_page(CPUArchState *env, target_ulong addr, target_ulong *addrp, target_ulong *sizep); +int lpt_flush_page(CPUArchState *env, target_ulong addr, target_ulong *addrp, target_ulong *sizep); + + +/* Tracer */ +void tracer_exec_tb(CPUArchState *env, uintptr_t next_tb, TranslationBlock *tb); +void tracer_reset(CPUArchState *env); + + +/* LLVM */ +int llvm_init(void); +int llvm_finalize(void); +int llvm_alloc_cache(void); +int llvm_check_cache(void); +int llvm_tb_flush(void); +int llvm_tb_remove(TranslationBlock *tb); +void llvm_handle_chaining(uintptr_t next_tb, TranslationBlock *tb); +int llvm_locate_trace(uintptr_t searched_pc); +TranslationBlock *llvm_find_pc(CPUState *cpu, uintptr_t searched_pc); +int llvm_restore_state(CPUState *cpu, TranslationBlock *tb, uintptr_t searched_pc); +void llvm_fork_start(void); +void llvm_fork_end(int child); + + +/* Annotation */ +enum { + ANNOTATION_NONE = 0, + ANNOTATION_LOOP, +}; +int llvm_has_annotation(target_ulong addr, int annotation); + + +/* External variables */ +extern int tracer_mode; +extern target_ulong pcid; +extern unsigned long alignment_count[]; /* 0: misaligned, 1: aligned. */ +extern unsigned long aligned_boundary; + +#ifdef __cplusplus +} +#endif + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/include/llvm-annotate.h b/llvm/include/llvm-annotate.h new file mode 100644 index 0000000..25454ed --- /dev/null +++ b/llvm/include/llvm-annotate.h @@ -0,0 +1,51 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_ANNOTATE_H +#define __LLVM_ANNOTATE_H + +#include <map> +#include <cstdint> +#include "qemu-types.h" +#include "llvm-types.h" +#include "utils.h" + +/* Loop metadata */ +struct LoopMetadata { + LoopMetadata() + : Address(-1), Length(-1), VS(-1), VF(-1), Distance(INT_MIN), Start(-1), + End(-1), Stride(-1) {} + target_ulong Address; + uint32_t Length; + uint32_t VS, VF; + int Distance; + int Start, End; + int Stride; +}; + +/* + * The AnnotationFactory class manages the metadata information. + */ +class AnnotationFactory { + typedef std::map<uintptr_t, LoopMetadata*> LoopList; + + std::string MetaFile; + + int ParseXML(const char *name); + +public: + AnnotationFactory(); + ~AnnotationFactory(); + + LoopList Loops; + LoopMetadata *getLoopAnnotation(target_ulong addr); + bool hasLoopAnnotation(target_ulong addr); +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/llvm-debug.h b/llvm/include/llvm-debug.h new file mode 100644 index 0000000..405b466 --- /dev/null +++ b/llvm/include/llvm-debug.h @@ -0,0 +1,247 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_DEBUG_H +#define __LLVM_DEBUG_H + +#include <cstdint> +#include <cstring> +#include <iostream> +#include <sstream> +#include <cstdarg> +#include <unistd.h> +#include <sys/time.h> +#include "llvm/Support/raw_ostream.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/Support/FileSystem.h" +#include "utils.h" + + +struct DebugMode { + uint64_t Mode; + + DebugMode(uint64_t M) : Mode(M) {} + + bool operator==(const DebugMode &RHS) const { + return Mode == RHS.Mode; + } + bool operator&(const DebugMode &RHS) const { + return Mode & RHS.Mode; + } + DebugMode operator|(const DebugMode &RHS) { + return DebugMode(Mode | RHS.Mode); + } + DebugMode &operator|=(const DebugMode &RHS) { + Mode |= RHS.Mode; + return *this; + } +}; + +/* + * LLVMDebug provides facilities to debug the LLVM translator, based on the + * debug levels. + */ +class LLVMDebug { +public: + enum LLVMDebugMode { + D_NONE = ((uint64_t)0), + D_LLVM = ((uint64_t)1 << 0), + D_INASM = ((uint64_t)1 << 1), + D_OP = ((uint64_t)1 << 2), + D_OUTASM = ((uint64_t)1 << 3), + D_IR = ((uint64_t)1 << 4), + D_IR_OPT = ((uint64_t)1 << 5), + D_ENTRY = ((uint64_t)1 << 6), + D_VERIFY = ((uint64_t)1 << 7), + D_PASS = ((uint64_t)1 << 8), + D_ANNOTATE = ((uint64_t)1 << 9), + D_HPM = ((uint64_t)1 << 10), + D_ASM = (D_INASM | D_OP | D_OUTASM), + D_DEBUG = (D_LLVM | D_IR_OPT | D_OUTASM | D_PASS), + D_ALL = (D_LLVM | D_INASM | D_OP | D_OUTASM | D_IR | D_IR_OPT | + D_ENTRY | D_VERIFY | D_PASS | D_ANNOTATE | D_HPM), + }; + + LLVMDebug() : Mode(D_NONE) + { + hqemu_out.reset(new llvm::raw_fd_ostream(STDOUT_FILENO, false, true)); + hqemu_dbg.reset(new llvm::raw_fd_ostream(STDERR_FILENO, false, true)); + + std::string Str(""); + gettimeofday(&uptime, nullptr); + ParseDebugMode(Str, false); + hqemu_null.SetUnbuffered(); + } + + DebugMode &getDebugMode() { + return Mode; + } + + DebugMode &getDebugMode(LLVMDebugMode M) { + if (Modes.find(M) == Modes.end()) + M = D_NONE; + return *Modes[M]; + } + + void setDebugMode(std::string &DebugLevel, std::string &DebugFile) { + ParseDebugMode(DebugLevel); + if (DebugFile != "") { + std::error_code EC; + auto OS = new llvm::raw_fd_ostream(DebugFile, EC, + llvm::sys::fs::F_Text); + if (EC) { + *hqemu_dbg << "Error: failed to open debug file " << DebugFile + << ". (" << EC.message().c_str() << ")\n"; + } + OS->SetUnbuffered(); + hqemu_dbg.reset(OS); + } + } + + void Flush() { + hqemu_dbg->flush(); + } + + void error(const char *fname, const char *fmt, ...) { + static char str[256] = {'\0'}; + va_list ap; + va_start(ap, fmt); + vsprintf(str, fmt, ap); + va_end(ap); + *hqemu_dbg << timestamp() << " Error: " << fname << " - " << str; + exit(0); + } + + llvm::raw_ostream &output() { + return *hqemu_out; + } + + llvm::raw_ostream &debug() { + return *hqemu_dbg; + } + + llvm::raw_ostream &operator<<(DebugMode &M) { + if (M & Mode) { + *hqemu_dbg << timestamp() << " "; + return *hqemu_dbg; + } + return hqemu_null; + }; + +private: + llvm::raw_null_ostream hqemu_null; + std::unique_ptr<llvm::raw_fd_ostream> hqemu_out; + std::unique_ptr<llvm::raw_fd_ostream> hqemu_dbg; + struct timeval uptime; /* The startup time of the DBT */ + DebugMode Mode; /* The debug level */ + std::map<LLVMDebugMode, DebugMode*> Modes; + + std::string timestamp() { + struct timeval tv; + char timestamp[32]; + gettimeofday(&tv, 0); + timersub(&tv, &uptime, &tv); + strftime(timestamp, 32, "[%H:%M:%S", gmtime(&tv.tv_sec)); + sprintf(timestamp + 9, ".%06ld]", tv.tv_usec); + return timestamp; + } + + void ParseDebugMode(std::string &DebugLevel, bool Update=true) { + static std::string debug_str[] = { + "none", "llvm", "in_asm", "op", "out_asm", "ir", "ir_opt", + "entry", "verify", "pass", "annotate", "hpm", "asm", "debug", + "all" + }; + static LLVMDebugMode debug_enum[] = { + D_NONE, D_LLVM, D_INASM, D_OP, D_OUTASM, D_IR, D_IR_OPT, + D_ENTRY, D_VERIFY, D_PASS, D_ANNOTATE, D_HPM, D_ASM, D_DEBUG, + D_ALL + }; + + if (!Update) { + for (auto M : debug_enum) + Modes[M] = new DebugMode(M); + return; + } + + if (DebugLevel.empty()) + return; + + std::istringstream ss(DebugLevel); + std::string token; + while(std::getline(ss, token, ',')) { + for (unsigned i = 0, e = ARRAY_SIZE(debug_enum); i != e; ++i) { + if (token == debug_str[i]) { + Mode |= getDebugMode(debug_enum[i]); + break; + } + } + } + } +}; + +extern LLVMDebug DM; + +/* Print messages to stdout. Should not use this function in release mode. */ +static inline llvm::raw_ostream &out() { + return DM.output(); +} +/* Print messages to stderr, controlled by DebugMode. */ +static inline LLVMDebug &dbg() { + return DM; +} +/* Print error messages to stderr and terminate the process. */ +#define hqemu_error(msg,args...) do { DM.error(__func__,msg,##args); } while(0) + +/* Macros to get defined DebugMode. */ +#define DEBUG_NONE DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_NONE) +#define DEBUG_LLVM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_LLVM) +#define DEBUG_INASM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_INASM) +#define DEBUG_OP DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_OP) +#define DEBUG_OUTASM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_OUTASM) +#define DEBUG_IR DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_IR) +#define DEBUG_IR_OPT DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_IR_OPT) +#define DEBUG_ENTRY DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ENTRY) +#define DEBUG_VERIFY DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_VERIFY) +#define DEBUG_PASS DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_PASS) +#define DEBUG_ANNOTATE DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ANNOTATE) +#define DEBUG_HPM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_HPM) +#define DEBUG_ASM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ASM) +#define DEBUG_DEBUG DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_DEBUG) +#define DEBUG_ALL DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ALL) + + + +/* + * Binary disassembler using MCDisassembler. + */ +class MCDisasm { + const llvm::MCDisassembler *DisAsm; + const llvm::MCSubtargetInfo *STI; + llvm::MCInstPrinter *IP; + const llvm::MCInstrAnalysis *MIA; + bool HostDisAsm; + bool NoShowRawInsn; + + MCDisasm(const llvm::Target *TheTarget, std::string TripleName, + bool isHost); + + void DumpBytes(llvm::ArrayRef<uint8_t> bytes, llvm::raw_ostream &OS); + +public: + ~MCDisasm(); + void PrintInAsm(uint64_t Addr, uint64_t Size, uint64_t GuestAddr); + void PrintOutAsm(uint64_t Addr, uint64_t Size); + + static MCDisasm *CreateMCDisasm(std::string TripleName, bool isHost); +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/llvm-hard-perfmon.h b/llvm/include/llvm-hard-perfmon.h new file mode 100644 index 0000000..ac03b23 --- /dev/null +++ b/llvm/include/llvm-hard-perfmon.h @@ -0,0 +1,87 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_HARD_PERFMON_H +#define __LLVM_HARD_PERFMON_H + +#include <map> +#include <thread> +#include "pmu/pmu.h" +#include "utils.h" + +class PerfmonData; +class BaseTracer; + +enum HPMControl { + HPM_INIT = 0, + HPM_FINALIZE, + HPM_START, + HPM_STOP, +}; + +/* + * Hardware Performance Monitor (HPM) + */ +class HardwarePerfmon { + std::thread MonThread; /* Monitor thread */ + int MonThreadID; /* Monitor thread id */ + bool MonThreadStop; /* Monitor thread is stopped or not */ + hqemu::Mutex Lock; + + /* Start monitor thread. */ + void StartMonThread(); + + /* Monitor thread routine. */ + void MonitorFunc(); + +public: + HardwarePerfmon(); + ~HardwarePerfmon(); + + /* Set up HPM with the monitor thread id */ + void Init(int monitor_thread_tid); + + /* Register a thread to be monitored. */ + void RegisterThread(BaseTracer *Tracer); + + /* Unreigster a thread from being monitored. */ + void UnregisterThread(BaseTracer *Tracer); + + /* Notify that the execution enters/leaves the code cache. */ + void NotifyCacheEnter(BaseTracer *Tracer); + void NotifyCacheLeave(BaseTracer *Tracer); + + /* Stop the monitor. */ + void Pause(); + + /* Restart the monitor. */ + void Resume(); +}; + + +class PerfmonData { +public: + PerfmonData(int tid); + ~PerfmonData(); + + int TID; + pmu::Handle ICountHndl; + pmu::Handle BranchHndl; + pmu::Handle MemLoadHndl; + pmu::Handle MemStoreHndl; + pmu::Handle CoverSetHndl; + uint64_t LastNumBranches, LastNumLoads, LastNumStores; + + void MonitorBasic(HPMControl Ctl); + void MonitorCoverSet(HPMControl Ctl); +}; + +extern HardwarePerfmon *HP; + +#endif /* __LLVM_HARD_PERFMON_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/llvm-helper.h b/llvm/include/llvm-helper.h new file mode 100644 index 0000000..2d24f81 --- /dev/null +++ b/llvm/include/llvm-helper.h @@ -0,0 +1,755 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + * + * This file defines the QEMU helper functions that could be inlined by + * the LLVM translators. + */ + +#ifndef __LLVM_HELPER_H +#define __LLVM_HELPER_H + +/* Speical TCG runtime helper */ + "tcg_helper_div_i32", + "tcg_helper_rem_i32", + "tcg_helper_divu_i32", + "tcg_helper_remu_i32", + "tcg_helper_shl_i64", + "tcg_helper_shr_i64", + "tcg_helper_sar_i64", + "tcg_helper_div_i64", + "tcg_helper_rem_i64", + "tcg_helper_divu_i64", + "tcg_helper_remu_i64", + +#if defined(TARGET_I386) + /* General */ + "helper_cc_compute_c", + "helper_cc_compute_all", + "helper_load_seg", + "helper_write_eflags", + "helper_read_eflags", + "helper_cli", + "helper_sti", + "helper_set_inhibit_irq", + "helper_reset_inhibit_irq", + /* FPU */ + "helper_divb_AL", + "helper_idivb_AL", + "helper_divw_AX", + "helper_idivw_AX", + "helper_divl_EAX", + "helper_idivl_EAX", + "helper_flds_FT0", + "helper_fldl_FT0", + "helper_fildl_FT0", + "helper_flds_ST0", + "helper_fldl_ST0", + "helper_fildl_ST0", + "helper_fildll_ST0", + "helper_fsts_ST0", + "helper_fstl_ST0", + "helper_fist_ST0", + "helper_fistl_ST0", + "helper_fistll_ST0", + "helper_fistt_ST0", + "helper_fisttl_ST0", + "helper_fisttll_ST0", + "helper_fldt_ST0", + "helper_fstt_ST0", + "helper_fpush", + "helper_fpop", + "helper_fdecstp", + "helper_fincstp", + "helper_ffree_STN", + "helper_fmov_ST0_FT0", + "helper_fmov_FT0_STN", + "helper_fmov_ST0_STN", + "helper_fmov_STN_ST0", + "helper_fxchg_ST0_STN", + "helper_fcom_ST0_FT0", + "helper_fucom_ST0_FT0", + "helper_fcomi_ST0_FT0", + "helper_fucomi_ST0_FT0", + "helper_fadd_ST0_FT0", + "helper_fmul_ST0_FT0", + "helper_fsub_ST0_FT0", + "helper_fsubr_ST0_FT0", + "helper_fdiv_ST0_FT0", + "helper_fdivr_ST0_FT0", + "helper_fadd_STN_ST0", + "helper_fmul_STN_ST0", + "helper_fsub_STN_ST0", + "helper_fsubr_STN_ST0", + "helper_fdiv_STN_ST0", + "helper_fdivr_STN_ST0", + "helper_fchs_ST0", + "helper_fabs_ST0", +#if defined(TCG_TARGET_I386) && TCG_TARGET_REG_BITS == 64 + "helper_fxam_ST0", +#endif + "helper_fld1_ST0", + "helper_fldl2t_ST0", + "helper_fldl2e_ST0", + "helper_fldpi_ST0", + "helper_fldlg2_ST0", + "helper_fldln2_ST0", + "helper_fldz_ST0", + "helper_fldz_FT0", + "helper_fnstsw", + "helper_fnstcw", + "helper_fldcw", + "helper_fclex", + "helper_fwait", + "helper_fninit", + "helper_fbld_ST0", + "helper_fbst_ST0", + "helper_f2xm1", + "helper_fyl2x", + "helper_fptan", + "helper_fpatan", + "helper_fxtract", + "helper_fprem1", + "helper_fprem", + "helper_fyl2xp1", + "helper_fsqrt", + "helper_fsincos", + "helper_frndint", + "helper_fscale", + "helper_fsin", + "helper_fcos", + "helper_fstenv", + "helper_fldenv", + "helper_fsave", + "helper_frstor", + "helper_fxsave", + "helper_fxrstor", + "helper_bsf", + "helper_bsr", + "helper_lzcnt", + + /* MMX/SSE */ + "helper_psrlw_xmm", + "helper_psraw_xmm", + "helper_psllw_xmm", + "helper_psrld_xmm", + "helper_psrad_xmm", + "helper_pslld_xmm", + "helper_psrlq_xmm", + "helper_psllq_xmm", + "helper_psrldq_xmm", + "helper_pslldq_xmm", + "helper_paddb_xmm", + "helper_paddw_xmm", + "helper_paddl_xmm", + "helper_paddq_xmm", + "helper_psubb_xmm", + "helper_psubw_xmm", + "helper_psubl_xmm", + "helper_psubq_xmm", + "helper_paddusb_xmm", + "helper_paddsb_xmm", + "helper_psubusb_xmm", + "helper_psubsb_xmm", + "helper_paddusw_xmm", + "helper_paddsw_xmm", + "helper_psubusw_xmm", + "helper_psubsw_xmm", + "helper_pminub_xmm", + "helper_pmaxub_xmm", + "helper_pminsw_xmm", + "helper_pmaxsw_xmm", + "helper_pand_xmm", + "helper_pandn_xmm", + "helper_por_xmm", + "helper_pxor_xmm", + "helper_pcmpgtb_xmm", + "helper_pcmpgtw_xmm", + "helper_pcmpgtl_xmm", + "helper_pcmpeqb_xmm", + "helper_pcmpeqw_xmm", + "helper_pcmpeql_xmm", + "helper_pmullw_xmm", + "helper_pmulhuw_xmm", + "helper_pmulhw_xmm", + "helper_pavgb_xmm", + "helper_pavgw_xmm", + "helper_pmuludq_xmm", + "helper_pmaddwd_xmm", + "helper_psadbw_xmm", + "helper_maskmov_xmm", + "helper_movl_mm_T0_xmm", + "helper_shufps_xmm", + "helper_shufpd_xmm", +#if !defined(TCG_TARGET_ARM) + "helper_pshufd_xmm", + "helper_pshuflw_xmm", + "helper_pshufhw_xmm", + "helper_punpcklbw_xmm", + "helper_punpcklwd_xmm", + "helper_punpckldq_xmm", + "helper_punpckhbw_xmm", + "helper_punpckhwd_xmm", + "helper_punpckhdq_xmm", +#endif + "helper_punpcklqdq_xmm", + "helper_punpckhqdq_xmm", + + "helper_enter_mmx", + "helper_psrlw_mmx", + "helper_psraw_mmx", + "helper_psllw_mmx", + "helper_psrld_mmx", + "helper_psrad_mmx", + "helper_pslld_mmx", + "helper_psrlq_mmx", + "helper_psllq_mmx", + "helper_psrldq_mmx", + "helper_pslldq_mmx", + "helper_paddb_mmx", + "helper_paddw_mmx", + "helper_paddl_mmx", + "helper_paddq_mmx", + "helper_psubb_mmx", + "helper_psubw_mmx", + "helper_psubl_mmx", + "helper_psubq_mmx", + "helper_paddusb_mmx", + "helper_paddsb_mmx", + "helper_psubusb_mmx", + "helper_psubsb_mmx", + "helper_paddusw_mmx", + "helper_paddsw_mmx", + "helper_psubusw_mmx", + "helper_psubsw_mmx", + "helper_pminub_mmx", + "helper_pmaxub_mmx", + "helper_pminsw_mmx", + "helper_pmaxsw_mmx", + "helper_pand_mmx", + "helper_pandn_mmx", + "helper_por_mmx", + "helper_pxor_mmx", + "helper_pcmpgtb_mmx", + "helper_pcmpgtw_mmx", + "helper_pcmpgtl_mmx", + "helper_pcmpeqb_mmx", + "helper_pcmpeqw_mmx", + "helper_pcmpeql_mmx", + "helper_pmullw_mmx", + "helper_pmulhuw_mmx", + "helper_pmulhw_mmx", + "helper_pavgb_mmx", + "helper_pavgw_mmx", + "helper_pmuludq_mmx", + "helper_pmaddwd_mmx", + "helper_psadbw_mmx", + "helper_maskmov_mmx", + "helper_movl_mm_T0_mmx", + "helper_shufps_mmx", + "helper_shufpd_mmx", +#if !defined(TCG_TARGET_ARM) + "helper_pshufd_mmx", + "helper_pshuflw_mmx", + "helper_pshufhw_mmx", + "helper_punpcklbw_mmx", + "helper_punpcklwd_mmx", + "helper_punpckldq_mmx", + "helper_punpckhbw_mmx", + "helper_punpckhwd_mmx", + "helper_punpckhdq_mmx", +#endif + "helper_punpcklqdq_mmx", + "helper_punpckhqdq_mmx", + + "helper_addps", + "helper_addss", + "helper_addpd", + "helper_addsd", + "helper_subps", + "helper_subss", + "helper_subpd", + "helper_subsd", + "helper_mulps", + "helper_mulss", + "helper_mulpd", + "helper_mulsd", + "helper_divps", + "helper_divss", + "helper_divpd", + "helper_divsd", + "helper_minps", + "helper_minss", + "helper_minpd", + "helper_minsd", + "helper_maxps", + "helper_maxss", + "helper_maxpd", + "helper_maxsd", + "helper_sqrtps", + "helper_sqrtss", + "helper_sqrtpd", + "helper_sqrtsd", + "helper_shufps", + "helper_shufpd", + + "helper_cmpeqps", + "helper_cmpeqss", + "helper_cmpeqpd", + "helper_cmpeqsd", + "helper_cmpltps", + "helper_cmpltss", + "helper_cmpltpd", + "helper_cmpltsd", + "helper_cmpleps", + "helper_cmpless", + "helper_cmplepd", + "helper_cmplesd", + "helper_cmpunordps", + "helper_cmpunordss", + "helper_cmpunordpd", + "helper_cmpunordsd", + "helper_cmpneqps", + "helper_cmpneqss", + "helper_cmpneqpd", + "helper_cmpneqsd", + "helper_cmpnltps", + "helper_cmpnltss", + "helper_cmpnltpd", + "helper_cmpnltsd", + "helper_cmpnleps", + "helper_cmpnless", + "helper_cmpnlepd", + "helper_cmpnlesd", + "helper_cmpordps", + "helper_cmpordss", + "helper_cmpordpd", + "helper_cmpordsd", + + "helper_cvtps2pd", + "helper_cvtpd2ps", + "helper_cvtss2sd", + "helper_cvtsd2ss", + "helper_cvtdq2ps", + "helper_cvtdq2pd", + "helper_cvtpi2ps", + "helper_cvtpi2pd", + "helper_cvtsi2ss", + "helper_cvtsi2sd", + "helper_cvtps2dq", + "helper_cvtpd2dq", + "helper_cvtps2pi", + "helper_cvtpd2pi", + "helper_cvtss2si", + "helper_cvtsd2si", + "helper_cvttps2dq", + "helper_cvttpd2dq", + "helper_cvttps2pi", + "helper_cvttpd2pi", + "helper_cvttss2si", + "helper_cvttsd2si", + + "helper_cmpeqps", + "helper_cmpeqss", + "helper_cmpeqpd", + "helper_cmpeqsd", + "helper_cmpltps", + "helper_cmpltss", + "helper_cmpltpd", + "helper_cmpltsd", + "helper_cmpleps", + "helper_cmpless", + "helper_cmplepd", + "helper_cmplesd", + "helper_cmpunordps", + "helper_cmpunordss", + "helper_cmpunordpd", + "helper_cmpunordsd", + "helper_cmpneqps", + "helper_cmpneqss", + "helper_cmpneqpd", + "helper_cmpneqsd", + "helper_cmpnltps", + "helper_cmpnltss", + "helper_cmpnltpd", + "helper_cmpnltsd", + "helper_cmpnleps", + "helper_cmpnless", + "helper_cmpnlepd", + "helper_cmpnlesd", + "helper_cmpordps", + "helper_cmpordss", + "helper_cmpordpd", + "helper_cmpordsd", + + "helper_ucomisd", + "helper_comisd", + "helper_ucomiss", + "helper_comiss", + + "helper_packuswb_xmm", + "helper_packsswb_xmm", + "helper_pmovmskb_xmm", + "helper_pshufw_mmx", + +#elif defined(TARGET_ARM) + "helper_add_cc", + "helper_sub_cc", + "helper_shl_cc", + "helper_shr_cc", + "helper_sar_cc", + "helper_adc_cc", + "helper_sbc_cc", + "helper_shl", + "helper_shr", + "helper_sar", + "helper_clz", + + "helper_sadd8", + "helper_sadd16", + "helper_ssub8", + "helper_ssub16", + "helper_ssubaddx", + "helper_saddsubx", + "helper_uadd8", + "helper_uadd16", + "helper_usub8", + "helper_usub16", + "helper_usubaddx", + "helper_uaddsubx", + + "helper_qadd8", + "helper_qadd16", + "helper_qsub8", + "helper_qsub16", + "helper_qsubaddx", + "helper_qaddsubx", + "helper_uqadd8", + "helper_uqadd16", + "helper_uqsub8", + "helper_uqsub16", + "helper_uqsubaddx", + "helper_uqaddsubx", + + "helper_set_rmode", + "helper_cpsr_write_nzcv", + "helper_cpsr_write", + "helper_cpsr_read", + "helper_vfp_get_fpscr", + "helper_vfp_set_fpscr", + "helper_vfp_adds", + "helper_vfp_addd", + "helper_vfp_subs", + "helper_vfp_subd", + "helper_vfp_muls", + "helper_vfp_muld", + "helper_vfp_divs", + "helper_vfp_divd", + "helper_vfp_negs", + "helper_vfp_negd", + "helper_vfp_abss", + "helper_vfp_absd", + "helper_vfp_sqrts", + "helper_vfp_sqrtd", + "helper_vfp_cmps", + "helper_vfp_cmpd", + "helper_vfp_cmpes", + "helper_vfp_cmped", + + "helper_vfp_muladds", + "helper_vfp_muladdd", + +#if defined(TARGET_AARCH64) + "helper_vfp_cmps_a64", + "helper_vfp_cmpd_a64", + "helper_vfp_cmpes_a64", + "helper_vfp_cmped_a64", + "helper_vfp_minnums", + "helper_vfp_maxnums", + "helper_vfp_minnumd", + "helper_vfp_maxnumd", +#endif +#if !defined(TCG_TARGET_PPC64) + "helper_vfp_fcvtds", + "helper_vfp_fcvtsd", + "helper_vfp_uitos", + "helper_vfp_uitod", + "helper_vfp_sitos", + "helper_vfp_sitod", + "helper_vfp_touis", + "helper_vfp_touid", + "helper_vfp_touizs", + "helper_vfp_touizd", + "helper_vfp_tosis", + "helper_vfp_tosid", + "helper_vfp_tosizs", + "helper_vfp_tosizd", + "helper_vfp_toshs", + "helper_vfp_tosls", + "helper_vfp_touhs", + "helper_vfp_touls", + "helper_vfp_toshd", + "helper_vfp_tosld", + "helper_vfp_touhd", + "helper_vfp_tould", + "helper_vfp_shtos", + "helper_vfp_sltos", + "helper_vfp_uhtos", + "helper_vfp_ultos", + "helper_vfp_shtod", + "helper_vfp_sltod", + "helper_vfp_uhtod", + "helper_vfp_ultod", +#endif + + /* neon helper */ + "helper_neon_qadd_u8", + "helper_neon_qadd_s8", + "helper_neon_qadd_u16", + "helper_neon_qadd_s16", + "helper_neon_qsub_u8", + "helper_neon_qsub_s8", + "helper_neon_qsub_u16", + "helper_neon_qsub_s16", + + "helper_neon_hadd_s8", + "helper_neon_hadd_u8", + "helper_neon_hadd_s16", + "helper_neon_hadd_u16", + "helper_neon_hadd_s32", + "helper_neon_hadd_u32", + "helper_neon_rhadd_s8", + "helper_neon_rhadd_u8", + "helper_neon_rhadd_s16", + "helper_neon_rhadd_u16", + "helper_neon_rhadd_s32", + "helper_neon_rhadd_u32", + "helper_neon_hsub_s8", + "helper_neon_hsub_u8", + "helper_neon_hsub_s16", + "helper_neon_hsub_u16", + "helper_neon_hsub_s32", + "helper_neon_hsub_u32", + + "helper_neon_cgt_u8", + "helper_neon_cgt_s8", + "helper_neon_cgt_u16", + "helper_neon_cgt_s16", + "helper_neon_cgt_u32", + "helper_neon_cgt_s32", + "helper_neon_cge_u8", + "helper_neon_cge_s8", + "helper_neon_cge_u16", + "helper_neon_cge_s16", + "helper_neon_cge_u32", + "helper_neon_cge_s32", + + "helper_neon_min_u8", + "helper_neon_min_s8", + "helper_neon_min_u16", + "helper_neon_min_s16", + "helper_neon_min_u32", + "helper_neon_min_s32", + "helper_neon_max_u8", + "helper_neon_max_s8", + "helper_neon_max_u16", + "helper_neon_max_s16", + "helper_neon_max_u32", + "helper_neon_max_s32", + "helper_neon_pmin_u8", + "helper_neon_pmin_s8", + "helper_neon_pmin_u16", + "helper_neon_pmin_s16", + "helper_neon_pmax_u8", + "helper_neon_pmax_s8", + "helper_neon_pmax_u16", + "helper_neon_pmax_s16", + + "helper_neon_abd_u8", + "helper_neon_abd_s8", + "helper_neon_abd_u16", + "helper_neon_abd_s16", + "helper_neon_abd_u32", + "helper_neon_abd_s32", + + "helper_neon_shl_u8", + "helper_neon_shl_s8", + "helper_neon_shl_u16", + "helper_neon_shl_s16", + "helper_neon_shl_u32", + "helper_neon_shl_s32", + "helper_neon_shl_u64", + "helper_neon_shl_s64", + "helper_neon_rshl_u8", + "helper_neon_rshl_s8", + "helper_neon_rshl_u16", + "helper_neon_rshl_s16", + "helper_neon_rshl_u32", + "helper_neon_rshl_s32", + "helper_neon_rshl_u64", + "helper_neon_rshl_s64", + "helper_neon_qshl_u8", + "helper_neon_qshl_s8", + "helper_neon_qshl_u16", + "helper_neon_qshl_s16", + "helper_neon_qshl_u32", + "helper_neon_qshl_s32", + "helper_neon_qshl_u64", + "helper_neon_qshl_s64", + "helper_neon_qrshl_u8", + "helper_neon_qrshl_s8", + "helper_neon_qrshl_u16", + "helper_neon_qrshl_s16", + "helper_neon_qrshl_u32", + "helper_neon_qrshl_s32", + "helper_neon_qrshl_u64", + "helper_neon_qrshl_s64", + + "helper_neon_add_u8", + "helper_neon_add_u16", + "helper_neon_padd_u8", + "helper_neon_padd_u16", + "helper_neon_sub_u8", + "helper_neon_sub_u16", + "helper_neon_mul_u8", + "helper_neon_mul_u16", + "helper_neon_mul_p8", + + "helper_neon_tst_u8", + "helper_neon_tst_u16", + "helper_neon_tst_u32", + "helper_neon_ceq_u8", + "helper_neon_ceq_u16", + "helper_neon_ceq_u32", + + "helper_neon_abs_s8", + "helper_neon_abs_s16", + "helper_neon_clz_u8", + "helper_neon_clz_u16", + "helper_neon_cls_s8", + "helper_neon_cls_s16", + "helper_neon_cls_s32", + "helper_neon_cnt_u8", + + "helper_neon_qdmulh_s16", + "helper_neon_qrdmulh_s16", + "helper_neon_qdmulh_s32", + "helper_neon_qrdmulh_s32", + + "helper_neon_narrow_u8", + "helper_neon_narrow_u16", + "helper_neon_narrow_sat_u8", + "helper_neon_narrow_sat_s8", + "helper_neon_narrow_sat_u16", + "helper_neon_narrow_sat_s16", + "helper_neon_narrow_sat_u32", + "helper_neon_narrow_sat_s32", + "helper_neon_narrow_high_u8", + "helper_neon_narrow_high_u16", + "helper_neon_narrow_round_high_u8", + "helper_neon_narrow_round_high_u16", + "helper_neon_widen_u8", + "helper_neon_widen_s8", + "helper_neon_widen_u16", + "helper_neon_widen_s16", + + "helper_neon_addl_u16", + "helper_neon_addl_u32", + "helper_neon_paddl_u16", + "helper_neon_paddl_u32", + "helper_neon_subl_u16", + "helper_neon_subl_u32", + "helper_neon_addl_saturate_s32", + "helper_neon_addl_saturate_s64", + "helper_neon_abdl_u16", + "helper_neon_abdl_s16", + "helper_neon_abdl_u32", + "helper_neon_abdl_s32", + "helper_neon_abdl_u64", + "helper_neon_abdl_s64", + "helper_neon_mull_u8", + "helper_neon_mull_s8", + "helper_neon_mull_u16", + "helper_neon_mull_s16", + + "helper_neon_negl_u16", + "helper_neon_negl_u32", + "helper_neon_negl_u64", + + "helper_neon_qabs_s8", + "helper_neon_qabs_s16", + "helper_neon_qabs_s32", + "helper_neon_qneg_s8", + "helper_neon_qneg_s16", + "helper_neon_qneg_s32", + + "helper_neon_min_f32", + "helper_neon_max_f32", + "helper_neon_abd_f32", + "helper_neon_add_f32", + "helper_neon_sub_f32", + "helper_neon_mul_f32", + "helper_neon_ceq_f32", + "helper_neon_cge_f32", + "helper_neon_cgt_f32", + "helper_neon_acge_f32", + "helper_neon_acgt_f32", + +#elif defined(TARGET_PPC) + "helper_popcntb", + "helper_cntlzw", + "helper_cntlsw32", + "helper_cntlzw32", + + "helper_compute_fprf", + "helper_store_fpscr", + "helper_fpscr_clrbit", + "helper_fpscr_setbit", + "helper_fcmpo", + "helper_fcmpu", + + "helper_fctiw", + "helper_fctiwz", + "helper_frsp", + "helper_frin", + "helper_friz", + "helper_frip", + "helper_frim", + + "helper_fadd", + "helper_fsub", + "helper_fmul", + "helper_fdiv", + "helper_fmadd", + "helper_fmsub", + "helper_fnmadd", + "helper_fnmsub", + "helper_fabs", + "helper_fnabs", + "helper_fneg", + "helper_fsqrt", + "helper_fre", + "helper_fres", + "helper_frsqrte", + "helper_fsel", + +#elif defined(TARGET_MICROBLAZE) + "helper_addkc", + "helper_subkc", + "helper_cmp", + "helper_cmpu", + "helper_divs", + "helper_divu", +#elif defined(TARGET_MIPS) + "helper_lwl", + "helper_lwr", + "helper_swl", + "helper_swr", +#endif + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/include/llvm-macro.h b/llvm/include/llvm-macro.h new file mode 100644 index 0000000..7b0e613 --- /dev/null +++ b/llvm/include/llvm-macro.h @@ -0,0 +1,88 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_MACRO_H +#define __LLVM_MACRO_H + +#if defined(CONFIG_SOFTMMU) +#define SaveStates() SaveGlobals(COHERENCE_GLOBAL, LastInst) +#else +#define SaveStates() +#endif + +#define CONST8(a) ConstantInt::get(Int8Ty, a) +#define CONST16(a) ConstantInt::get(Int16Ty, a) +#define CONST32(a) ConstantInt::get(Int32Ty, a) +#define CONST64(a) ConstantInt::get(Int64Ty, a) +#define CONST128(a) ConstantInt::get(Int128Ty, a) +#define CONSTPtr(a) ConstantInt::get(IntPtrTy, a) + +#define FPCONST32(a) ConstantFP::get(FloatTy, a) +#define FPCONST64(a) ConstantFP::get(DoubleTy, a) +#define FPCONST80(a) ConstantFP::get(FP80Ty, a) +#define FPCONST128(a) ConstantFP::get(FP128Ty, a) + +#define ICMP(a,b,pred) new ICmpInst(LastInst, pred, a, b, "") + +#define AND(a,b) BinaryOperator::Create(Instruction::And, a, b, "", LastInst) +#define OR(a,b) BinaryOperator::Create(Instruction::Or, a, b, "", LastInst) +#define XOR(a,b) BinaryOperator::Create(Instruction::Xor, a, b, "", LastInst) +#define SHL(a,b) BinaryOperator::Create(Instruction::Shl, a, b, "", LastInst) +#define LSHR(a,b) BinaryOperator::Create(Instruction::LShr, a, b, "", LastInst) +#define ASHR(a,b) BinaryOperator::Create(Instruction::AShr, a, b, "", LastInst) +#define ADD(a,b) BinaryOperator::Create(Instruction::Add, a, b, "", LastInst) +#define SUB(a,b) BinaryOperator::Create(Instruction::Sub, a, b, "", LastInst) +#define MUL(a,b) BinaryOperator::Create(Instruction::Mul, a, b, "", LastInst) +#define SDIV(a,b) BinaryOperator::Create(Instruction::SDiv, a, b, "", LastInst) +#define UDIV(a,b) BinaryOperator::Create(Instruction::UDiv, a, b, "", LastInst) +#define SREM(a,b) BinaryOperator::Create(Instruction::SRem, a, b, "", LastInst) +#define UREM(a,b) BinaryOperator::Create(Instruction::URem, a, b, "", LastInst) + +#define FADD(a,b) BinaryOperator::Create(Instruction::FAdd, a, b, "", LastInst) +#define FSUB(a,b) BinaryOperator::Create(Instruction::FSub, a, b, "", LastInst) +#define FMUL(a,b) BinaryOperator::Create(Instruction::FMul, a, b, "", LastInst) +#define FDIV(a,b) BinaryOperator::Create(Instruction::FDiv, a, b, "", LastInst) + +#define CAST(a,t) new BitCastInst(a, t, "", LastInst) +#define CASTPTR8(a) CAST(a,Int8PtrTy) +#define CASTPTR16(a) CAST(a,Int16PtrTy) +#define CASTPTR32(a) CAST(a,Int32PtrTy) +#define CASTPTR64(a) CAST(a,Int64PtrTy) + +#define ITP(a,t) new IntToPtrInst(a, t, "", LastInst) +#define ITP8(a) ITP(a,Int8PtrTy) +#define ITP16(a) ITP(a,Int16PtrTy) +#define ITP32(a) ITP(a,Int32PtrTy) +#define ITP64(a) ITP(a,Int64PtrTy) + +#define TRUNC(a,t) new TruncInst(a, t, "", LastInst) +#define TRUNC8(a) TRUNC(a, Int8Ty) +#define TRUNC16(a) TRUNC(a, Int16Ty) +#define TRUNC32(a) TRUNC(a, Int32Ty) +#define TRUNC64(a) TRUNC(a, Int64Ty) + +#define ZEXT(a,t) new ZExtInst(a, t, "", LastInst) +#define ZEXT8(a) ZEXT(a, Int8Ty) +#define ZEXT16(a) ZEXT(a, Int16Ty) +#define ZEXT32(a) ZEXT(a, Int32Ty) +#define ZEXT64(a) ZEXT(a, Int64Ty) +#define ZEXT128(a) ZEXT(a, Int128Ty) +#define SEXT(a,t) new SExtInst(a, t, "", LastInst) +#define SEXT8(a) SEXT(a, Int8Ty) +#define SEXT16(a) SEXT(a, Int16Ty) +#define SEXT32(a) SEXT(a, Int32Ty) +#define SEXT64(a) SEXT(a, Int64Ty) +#define SEXT128(a) SEXT(a, Int128Ty) + +#define BSWAP16(a) CreateBSwap(Int16Ty, a, LastInst) +#define BSWAP32(a) CreateBSwap(Int32Ty, a, LastInst) +#define BSWAP64(a) CreateBSwap(Int64Ty, a, LastInst) + + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/llvm-opc.h b/llvm/include/llvm-opc.h new file mode 100644 index 0000000..9454dac --- /dev/null +++ b/llvm/include/llvm-opc.h @@ -0,0 +1,494 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_OPC_H +#define __LLVM_OPC_H + +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "qemu-types.h" +#include "llvm-types.h" +#include "llvm-translator.h" +#include "llvm.h" + +//#define ASSERT +//#define VERIFY_TB + + +#define IRDebug(idx) \ + do { \ + dbg() << DEBUG_ENTRY << "op_" << llvm_op_defs[idx].name << ": " \ + << llvm_op_defs[idx].nb_oargs << " " \ + << llvm_op_defs[idx].nb_iargs << " " \ + << llvm_op_defs[idx].nb_cargs << "\n"; \ + } while (0) +#define IRError(fmt,args...) hqemu_error(fmt,##args) + +#ifdef ASSERT +#define AssertType(t) \ + do { \ + if (!(t)) \ + hqemu_error("invalid type.\n"); \ + } while(0) +#else +#define AssertType(t) +#endif + +#define IRAbort() \ + do { \ + if (!LLEnv->isTraceMode()) { \ + Func->dump(); \ + hqemu_error("fixme.\n"); \ + } \ + Builder->Abort(); \ + } while (0) + + +class LLVMTranslator; +class NotifyInfo; +class OptimizationInfo; + + +/* Patch flags. + * NOTE: patch flags must be synchronized with those in the LLVM backend. */ +enum { + PATCH_HQEMU = 0x4182U, + PATCH_DUMMY, + PATCH_EXIT_TB, + PATCH_DIRECT_JUMP, + PATCH_TRACE_BLOCK_CHAINING, + PATCH_QMMU, +}; + +/* + * Register is used to describe the pseudo registers used by QEMU TCG op. + */ +struct Register { + /* Status of the register. */ + enum { + STATE_NONE = 0x0, + STATE_REV = 0x1, /* Register is reserved */ + STATE_REG = 0x2, /* Register is promoted */ + STATE_MEM = 0x4, /* Register is in CPUArchState memory */ + STATE_LOC = 0x8, /* Register is a local register */ + STATE_TMP = 0x10, /* Register is a tmp register */ + }; + + int State; /* State of the register */ + int Base; + intptr_t Off; /* Register offset of CPUArchState */ + int Size; /* Register size */ + std::string Name; /* Name string of this register */ + bool Dirty; /* This register is updated or not */ + Type *Ty; /* Register type in LLVM */ + Value *Data; /* Data value if this regisrer is promoted */ + Value *AI; /* Register as Alloca */ + Register *Alias; + + Register() : State(STATE_NONE), Off(-1), Dirty(false), Ty(nullptr), + Data(nullptr), AI(nullptr), Alias(nullptr) {} + + void set(int base, intptr_t off, std::string name) { + Base = base; + Off = off; + Name = name; + } + void reset(int state, int size, Type *ty) { + State = state; + Size = size; + Ty = ty; + Dirty = false; + Data = AI = nullptr; + } + + void Promote() { State |= STATE_REG; } + void Demote() { State &= ~STATE_REG; } + + Value *getData() { return Data; } + Register &getAlias() { return *Alias; } + + void setState(int state) { State = state; } + void setData(Value *data, bool dirty = false) { + if (Alias) { + Alias->setData(data, dirty); + return; + } + Data = data; + Dirty = dirty; + Promote(); + } + bool isRev() { return State & STATE_REV; } + bool isReg() { return State & STATE_REG; } + bool isMem() { return State & STATE_MEM; } + bool isLocal() { return State & STATE_LOC; } + bool isDirty() { return Dirty; } + bool isAlias() { return Alias != nullptr; } +}; + +/* + * TraceBuilder provides the facilities to build a trace in IRFactory. + */ +class TraceBuilder { + typedef std::map<target_ulong, + std::pair<GraphNode*, BasicBlock*> > NodeBuildMap; + typedef std::vector<std::pair<BranchInst*, GraphNode*> > BranchList; + + IRFactory *IF; + OptimizationInfo *Opt; + GraphNode *CurrNode; /* The current CFG node to process */ + NodeBuildMap Nodes; + BranchList Branches; + NodeVec NodeQueue; /* CFG nodes to be translated */ + NodeSet NodeVisisted; + NodeVec NodeUsed; + bool Aborted; + uint32_t Attribute; + + TraceInfo *Trace; + +public: + TraceBuilder(IRFactory *IRF, OptimizationInfo *Opt); + ~TraceBuilder() {} + + void ConvertToTCGIR(CPUArchState *env); + void ConvertToLLVMIR(); + void Abort(); + void Finalize(); + bool isAborted() { return Aborted; } + + OptimizationInfo *getOpt() { return Opt; } + TraceInfo *getTrace() { return Trace; } + GraphNode *getEntryNode() { return Opt->getCFG(); } + GraphNode *getCurrNode() { return CurrNode; } + unsigned getNumNodes() { return Nodes.size(); } + std::string getPCString(GraphNode *Node) { + std::stringstream ss; + ss << std::hex << Node->getGuestPC(); + return ss.str(); + } + + GraphNode *getNextNode() { + if (NodeQueue.empty()) + return nullptr; + CurrNode = NodeQueue.back(); + NodeQueue.pop_back(); + + if (NodeVisisted.find(CurrNode) != NodeVisisted.end()) + return getNextNode(); + + NodeVisisted.insert(CurrNode); + NodeUsed.push_back(CurrNode); + return CurrNode; + } + + target_ulong getGuestPC(GraphNode *Node) { +#if defined(TARGET_I386) + return Node->getTB()->pc - Node->getTB()->cs_base; +#else + return Node->getTB()->pc; +#endif + } + void setUniqueNode(GraphNode *Node) { + target_ulong gpc = getGuestPC(Node); + if (Nodes.find(gpc) == Nodes.end()) + Nodes[gpc] = std::make_pair(Node, nullptr); + } + void setBasicBlock(GraphNode *Node, BasicBlock *BB) { + target_ulong gpc = getGuestPC(Node); + if (Nodes.find(gpc) == Nodes.end()) + hqemu_error("internal error.\n"); + Nodes[gpc].second = BB; + } + void setBranch(BranchInst *BI, GraphNode *Node) { + Branches.push_back(std::make_pair(BI, Node)); + target_ulong gpc = getGuestPC(Node); + if (!Nodes[gpc].second) + NodeQueue.push_back(Node); + } + GraphNode *getNode(target_ulong gpc) { + return Nodes.find(gpc) == Nodes.end() ? nullptr : Nodes[gpc].first; + } + BasicBlock *getBasicBlock(GraphNode *Node) { + target_ulong gpc = getGuestPC(Node); + if (Nodes.find(gpc) == Nodes.end()) + hqemu_error("internal error.\n"); + return Nodes[gpc].second; + } + void addAttribute(uint32_t Attr) { + Attribute |= Attr; + } +}; + + +#define META_CONST "const" +#define META_GVA "gva" +#define META_LOOP "loop" +#define META_EXIT "exit" +#define META_CC "cc" + +class MDFactory { + uint32_t UID; + LLVMContext &Context; + MDNode *Dummy; + + ConstantInt *getUID() { + return ConstantInt::get(IntegerType::get(Context, 32), UID++); + } + +public: + MDFactory(Module *M); + ~MDFactory(); + + MDNode *getMDNode(ArrayRef<ConstantInt*> V); + DebugLoc getDebugLoc(unsigned Line, unsigned Col, Function *F, + ArrayRef<ConstantInt*> Meta); + + void setConst(Instruction *I) { I->setMetadata(META_CONST, Dummy); } + void setGuestMemory(Instruction *I) { I->setMetadata(META_GVA, Dummy); } + void setLoop(Instruction *I) { I->setMetadata(META_LOOP, Dummy); } + void setExit(Instruction *I) { I->setMetadata(META_EXIT, Dummy); } + void setCondition(Instruction *I) { I->setMetadata(META_CC, Dummy); } + + static bool isConst(Instruction *I) { + return I->getMetadata(META_CONST); + } + static bool isGuestMemory(Instruction *I) { + return I->getMetadata(META_GVA); + } + static bool isLoop(Instruction *I) { + return I->getMetadata(META_LOOP); + } + static bool isExit(Instruction *I) { + return I->getMetadata(META_EXIT); + } + static bool isCondition(Instruction *I) { + return I->getMetadata(META_CC); + } + + static void setConstStatic(LLVMContext &Context, Instruction *I, + ArrayRef<ConstantInt*> V); +}; + +/* + * IRFactory conducts QEMU TCG opcodes to LLVM IR conversion. + */ +class IRFactory { + typedef std::map<std::pair<intptr_t, Type *>, Value *> StatePtrMap; + typedef std::map<TCGArg, BasicBlock *> LabelMap; + + enum { + COHERENCE_NONE = 0, + COHERENCE_GLOBAL, + COHERENCE_ALL, + }; + + bool InitOnce; + + /* Basic types */ + Type *VoidTy; + IntegerType *Int8Ty; + IntegerType *Int16Ty; + IntegerType *Int32Ty; + IntegerType *Int64Ty; + IntegerType *Int128Ty; + IntegerType *IntPtrTy; + PointerType *Int8PtrTy; + PointerType *Int16PtrTy; + PointerType *Int32PtrTy; + PointerType *Int64PtrTy; + Type *FloatTy; + Type *DoubleTy; + Type *FP80Ty; + Type *FP128Ty; + + ConstantInt *ExitAddr; + + LLVMTranslator &Translator; /* Uplink to the LLVMTranslator instance */ + LLVMContext *Context; /* Translator local context */ + Module *Mod; /* The LLVM module */ + ExecutionEngine *EE; /* The JIT compiler */ + EventListener *Listener; /* The JIT listener */ + JITEventListener *IntelJIT; /* The Intel JIT listener */ + const DataLayout *DL; /* Data layout */ + TraceBuilder *Builder; + MDFactory *MF; + MCDisasm *HostDisAsm; + + HelperMap &Helpers; + std::vector<BaseRegister> &BaseReg; /* TCG base register */ + std::vector<Register> Reg; /* TCG virtual registers */ + LabelMap Labels; /* TCG labels */ + int Segment; + GuestBaseRegister &GuestBaseReg; /* Reserved guest base register */ + + Function *Func; /* The container of LLVM IR to be translated */ + BasicBlock *InitBB; /* BasicBlock for variable decalaration */ + BasicBlock *CurrBB; /* Current BasicBlock to insert LLVM IR */ + BasicBlock *ExitBB; /* Temp BasicBlock as the exit-function stub */ + BranchInst *LastInst; /* Position to insert LLVM IR */ + + Instruction *CPU; /* Base register with (char*) type */ + Instruction *CPUStruct; /* Base register with (struct CPUArchState*) type */ + Instruction *GEPInsertPos; /* Position to insert GEP instruction */ + + StatePtrMap StatePtr; + IVec InlineCalls; /* Helpers to be inlined */ + std::map<std::string, BasicBlock*> CommonBB; + IVec IndirectBrs; + IVec toErase; + BBVec toSink; + std::set<Function *> ClonedFuncs; + bool runPasses; + + void CreateJIT(); + void DeleteJIT(); + + /* Initialize basic types used during IR conversion. */ + void InitializeTypes(); + + /* Store dirty states back to CPU state in the memory. */ + void SaveGlobals(int level, Instruction *InsertPos); + + /* Sync PC to CPU state in the memory. */ + void CreateStorePC(Instruction *InsertPos); + + /* Get or insert the pointer to the CPU state. */ + Value *StatePointer(Register ®); + Value *StatePointer(Register ®, intptr_t Off, Type *PTy); + + /* Load value from the CPU state in the memory. */ + Value *LoadState(Register ®); + void StoreState(Register ®, Instruction *InsertPos); + + /* Load/Store data from/to the guest memory. */ + Value *QEMULoad(Value *AddrL, Value *AddrH, TCGMemOpIdx oi); + void QEMUStore(Value *Data, Value *AddrL, Value *AddrH, TCGMemOpIdx oi); + + Value *ConvertCPUType(Function *F, int Idx, Instruction *InsertPos); + Value *ConvertCPUType(Function *F, int Idx, BasicBlock *InsertPos); + + Value *ConvertEndian(Value *V, int opc); + Value *getExtendValue(Value *V, Type *Ty, int opc); + Value *getTruncValue(Value *V, int opc); + int getSizeInBits(int opc) { + return 8 * (1 << (opc & MO_SIZE)); + } + + Value *ConcatTLBVersion(Value *GVA); + + /* Return the LLVM instruction that stores PC. For the guest's register + * size larger than the host, replace the multiple store-PC instructions + * to one single store-PC instruction. */ + StoreInst *getStorePC(); + + /* Create both chaining and exiting stubs. */ + void InsertLinkAndExit(Instruction *InsertPos); + + /* Create exit stub */ + void InsertExit(uintptr_t RetVal, bool setExit = false); + + /* Find the next node of a trace according to the brach pc. + * Return null if we cannot find one. */ + GraphNode *findNextNode(target_ulong pc); + + /* Perform internal linking of basic blocks to form a region. */ + void TraceLink(StoreInst *SI); + + /* Link basic blocks of direct branch. */ + void TraceLinkDirectJump(GraphNode *NextNode, StoreInst *SI); + void TraceLinkDirectJump(StoreInst *SI); + + /* Link basic blocks of indirect branch. */ + void TraceLinkIndirectJump(GraphNode *NextNode, StoreInst *SI); + + /* Insert code for IBTC hash table lookup. */ + void InsertLookupIBTC(GraphNode *CurrNode); + + /* Insert code for CPBL hash table lookup. */ + void InsertLookupCPBL(GraphNode *CurrNode); + + void TraceValidateCPBL(GraphNode *NextNode, StoreInst *StorePC); + + /* Insert bswap intrinsic instruction. */ + Value *CreateBSwap(Type *Ty, Value *V, Instruction *InsertPos); + + /* Given the size, return its PointerType. */ + PointerType *getPointerTy(int Size, unsigned AS = 0); + + /* Analyze a helper function to determine if it will be inlined or not. */ + int AnalyzeInlineCost(CallSite CS); + + /* Perform helper function inlining. */ + void ProcessInline(); + + void VerifyFunction(Function &F); + + /* Legalize LLVM IR before running the pre-defined passes. */ + void PreProcess(); + + void Optimize(); + + /* Legalize LLVM IR after running the pre-defined passes. */ + void PostProcess(); + + void FinalizeObject(); + + void InitializeLLVMPasses(legacy::FunctionPassManager *FPM); + + uint32_t setRestorePoint(TCGMemOpIdx oi) { + if (oi != (uint16_t)oi) + hqemu_error("key value too large.\n"); + return (NI.setRestorePoint() << 16) | oi; + } + +public: + typedef void (IRFactory::*FuncPtr)(const TCGArg *); + + NotifyInfo &NI; /* Info to pass among translator and JIT */ + + /* QEMU TCG IR to LLVM IR converion routines. */ +#define DEF(name, oargs, iargs, cargs, flags) void op_ ## name(const TCGArg *); +#include "tcg-opc.h" +#undef DEF + + IRFactory(LLVMTranslator *Trans); + ~IRFactory(); + + void CreateSession(TraceBuilder *builder); + void DeleteSession(); + + /* Prepare the initial LLVM Function, BasicBlocks and variables. */ + void CreateFunction(); + void CreateBlock(); + + /* Start LLVM JIT compilation. */ + void Compile(); + + /* Set instruction BI to jump to the basic block BB. */ + void setSuccessor(BranchInst *BI, BasicBlock *BB); + + /* Get function pointer of the IR converion routines. */ + void *getOpcFunc(); + + Function *ResolveFunction(std::string Name); + + LLVMTranslator &getTranslator() { return Translator; } + LLVMContext &getContext() { return *Context; } + const DataLayout *getDL() { return DL; } + MDFactory *getMDFactory() { return MF; } + HelperMap &getHelpers() { return Helpers; } + TraceInfo *getTrace() { return Builder->getTrace(); } + Value *getGuestBase() { return GuestBaseReg.Base; } + Instruction *getDefaultCPU(Function &F); + +public: + static bool isStateOfPC(intptr_t Off); +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/llvm-pass.h b/llvm/include/llvm-pass.h new file mode 100644 index 0000000..75bcf4a --- /dev/null +++ b/llvm/include/llvm-pass.h @@ -0,0 +1,205 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_PASS_H +#define __LLVM_PASS_H + +#include <map> +#include <vector> +#include "llvm-types.h" + +class IRFactory; + + +static inline Value *getPointerOperand(Value *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->getPointerOperand(); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getPointerOperand(); + return nullptr; +} + +static inline Value *getValueOperand(Value *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI; + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getValueOperand(); + return nullptr; +} + +static inline unsigned getAddressSpaceOperand(Value *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->getPointerAddressSpace(); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getPointerAddressSpace(); + return -1; +} + +/* A CPU state reference. */ +struct StateRef { + StateRef(intptr_t Start, intptr_t End, Instruction *I) + : Start(Start), End(End), I(I) {} + intptr_t Start; + intptr_t End; + Instruction *I; + + intptr_t getSize() { + return End - Start; + } + Type *getType() { + return getValueOperand(I)->getType(); + } +}; + +/* A group of references to a CPU state. */ +struct StateData { + intptr_t Start; + intptr_t End; + std::vector<StateRef*> Refs; + + void reset(StateRef &Ref) { + Start = Ref.Start; + End = Ref.End; + Refs.clear(); + Refs.push_back(&Ref); + } + void insert(StateRef &Ref) { + End = std::max(End, Ref.End); + Refs.push_back(&Ref); + } +}; + +typedef std::map<intptr_t, intptr_t> StateRange; +typedef std::vector<StateData> StateList; +typedef std::vector<CallInst*> CallList; + +/* + * The purpose of StateAnalyzer is to analyze loads/stores of CPU states and + * group loads/stores of the same CPU state into the same bucket (StateData). + */ +class StateAnalyzer { + const DataLayout *DL; + std::vector<StateRef> StateRefs; + CallList Calls; + StateList States; + + /* Sort state references by the state offset. */ + void sortStateRefs() { + if (StateRefs.empty()) + return; + std::sort(StateRefs.begin(), StateRefs.end(), + [](const StateRef &lhs, const StateRef &rhs) -> bool { + return lhs.Start < rhs.Start; + }); + } + +public: + StateAnalyzer(const DataLayout *DL) : DL(DL) {} + + void clear() { + StateRefs.clear(); + Calls.clear(); + States.clear(); + } + + /* Add a CPU state reference. */ + void addStateRef(Instruction *I, intptr_t Off) { + Type *Ty = getValueOperand(I)->getType(); + intptr_t Start = Off; + intptr_t End = Off + DL->getTypeSizeInBits(Ty) / 8; + StateRefs.push_back(StateRef(Start, End, I)); + } + + /* Add a helper function call. */ + void addCall(CallInst *CI) { + Calls.push_back(CI); + } + + /* Return non-overlapped ranges of states. */ + void computeStateRange(StateRange &Reads, StateRange &Writes) { + computeState(); + if (StateRefs.empty()) + return; + + const uint8_t READ = 0x1; + const uint8_t WRITE = 0x2; + for (auto &State : States) { + uint8_t RW = 0; + for (auto &Ref : State.Refs) + RW |= isa<LoadInst>(Ref->I) ? READ : WRITE; + if (RW & READ) + Reads[State.Start] = State.End; + if (RW & WRITE) + Writes[State.Start] = State.End; + } + } + + /* Compute referenced states and group instructions. */ + void computeState() { + /* Sort state refs by the offset. */ + sortStateRefs(); + if (StateRefs.empty()) + return; + + StateData State; + State.reset(StateRefs.front()); + for (unsigned i = 1, e = StateRefs.size(); i != e; ++i) { + StateRef &Next = StateRefs[i]; + if (State.End <= Next.Start) { + /* The next reference is not overlapped with the previous + * reference. A new state is found. */ + States.push_back(State); + /* Reset Curr to the next state. */ + State.reset(Next); + } else { + /* Overlap and merge. */ + State.insert(Next); + } + } + /* The last state. */ + States.push_back(State); + } + + StateList &getStateList() { + return States; + } + + CallList &getCalls() { + return Calls; + } +}; + + +namespace llvm { +/* Passes */ +FunctionPass *createReplaceIntrinsic(); +FunctionPass *createFastMathPass(); +FunctionPass *createProfileExec(IRFactory *IF); +FunctionPass *createStateMappingPass(IRFactory *IF); +FunctionPass *createRedundantStateElimination(IRFactory *IF); +FunctionPass *createCombineGuestMemory(IRFactory *IF); +FunctionPass *createCombineCasts(IRFactory *IF); +FunctionPass *createCombineZExtTrunc(); +FunctionPass *createSimplifyPointer(IRFactory *IF); + +void initializeReplaceIntrinsicPass(llvm::PassRegistry&); +void initializeFastMathPassPass(llvm::PassRegistry&); +void initializeProfileExecPass(llvm::PassRegistry&); +void initializeStateMappingPassPass(llvm::PassRegistry&); +void initializeRedundantStateEliminationPass(llvm::PassRegistry&); +void initializeCombineGuestMemoryPass(llvm::PassRegistry&); +void initializeCombineCastsPass(llvm::PassRegistry&); +void initializeCombineZExtTruncPass(llvm::PassRegistry&); +void initializeSimplifyPointerPass(llvm::PassRegistry&); + +/* Analysis */ +void initializeInnerLoopAnalysisWrapperPassPass(llvm::PassRegistry&); +} + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/llvm-soft-perfmon.h b/llvm/include/llvm-soft-perfmon.h new file mode 100644 index 0000000..c55201e --- /dev/null +++ b/llvm/include/llvm-soft-perfmon.h @@ -0,0 +1,74 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_SOFT_PERFMON_H +#define __LLVM_SOFT_PERFMON_H + +#include "utils.h" + +#define MAX_SPM_THREADS 256 + +#define SPM_NONE (uint64_t)0 +#define SPM_BASIC ((uint64_t)1 << 0) +#define SPM_TRACE ((uint64_t)1 << 1) +#define SPM_CACHE ((uint64_t)1 << 2) +#define SPM_PASS ((uint64_t)1 << 3) +#define SPM_HPM ((uint64_t)1 << 4) +#define SPM_EXIT ((uint64_t)1 << 5) +#define SPM_HOTSPOT ((uint64_t)1 << 6) +#define SPM_ALL SPM_BASIC | SPM_TRACE | SPM_CACHE | SPM_PASS | SPM_HPM | \ + SPM_EXIT | SPM_HOTSPOT +#define SPM_NUM 9 + + +/* + * Software Performance Monitor (SPM) + */ +class SoftwarePerfmon { +public: + typedef void (*ExitFuncPtr)(void); + + uint64_t Mode; /* Profile level */ + uint64_t NumInsns; /* Number of instructions */ + uint64_t NumBranches; /* Number of branches */ + uint64_t NumLoads; /* Number of memory loads */ + uint64_t NumStores; /* Number of memory stores */ + uint64_t NumTraceExits; /* Count of trace exits */ + uint64_t SampleTime; /* Process time of the sampling handler. */ + unsigned CoverSet; + std::vector<std::vector<uint64_t> *> SampleListVec; + + SoftwarePerfmon() + : Mode(SPM_NONE), NumInsns(0), NumBranches(0), NumLoads(0), NumStores(0), + NumTraceExits(0), SampleTime(0), CoverSet(90) {} + SoftwarePerfmon(std::string &ProfileLevel) : SoftwarePerfmon() { + ParseProfileMode(ProfileLevel); + } + + bool isEnabled() { + return Mode != SPM_NONE; + } + + void registerExitFn(ExitFuncPtr F) { + ExitFunc.push_back(F); + } + + void printProfile(); + +private: + std::vector<ExitFuncPtr> ExitFunc; + + void ParseProfileMode(std::string &ProfileLevel); + void printBlockProfile(); + void printTraceProfile(); +}; + +extern SoftwarePerfmon *SP; + +#endif /* __LLVM_SOFT_PERFMON_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/llvm-state.h b/llvm/include/llvm-state.h new file mode 100644 index 0000000..e573073 --- /dev/null +++ b/llvm/include/llvm-state.h @@ -0,0 +1,194 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + * + * This file implements the basic optimization schemes including indirect + * branch target cache (IBTC), indirect branch chain (IB chain), and trace + * profiling and prediction routines. + */ + +#ifndef __LLVM_STATE_H +#define __LLVM_STATE_H + +#define COPY_STATE(_dst, _src, _e) do { _dst->_e = _src->_e; } while(0) + +/* + * The following data structure and routine are used to save/restore the states + * of CPUArchState. Only the states that could affect decoding the guest binary by + * the TCG front-end are saved/restored. Such states are saved when translating + * the block at the first time because the states could change later and are + * restored to the saved values when the block is decoded again during the + * trace formation. + */ +#if defined(TARGET_I386) || defined(TARGET_X86_64) +typedef struct i386_env { + int singlestep_enabled; + uint32_t hflags; + target_ulong eflags; +} cpustate; +#elif defined(TARGET_ARM) +typedef struct arm_env { + int singlestep_enabled; + uint32_t pstate; + uint32_t aarch64; + struct { + uint32_t c15_cpar; + uint64_t scr_el3; + } cp15; + uint32_t uncached_cpsr; + uint64_t features; +} cpustate; +#elif defined(TARGET_PPC) || defined(TARGET_PPC64) +typedef struct ppc_env { + int singlestep_enabled; + target_ulong msr; + int mmu_idx; + uint32_t flags; + uint64_t insns_flags; + uint64_t insns_flags2; + target_ulong hflags; +} cpustate; +#elif defined(TARGET_SH4) +typedef struct sh4_env { + int singlestep_enabled; + uint32_t sr; /* status register */ + uint32_t fpscr; /* floating point status/control register */ + uint32_t features; +} cpustate; +#elif defined(TARGET_M68K) +typedef struct m68k_env { + int singlestep_enabled; + uint32_t sr; /* status register */ + uint32_t fpcr; /* floating point status/control register */ +} cpustate; +#elif defined(TARGET_MIPS) +typedef struct mips_env { + int singlestep_enabled; + target_ulong btarget; +} cpustate; +#else +typedef struct dummy_env { + int dummy; +} cpustate; +#endif + +static inline void tcg_save_state(CPUArchState *env, TranslationBlock *tb) +{ +#if defined(TARGET_I386) || defined(TARGET_X86_64) + CPUState *cpu = ENV_GET_CPU(env); + struct i386_env *s = new struct i386_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, hflags); + COPY_STATE(s, env, eflags); +#elif defined(TARGET_ARM) + CPUState *cpu = ENV_GET_CPU(env); + struct arm_env *s = new struct arm_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, cp15.c15_cpar); + COPY_STATE(s, env, cp15.scr_el3); + COPY_STATE(s, env, uncached_cpsr); + COPY_STATE(s, env, features); + COPY_STATE(s, env, pstate); + COPY_STATE(s, env, aarch64); +#elif defined(TARGET_PPC) || defined(TARGET_PPC64) + CPUState *cpu = ENV_GET_CPU(env); + struct ppc_env *s = new struct ppc_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, msr); + COPY_STATE(s, env, mmu_idx); + COPY_STATE(s, env, flags); + COPY_STATE(s, env, insns_flags); + COPY_STATE(s, env, insns_flags2); + COPY_STATE(s, env, hflags); +#elif defined(TARGET_SH4) + CPUState *cpu = ENV_GET_CPU(env); + struct sh4_env *s = new struct sh4_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, sr); + COPY_STATE(s, env, fpscr); + COPY_STATE(s, env, features); +#elif defined(TARGET_M68K) + CPUState *cpu = ENV_GET_CPU(env); + struct m68k_env *s = new struct m68k_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, sr); + COPY_STATE(s, env, fpcr); +#elif defined(TARGET_MIPS) + CPUState *cpu = ENV_GET_CPU(env); + struct mips_env *s = new struct mips_env; + COPY_STATE(s, cpu, singlestep_enabled); + COPY_STATE(s, env, btarget); +#else + void *s = nullptr; +#endif + + tb->state = (void *)s; +} + +/* + * tcg_restore_state() + * Reset states to those when the block is first translated. + */ +static inline void tcg_copy_state(CPUArchState *env, TranslationBlock *tb) +{ +#if defined(TARGET_I386) || defined(TARGET_X86_64) + CPUState *cpu = ENV_GET_CPU(env); + struct i386_env *i386e = (struct i386_env *)tb->state; + COPY_STATE(cpu, i386e, singlestep_enabled); + COPY_STATE(env, i386e, hflags); + COPY_STATE(env, i386e, eflags); +#elif defined(TARGET_ARM) + CPUState *cpu = ENV_GET_CPU(env); + struct arm_env *arme = (struct arm_env *)tb->state; + COPY_STATE(cpu, arme, singlestep_enabled); + COPY_STATE(env, arme, cp15.c15_cpar); + COPY_STATE(env, arme, cp15.scr_el3); + COPY_STATE(env, arme, uncached_cpsr); + COPY_STATE(env, arme, features); + COPY_STATE(env, arme, pstate); + COPY_STATE(env, arme, aarch64); +#elif defined(TARGET_PPC) || defined(TARGET_PPC64) + CPUState *cpu = ENV_GET_CPU(env); + struct ppc_env *ppce = (struct ppc_env *)tb->state; + COPY_STATE(cpu, ppce, singlestep_enabled); + COPY_STATE(env, ppce, msr); + COPY_STATE(env, ppce, mmu_idx); + COPY_STATE(env, ppce, flags); + COPY_STATE(env, ppce, insns_flags); + COPY_STATE(env, ppce, insns_flags2); + COPY_STATE(env, ppce, hflags); +#elif defined(TARGET_SH4) + CPUState *cpu = ENV_GET_CPU(env); + struct sh4_env *sh4e = (struct sh4_env *)tb->state; + COPY_STATE(cpu, sh4e, singlestep_enabled); + COPY_STATE(env, sh4e, sr); + COPY_STATE(env, sh4e, fpscr); + COPY_STATE(env, sh4e, features); +#elif defined(TARGET_M68K) + CPUState *cpu = ENV_GET_CPU(env); + struct m68k_env *m68ke = (struct m68k_env *)tb->state; + COPY_STATE(cpu, m68ke, singlestep_enabled); + COPY_STATE(env, m68ke, sr); + COPY_STATE(env, m68ke, fpcr); +#elif defined(TARGET_MIPS) + CPUState *cpu = ENV_GET_CPU(env); + struct mips_env *mipse = (struct mips_env *)tb->state; + COPY_STATE(cpu, mipse, singlestep_enabled); + COPY_STATE(env, mipse, btarget); +#endif +} + +static inline void delete_state(TranslationBlock *tb) +{ + delete (cpustate *)tb->state; + tb->state = nullptr; +} + +#undef COPY_STATE +#endif /* __LLVM_STATE_H */ + + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/include/llvm-target.h b/llvm/include/llvm-target.h new file mode 100644 index 0000000..1784942 --- /dev/null +++ b/llvm/include/llvm-target.h @@ -0,0 +1,116 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_TARGET_H +#define __LLVM_TARGET_H + +#include "llvm/ExecutionEngine/JITEventListener.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm-types.h" +#include "llvm-translator.h" + +#ifndef __PRI64_PREFIX +# if __WORDSIZE == 64 +# define __PRI64_PREFIX "l" +# else +# define __PRI64_PREFIX "ll" +# endif +#endif + +#if TARGET_LONG_BITS == 32 +# define PRId "d" +# define PRIx "x" +#else +# define PRId __PRI64_PREFIX "d" +# define PRIx __PRI64_PREFIX "x" +#endif + +#define PRId64 __PRI64_PREFIX "d" +#define PRIu64 __PRI64_PREFIX "u" + +class code_ostream { + char *OutBufStart; + char *OutBufCur; +public: + void Skip(unsigned Size) { + OutBufCur += Size; + } + + code_ostream(uintptr_t Ptr) + : OutBufStart((char *)Ptr), OutBufCur((char *)Ptr) {} + code_ostream &operator<<(char C) { + *OutBufCur = C; + OutBufCur++; + return *this; + } + code_ostream &operator<<(unsigned char C) { + *(unsigned char *)OutBufCur = C; + OutBufCur++; + return *this; + } + code_ostream &operator<<(unsigned int C) { + *(unsigned int *)OutBufCur = C; + OutBufCur += sizeof(unsigned int); + return *this; + } + code_ostream &operator<<(unsigned long C) { + *(unsigned long *)OutBufCur = C; + OutBufCur += sizeof(unsigned long); + return *this; + } +}; + +static inline void EmitByte(code_ostream &OS, unsigned char C) +{ + OS << (char)C; +} +static inline void EmitConstant(code_ostream &OS, uint64_t Val, unsigned Size) +{ + for (unsigned i = 0; i != Size; ++i) { + EmitByte(OS, Val & 255); + Val >>= 8; + } +} + +/* + * EventListener is used by the JIT to notify clients about significant events + * during compilation. + */ +class EventListener : public JITEventListener { + NotifyInfo &NI; + +public: + EventListener(NotifyInfo &NI) : NI(NI) {} + ~EventListener() {} + virtual void NotifyFunctionEmitted(const Function &F, void *Code, size_t Size, + const EmittedFunctionDetails &Details); +#if defined(LLVM_V35) + virtual void NotifyObjectEmitted(const ObjectImage &Obj); +#else + virtual void NotifyObjectEmitted(const object::ObjectFile &Obj, + const RuntimeDyld::LoadedObjectInfo &L); +#endif +}; + + +const char *getMMUFName(const void *func); +bool isMMUFunction(std::string &Name); +bool isLMTFunction(std::string &Name); +bool isIllegalHelper(const void *func); +bool isLibcall(std::string &Name); +bool isSoftFPcall(std::string &Name); +void AddDependentSymbols(LLVMTranslator *Translator); +Value *StripPointer(Value *Ptr); +Value *StripPointerWithConstantOffset(const DataLayout *DL, Value *Ptr, + APInt &Offset, Value *GuestBase); +Value *getBaseWithConstantOffset(const DataLayout *DL, Value *Ptr, intptr_t &Offset); +void ProcessErase(IVec &toErase); + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/include/llvm-translator.h b/llvm/include/llvm-translator.h new file mode 100644 index 0000000..d1d92c5 --- /dev/null +++ b/llvm/include/llvm-translator.h @@ -0,0 +1,270 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_TRANSLATOR_H +#define __LLVM_TRANSLATOR_H + +#include <map> +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm-types.h" +#include "llvm-pass.h" +#include "llvm.h" + + +class OptimizationInfo; +class EventListener; +class NotifyInfo; +class IRFactory; +class TraceBuilder; + + +/* + * BaseRegister is used to describe the `reserved' registers by QEMU TCG. + * Ex: R14 for the x86 host or R7 for the ARM host. + */ +struct BaseRegister { + BaseRegister() : Base(nullptr) {} + int RegNo; /* Register number */ + std::string Name; /* Register name string */ + Type *Ty; /* Type (struct CPUArchState) */ + Instruction *Base; /* CallInst to retrieve basereg */ +}; + +struct GuestBaseRegister { + GuestBaseRegister() : Name(""), Base(nullptr) {} + std::string Name; /* Register name string */ + Value *Base; /* CallInst to retrieve basereg */ +}; + +/* + * Information of helper functions defined in llvm-helper.h. + */ +struct HelperInfo { + HelperInfo() + : ConflictSize(0), mayConflictArg(false), hasNestedCall(false) {} + + struct ArgInfo { + unsigned ConstantWeight; /* Weight if the argument is a constant */ + unsigned AllocaWeight; /* Weight if the argument is a alloca */ + ArgInfo(unsigned CWeight, unsigned AWeight) + : ConstantWeight(CWeight), AllocaWeight(AWeight) {} + }; + + Function *Func; /* Function symbol to be inlined */ + Function *FuncNoInline; /* Function symbol not to be inlined */ + std::vector<std::pair<Instruction*, intptr_t> > States; + std::vector<CallInst*> NestedCalls; + StateRange StateUse; + StateRange StateDef; + CodeMetrics Metrics; /* Inlining metrics */ + std::vector<ArgInfo> ArgumentWeights; /* Weight of the function arguments */ + intptr_t ConflictSize; + + bool mayConflictArg; /* Arguments conflict with state mapping or not */ + bool hasNestedCall; /* This function has nested function or not */ + + void CalculateMetrics(Function *F); + + void insertState(StateRange &Range, bool isWrite) { + if (isWrite) + StateDef.insert(Range.begin(), Range.end()); + else + StateUse.insert(Range.begin(), Range.end()); + } +}; + +/* + * NotifyInfo is used to pass information between LLVMTranslator, IRFactory and + * the JIT listener. + */ +class NotifyInfo { +#define MAX_CHAINSLOT 256 +public: + struct SlotInfo { + size_t Key; + uintptr_t Addr; + }; + + struct PatchInfo { + PatchInfo(unsigned ty, unsigned idx, uintptr_t addr) + : Type(ty), Idx(idx), Addr(addr) {} + unsigned Type; + unsigned Idx; + uintptr_t Addr; + }; + + NotifyInfo() : Func(nullptr) { + ChainSlot = new SlotInfo[MAX_CHAINSLOT]; + } + ~NotifyInfo() { + delete ChainSlot; + } + + Function *Func; /* LLVM Function of this translation unit */ + TCGOp *Op; + TranslationBlock *TB; + uint16_t NumInsts; + RestoreVec Restore; + unsigned NumChainSlot; + SlotInfo *ChainSlot; + + uint32_t Size; /* Size of the translated host code */ + uint8_t *Code; /* Start PC of the translated host code */ + std::vector<PatchInfo> Patches; + + void reset() { + Restore.clear(); + Patches.clear(); + NumInsts = 0; + NumChainSlot = 0; + } + unsigned setChainSlot(size_t Key) { + if (NumChainSlot >= MAX_CHAINSLOT) + hqemu_error("run out of chain slot.\n"); + unsigned Curr = NumChainSlot; + ChainSlot[NumChainSlot++].Key = Key; + return Curr; + } + uintptr_t getChainSlotAddr(unsigned Idx) { + if (NumChainSlot >= MAX_CHAINSLOT) + hqemu_error("invalid chain slot index.\n"); + return (uintptr_t)&ChainSlot[Idx].Addr; + } + void addPatch(unsigned Type, unsigned Idx, uintptr_t Addr) { + Patches.push_back(PatchInfo(Type, Idx, Addr)); + } + void setOp(TCGOp *op) { Op = op; } + void setTB(TranslationBlock *tb) { + TB = tb; + NumInsts = 0; + } + uint32_t setRestorePoint() { + uint32_t Idx = Restore.size(); + if (Idx != (uint16_t)Idx) + hqemu_error("key value too large.\n"); + Restore.push_back(std::make_pair(TB->id, NumInsts)); + return Idx; + } +}; + +/* + * LLVM Translator + */ +class LLVMTranslator { + unsigned MyID; /* Translator ID */ + CPUArchState *Env; + + /* Basic types */ + Type *VoidTy; + IntegerType *Int8Ty; + IntegerType *Int16Ty; + IntegerType *Int32Ty; + IntegerType *Int64Ty; + IntegerType *Int128Ty; + IntegerType *IntPtrTy; + PointerType *Int8PtrTy; + PointerType *Int16PtrTy; + PointerType *Int32PtrTy; + PointerType *Int64PtrTy; + Type *FloatTy; + Type *DoubleTy; + PointerType *FloatPtrTy; + PointerType *DoublePtrTy; + + LLVMContext Context; /* Translator local context */ + Module *Mod; /* The LLVM module */ + const DataLayout *DL; /* Data layout */ + NotifyInfo NI; /* Info to set/use by the JIT listener */ + + std::vector<BaseRegister> BaseReg; /* Reserved base registers */ + GuestBaseRegister GuestBaseReg; /* Reserved guest base register */ + FlatType StateType; /* Offset and type of guest registers */ + TCGHelperMap TCGHelpers; + HelperMap Helpers; + std::set<std::string> ConstHelpers; + SymbolMap Symbols; + + MCDisasm *GuestDisAsm; + MCDisasm *HostDisAsm; + + IRFactory *IF; /* TCG-to-LLVM IR converter */ + + /* Initialize the LLVM module. */ + void InitializeModule(); + + /* Create the JIT compiler. */ + void InitializeJIT(); + + /* Initialize required LLVM types. */ + void InitializeType(); + + /* Setup guest and host dependent structures. */ + void InitializeTarget(); + + /* Setup special registers. */ + void DefineSpecialReg(std::map<Type*, Type*> &SpecialReg); + + /* Convert the CPUArchState structure type to a list of primitive types. */ + void FlattenCPUState(Type *Ty, intptr_t &Off, std::map<Type*, Type*> &SpecialReg); + + /* Initialize helper functions. */ + void InitializeHelpers(); + + /* Analyze and optimize a helper function. */ + bool OptimizeHelper(HelperInfo &Helper); + + void InitializeDisasm(); + + void InitializeConstHelpers(); + + void Commit(TraceBuilder &Builder); + + void Abort(TraceBuilder &Builder); + + void dump(CPUArchState *env, TranslationBlock *tb); + + LLVMTranslator(unsigned id, CPUArchState *env); + +public: + ~LLVMTranslator(); + + void GenBlock(CPUArchState *env, OptimizationInfo *Opt); + void GenTrace(CPUArchState *env, OptimizationInfo *Opt); + + unsigned getID() { return MyID; } + LLVMContext *getContext() { return &Context; } + Module *getModule() { return Mod; } + NotifyInfo &getNotifyInfo() { return NI; } + std::vector<BaseRegister> &getBaseReg() { return BaseReg; } + GuestBaseRegister &getGuestBaseReg() { return GuestBaseReg; } + TCGHelperMap &getTCGHelpers() { return TCGHelpers; } + HelperMap &getHelpers() { return Helpers; } + std::set<std::string> &getConstHelpers() { return ConstHelpers; } + FlatType &getStateType() { return StateType; } + SymbolMap &getSymbols() { return Symbols; } + MCDisasm *getHostDisAsm() { return HostDisAsm;} + + void AddSymbol(std::string Name, void *FP) { + Symbols[Name] = (uintptr_t)FP; + } + + /* Create the LLVMTranslator instrance. */ + static LLVMTranslator *CreateLLVMTranslator(int id, CPUArchState *env) { + return new LLVMTranslator(id, env); + } + + /* Show guest assembly code for each compiled TB. */ + void printAsm(CPUArchState *env, TranslationBlock *tb); + + /* Show TCG micro ops for each compiled TB. */ + void printOp(CPUArchState *env, TranslationBlock *tb); +}; + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/llvm-types.h b/llvm/include/llvm-types.h new file mode 100644 index 0000000..1b8d09c --- /dev/null +++ b/llvm/include/llvm-types.h @@ -0,0 +1,127 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_TYPES_H +#define __LLVM_TYPES_H + +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/GetElementPtrTypeIterator.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Verifier.h" +#include "llvm/IR/ValueHandle.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Utils/Cloning.h" + +#if defined(LLVM_V35) +#include "llvm/MC/MCDisassembler.h" +#include "llvm/ExecutionEngine/ObjectImage.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/MemoryObject.h" +#elif defined(LLVM_V38) +#include "llvm/MC/MCDisassembler.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Support/MemoryObject.h" +#elif defined(LLVM_V39) +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#include "llvm/Support/MemoryObject.h" +#else +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/Object/SymbolSize.h" +#include "llvm/DebugInfo/DWARF/DWARFContext.h" +#endif + +#include <vector> +#include <set> +#include <map> +#include "llvm-macro.h" +#include "qemu-types.h" + +using namespace llvm; + +class HelperInfo; + +typedef std::vector<TranslationBlock *> TBVec; +typedef std::vector<std::pair<BlockID, uint16_t> > RestoreVec; +typedef std::map<uintptr_t, std::string> TCGHelperMap; /* <func_ptr, func_name> */ +typedef std::map<std::string, HelperInfo*> HelperMap; +typedef std::map<std::string, uintptr_t> SymbolMap; +typedef std::map<intptr_t, Type *> FlatType; /* <state_off, state_ty> */ +typedef std::vector<Instruction *> IVec; +typedef std::vector<BasicBlock *> BBVec; + + +static inline const DataLayout *getDataLayout(Module *Mod) { +#if defined(LLVM_V35) + return Mod->getDataLayout(); +#else + return &Mod->getDataLayout(); +#endif +} + +static inline AllocaInst *CreateAlloca(Type *Ty, unsigned AddrSpace, + const Twine &Name, + Instruction *InsertBefore = nullptr) { +#if defined(LLVM_V35) || defined(LLVM_V38) || defined(LLVM_V39) + return new AllocaInst(Ty, Name, InsertBefore); +#else + return new AllocaInst(Ty, AddrSpace, Name, InsertBefore); +#endif +} + +static inline AllocaInst *CreateAlloca(Type *Ty, unsigned AddrSpace, + Value *ArraySize = nullptr, + const Twine &Name = "", + Instruction *InsertBefore = nullptr) { +#if defined(LLVM_V35) || defined(LLVM_V38) || defined(LLVM_V39) + return new AllocaInst(Ty, ArraySize, Name, InsertBefore); +#else + return new AllocaInst(Ty, AddrSpace, ArraySize, Name, InsertBefore); +#endif +} + +static inline void InlineFunc(CallInst *CI) { +#if defined(LLVM_V38) || defined(LLVM_V39) + AssumptionCacheTracker ACT; + InlineFunctionInfo IFI(nullptr, &ACT); +#else + InlineFunctionInfo IFI; +#endif + InlineFunction(CI, IFI); +} + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/llvm.h b/llvm/include/llvm.h new file mode 100644 index 0000000..67bff2f --- /dev/null +++ b/llvm/include/llvm.h @@ -0,0 +1,278 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __LLVM_H +#define __LLVM_H + +#include <memory> +#include <vector> +#include "llvm/ADT/STLExtras.h" +#include "llvm-types.h" +#include "llvm-debug.h" +#include "utils.h" + +#if defined(ENABLE_MCJIT) +#include "llvm/ExecutionEngine/MCJIT.h" +#include "MCJITMemoryManager.h" +typedef class DefaultMCJITMemoryManager MemoryManager; +#else +#if defined(LLVM_V35) +#include "JIT.h" +#include "JITMemoryManager.h" +#else +# error "LLVM version >3.5 supports MCJIT only. ENABLE_MCJIT must be enabled." +#endif +typedef class DefaultJITMemoryManager MemoryManager; +#endif + + +extern cl::OptionCategory CategoryHQEMU; + +class LLVMTranslator; +class OptimizationInfo; +class TranslatedCode; + +typedef std::unique_ptr<OptimizationInfo> OptRequest; + + +/* + * LLVMEnv is the top level container of whole LLVM translation environment + * which manages the LLVM translator(s) and globally shared resources. The + * LLVMEnv instance must be initialized before using the underlying transaltion + * service and can only be initialized ONCE. + */ +class LLVMEnv { +public: + typedef std::vector<TranslatedCode *> TransCodeList; + typedef std::map<uintptr_t, TranslatedCode *> TransCodeMap; + typedef std::vector<uintptr_t> ChainSlot; + typedef std::pair<size_t, uintptr_t> SlotInfo; + +private: + std::shared_ptr<MemoryManager> MM; /* Trace cache manager */ + unsigned NumTranslator; /* The amount of LLVM translators */ + std::vector<LLVMTranslator *> Translator; /* LLVM translators */ + std::vector<pthread_t> HelperThread; /* LLVM translation threads */ + std::vector<CPUState *> ThreadEnv; + + TransCodeList TransCode; /* Translated traces. */ + TransCodeMap SortedCode; /* Sorted traces in code cache address order. */ + ChainSlot ChainPoint; /* Address of stubs for trace-to-block linking */ + + bool UseThreading; /* Whether multithreaded translators are used or not. */ + unsigned NumFlush; + + LLVMEnv(); + + /* Parse the command line options. */ + void ParseCommandLineOptions(); + + /* Test whether HQEMU is running in Intel VTune. */ + void ProbeIntelVTune(); + +public: + QemuMutex mutex; + + ~LLVMEnv(); + + /* Start/stop/restart LLVM translators and worker threads. */ + void CreateTranslator(); + void DeleteTranslator(); + void RestartTranslator(); + void StartThread(); + void StopThread(); + + /* Get the LLVM translator with index. */ + LLVMTranslator *getTranslator(unsigned ID) { + if (ID >= Translator.size()) + hqemu_error("invalid translator ID.\n"); + return Translator[ID]; + } + + /* Acquire and lock the first LLVM translator. */ + LLVMTranslator *AcquireSingleTranslator(); + + /* Release the first LLVM translator. */ + void ReleaseSingleTranslator(); + + /* Get CPUState of the LLVM translator with index. */ + CPUState *getThreadEnv(int ID) { return ThreadEnv[ID]; } + + std::vector<pthread_t> &getHelperThread() { return HelperThread; } + std::shared_ptr<MemoryManager> getMemoryManager() { return MM; } + TransCodeList &getTransCode() { return TransCode; } + TransCodeMap &getSortedCode() { return SortedCode; } + ChainSlot &getChainPoint() { return ChainPoint; } + TraceID insertTransCode(TranslatedCode *TC); + SlotInfo getChainSlot(); + + bool isThreading() { return UseThreading; } + void incNumFlush() { NumFlush++; } + unsigned getNumFlush() { return NumFlush; } + + /* + * static public members + */ + static bool InitOnce; /* LLVMEnv is initialized or not? */ + static int TransMode; + static uint8_t *TraceCache; + static size_t TraceCacheSize; + static bool RunWithVTune; + + static void CreateLLVMEnv(); + static void DeleteLLVMEnv(); + static int OptimizeBlock(CPUArchState *env, OptRequest Request); + static int OptimizeTrace(CPUArchState *env, OptRequest Request); + static void setTransMode(int Mode) { TransMode = Mode; } + static int isTraceMode() { + return (TransMode == TRANS_MODE_HYBRIDS || + TransMode == TRANS_MODE_HYBRIDM); + } +}; + +class QueueManager { + std::vector<Queue *> ActiveQueue; + Queue *CurrentQueue; + +public: + QueueManager(); + ~QueueManager(); + void Enqueue(OptimizationInfo *Opt); + void *Dequeue(); + void Flush(); +}; + +/* + * OptimizationInfo is the description to an optimization request. It consists + * of the optimization mode and the control-flow-graph of the trace. + */ +class OptimizationInfo { +public: + typedef std::set<TranslationBlock *> TraceNode; + typedef std::map<TranslationBlock *, TraceNode> TraceEdge; + + ~OptimizationInfo() { + if (CFG) + GraphNode::DeleteCFG(CFG); + } + + void ComposeCFG(); + GraphNode *getCFG() { return CFG; } + bool isTrace() { return !isBlock; } + + static OptRequest CreateRequest(TranslationBlock *tb) { + return OptRequest(new OptimizationInfo(tb)); + } + static OptRequest CreateRequest(TBVec &trace, int idx) { + return OptRequest(new OptimizationInfo(trace, idx)); + } + static OptRequest CreateRequest(TranslationBlock *head, TraceEdge &edges) { + return OptRequest(new OptimizationInfo(head, edges)); + } + +private: + TBVec Trace; /* Trace of a list of TBs */ + int LoopHeadIdx; /* Index to the loopback block */ + bool isUserTrace; /* Trace of all user-mode blocks */ + bool isBlock; /* Trace of a single block */ + GraphNode *CFG; /* CFG of the trace */ + + OptimizationInfo(TranslationBlock *tb) + : isUserTrace(true), isBlock(true) { + Trace.push_back(tb); + LoopHeadIdx = -1; + CFG = new GraphNode(tb); + } + OptimizationInfo(TBVec &trace, int idx) + : isUserTrace(true), isBlock(false), CFG(nullptr) { + if (trace.empty()) + hqemu_error("trace length cannot be zero.\n"); + Trace = trace; + LoopHeadIdx = idx; + } + OptimizationInfo(TranslationBlock *HeadTB, TraceEdge &Edges); + + void SearchCycle(TraceNode &SearchNodes, TraceNode &Nodes, + TraceEdge &Edges, TBVec &Visited, int Depth); + void ExpandTrace(TranslationBlock *HeadTB, TraceEdge &Edges); +}; + +class TraceInfo { +public: + TBVec TBs; + unsigned NumLoop; + unsigned NumExit; + unsigned NumIndirectBr; + uint64_t **ExecCount; + uint64_t TransTime; + uint32_t Attribute; + + TraceInfo(NodeVec &Nodes, uint32_t Attr = A_None) + : NumLoop(0), NumExit(0), NumIndirectBr(0), ExecCount(nullptr), + TransTime(0), Attribute(Attr) + { + if (Nodes.empty()) + hqemu_error("number of nodes cannot be zero.\n"); + for (unsigned i = 0, e = Nodes.size(); i != e; ++i) + TBs.push_back(Nodes[i]->getTB()); + } + + TranslationBlock *getEntryTB() { return TBs[0]; } + target_ulong getEntryPC() { return TBs[0]->pc; } + unsigned getNumBlock() { return TBs.size(); } + void setTransTime(struct timeval *start, struct timeval *end) { + struct timeval t; + timersub(end, start, &t); + TransTime = t.tv_sec * 1e6 + t.tv_usec; + } + bool hasAttribute(uint32_t Attr) { + return Attribute & Attr; + } +}; + +struct ChainInfo { + std::vector<uintptr_t> Chains; + std::vector<BlockID> DepTraces; + + void insertChain(uintptr_t addr) { + Chains.push_back(addr); + } + void insertDepTrace(BlockID id) { + DepTraces.push_back(id); + } + static ChainInfo *get(TranslationBlock *tb) { + if (!tb->chain) + tb->chain = (ChainInfo *)new ChainInfo; + return (ChainInfo *)tb->chain; + } + static void free(TranslationBlock *tb) { + delete (ChainInfo *)tb->chain; + tb->chain = nullptr; + } +}; + +class TranslatedCode { +public: + TranslatedCode() : Trace(nullptr), SampleCount(0) {} + ~TranslatedCode() { + if (Trace) + delete Trace; + } + + bool Active; + uint32_t Size; /* Size of the translated host code */ + uint8_t *Code; /* Start PC of the translated host code */ + TranslationBlock *EntryTB; /* The entry block of the region */ + RestoreVec Restore; + TraceInfo *Trace; + uint64_t SampleCount; +}; + + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/optimization.h b/llvm/include/optimization.h new file mode 100644 index 0000000..bdafb3a --- /dev/null +++ b/llvm/include/optimization.h @@ -0,0 +1,261 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __OPTIMIZATION_H +#define __OPTIMIZATION_H + +#include <iostream> +#include <list> +#include "qemu-types.h" + + +extern "C" TranslationBlock *tbs; + +/* + * Instruction TLB (iTLB) + */ +#define ITLB_CACHE_BITS (10) +#define ITLB_CACHE_SIZE (1U << ITLB_CACHE_BITS) +#define ITLB_CACHE_MASK (ITLB_CACHE_SIZE - 1) + +class ITLB { + struct itlb_t { tb_page_addr_t paddr; }; + itlb_t Cache[ITLB_CACHE_SIZE]; + +public: + ITLB() { reset(); } + ~ITLB() {} + + inline itlb_t &cache(target_ulong vaddr) { + return Cache[(vaddr >> TARGET_PAGE_BITS) & ITLB_CACHE_MASK]; + } + void reset() { + for (unsigned i = 0; i < ITLB_CACHE_SIZE; ++i) + Cache[i].paddr = (tb_page_addr_t)-1; + } + void flush(target_ulong vaddr) { + cache(vaddr).paddr = (tb_page_addr_t)-1; + } + void insert(target_ulong vaddr, tb_page_addr_t paddr) { + cache(vaddr).paddr = paddr; + } + tb_page_addr_t get(target_ulong vaddr) { + return cache(vaddr).paddr; + } +}; + + +/* + * Indirect Branch Target Cache (IBTC) + */ +#define IBTC_CACHE_BITS (16) +#define IBTC_CACHE_SIZE (1U << IBTC_CACHE_BITS) +#define IBTC_CACHE_MASK (IBTC_CACHE_SIZE - 1) + +class IBTC { + typedef std::pair<target_ulong, TranslationBlock *> ibtc_t; + ibtc_t Cache[IBTC_CACHE_SIZE]; + bool NeedUpdate; + uint64_t Total; /* Total access count */ + uint64_t Miss; /* Miss count */ + +public: + IBTC() : NeedUpdate(false), Total(0), Miss(0) { reset(); } + ~IBTC() {} + + inline ibtc_t &cache(target_ulong pc) { + return Cache[(pc >> 2) & IBTC_CACHE_MASK]; + } + void reset() { + for (unsigned i = 0; i < IBTC_CACHE_SIZE; ++i) + Cache[i].first = (target_ulong)-1; + } + void remove(TranslationBlock *tb) { + ibtc_t &c = cache(tb->pc); + if (c.first == tb->pc) + c.first = (target_ulong)-1; + } + void insert(target_ulong pc, TranslationBlock *tb) { + cache(pc) = std::make_pair(pc, tb); + } + TranslationBlock *get(target_ulong pc) { + ibtc_t &c = cache(pc); + return (c.first == pc) ? c.second : nullptr; + } + void setUpdate() { NeedUpdate = true; } + void resetUpdate() { NeedUpdate = false; } + bool needUpdate() { return NeedUpdate; } + inline void incTotal() { Total++; } + inline void incMiss() { Miss++; } + void dump() { + double HitRate = (double)(Total - Miss) * 100 / Total; + std::cerr << "\nibtc.miss = " << Miss << "/" << Total << + " (hit rate=" << HitRate << "%)\n"; + } +}; + +/* + * Cross-Page Block Linking (CPBL) + */ +class CPBL { + uint64_t Total; /* Total access count */ + uint64_t Miss; /* Miss count */ + uint64_t ValidateTotal; /* Total validation count */ + uint64_t ValidateMiss; /* Miss validation count */ +public: + CPBL() : Total(0), Miss(0), ValidateTotal(0), ValidateMiss(0) {} + + inline void incTotal() { Total++; } + inline void incMiss() { Miss++; } + inline void incValidateTotal() { ValidateTotal++; } + inline void incValidateMiss() { ValidateMiss++; } + void dump() { + double HitRate = (double)(Total - Miss) * 100 / Total; + double HitRate2 = (double)(ValidateTotal - ValidateMiss) * 100 / Total; + std::cerr << "cpbl.miss = " << Miss << "/" << Total << + " (hit rate=" << HitRate << "%)\n" << + "validate.miss = " << ValidateMiss << "/" << ValidateTotal << + " (hit rate=" << HitRate2 << "%)\n"; + } +}; + +/* + * Large Page Table + * + * This handling is to track every large page created by the guest system. + * Once a `possibly' large page is invalidated, do a search with the tracked + * pages to determine if it is really a large page invalidation. If it cannot + * be found, this is a false alert and we can fall back to the default-size + * page flushing. Otherwise, SoftTLB, IBTC/CPBL optimization, etc. are + * partial or full cleanup due to the true large page flushing. + */ +#define MAX_NUM_LARGEPAGE (1024) + +class LargePageTable { + typedef std::pair<target_ulong, target_ulong> PTE; + typedef std::list<PTE> PTEList; + PTEList Used; + PTEList Free; + CPUState *CS; + uint64_t Total; + uint64_t Miss; + +public: + LargePageTable(CPUState *cpu) : Total(0), Miss(0) { + CS = cpu; + Used.clear(); + Free.resize(MAX_NUM_LARGEPAGE); + } + ~LargePageTable() {} + + enum { + SEARCH = 0, + FLUSH, + }; + + void reset() { + Free.splice(Free.end(), Used); + } + void remove(PTEList::iterator I) { + Free.splice(Free.begin(), Used, I); + } + void allocate(PTE pte) { + /* If the free list is empty, we need to clear softtlb by calling + * tlb_flush() which will then invoke LTP::reset() to clear LPT. */ + if (Free.empty()) + tlb_flush(CS, 0); + Free.front() = pte; + Used.splice(Used.begin(), Free, Free.begin()); + } + void insert(target_ulong addr, target_ulong size) { + for (PTEList::iterator I = Used.begin(), E = Used.end(); I != E; ++I) { + if (I->first == (addr & I->second)) { + Used.splice(Used.begin(), Used, I); + return; + } + } + target_ulong mask = ~(size - 1); + allocate(PTE(addr & mask, mask)); + } + bool search(target_ulong addr, bool mode, target_ulong *addrp, + target_ulong *sizep) { + for (PTEList::iterator I = Used.begin(), E = Used.end(); I != E; ++I) { + if (I->first != (addr & I->second)) + continue; + *addrp = I->first; + *sizep = ~I->second + 1; + if (mode == FLUSH) + remove(I); + return true; + } + return false; + } + void incTotal() { Total++; } + void incMiss() { Miss++; } + void dump() { + double Rate = (double)(Total - Miss) * 100 / Total; + std::cerr << "lpt.miss = " << Miss << "/" << Total << + " (false flushing=" << Rate << "% #pages=" << + Used.size() << ")\n"; + } +}; + + +class BaseTracer; + +struct CPUOptimization { + CPUOptimization(CPUState *cpu, BaseTracer *tracer) + : lpt(LargePageTable(cpu)), pt(tracer) {} + + ITLB itlb; /* instruction TLB */ + IBTC ibtc; /* indirect branch target cache */ + CPBL cpbl; /* cross-page block linking */ + LargePageTable lpt; /* large page handling */ + BaseTracer *pt; /* processor tracer */ +}; + + +static inline int isUserTB(TranslationBlock *tb) { + int is_user = 1; +#if defined(CONFIG_SOFTMMU) +#if defined(TARGET_ALPHA) + is_user = (tb->flags & TB_FLAGS_USER_MODE); +#elif defined(TARGET_ARM) + is_user = ((ARM_TBFLAG_MMUIDX(tb->flags) & 3) == 0); +#elif defined(TARGET_I386) + is_user = ((tb->flags >> HF_CPL_SHIFT) & 3) == 3; +#elif defined(TARGET_MIPS) + is_user = (tb->flags & MIPS_HFLAG_UM); +#elif defined(TARGET_PPC) + is_user = ((tb->flags >> MSR_PR) & 1); +#else +#error "unsupported processor type" +#endif +#endif + return is_user; +} + +static inline ITLB &cpu_get_itlb(CPUArchState *env) { + return ((CPUOptimization *)env->opt_link)->itlb; +} +static inline IBTC &cpu_get_ibtc(CPUArchState *env) { + return ((CPUOptimization *)env->opt_link)->ibtc; +} +static inline CPBL &cpu_get_cpbl(CPUArchState *env) { + return ((CPUOptimization *)env->opt_link)->cpbl; +} +static inline LargePageTable &cpu_get_lpt(CPUArchState *env) { + return ((CPUOptimization *)env->opt_link)->lpt; +} +static inline BaseTracer *cpu_get_tracer(CPUArchState *env) { + return ((CPUOptimization *)env->opt_link)->pt; +} + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/include/pmu/arm/arm-events.h b/llvm/include/pmu/arm/arm-events.h new file mode 100644 index 0000000..b3bb1d7 --- /dev/null +++ b/llvm/include/pmu/arm/arm-events.h @@ -0,0 +1,35 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __ARM_EVENTS_H +#define __ARM_EVENTS_H + +#include <vector> +#include "pmu/pmu.h" + +namespace pmu { + +class PMUEvent; + +#if defined(__arm__) +#define pmu_mb() ((void(*)(void))0xffff0fa0)() +#define pmu_rmb() ((void(*)(void))0xffff0fa0)() +#define pmu_wmb() ((void(*)(void))0xffff0fa0)() +#elif defined(__aarch64__) +#define pmu_mb() asm volatile("dmb ish" ::: "memory") +#define pmu_rmb() asm volatile("dmb ishld" ::: "memory") +#define pmu_wmb() asm volatile("dmb ishst" ::: "memory") +#endif + + +int ARMInit(void); + +} /* namespace pmu */ + +#endif /* __ARM_EVENTS_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/pmu/perf_event.h b/llvm/include/pmu/perf_event.h new file mode 100644 index 0000000..81fed4a --- /dev/null +++ b/llvm/include/pmu/perf_event.h @@ -0,0 +1,992 @@ +/* + * This file is copied from linux-4.11/include/uapi/linux/perf_event.h. + * + * Performance events: + * + * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de> + * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar + * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra + * + * Data type definitions, declarations, prototypes. + * + * Started by: Thomas Gleixner and Ingo Molnar + * + * For licencing details see kernel-base/COPYING + */ +#ifndef _UAPI_LINUX_PERF_EVENT_H +#define _UAPI_LINUX_PERF_EVENT_H + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" +{ +#endif + +/* + * User-space ABI bits: + */ + +/* + * attr.type + */ +enum perf_type_id { + PERF_TYPE_HARDWARE = 0, + PERF_TYPE_SOFTWARE = 1, + PERF_TYPE_TRACEPOINT = 2, + PERF_TYPE_HW_CACHE = 3, + PERF_TYPE_RAW = 4, + PERF_TYPE_BREAKPOINT = 5, + + PERF_TYPE_MAX, /* non-ABI */ +}; + +/* + * Generalized performance event event_id types, used by the + * attr.event_id parameter of the sys_perf_event_open() + * syscall: + */ +enum perf_hw_id { + /* + * Common hardware events, generalized by the kernel: + */ + PERF_COUNT_HW_CPU_CYCLES = 0, + PERF_COUNT_HW_INSTRUCTIONS = 1, + PERF_COUNT_HW_CACHE_REFERENCES = 2, + PERF_COUNT_HW_CACHE_MISSES = 3, + PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4, + PERF_COUNT_HW_BRANCH_MISSES = 5, + PERF_COUNT_HW_BUS_CYCLES = 6, + PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7, + PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8, + PERF_COUNT_HW_REF_CPU_CYCLES = 9, + + PERF_COUNT_HW_MAX, /* non-ABI */ +}; + +/* + * Generalized hardware cache events: + * + * { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x + * { read, write, prefetch } x + * { accesses, misses } + */ +enum perf_hw_cache_id { + PERF_COUNT_HW_CACHE_L1D = 0, + PERF_COUNT_HW_CACHE_L1I = 1, + PERF_COUNT_HW_CACHE_LL = 2, + PERF_COUNT_HW_CACHE_DTLB = 3, + PERF_COUNT_HW_CACHE_ITLB = 4, + PERF_COUNT_HW_CACHE_BPU = 5, + PERF_COUNT_HW_CACHE_NODE = 6, + + PERF_COUNT_HW_CACHE_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_id { + PERF_COUNT_HW_CACHE_OP_READ = 0, + PERF_COUNT_HW_CACHE_OP_WRITE = 1, + PERF_COUNT_HW_CACHE_OP_PREFETCH = 2, + + PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */ +}; + +enum perf_hw_cache_op_result_id { + PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0, + PERF_COUNT_HW_CACHE_RESULT_MISS = 1, + + PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */ +}; + +/* + * Special "software" events provided by the kernel, even if the hardware + * does not support performance events. These events measure various + * physical and sw events of the kernel (and allow the profiling of them as + * well): + */ +enum perf_sw_ids { + PERF_COUNT_SW_CPU_CLOCK = 0, + PERF_COUNT_SW_TASK_CLOCK = 1, + PERF_COUNT_SW_PAGE_FAULTS = 2, + PERF_COUNT_SW_CONTEXT_SWITCHES = 3, + PERF_COUNT_SW_CPU_MIGRATIONS = 4, + PERF_COUNT_SW_PAGE_FAULTS_MIN = 5, + PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6, + PERF_COUNT_SW_ALIGNMENT_FAULTS = 7, + PERF_COUNT_SW_EMULATION_FAULTS = 8, + PERF_COUNT_SW_DUMMY = 9, + PERF_COUNT_SW_BPF_OUTPUT = 10, + + PERF_COUNT_SW_MAX, /* non-ABI */ +}; + +/* + * Bits that can be set in attr.sample_type to request information + * in the overflow packets. + */ +enum perf_event_sample_format { + PERF_SAMPLE_IP = 1U << 0, + PERF_SAMPLE_TID = 1U << 1, + PERF_SAMPLE_TIME = 1U << 2, + PERF_SAMPLE_ADDR = 1U << 3, + PERF_SAMPLE_READ = 1U << 4, + PERF_SAMPLE_CALLCHAIN = 1U << 5, + PERF_SAMPLE_ID = 1U << 6, + PERF_SAMPLE_CPU = 1U << 7, + PERF_SAMPLE_PERIOD = 1U << 8, + PERF_SAMPLE_STREAM_ID = 1U << 9, + PERF_SAMPLE_RAW = 1U << 10, + PERF_SAMPLE_BRANCH_STACK = 1U << 11, + PERF_SAMPLE_REGS_USER = 1U << 12, + PERF_SAMPLE_STACK_USER = 1U << 13, + PERF_SAMPLE_WEIGHT = 1U << 14, + PERF_SAMPLE_DATA_SRC = 1U << 15, + PERF_SAMPLE_IDENTIFIER = 1U << 16, + PERF_SAMPLE_TRANSACTION = 1U << 17, + PERF_SAMPLE_REGS_INTR = 1U << 18, + + PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */ +}; + +/* + * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * + * If the user does not pass priv level information via branch_sample_type, + * the kernel uses the event's priv level. Branch and event priv levels do + * not have to match. Branch priv level is checked for permissions. + * + * The branch types can be combined, however BRANCH_ANY covers all types + * of branches and therefore it supersedes all the other types. + */ +enum perf_branch_sample_type_shift { + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ + + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ + PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ + + PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */ + PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */ + + PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */ +}; + +enum perf_branch_sample_type { + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, +}; + +#define PERF_SAMPLE_BRANCH_PLM_ALL \ + (PERF_SAMPLE_BRANCH_USER|\ + PERF_SAMPLE_BRANCH_KERNEL|\ + PERF_SAMPLE_BRANCH_HV) + +/* + * Values to determine ABI of the registers dump. + */ +enum perf_sample_regs_abi { + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, +}; + +/* + * Values for the memory transaction event qualifier, mostly for + * abort events. Multiple bits can be set. + */ +enum { + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + + PERF_TXN_MAX = (1 << 8), /* non-ABI */ + + /* bits 32..63 are reserved for the abort code */ + + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, +}; + +/* + * The format of the data returned by read() on a perf event fd, + * as specified by attr.read_format: + * + * struct read_format { + * { u64 value; + * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED + * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING + * { u64 id; } && PERF_FORMAT_ID + * } && !PERF_FORMAT_GROUP + * + * { u64 nr; + * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED + * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING + * { u64 value; + * { u64 id; } && PERF_FORMAT_ID + * } cntr[nr]; + * } && PERF_FORMAT_GROUP + * }; + */ +enum perf_event_read_format { + PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0, + PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1, + PERF_FORMAT_ID = 1U << 2, + PERF_FORMAT_GROUP = 1U << 3, + + PERF_FORMAT_MAX = 1U << 4, /* non-ABI */ +}; + +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ +#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ + /* add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ + +/* + * Hardware event_id to monitor via a performance monitoring event: + * + * @sample_max_stack: Max number of frame pointers in a callchain, + * should be < /proc/sys/kernel/perf_event_max_stack + */ +struct perf_event_attr { + + /* + * Major type: hardware/software/tracepoint/etc. + */ + uint32_t type; + + /* + * Size of the attr structure, for fwd/bwd compat. + */ + uint32_t size; + + /* + * Type specific configuration information. + */ + uint64_t config; + + union { + uint64_t sample_period; + uint64_t sample_freq; + }; + + uint64_t sample_type; + uint64_t read_format; + + uint64_t disabled : 1, /* off by default */ + inherit : 1, /* children inherit it */ + pinned : 1, /* must always be on PMU */ + exclusive : 1, /* only group on PMU */ + exclude_user : 1, /* don't count user */ + exclude_kernel : 1, /* ditto kernel */ + exclude_hv : 1, /* ditto hypervisor */ + exclude_idle : 1, /* don't count when idle */ + mmap : 1, /* include mmap data */ + comm : 1, /* include comm data */ + freq : 1, /* use freq, not period */ + inherit_stat : 1, /* per task counts */ + enable_on_exec : 1, /* next exec enables */ + task : 1, /* trace fork/exit */ + watermark : 1, /* wakeup_watermark */ + /* + * precise_ip: + * + * 0 - SAMPLE_IP can have arbitrary skid + * 1 - SAMPLE_IP must have constant skid + * 2 - SAMPLE_IP requested to have 0 skid + * 3 - SAMPLE_IP must have 0 skid + * + * See also PERF_RECORD_MISC_EXACT_IP + */ + precise_ip : 2, /* skid constraint */ + mmap_data : 1, /* non-exec mmap data */ + sample_id_all : 1, /* sample_type all events */ + + exclude_host : 1, /* don't count in host */ + exclude_guest : 1, /* don't count in guest */ + + exclude_callchain_kernel : 1, /* exclude kernel callchains */ + exclude_callchain_user : 1, /* exclude user callchains */ + mmap2 : 1, /* include mmap with inode data */ + comm_exec : 1, /* flag comm events that are due to an exec */ + use_clockid : 1, /* use @clockid for time fields */ + context_switch : 1, /* context switch data */ + write_backward : 1, /* Write ring buffer from end to beginning */ + __reserved_1 : 36; + + union { + uint32_t wakeup_events; /* wakeup every n events */ + uint32_t wakeup_watermark; /* bytes before wakeup */ + }; + + uint32_t bp_type; + union { + uint64_t bp_addr; + uint64_t config1; /* extension of config */ + }; + union { + uint64_t bp_len; + uint64_t config2; /* extension of config1 */ + }; + uint64_t branch_sample_type; /* enum perf_branch_sample_type */ + + /* + * Defines set of user regs to dump on samples. + * See asm/perf_regs.h for details. + */ + uint64_t sample_regs_user; + + /* + * Defines size of the user stack to dump on samples. + */ + uint32_t sample_stack_user; + + int32_t clockid; + /* + * Defines set of regs to dump for each sample + * state captured on: + * - precise = 0: PMU interrupt + * - precise > 0: sampled instruction + * + * See asm/perf_regs.h for details. + */ + uint64_t sample_regs_intr; + + /* + * Wakeup watermark for AUX area + */ + uint32_t aux_watermark; + uint16_t sample_max_stack; + uint16_t __reserved_2; /* align to uint64_t */ +}; + +#define perf_flags(attr) (*(&(attr)->read_format + 1)) + +/* + * Ioctls that can be done on a perf event fd: + */ +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, uint64_t) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR('$', 7, uint64_t *) +#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, uint32_t) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, uint32_t) + +enum perf_event_ioc_flags { + PERF_IOC_FLAG_GROUP = 1U << 0, +}; + +/* + * Structure of the page that can be mapped via mmap + */ +struct perf_event_mmap_page { + uint32_t version; /* version number of this structure */ + uint32_t compat_version; /* lowest version this is compat with */ + + /* + * Bits needed to read the hw events in user-space. + * + * u32 seq, time_mult, time_shift, index, width; + * u64 count, enabled, running; + * u64 cyc, time_offset; + * s64 pmc = 0; + * + * do { + * seq = pc->lock; + * barrier() + * + * enabled = pc->time_enabled; + * running = pc->time_running; + * + * if (pc->cap_usr_time && enabled != running) { + * cyc = rdtsc(); + * time_offset = pc->time_offset; + * time_mult = pc->time_mult; + * time_shift = pc->time_shift; + * } + * + * index = pc->index; + * count = pc->offset; + * if (pc->cap_user_rdpmc && index) { + * width = pc->pmc_width; + * pmc = rdpmc(index - 1); + * } + * + * barrier(); + * } while (pc->lock != seq); + * + * NOTE: for obvious reason this only works on self-monitoring + * processes. + */ + uint32_t lock; /* seqlock for synchronization */ + uint32_t index; /* hardware event identifier */ + int64_t offset; /* add to hardware event value */ + uint64_t time_enabled; /* time event active */ + uint64_t time_running; /* time event on cpu */ + union { + uint64_t capabilities; + struct { + uint64_t cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */ + cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */ + + cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */ + cap_user_time : 1, /* The time_* fields are used */ + cap_user_time_zero : 1, /* The time_zero field is used */ + cap_____res : 59; + }; + }; + + /* + * If cap_user_rdpmc this field provides the bit-width of the value + * read using the rdpmc() or equivalent instruction. This can be used + * to sign extend the result like: + * + * pmc <<= 64 - width; + * pmc >>= 64 - width; // signed shift right + * count += pmc; + */ + uint16_t pmc_width; + + /* + * If cap_usr_time the below fields can be used to compute the time + * delta since time_enabled (in ns) using rdtsc or similar. + * + * u64 quot, rem; + * u64 delta; + * + * quot = (cyc >> time_shift); + * rem = cyc & (((u64)1 << time_shift) - 1); + * delta = time_offset + quot * time_mult + + * ((rem * time_mult) >> time_shift); + * + * Where time_offset,time_mult,time_shift and cyc are read in the + * seqcount loop described above. This delta can then be added to + * enabled and possible running (if index), improving the scaling: + * + * enabled += delta; + * if (index) + * running += delta; + * + * quot = count / running; + * rem = count % running; + * count = quot * enabled + (rem * enabled) / running; + */ + uint16_t time_shift; + uint32_t time_mult; + uint64_t time_offset; + /* + * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated + * from sample timestamps. + * + * time = timestamp - time_zero; + * quot = time / time_mult; + * rem = time % time_mult; + * cyc = (quot << time_shift) + (rem << time_shift) / time_mult; + * + * And vice versa: + * + * quot = cyc >> time_shift; + * rem = cyc & (((u64)1 << time_shift) - 1); + * timestamp = time_zero + quot * time_mult + + * ((rem * time_mult) >> time_shift); + */ + uint64_t time_zero; + uint32_t size; /* Header size up to __reserved[] fields. */ + + /* + * Hole for extension of the self monitor capabilities + */ + + uint8_t __reserved[118*8+4]; /* align to 1k. */ + + /* + * Control data for the mmap() data buffer. + * + * User-space reading the @data_head value should issue an smp_rmb(), + * after reading this value. + * + * When the mapping is PROT_WRITE the @data_tail value should be + * written by userspace to reflect the last read data, after issueing + * an smp_mb() to separate the data read from the ->data_tail store. + * In this case the kernel will not over-write unread data. + * + * See perf_output_put_handle() for the data ordering. + * + * data_{offset,size} indicate the location and size of the perf record + * buffer within the mmapped area. + */ + uint64_t data_head; /* head in the data section */ + uint64_t data_tail; /* user-space written tail */ + uint64_t data_offset; /* where the buffer starts */ + uint64_t data_size; /* data buffer size */ + + /* + * AUX area is defined by aux_{offset,size} fields that should be set + * by the userspace, so that + * + * aux_offset >= data_offset + data_size + * + * prior to mmap()ing it. Size of the mmap()ed area should be aux_size. + * + * Ring buffer pointers aux_{head,tail} have the same semantics as + * data_{head,tail} and same ordering rules apply. + */ + uint64_t aux_head; + uint64_t aux_tail; + uint64_t aux_offset; + uint64_t aux_size; +}; + +#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0) +#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0) +#define PERF_RECORD_MISC_KERNEL (1 << 0) +#define PERF_RECORD_MISC_USER (2 << 0) +#define PERF_RECORD_MISC_HYPERVISOR (3 << 0) +#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0) +#define PERF_RECORD_MISC_GUEST_USER (5 << 0) + +/* + * Indicates that /proc/PID/maps parsing are truncated by time out. + */ +#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) +/* + * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on + * different events so can reuse the same bit position. + * Ditto PERF_RECORD_MISC_SWITCH_OUT. + */ +#define PERF_RECORD_MISC_MMAP_DATA (1 << 13) +#define PERF_RECORD_MISC_COMM_EXEC (1 << 13) +#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13) +/* + * Indicates that the content of PERF_SAMPLE_IP points to + * the actual instruction that triggered the event. See also + * perf_event_attr::precise_ip. + */ +#define PERF_RECORD_MISC_EXACT_IP (1 << 14) +/* + * Reserve the last bit to indicate some extended misc field + */ +#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) + +struct perf_event_header { + uint32_t type; + uint16_t misc; + uint16_t size; +}; + +enum perf_event_type { + + /* + * If perf_event_attr.sample_id_all is set then all event types will + * have the sample_type selected fields related to where/when + * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU, + * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed + * just after the perf_event_header and the fields already present for + * the existing fields, i.e. at the end of the payload. That way a newer + * perf.data file will be supported by older perf tools, with these new + * optional fields being ignored. + * + * struct sample_id { + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 id; } && PERF_SAMPLE_IDENTIFIER + * } && perf_event_attr::sample_id_all + * + * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. The + * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed + * relative to header.size. + */ + + /* + * The MMAP events record the PROT_EXEC mappings so that we can + * correlate userspace IPs to code. They have the following structure: + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * char filename[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_MMAP = 1, + + /* + * struct { + * struct perf_event_header header; + * u64 id; + * u64 lost; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_LOST = 2, + + /* + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * char comm[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_COMM = 3, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * u64 time; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_EXIT = 4, + + /* + * struct { + * struct perf_event_header header; + * u64 time; + * u64 id; + * u64 stream_id; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_THROTTLE = 5, + PERF_RECORD_UNTHROTTLE = 6, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, ppid; + * u32 tid, ptid; + * u64 time; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_FORK = 7, + + /* + * struct { + * struct perf_event_header header; + * u32 pid, tid; + * + * struct read_format values; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_READ = 8, + + /* + * struct { + * struct perf_event_header header; + * + * # + * # Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. + * # The advantage of PERF_SAMPLE_IDENTIFIER is that its position + * # is fixed relative to header. + * # + * + * { u64 id; } && PERF_SAMPLE_IDENTIFIER + * { u64 ip; } && PERF_SAMPLE_IP + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 addr; } && PERF_SAMPLE_ADDR + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u64 period; } && PERF_SAMPLE_PERIOD + * + * { struct read_format values; } && PERF_SAMPLE_READ + * + * { u64 nr, + * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN + * + * # + * # The RAW record below is opaque data wrt the ABI + * # + * # That is, the ABI doesn't make any promises wrt to + * # the stability of its content, it may vary depending + * # on event, hardware, kernel version and phase of + * # the moon. + * # + * # In other words, PERF_SAMPLE_RAW contents are not an ABI. + * # + * + * { u32 size; + * char data[size];}&& PERF_SAMPLE_RAW + * + * { u64 nr; + * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK + * + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * + * { u64 weight; } && PERF_SAMPLE_WEIGHT + * { u64 data_src; } && PERF_SAMPLE_DATA_SRC + * { u64 transaction; } && PERF_SAMPLE_TRANSACTION + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR + * }; + */ + PERF_RECORD_SAMPLE = 9, + + /* + * The MMAP2 records are an augmented version of MMAP, they add + * maj, min, ino numbers to be used to uniquely identify each mapping + * + * struct { + * struct perf_event_header header; + * + * u32 pid, tid; + * u64 addr; + * u64 len; + * u64 pgoff; + * u32 maj; + * u32 min; + * u64 ino; + * u64 ino_generation; + * u32 prot, flags; + * char filename[]; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_MMAP2 = 10, + + /* + * Records that new data landed in the AUX buffer part. + * + * struct { + * struct perf_event_header header; + * + * u64 aux_offset; + * u64 aux_size; + * u64 flags; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_AUX = 11, + + /* + * Indicates that instruction trace has started + * + * struct { + * struct perf_event_header header; + * u32 pid; + * u32 tid; + * }; + */ + PERF_RECORD_ITRACE_START = 12, + + /* + * Records the dropped/lost sample number. + * + * struct { + * struct perf_event_header header; + * + * u64 lost; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_LOST_SAMPLES = 13, + + /* + * Records a context switch in or out (flagged by + * PERF_RECORD_MISC_SWITCH_OUT). See also + * PERF_RECORD_SWITCH_CPU_WIDE. + * + * struct { + * struct perf_event_header header; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_SWITCH = 14, + + /* + * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and + * next_prev_tid that are the next (switching out) or previous + * (switching in) pid/tid. + * + * struct { + * struct perf_event_header header; + * u32 next_prev_pid; + * u32 next_prev_tid; + * struct sample_id sample_id; + * }; + */ + PERF_RECORD_SWITCH_CPU_WIDE = 15, + + PERF_RECORD_MAX, /* non-ABI */ +}; + +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 + +enum perf_callchain_context { + PERF_CONTEXT_HV = (uint64_t)-32, + PERF_CONTEXT_KERNEL = (uint64_t)-128, + PERF_CONTEXT_USER = (uint64_t)-512, + + PERF_CONTEXT_GUEST = (uint64_t)-2048, + PERF_CONTEXT_GUEST_KERNEL = (uint64_t)-2176, + PERF_CONTEXT_GUEST_USER = (uint64_t)-2560, + + PERF_CONTEXT_MAX = (uint64_t)-4095, +}; + +/** + * PERF_RECORD_AUX::flags bits + */ +#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ + +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ + +union perf_mem_data_src { + uint64_t val; + struct { + uint64_t mem_op:5, /* type of opcode */ + mem_lvl:14, /* memory hierarchy level */ + mem_snoop:5, /* snoop mode */ + mem_lock:2, /* lock instr */ + mem_dtlb:7, /* tlb access */ + mem_rsvd:31; + }; +}; + +/* type of opcode (load/store/prefetch,code) */ +#define PERF_MEM_OP_NA 0x01 /* not available */ +#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ +#define PERF_MEM_OP_STORE 0x04 /* store instruction */ +#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ +#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ +#define PERF_MEM_OP_SHIFT 0 + +/* memory hierarchy (memory level, hit or miss) */ +#define PERF_MEM_LVL_NA 0x01 /* not available */ +#define PERF_MEM_LVL_HIT 0x02 /* hit level */ +#define PERF_MEM_LVL_MISS 0x04 /* miss level */ +#define PERF_MEM_LVL_L1 0x08 /* L1 */ +#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x20 /* L2 */ +#define PERF_MEM_LVL_L3 0x40 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +/* snoop mode */ +#define PERF_MEM_SNOOP_NA 0x01 /* not available */ +#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ +#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +/* locked instruction */ +#define PERF_MEM_LOCK_NA 0x01 /* not available */ +#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 + +/* TLB access */ +#define PERF_MEM_TLB_NA 0x01 /* not available */ +#define PERF_MEM_TLB_HIT 0x02 /* hit level */ +#define PERF_MEM_TLB_MISS 0x04 /* miss level */ +#define PERF_MEM_TLB_L1 0x08 /* L1 */ +#define PERF_MEM_TLB_L2 0x10 /* L2 */ +#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 + +#define PERF_MEM_S(a, s) \ + (((uint64_t)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) + +/* + * single taken branch record layout: + * + * from: source instruction (may not always be a branch insn) + * to: branch target + * mispred: branch target was mispredicted + * predicted: branch target was predicted + * + * support for mispred, predicted is optional. In case it + * is not supported mispred = predicted = 0. + * + * in_tx: running in a hardware transaction + * abort: aborting a hardware transaction + * cycles: cycles from last branch (or 0 if not supported) + */ +struct perf_branch_entry { + uint64_t from; + uint64_t to; + uint64_t mispred:1, /* target mispredicted */ + predicted:1,/* target predicted */ + in_tx:1, /* in transaction */ + abort:1, /* transaction abort */ + cycles:16, /* cycle count to last branch */ + reserved:44; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _UAPI_LINUX_PERF_EVENT_H */ diff --git a/llvm/include/pmu/pmu-events.h b/llvm/include/pmu/pmu-events.h new file mode 100644 index 0000000..2c31ae9 --- /dev/null +++ b/llvm/include/pmu/pmu-events.h @@ -0,0 +1,131 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __PMU_EVENTS_H +#define __PMU_EVENTS_H + +#include <list> +#include <vector> +#include <signal.h> +#include "pmu-global.h" +#include "pmu.h" + +namespace pmu { + +#define PMU_MAX_EVENTS (1024) + +class Timer; + +/* Mode of the event. */ +enum { + MODE_NONE = 0, + MODE_COUNTER = ((uint32_t)1U << 1), + MODE_SAMPLE = ((uint32_t)1U << 2), + MODE_SAMPLE_IP = ((uint32_t)1U << 3), + MODE_SAMPLE_READ = ((uint32_t)1U << 4), +}; + +/* State of the event. */ +enum { + STATE_STOP = 0, + STATE_START = ((uint32_t)1U << 1), + STATE_GOTO_STOP = ((uint32_t)1U << 2), + STATE_GOTO_START = ((uint32_t)1U << 3), +}; + +/* Sampling mmap buffer information. */ +struct MMap { + void *Base; + uint64_t Size; + uint64_t Prev; +}; + +/* Event. */ +struct PMUEvent { + PMUEvent() : Hndl(0), Mode(MODE_NONE), State(STATE_STOP) {} + + Handle Hndl; /* Unique handle value */ + int Mode; /* Event mode */ + int State; /* Current event state */ + std::vector<int> FD; /* Opened fd(s) of this event */ + MMap Data; /* mmap data info */ + MMap Aux; /* mmap aux info */ + uint64_t Watermark; /* The bytes before wakeup */ + /* Overflow handling function pointer */ + union { + void *OverflowHandler; + SampleHandlerTy SampleHandler; + }; + void *Opaque; /* Opaque pointer passed to the overflow handler. */ + + int getFD() { return FD[0]; } /* Group leader fd */ +}; + +/* + * Event Manager. + */ +class EventManager { + typedef std::list<PMUEvent *> EventList; + + PMUEvent Events[PMU_MAX_EVENTS]; /* Pre-allocated events */ + EventList FreeEvents; /* Free events */ + EventList SampleEvents; /* Sampling events */ + Timer *EventTimer; /* Timer for sampling events. */ + std::vector<PMUEvent *> ChangedEvents; + +public: + EventManager(); + ~EventManager(); + + /* Return the event of the input handle. */ + PMUEvent *GetEvent(Handle Hndl); + + /* Add a counting event and return its handle. */ + Handle AddEvent(int fd); + + /* Add a sampling event and return its handle. */ + Handle AddSampleEvent(unsigned NumFDs, int *FD, uint64_t DataSize, void *Data, + uint32_t Mode, SampleConfig &Config); + + /* Notify that an event is started. */ + void StartEvent(PMUEvent *Event, bool ShouldLock = true); + + /* Notify that an event is stopped. */ + void StopEvent(PMUEvent *Event, bool ShouldLock = true); + + /* Notify that an event is deleted. */ + void DeleteEvent(PMUEvent *Event); + + /* Stop the event manager. */ + void Pause(); + + /* Restart the event manager. */ + void Resume(); + + friend void DefaultHandler(int signum, siginfo_t *info, void *data); +}; + +/* Interval timer. */ +class Timer { + timer_t T; + +public: + Timer(int Signum, int TID); + ~Timer(); + + /* Start a timer that expires just once. */ + void Start(); + + /* Stop a timer.*/ + void Stop(); +}; + +} /* namespace pmu */ + +#endif /* __PMU_EVENTS_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/pmu/pmu-global.h b/llvm/include/pmu/pmu-global.h new file mode 100644 index 0000000..ed059a4 --- /dev/null +++ b/llvm/include/pmu/pmu-global.h @@ -0,0 +1,52 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __PMU_GLOBAL_H +#define __PMU_GLOBAL_H + +#if defined(__i386__) || defined(__x86_64__) +#include "pmu/x86/x86-events.h" +#elif defined(__arm__) || defined(__aarch64__) +#include "pmu/arm/arm-events.h" +#elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) +#include "pmu/ppc/ppc-events.h" +#endif + +#include "pmu/pmu-utils.h" +#include "pmu/pmu.h" + +namespace pmu { + +#define PMU_SIGNAL_NUM SIGIO +#define PMU_SAMPLE_PERIOD 1e6 +#define PMU_SAMPLE_PAGES 4 + +class EventManager; + +/* Pre-defined event identity. */ +struct EventID { + int Type; /* Perf major type: hardware/software/etc */ + int Config; /* Perf type specific configuration information */ +}; + +/* System-wide configuration. */ +struct GlobalConfig { + int PageSize; /* Host page size */ + int SignalReceiver; /* TID of the signal receiver */ + uint32_t Timeout; /* Timer period in nanosecond */ + int PerfVersion; /* Perf version used in this PMU tool */ + int OSPerfVersion; /* Perf version used in the OS kernel */ +}; + +extern EventManager *EventMgr; +extern GlobalConfig SysConfig; + +} /* namespace pmu */ + +#endif /* __PMU_GLOBAL_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/pmu/pmu-utils.h b/llvm/include/pmu/pmu-utils.h new file mode 100644 index 0000000..5e3e014 --- /dev/null +++ b/llvm/include/pmu/pmu-utils.h @@ -0,0 +1,106 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __PMU_UTILS_H +#define __PMU_UTILS_H + +#include <unistd.h> +#include <string.h> +#include <pthread.h> +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> +#include "perf_event.h" + +#ifndef ACCESS_ONCE +#define ACCESS_ONCE(x) (*(volatile decltype(x) *)&(x)) +#endif + +namespace pmu { + +static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid, + int cpu, int group_fd, + unsigned long flags) { + return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +} + +static inline void perf_attr_init(struct perf_event_attr *attr, int type, + int config) { + memset(attr, 0, sizeof(struct perf_event_attr)); + attr->type = type; + attr->config = config; + attr->size = sizeof(struct perf_event_attr); + attr->disabled = 1; + attr->exclude_kernel = 1; + attr->exclude_guest = 1; + attr->exclude_hv = 1; +} + +static inline int perf_event_start(int fd) { + return ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); +} + +static inline int perf_event_stop(int fd) { + return ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); +} + +static inline int perf_event_reset(int fd) { + return ioctl(fd, PERF_EVENT_IOC_RESET, 0); +} + +static inline int perf_event_set_filter(int fd, const char *arg) { + return ioctl(fd, PERF_EVENT_IOC_SET_FILTER, (void *)arg); +} + +static inline uint64_t perf_read_data_head(void *header) { + struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header; + uint64_t head = ACCESS_ONCE(pc->data_head); + pmu_rmb(); + return head; +} + +static inline void perf_write_data_tail(void *header, uint64_t val) { + struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header; + pmu_mb(); + pc->data_tail = val; +} + +static inline uint64_t perf_read_aux_head(void *header) { + struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header; + uint64_t head = ACCESS_ONCE(pc->aux_head); + pmu_rmb(); + return head; +} + +static inline void perf_write_aux_tail(void *header, uint64_t val) { + struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header; + pmu_mb(); + pc->aux_tail = val; +} + +static inline int isPowerOf2(uint64_t value) { + if (!value) + return 0; + return !(value & (value - 1)); +} + +/* Convert system errno to PMU error code. */ +static inline int ErrorCode(int err) +{ + switch (err) { + case EPERM: + case EACCES: return PMU_EPERM; + case ENOMEM: return PMU_ENOMEM; + default: return PMU_EEVENT; + } +} + +} /* namespace pmu */ + +#endif /* __PMU_UTILS_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/pmu/pmu.h b/llvm/include/pmu/pmu.h new file mode 100644 index 0000000..89a7c98 --- /dev/null +++ b/llvm/include/pmu/pmu.h @@ -0,0 +1,170 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + * + * Hardware Performance Monitoring Unit (PMU), C++ interfaces. + */ + +#ifndef __PMU_H +#define __PMU_H + +#include <vector> +#include <memory> +#include <stdint.h> + +namespace pmu { + +#define PMU_GROUP_EVENTS (8) +#define PMU_TIMER_PERIOD (400) /* micro-second */ +#define PMU_INVALID_HNDL ((Handle)-1) + +typedef unsigned Handle; +/* Sampling event overflow handling. */ +typedef std::vector<uint64_t> SampleList; +typedef std::unique_ptr<SampleList> SampleDataPtr; +typedef void (*SampleHandlerTy)(Handle Hndl, SampleDataPtr Data, void *Opaque); + +/* Error code. */ +enum { + PMU_OK = 0, /* No error */ + PMU_EINVAL = -1, /* Invalid argument */ + PMU_ENOMEM = -2, /* Insufficient memory */ + PMU_ENOEVENT = -3, /* Pre-defined event not available */ + PMU_EEVENT = -4, /* Hardware event error */ + PMU_EPERM = -5, /* Permission denied */ + PMU_EINTER = -6, /* Internal error */ + PMU_EDECODER = -7, /* Instruction trace decoder error */ +}; + +/* Pre-defined event code. */ +enum { + /* Basic events */ + PMU_CPU_CYCLES = 0, + PMU_REF_CPU_CYCLES, + PMU_INSTRUCTIONS, + PMU_LLC_REFERENCES, + PMU_LLC_MISSES, + PMU_BRANCH_INSTRUCTIONS, + PMU_BRANCH_MISSES, + /* Instruction cache events */ + PMU_ICACHE_HITS, + PMU_ICACHE_MISSES, + /* Memory instruction events */ + PMU_MEM_LOADS, + PMU_MEM_STORES, + + PMU_EVENT_MAX, +}; + +/* PMU initial configuration. */ +struct PMUConfig { + /* Input */ + int SignalReceiver; /* TID of the signal receiver. 0 for auto-select. */ + uint32_t Timeout; /* Timer period in micro-second. 0 for auto-select. */ + + /* Output */ + int PerfVersion; /* Perf version used in this PMU tool */ + int OSPerfVersion; /* Perf version used in the OS kernel */ +}; + +/* Config for sampling with one or multiple event(s).*/ +struct SampleConfig { + unsigned NumEvents; /* Number of events in the event group */ + unsigned EventCode[PMU_GROUP_EVENTS]; /* Event group. The 1st event is the leader. */ + unsigned NumPages; /* Number of pages as the sample buffer size. (must be 2^n) */ + uint64_t Period; /* Sampling period of the group leader. */ + uint64_t Watermark; /* Bytes before wakeup. 0 for every timer period. */ + SampleHandlerTy SampleHandler; /* User handler routine */ + void *Opaque; /* An opaque pointer passed to the overflow handler. */ +}; + +/* Config for sampling with only one event. */ +struct Sample1Config { + unsigned EventCode; /* Pre-defined event to trigger counter overflow */ + unsigned NumPages; /* Number of pages as the sample buffer size. (must be 2^n) */ + uint64_t Period; /* Sampling period */ + uint64_t Watermark; /* Bytes before wakeup. 0 for every timer period. */ + SampleHandlerTy SampleHandler; /* User handler routine */ + void *Opaque; /* An opaque pointer passed to the overflow handler. */ +}; + +/* + * PMU main tools. + */ +class PMU { + PMU() = delete; + ~PMU() = delete; + +public: + /* Initialize the PMU module. */ + static int Init(PMUConfig &Config); + + /* Finalize the PMU module. */ + static int Finalize(void); + + /* Stop the PMU module. When the PMU module is paused, the user can continue + * to use counting events, but the overflow handler will not be invoked. */ + static int Pause(void); + + /* Restart the PMU module. After the PMU module is resumed, the overflow + * handler will be invoked. */ + static int Resume(void); + + /* Start a counting/sampling/tracing event. */ + static int Start(Handle Hndl); + + /* Stop a counting/sampling/tracing event. */ + static int Stop(Handle Hndl); + + /* Reset the hardware counter. */ + static int Reset(Handle Hndl); + + /* Remove an event. */ + static int Cleanup(Handle Hndl); + + /* Start/stop a sampling/tracing event without acquiring a lock. + * Note that these two function should only be used within the overflow + * handler. Since the overflow handling is already in a locked section, + * acquiring a lock is not required. */ + static int StartUnlocked(Handle Hndl); + static int StopUnlocked(Handle Hndl); + + /* Open an event using the pre-defined event code. */ + static int CreateEvent(unsigned EventCode, Handle &Hndl); + + /* Open an event using the raw event number and umask value. + * The raw event code is computed as (RawEvent | (Umask << 8)). */ + static int CreateRawEvent(unsigned RawEvent, unsigned Umask, Handle &Hndl); + + /* Open a sampling event, with the 1st EventCode as the interrupt event. + * The sample data will be recorded in a vector of type 'uint64_t'. + * The following vector shows the data format of sampling with N events: + * { pc, val1, val2, ..., valN, # 1st sample + * ... + * pc, val1, val2, ..., valN }; # nth sample + * + * Note that ownwership of the output vector is transferred to the user. + * It is the user's responsibility to free the resource of the vector. */ + static int CreateSampleEvent(SampleConfig &Config, Handle &Hndl); + + /* Generate an IP histogram, using EventCode as the interrupt event. + * The IP histogram will be recorded in a vector of type 'uint64_t' with + * the format: { pc1, pc2, pc3, ..., pcN }. + * Note that ownwership of the output vector is transferred to the user. + * It is the user's responsibility to free the resource of the vector. */ + static int CreateSampleIP(Sample1Config &Config, Handle &Hndl); + + /* Read value from the hardware counter. */ + static int ReadEvent(Handle Hndl, uint64_t &Value); + + /* Convert error code to string. */ + static const char *strerror(int ErrCode); +}; + +} /* namespace pmu */ + +#endif /* __PMU_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/pmu/ppc/ppc-events.h b/llvm/include/pmu/ppc/ppc-events.h new file mode 100644 index 0000000..f48e10d --- /dev/null +++ b/llvm/include/pmu/ppc/ppc-events.h @@ -0,0 +1,30 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __PPC_EVENTS_H +#define __PPC_EVENTS_H + +#include <vector> +#include "pmu/pmu.h" + +namespace pmu { + +class PMUEvent; + +#if defined(_ARCH_PPC) || defined(_ARCH_PPC64) +#define pmu_mb() __asm__ __volatile__ ("sync" : : : "memory") +#define pmu_rmb() __asm__ __volatile__ ("sync" : : : "memory") +#define pmu_wmb() __asm__ __volatile__ ("sync" : : : "memory") +#endif + +int PPCInit(void); + +} /* namespace pmu */ + +#endif /* __PPC_EVENTS_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/pmu/x86/x86-events.h b/llvm/include/pmu/x86/x86-events.h new file mode 100644 index 0000000..c6fdb95 --- /dev/null +++ b/llvm/include/pmu/x86/x86-events.h @@ -0,0 +1,38 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __X86_EVENTS_H +#define __X86_EVENTS_H + +#include <vector> +#include "pmu/pmu.h" + +namespace pmu { + +class PMUEvent; + +#if defined(__i386__) +/* + * Some non-Intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ +#define pmu_mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#define pmu_rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#define pmu_wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#elif defined(__x86_64__) +#define pmu_mb() asm volatile("mfence" ::: "memory") +#define pmu_rmb() asm volatile("lfence" ::: "memory") +#define pmu_wmb() asm volatile("sfence" ::: "memory") +#endif + +int X86Init(void); + +} /* namespace pmu */ + +#endif /* __X86_EVENTS_H */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/include/qemu-types.h b/llvm/include/qemu-types.h new file mode 100644 index 0000000..f2430e0 --- /dev/null +++ b/llvm/include/qemu-types.h @@ -0,0 +1,33 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __QEMU_TYPES_H +#define __QEMU_TYPES_H + +extern "C" { +#include "cpu.h" +#include "exec/tb-hash.h" +#include "exec/exec-all.h" +#include "exec/helper-proto.h" +#include "exec/cpu_ldst.h" +#include "tcg/tcg.h" +#include "qemu/atomic.h" +#include "hqemu.h" + +extern uint8_t *tb_ret_addr; +extern uint8_t *ibtc_ret_addr; + +} + +#ifdef inline +#undef inline +#endif + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/include/tcg-opc-vector.h b/llvm/include/tcg-opc-vector.h new file mode 100644 index 0000000..bc03ea1 --- /dev/null +++ b/llvm/include/tcg-opc-vector.h @@ -0,0 +1,80 @@ +DEF(vector_start, 0, 0, 0, 0) + +DEF(vmov_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vload_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vstore_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vsitofp_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vuitofp_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfptosi_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfptoui_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vadd_i8_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i16_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i8_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i16_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vadd_i32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vsub_i8_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i16_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i8_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i16_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vsub_i32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vadd_f32_128, 0, 0, 0, 0) +DEF(vadd_f64_128, 0, 0, 0, 0) +DEF(vadd_f32_64, 0, 0, 0, 0) +DEF(vpadd_f32_128, 0, 0, 0, 0) +DEF(vpadd_f64_128, 0, 0, 0, 0) +DEF(vpadd_f32_64, 0, 0, 0, 0) +DEF(vsub_f32_128, 0, 0, 0, 0) +DEF(vsub_f64_128, 0, 0, 0,0) +DEF(vsub_f32_64, 0, 0, 0, 0) +DEF(vabd_f32_128, 0, 0, 0 ,0) +DEF(vabd_f64_128, 0, 0, 0 ,0) +DEF(vabd_f32_64, 0, 0, 0, 0) + +DEF(vfma_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfma_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfma_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfms_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfms_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vfms_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vmul_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmul_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmul_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmla_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmla_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmla_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmls_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmls_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vmls_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vdiv_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vdiv_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vdiv_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vand_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vand_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbic_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbic_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vorr_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vorr_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vorn_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vorn_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(veor_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(veor_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vbif_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbif_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbit_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbit_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbsl_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) +DEF(vbsl_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS) + +DEF(vector_end, 0, 0, 0, 0) diff --git a/llvm/include/tracer.h b/llvm/include/tracer.h new file mode 100644 index 0000000..2813e0e --- /dev/null +++ b/llvm/include/tracer.h @@ -0,0 +1,109 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __TRACE_H +#define __TRACE_H + +#include <vector> +#include <iostream> +#include "qemu-types.h" +#include "optimization.h" +#include "utils.h" + + +/* + * Base processor tracer + */ +class BaseTracer { +public: + CPUArchState *Env; + void *Perf; + + BaseTracer(CPUArchState *env) : Env(env), Perf(nullptr) {} + virtual ~BaseTracer() {} + virtual void Reset() {} + virtual void Record(uintptr_t next_tb, TranslationBlock *tb) {} + + /* Create and return the tracer object based on LLVM_MODE. */ + static BaseTracer *CreateTracer(CPUArchState *env); + + /* Release the trace resources. */ + static void DeleteTracer(CPUArchState *env); +}; + + +/* + * Trace of a single basic block + */ +class SingleBlockTracer : public BaseTracer { + TranslationBlock *TB; + +public: + SingleBlockTracer(CPUArchState *env); + + void Record(uintptr_t next_tb, TranslationBlock *tb) override; +}; + + +/* + * Trace with NET trace formation algorithm + */ +#define NET_PROFILE_THRESHOLD 50 +#if defined(CONFIG_SOFTMMU) +# define NET_PREDICT_THRESHOLD 16 +#else +# define NET_PREDICT_THRESHOLD 64 +#endif +class NETTracer : public BaseTracer { + bool isTraceHead(uintptr_t next_tb, TranslationBlock *tb, bool NewTB); + +public: + typedef std::vector<TranslationBlock *> TBVec; + TBVec TBs; + + NETTracer(CPUArchState *env, int Mode); + ~NETTracer(); + + void Reset() override; + void Record(uintptr_t next_tb, TranslationBlock *tb) override; + inline void Profile(TranslationBlock *tb); + inline void Predict(TranslationBlock *tb); +}; + +/* Return the address of the patch point to the trace code. */ +static inline uintptr_t tb_get_jmp_entry(TranslationBlock *tb) { + return (uintptr_t)tb->tc_ptr + tb->patch_jmp; +} +/* Return the initial jump target address of the patch point. */ +static inline uintptr_t tb_get_jmp_next(TranslationBlock *tb) { + return (uintptr_t)tb->tc_ptr + tb->patch_next; +} +static inline SingleBlockTracer &getSingleBlockTracer(CPUArchState *env) { + return *static_cast<SingleBlockTracer *>(cpu_get_tracer(env)); +} +static inline NETTracer &getNETTracer(CPUArchState *env) { + return *static_cast<NETTracer *>(cpu_get_tracer(env)); +} + +static inline void delete_image(TranslationBlock *tb) +{ +#if defined(CONFIG_LLVM) && defined(CONFIG_SOFTMMU) + delete (char *)tb->image; + tb->image = nullptr; +#endif +} + +static inline bool update_tb_mode(TranslationBlock *tb, int from, int to) { + if (tb->mode != from) + return false; + return Atomic<int>::testandset(&tb->mode, from, to); +} + +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/include/utils.h b/llvm/include/utils.h new file mode 100644 index 0000000..90b36d9 --- /dev/null +++ b/llvm/include/utils.h @@ -0,0 +1,260 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#ifndef __UTILS_H +#define __UTILS_H + +#include <cstdint> +#include <cstdlib> +#include <sstream> +#include <iomanip> +#include <set> +#include <map> +#include <vector> +#include "qemu-types.h" + + +#ifndef timersub +# define timersub(a, b, result) \ + do { \ + (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ + (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ + if ((result)->tv_usec < 0) { \ + --(result)->tv_sec; \ + (result)->tv_usec += 1000000; \ + } \ + } while (0) +#endif + +#if !defined(__i386__) && !defined(__x86_64__) +#define USE_PTHREAD_MUTEX +#endif + +#if defined(USE_PTHREAD_MUTEX) +# define hqemu_lock_t pthread_mutex_t +# define hqemu_lock_init(lock) pthread_mutex_init(lock, nullptr) +# define hqemu_lock(lock) pthread_mutex_lock(lock) +# define hqemu_unlock(lock) pthread_mutex_unlock(lock) +#else +# define hqemu_lock_t volatile int +# define hqemu_lock_init(lock) do { *lock = 0; } while(0) +# define hqemu_lock(lock) \ + do { \ + while (!Atomic<int>::testandset(lock,0,1)) { \ + while(*(lock)) _mm_pause(); \ + } \ + } while(0) +# define hqemu_unlock(lock) \ + do { \ + barrier(); \ + *(lock) = 0; \ + } while(0) +#endif /* USE_PTHREAD_MUTEX */ + + +/* + * Atomic Utilities + */ +template<class T> +class Atomic { +public: + static T inc_return(volatile T *p) { + return __sync_fetch_and_add(p, 1) + 1; + } + static bool testandset(volatile T *p, T _old, T _new) { + return __sync_bool_compare_and_swap(p, _old, _new); + } +}; + + +/* + * Mutex + */ +namespace hqemu { +class Mutex { + hqemu_lock_t M; +public: + Mutex() { hqemu_lock_init(&M); } + inline void acquire() { hqemu_lock(&M); } + inline void release() { hqemu_unlock(&M); } +}; + +class MutexGuard { + Mutex &M; +public: + MutexGuard(Mutex &M) : M(M) { M.acquire(); } + ~MutexGuard() { M.release(); } +}; +}; + + +/* + * GraphNode is used to describe the information of one node in a CFG. + */ +class GraphNode; +typedef std::vector<GraphNode *> NodeVec; +typedef std::set<GraphNode *> NodeSet; + +class GraphNode { + TranslationBlock *TB; + NodeVec Children; + +public: + GraphNode(TranslationBlock *tb) : TB(tb) {} + + TranslationBlock *getTB() { return TB; } + target_ulong getGuestPC() { return TB->pc; } + NodeVec &getChildren() { return Children; } + void insertChild(GraphNode *Node) { + Children.push_back(Node); + } + + static void DeleteCFG(GraphNode *Root); +}; + +/* + * ControlFlowGraph is used to build the whole program control flow graph (CFG). + * GlobalCFG uses this structure to maintain a whole program CFG connected by + * direct branches. + */ +class ControlFlowGraph { + hqemu::Mutex lock; + +public: + typedef std::vector<TranslationBlock *> TBVec; + typedef std::map<TranslationBlock*, TBVec> SuccMap; + SuccMap SuccCFG; + + ControlFlowGraph() {} + + hqemu::Mutex &getLock() { return lock; } + TBVec &getSuccessor(TranslationBlock *tb) { + return SuccCFG[tb]; + } + + void reset() { + hqemu::MutexGuard locked(lock); + SuccCFG.clear(); + } + void insertLink(TranslationBlock *src, TranslationBlock *dst) { + hqemu::MutexGuard locked(lock); + SuccCFG[src].push_back(dst); + } +}; + + +/* + * Queue + */ +#if defined(__x86_64__) +#define LOCK_FREE +#endif + +#ifdef LOCK_FREE +struct pointer_t { + struct node_t *ptr; + unsigned long int count; +}; + +struct node_t { + struct pointer_t next; + void *value; +}; + +/* Lock-free MS-queue */ +class Queue { + struct queue_t { + struct pointer_t head; + struct pointer_t tail; + }; + + node_t *new_node(void *value) { + node_t *node = new node_t; + node->next.ptr = nullptr; + node->value = value; + return node; + } + void delete_node(node_t *node) { + delete node; + } + + queue_t Q; + +public: + Queue(); + void enqueue(void *data); + void *dequeue(); +}; +#else +class Queue { + struct node_t { + struct node_t *next; + void *value; + node_t(void *v) : next(nullptr), value(v) {} + }; + struct queue_t { + struct node_t *head; + struct node_t *tail; + }; + + pthread_mutex_t lock; + queue_t Q; + +public: + Queue(); + void enqueue(void *data); + void *dequeue(); +}; +#endif + + +class UUID { + static uint64_t uuid; + +public: +#if defined(__x86_64__) + static uint64_t gen() { + uint64_t i = 1; + asm volatile("lock; xaddq %0, %1" + : "+r" (i), "+m" (uuid) :: "memory"); + return i + 1; + } +#else + static uint64_t gen() { + static pthread_mutex_t uuid_lock = PTHREAD_MUTEX_INITIALIZER; + pthread_mutex_lock(&uuid_lock); + uint64_t id = uuid++; + pthread_mutex_unlock(&uuid_lock); + return id; + } +#endif +}; + +/* Return the string of a hexadecimal number. */ +template <class T> +static inline std::string toHexString(T Num) { + std::stringstream ss; + ss << "0x" << std::hex << Num; + return ss.str(); +} + +/* Return the string of a zero extended number. */ +template <class T> +static inline std::string toZextStr(T Num) { + std::stringstream ss; + ss << std::setfill('0') << std::setw(16) << Num; + return ss.str(); +} + +/* Misc utilities */ +pid_t gettid(); +void patch_jmp(volatile uintptr_t patch_addr, volatile uintptr_t addr); +void patch_jmp(volatile uintptr_t patch_addr, volatile void *addr); + +#endif +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/llvm-annotate.cpp b/llvm/llvm-annotate.cpp new file mode 100644 index 0000000..040c771 --- /dev/null +++ b/llvm/llvm-annotate.cpp @@ -0,0 +1,136 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "xml/tinyxml2.h" +#include "optimization.h" +#include "llvm-debug.h" +#include "llvm-annotate.h" + + +using namespace tinyxml2; +static hqemu::Mutex Lock; + +#if defined(CONFIG_USER_ONLY) +extern "C" const char *filename; +#endif + +AnnotationFactory::AnnotationFactory() +{ +#if defined(CONFIG_USER_ONLY) + int ret; + MetaFile = std::string(filename).append(".xml"); + ret = ParseXML(MetaFile.c_str()); + if (ret != 0) + return; +#endif +} + +AnnotationFactory::~AnnotationFactory() +{ + for (auto L : Loops) + delete L.second; +} + +static inline const char *getAttrName(XMLElement *Attr) +{ + return Attr->Name(); +} + +static inline const char *getAttrValue(XMLElement *Attr) +{ + return Attr->FirstChild() ? Attr->FirstChild()->ToText()->Value() : ""; +} + +static LoopMetadata *ParseXMLLoop(XMLElement *LoopNode) +{ + if (LoopNode == nullptr) + return nullptr; + + LoopMetadata *LoopMD = new LoopMetadata(); + XMLElement *Attr = LoopNode->FirstChildElement(); + while (Attr) { + std::string Name = getAttrName(Attr); + const char *Val = getAttrValue(Attr); + if (strlen(Val) == 0) + goto next; + + if (Name == "address") + LoopMD->Address = (target_ulong)strtoull(Val, nullptr, 16); + else if (Name == "length") + LoopMD->Length = (uint32_t)strtoul(Val, nullptr, 10); + else if (Name == "vs") + LoopMD->VS = (uint32_t)strtoul(Val, nullptr, 10); + else if (Name == "vf") + LoopMD->VF = (uint32_t)strtoul(Val, nullptr, 10); + else if (Name == "distance") { + LoopMD->Distance = atoi(Val); + if (LoopMD->Distance == 0) + LoopMD->Distance = INT_MAX; + } + else if (Name == "start") LoopMD->Start = atoi(Val); + else if (Name == "end") LoopMD->End = atoi(Val); + else if (Name == "stride") LoopMD->Stride = atoi(Val); +next: + Attr = Attr->NextSiblingElement(); + } + + if (LoopMD->Address == (target_ulong)-1) { + delete LoopMD; + return nullptr; + } + + return LoopMD; +} + +int AnnotationFactory::ParseXML(const char *name) +{ + XMLDocument Doc; + XMLElement *RootNode, *LoopNode; + + if (Doc.LoadFile(name) != 0) { + dbg() << DEBUG_ANNOTATE << "Disable annotation support." + << " (cannot find " << name << ")\n"; + return 1; + } + + dbg() << DEBUG_ANNOTATE << "Found an annotation file " << name << "\n"; + + /* A legal annoation should be embedded within the <hqemu> tag. For example: + * <hqemu><loop><addr>...</addr></loop></hqemu> */ + RootNode = Doc.FirstChildElement("hqemu"); + if (RootNode == nullptr) + return 1; + + LoopNode = RootNode->FirstChildElement("loop"); + while (LoopNode) { + LoopMetadata *LoopMD = ParseXMLLoop(LoopNode); + if (LoopMD) + Loops[LoopMD->Address] = LoopMD; + LoopNode = LoopNode->NextSiblingElement(); + } + + dbg() << DEBUG_ANNOTATE + << "Found " << Loops.size() << " loop annotation(s).\n"; + return 0; +} + +LoopMetadata *AnnotationFactory::getLoopAnnotation(target_ulong addr) +{ + hqemu::MutexGuard locked(Lock); + + if (Loops.find(addr) == Loops.end()) + return nullptr; + return Loops[addr]; +} + +bool AnnotationFactory::hasLoopAnnotation(target_ulong addr) +{ + hqemu::MutexGuard locked(Lock); + return Loops.count(addr) ? true : false; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/llvm-debug.cpp b/llvm/llvm-debug.cpp new file mode 100644 index 0000000..e5d715a --- /dev/null +++ b/llvm/llvm-debug.cpp @@ -0,0 +1,229 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/ADT/Triple.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm-debug.h" +#include "llvm.h" + + +static const Target *getTarget(std::string TripleName) +{ + /* Get the target specific parser. */ + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget( + TripleName.c_str(), Error); + if (!TheTarget) + return nullptr; + + return TheTarget; +} + +MCDisasm *MCDisasm::CreateMCDisasm(std::string TripleName, bool isHost) +{ + if (TripleName.empty() || TripleName == "UnknownArch") + return nullptr; + + const Target *TheTarget = getTarget(TripleName); + if (!TheTarget) + return nullptr; + + return new MCDisasm(TheTarget, TripleName, isHost); +} + +MCDisasm::MCDisasm(const llvm::Target *TheTarget, std::string TripleName, + bool isHost) + : HostDisAsm(isHost), NoShowRawInsn(false) +{ + const char *triple = TripleName.c_str(); + Triple TheTriple(Triple::normalize(TripleName)); + + switch (TheTriple.getArch()) { + case Triple::x86: + case Triple::x86_64: + NoShowRawInsn = true; + break; + default: + NoShowRawInsn = false; + break; + } + + const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TripleName); + if (!MRI) + hqemu_error("no register info for target %s.\n", triple); + const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, TripleName); + if (!MAI) + hqemu_error("no assembly info for target %s\n", triple); + const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(TripleName, "", ""); + if (!STI) + hqemu_error("no subtarget info for target %s\n", triple); + const MCInstrInfo *MII = TheTarget->createMCInstrInfo(); + if (!MII) + hqemu_error("no instruction info for target %s\n", triple); + + MCContext Ctx(MAI, MRI, nullptr); + const MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI, Ctx); + + if (!DisAsm) + hqemu_error("no disassembler for target %s\n", TripleName.c_str()); + + const MCInstrAnalysis *MIA = TheTarget->createMCInstrAnalysis(MII); + + int AsmPrinterVariant = MAI->getAssemblerDialect(); +#if defined(LLVM_V35) + MCInstPrinter *IP = TheTarget->createMCInstPrinter( + AsmPrinterVariant, *MAI, *MII, *MRI, *STI); +#else + MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), + AsmPrinterVariant, *MAI, *MII, *MRI); +#endif + if (!IP) + hqemu_error("no instruction printer for target %s\n", TripleName.c_str()); + + IP->setPrintImmHex(true); + + this->DisAsm = DisAsm; + this->STI = STI; + this->IP = IP; + this->MIA = MIA; +} + +MCDisasm::~MCDisasm() +{ +} + + +void MCDisasm::DumpBytes(ArrayRef<uint8_t> bytes, raw_ostream &OS) +{ + if (NoShowRawInsn) + return; + + static const char hex_rep[] = "0123456789abcdef"; + OS << " "; + for (auto I = bytes.rbegin(), E = bytes.rend(); I != E; ++I) { + char c = *I; + OS << hex_rep[(c & 0xF0) >> 4]; + OS << hex_rep[c & 0xF]; + OS << ' '; + } +} + +#if defined(LLVM_V35) +class DisasmMemoryObject : public MemoryObject { + uint8_t *Bytes; + uint64_t Size; + uint64_t BasePC; +public: + DisasmMemoryObject(uint8_t *bytes, uint64_t size, uint64_t basePC) : + Bytes(bytes), Size(size), BasePC(basePC) {} + + uint64_t getBase() const override { return BasePC; } + uint64_t getExtent() const override { return Size; } + + int readByte(uint64_t Addr, uint8_t *Byte) const override { + if (Addr - BasePC >= Size) + return -1; + *Byte = Bytes[Addr - BasePC]; + return 0; + } + ArrayRef<uint8_t> slice(size_t N, size_t M) const { + return makeArrayRef<uint8_t>(Bytes+N, M); + } +}; + +void MCDisasm::PrintInAsm(uint64_t Addr, uint64_t Size, uint64_t GuestAddr) +{ + uint64_t Len; + DisasmMemoryObject MemoryObject((uint8_t *)Addr, Size, Addr); + + for (uint64_t Start = 0; Start < Size; Start += Len) { + MCInst Inst; + std::string Str; + raw_string_ostream OS(Str); + if (DisAsm->getInstruction(Inst, Len, MemoryObject, + Addr + Start, nulls(), nulls())) { + OS << format("0x%08" PRIx64 ":", GuestAddr); + + DumpBytes(MemoryObject.slice(Start, Len), OS); + IP->printInst(&Inst, OS, ""); + + if (MIA && (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) || + MIA->isConditionalBranch(Inst))) { + uint64_t Target; + if (MIA->evaluateBranch(Inst, GuestAddr, Len, Target)) { + OS << " <" << format("0x%08" PRIx64, Target) << ">"; + if (HostDisAsm) { + if (Target == (uint64_t)tb_ret_addr) + OS << " !tb_ret_addr"; + } + } + } + } else { + OS << "\t<internal disassembler error>"; + if (Len == 0) + Len = 1; + } + + DM.debug() << OS.str() << "\n"; + GuestAddr += Len; + } +} +#else +void MCDisasm::PrintInAsm(uint64_t Addr, uint64_t Size, uint64_t GuestAddr) +{ + uint64_t Len; + ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(Addr), Size); + + for (uint64_t Start = 0; Start < Size; Start += Len) { + MCInst Inst; + std::string Str; + raw_string_ostream OS(Str); + if (DisAsm->getInstruction(Inst, Len, Bytes.slice(Start), + Addr + Start, nulls(), nulls())) { + OS << format("0x%08" PRIx64 ":", GuestAddr); + + DumpBytes(Bytes.slice(Start, Len), OS); + IP->printInst(&Inst, OS, "", *STI); + + if (MIA && (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) || + MIA->isConditionalBranch(Inst))) { + uint64_t Target; + if (MIA->evaluateBranch(Inst, GuestAddr, Len, Target)) { + OS << " <" << format("0x%08" PRIx64, Target) << ">"; + if (HostDisAsm) { + if (Target == (uint64_t)tb_ret_addr) + OS << " !tb_ret_addr"; + } + } + } + } else { + OS << "\t<internal disassembler error>"; + if (Len == 0) + Len = 1; + } + + DM.debug() << OS.str() << "\n"; + GuestAddr += Len; + } +} +#endif + +void MCDisasm::PrintOutAsm(uint64_t Addr, uint64_t Size) +{ + auto &OS = DM.debug(); + OS << "\nOUT: [size=" << Size << "]\n"; + PrintInAsm(Addr, Size, Addr); + OS << "\n"; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/llvm-hard-perfmon.cpp b/llvm/llvm-hard-perfmon.cpp new file mode 100644 index 0000000..051ee02 --- /dev/null +++ b/llvm/llvm-hard-perfmon.cpp @@ -0,0 +1,289 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include <string.h> +#include "config-target.h" +#include "tracer.h" +#include "llvm.h" +#include "llvm-soft-perfmon.h" +#include "llvm-hard-perfmon.h" + +using namespace pmu; + + +HardwarePerfmon::HardwarePerfmon() : MonThreadID(-1), MonThreadStop(true) +{ +} + +HardwarePerfmon::~HardwarePerfmon() +{ + if (LLVMEnv::RunWithVTune) + return; + + PMU::Finalize(); + + if (!MonThreadStop) + MonThreadStop = true; +} + +/* Set up HPM with the monitor thread id */ +void HardwarePerfmon::Init(int monitor_thread_tid) +{ + if (LLVMEnv::RunWithVTune) + return; + + MonThreadID = monitor_thread_tid; + +#if defined(ENABLE_HPM_THREAD) + /* Start HPM thread. */ + StartMonThread(); +#else + /* If we attempt to profile hotspot but do not run the HPM translation mode, + * we enable the HPM monitor thread for the hotspot profiling in order to + * avoid deadlock. */ + if (SP->Mode & SPM_HOTSPOT) + StartMonThread(); +#endif + + /* Initialize the PMU tools. */ + PMUConfig Config; + memset(&Config, 0, sizeof(PMUConfig)); + Config.SignalReceiver = MonThreadID; + Config.Timeout = 400; + int EC = PMU::Init(Config); + if (EC != PMU_OK) { + dbg() << DEBUG_HPM << "Failed to initialize PMU (" << PMU::strerror(EC) + << ").\n"; + return; + } +} + +/* Stop the monitor. */ +void HardwarePerfmon::Pause() +{ + if (LLVMEnv::RunWithVTune) + return; + + PMU::Pause(); +} + +/* Restart the monitor. */ +void HardwarePerfmon::Resume() +{ + if (LLVMEnv::RunWithVTune) + return; + + PMU::Resume(); +} + +/* Start monitor thread. */ +void HardwarePerfmon::StartMonThread() +{ + /* Start HPM thread. */ + MonThreadID = -1; + MonThreadStop = false; + MonThread = std::thread( + [=]() { MonitorFunc(); } + ); + + MonThread.detach(); + while (MonThreadID == -1) + usleep(200); +} + +/* Monitor thread routine. */ +void HardwarePerfmon::MonitorFunc() +{ + MonThreadID = gettid(); + copy_tcg_context(); + + while (!MonThreadStop) + usleep(10000); +} + +static void CoverSetHandler(Handle Hndl, std::unique_ptr<SampleList> DataPtr, + void *Opaque) +{ + /* Just attach the sampled IPs to the profile list. The soft-perfmon will + * release the resource later. */ + SP->SampleListVec.push_back(DataPtr.release()); +} + +void HardwarePerfmon::RegisterThread(BaseTracer *Tracer) +{ + hqemu::MutexGuard Locked(Lock); + + dbg() << DEBUG_HPM << "Register thread " << gettid() << ".\n"; + + if (LLVMEnv::RunWithVTune) + return; + + PerfmonData *Perf = new PerfmonData(gettid()); + Perf->MonitorBasic(HPM_INIT); + Perf->MonitorCoverSet(HPM_INIT); + + Tracer->Perf = static_cast<void *>(Perf); +} + +void HardwarePerfmon::UnregisterThread(BaseTracer *Tracer) +{ + hqemu::MutexGuard Locked(Lock); + + dbg() << DEBUG_HPM << "Unregister thread " << gettid() << ".\n"; + + if (LLVMEnv::RunWithVTune) + return; + if (!Tracer->Perf) + return; + + auto Perf = static_cast<PerfmonData *>(Tracer->Perf); + Perf->MonitorBasic(HPM_FINALIZE); + Perf->MonitorCoverSet(HPM_FINALIZE); + + delete Perf; + Tracer->Perf = nullptr; +} + +void HardwarePerfmon::NotifyCacheEnter(BaseTracer *Tracer) +{ + hqemu::MutexGuard Locked(Lock); + + if (!Tracer->Perf) + return; + auto Perf = static_cast<PerfmonData *>(Tracer->Perf); + Perf->MonitorBasic(HPM_START); +} + +void HardwarePerfmon::NotifyCacheLeave(BaseTracer *Tracer) +{ + hqemu::MutexGuard Locked(Lock); + + if (!Tracer->Perf) + return; + auto Perf = static_cast<PerfmonData *>(Tracer->Perf); + Perf->MonitorBasic(HPM_STOP); +} + +/* + * PerfmonData + */ +PerfmonData::PerfmonData(int tid) : TID(tid) +{ +} + +PerfmonData::~PerfmonData() +{ +} + +void PerfmonData::MonitorBasic(HPMControl Ctl) +{ + if (!(SP->Mode & SPM_HPM)) + return; + + switch (Ctl) { + case HPM_INIT: + if (PMU::CreateEvent(PMU_INSTRUCTIONS, ICountHndl) == PMU_OK) { + dbg() << DEBUG_HPM << "Register event: # instructions.\n"; + PMU::Start(ICountHndl); + } + if (PMU::CreateEvent(PMU_BRANCH_INSTRUCTIONS, BranchHndl) == PMU_OK) { + dbg() << DEBUG_HPM << "Register event: # branch instructions.\n"; + PMU::Start(BranchHndl); + } + if (PMU::CreateEvent(PMU_MEM_LOADS, MemLoadHndl) == PMU_OK) { + dbg() << DEBUG_HPM << "Register event: # load instructions.\n"; + PMU::Start(MemLoadHndl); + } + if (PMU::CreateEvent(PMU_MEM_STORES, MemStoreHndl) == PMU_OK) { + dbg() << DEBUG_HPM << "Register event: # store instructions.\n"; + PMU::Start(MemStoreHndl); + } + break; + case HPM_FINALIZE: + { + uint64_t NumInsns = 0, NumBranches = 0, NumLoads = 0, NumStores = 0; + if (ICountHndl != PMU_INVALID_HNDL) { + PMU::ReadEvent(ICountHndl, NumInsns); + PMU::Cleanup(ICountHndl); + } + if (BranchHndl != PMU_INVALID_HNDL) { + PMU::ReadEvent(BranchHndl, NumBranches); + PMU::Cleanup(BranchHndl); + } + if (MemLoadHndl != PMU_INVALID_HNDL) { + PMU::ReadEvent(MemLoadHndl, NumLoads); + PMU::Cleanup(MemLoadHndl); + } + if (MemStoreHndl != PMU_INVALID_HNDL) { + PMU::ReadEvent(MemStoreHndl, NumStores); + PMU::Cleanup(MemStoreHndl); + } + + SP->NumInsns += NumInsns; + SP->NumBranches += NumBranches; + SP->NumLoads += NumLoads; + SP->NumStores += NumStores; + break; + } + case HPM_START: + if (BranchHndl != PMU_INVALID_HNDL) + PMU::ReadEvent(BranchHndl, LastNumBranches); + if (MemLoadHndl != PMU_INVALID_HNDL) + PMU::ReadEvent(MemLoadHndl, LastNumLoads); + if (MemStoreHndl != PMU_INVALID_HNDL) + PMU::ReadEvent(MemStoreHndl, LastNumStores); + break; + case HPM_STOP: + { + uint64_t NumBranches = 0, NumLoads = 0, NumStores = 0; + if (BranchHndl != PMU_INVALID_HNDL) + PMU::ReadEvent(BranchHndl, NumBranches); + if (MemLoadHndl != PMU_INVALID_HNDL) + PMU::ReadEvent(MemLoadHndl, NumLoads); + if (MemStoreHndl != PMU_INVALID_HNDL) + PMU::ReadEvent(MemStoreHndl, NumStores); + break; + } + default: + break; + } +} + +void PerfmonData::MonitorCoverSet(HPMControl Ctl) +{ + if (!(SP->Mode & SPM_HOTSPOT)) + return; + + switch (Ctl) { + case HPM_INIT: { + Sample1Config IPConfig; + memset(&IPConfig, 0, sizeof(Sample1Config)); + IPConfig.EventCode = PMU_INSTRUCTIONS; + IPConfig.NumPages = 4; + IPConfig.Period = 1e5; + IPConfig.Watermark = IPConfig.NumPages * getpagesize() / 2; + IPConfig.SampleHandler = CoverSetHandler; + IPConfig.Opaque = static_cast<void *>(this); + + if (PMU::CreateSampleIP(IPConfig, CoverSetHndl) == PMU_OK) { + dbg() << DEBUG_HPM << "Register event: cover set sampling.\n"; + PMU::Start(CoverSetHndl); + } + break; + } + case HPM_FINALIZE: + if (CoverSetHndl != PMU_INVALID_HNDL) + PMU::Cleanup(CoverSetHndl); + break; + case HPM_START: + case HPM_STOP: + default: + break; + } +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/llvm-opc-mmu.cpp b/llvm/llvm-opc-mmu.cpp new file mode 100644 index 0000000..9d2e60f --- /dev/null +++ b/llvm/llvm-opc-mmu.cpp @@ -0,0 +1,344 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + * + * This file provides LLVM IR generator in terms of basic block and trace. + */ + +#include "llvm-debug.h" +#include "llvm.h" +#include "llvm-opc.h" +#include "llvm-target.h" +#include "utils.h" + +#if defined(CONFIG_SOFTMMU) +extern "C" { +extern const void * const llvm_ld_helpers[16]; +extern const void * const llvm_st_helpers[16]; +}; +#endif + + +#if defined(CONFIG_USER_ONLY) +Value *IRFactory::QEMULoad(Value *AddrL, Value *AddrH, TCGMemOpIdx oi) +{ + TCGMemOp opc = get_memop(oi); + Value *Base = AddrL; + PointerType *PtrTy = getPointerTy(getSizeInBits(opc), Segment); + LoadInst *LI; + + if (GUEST_BASE == 0 || Segment != 0) { + Base = ITP(Base, PtrTy); + LI = new LoadInst(Base, "", true, LastInst); + } else { + Base = ITP(Base, Int8PtrTy); + Base = GetElementPtrInst::CreateInBounds(Base, GuestBaseReg.Base, "", LastInst); + if (Base->getType() != PtrTy) + Base = CAST(Base, PtrTy); + LI = new LoadInst(Base, "", true, LastInst); + } + MF->setGuestMemory(LI); + + return ConvertEndian(LI, opc); +} + +void IRFactory::QEMUStore(Value *Data, Value *AddrL, Value *AddrH, TCGMemOpIdx oi) +{ + TCGMemOp opc = get_memop(oi); + Value *Base = AddrL; + PointerType *PtrTy = getPointerTy(getSizeInBits(opc), Segment); + StoreInst *SI; + + Data = ConvertEndian(Data, opc); + + if (GUEST_BASE == 0 || Segment != 0) { + Base = ITP(Base, PtrTy); + SI = new StoreInst(Data, Base, true, LastInst); + } else { + Base = ITP(Base, Int8PtrTy); + Base = GetElementPtrInst::CreateInBounds(Base, GuestBaseReg.Base, "", LastInst); + if (Base->getType() != PtrTy) + Base = CAST(Base, PtrTy); + SI = new StoreInst(Data, Base, true, LastInst); + } + MF->setGuestMemory(SI); +} + +#else /* !CONFIG_USER_ONLY */ + +inline long getTLBOffset(int mem_index) +{ + long Offset = 0; + + switch (mem_index) { +#if NB_MMU_MODES > 0 + case 0: Offset = offsetof(CPUArchState, tlb_table[0][0]); break; +#endif +#if NB_MMU_MODES > 1 + case 1: Offset = offsetof(CPUArchState, tlb_table[1][0]); break; +#endif +#if NB_MMU_MODES > 2 + case 2: Offset = offsetof(CPUArchState, tlb_table[2][0]); break; +#endif +#if NB_MMU_MODES > 3 + case 3: Offset = offsetof(CPUArchState, tlb_table[3][0]); break; +#endif +#if NB_MMU_MODES > 4 + case 4: Offset = offsetof(CPUArchState, tlb_table[4][0]); break; +#endif +#if NB_MMU_MODES > 5 + case 5: Offset = offsetof(CPUArchState, tlb_table[5][0]); +#endif + default: + IRError("%s: internal error. mem_index=%d\n", __func__, mem_index); + } + + return Offset; +} + +Value *IRFactory::ConcatTLBVersion(Value *GVA) +{ +#if defined(ENABLE_TLBVERSION_EXT) + GVA = ZEXT64(GVA); +#endif + Type *PtrTy = getPointerTy(DL->getTypeSizeInBits(GVA->getType())); + Value *TLBVersion = GetElementPtrInst::CreateInBounds(CPU, + CONSTPtr(offsetof(CPUArchState, tlb_version)), "", LastInst); + TLBVersion = new BitCastInst(TLBVersion, PtrTy, "", LastInst); + TLBVersion = new LoadInst(TLBVersion, "version", true, LastInst); + return OR(GVA, TLBVersion); +} + +Value *IRFactory::QEMULoad(Value *AddrL, Value *AddrH, TCGMemOpIdx oi) +{ + TCGMemOp opc = get_memop(oi); + int mem_index = get_mmuidx(oi); + IntegerType *AccessTy; + PointerType *GuestPtrTy, *HostPtrTy; + int Size, s_bits = opc & MO_SIZE; + + Size = 8 * 1 << s_bits; /* data size (bits) for this load */ + + const void *helper = llvm_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]; + Function *MissFunc = ResolveFunction(getMMUFName(helper)); + if (!MissFunc) + IRError("%s: internal error.\n", __func__); + + GuestPtrTy = (TARGET_LONG_BITS == 32) ? Int32PtrTy : Int64PtrTy; + HostPtrTy = (TCG_TARGET_REG_BITS == 32) ? Int32PtrTy : Int64PtrTy; + +#if defined(ENABLE_TLBVERSION_EXT) + GuestPtrTy = Int64PtrTy; +#endif + + /* Create TLB basic blocks. */ + BasicBlock *tlb_hit = BasicBlock::Create(*Context, "tlb_hit", Func); + BasicBlock *tlb_miss = BasicBlock::Create(*Context, "tlb_miss", Func); + BasicBlock *tlb_exit = BasicBlock::Create(*Context, "tlb_exit", Func); + toSink.push_back(tlb_miss); + + /* Load compared value in TLB. QEMU uses only addrlo to index the TLB entry. */ + Value *TLBEntry, *TLBValue, *CPUAddr; + AccessTy = (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) ? Int64Ty : Int32Ty; + size_t Offset = getTLBOffset(mem_index) + offsetof(CPUTLBEntry, addr_read); + TLBEntry = LSHR(AddrL, ConstantInt::get(AccessTy, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); + TLBEntry = AND(TLBEntry, ConstantInt::get(AccessTy, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS)); + TLBEntry = ADD(TLBEntry, ConstantInt::get(AccessTy, Offset)); + + if (TLBEntry->getType() != IntPtrTy) + TLBEntry = new ZExtInst(TLBEntry, IntPtrTy, "", LastInst); + + CPUAddr = new PtrToIntInst(CPU, IntPtrTy, "", LastInst); + TLBEntry = ADD(CPUAddr, TLBEntry); + TLBValue = new IntToPtrInst(TLBEntry, GuestPtrTy, "", LastInst); + TLBValue = new LoadInst(TLBValue, "tlb.read", false, LastInst); + + /* Compare GVA and TLB value. */ + Value *GVA, *Cond, *GuestPC = AddrL; + AccessTy = (TARGET_LONG_BITS == 32) ? Int32Ty : Int64Ty; + if (AddrH) { /* guest is 64-bit and host is 32-bit. */ + GuestPC = SHL(ZEXT64(AddrH), CONST64(32)); + GuestPC = OR(GuestPC, ZEXT64(AddrL)); + } +#if defined(ALIGNED_ONLY) + GVA = AND(GuestPC, ConstantInt::get(AccessTy, + TARGET_PAGE_MASK | ((1 << s_bits) - 1))); +#elif defined(ENABLE_TLBVERSION) + GVA = ADD(GuestPC, ConstantInt::get(AccessTy, (1 << s_bits) - 1)); + GVA = AND(GVA, ConstantInt::get(AccessTy, TARGET_PAGE_MASK)); + GVA = ConcatTLBVersion(GVA); +#else + GVA = ADD(GuestPC, ConstantInt::get(AccessTy, (1 << s_bits) - 1)); + GVA = AND(GVA, ConstantInt::get(AccessTy, TARGET_PAGE_MASK)); +#endif + Cond = ICMP(GVA, TLBValue, ICmpInst::ICMP_EQ); + BranchInst::Create(tlb_hit, tlb_miss, Cond, LastInst); + LastInst->eraseFromParent(); + + /* TLB hit. */ + Value *PhyAddr, *Addend, *HitData, *Addr=AddrL; + + LastInst = BranchInst::Create(tlb_exit, tlb_hit); + if (Addr->getType() != IntPtrTy) + Addr = new ZExtInst(Addr, IntPtrTy, "", LastInst); + + Offset = offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read); + Addend = ADD(TLBEntry, ConstantInt::get(IntPtrTy, Offset)); + Addend = new IntToPtrInst(Addend, HostPtrTy, "", LastInst); + Addend = new LoadInst(Addend, "tlb.addend", false, LastInst); + PhyAddr = ADD(Addr, Addend); + PhyAddr = ITP(PhyAddr, getPointerTy(Size)); + HitData = new LoadInst(PhyAddr, "hit", true, LastInst); + + HitData = ConvertEndian(HitData, opc); + + /* TLB miss. */ + LastInst = BranchInst::Create(tlb_exit, tlb_miss); + SmallVector<Value *, 4> Params; + uint32_t restore_val = setRestorePoint(oi); + Params.push_back(CPUStruct); + Params.push_back(GuestPC); + Params.push_back(CONST32(restore_val)); + + CallInst *MissCall = CallInst::Create(MissFunc, Params, "", LastInst); + Value *MissData = MissCall; + switch (opc & MO_SSIZE) { + case MO_UB: + case MO_SB: + if (DL->getTypeSizeInBits(MissData->getType()) != 8) + MissData = TRUNC8(MissCall); + break; + case MO_UW: + case MO_SW: + if (DL->getTypeSizeInBits(MissData->getType()) != 16) + MissData = TRUNC16(MissCall); + break; + case MO_UL: + case MO_SL: + if (DL->getTypeSizeInBits(MissData->getType()) != 32) + MissData = TRUNC32(MissCall); + break; + case MO_Q: + if (DL->getTypeSizeInBits(MissData->getType()) != 64) + MissData = ZEXT64(MissCall); + break; + default: + IRError("%s: invalid size (opc=%d)\n", __func__, opc); + break; + } + + /* TLB exit. */ + CurrBB = tlb_exit; + LastInst = BranchInst::Create(ExitBB, CurrBB); + PHINode *PH = PHINode::Create(HitData->getType(), 2, "", LastInst); + PH->addIncoming(HitData, tlb_hit); + PH->addIncoming(MissData, tlb_miss); + + return PH; +} + +void IRFactory::QEMUStore(Value *Data, Value *AddrL, Value *AddrH, TCGMemOpIdx oi) +{ + TCGMemOp opc = get_memop(oi); + int mem_index = get_mmuidx(oi); + IntegerType *AccessTy; + PointerType *GuestPtrTy, *HostPtrTy; + int Size, s_bits = opc & MO_SIZE; + + Size = 8 * 1 << s_bits; /* data size (bits) for this load */ + + const void *helper = llvm_st_helpers[opc & (MO_BSWAP | MO_SIZE)]; + Function *MissFunc = ResolveFunction(getMMUFName(helper)); + if (!MissFunc) + IRError("%s: internal error.\n", __func__); + + GuestPtrTy = (TARGET_LONG_BITS == 32) ? Int32PtrTy : Int64PtrTy; + HostPtrTy = (TCG_TARGET_REG_BITS == 32) ? Int32PtrTy : Int64PtrTy; + +#if defined(ENABLE_TLBVERSION_EXT) + GuestPtrTy = Int64PtrTy; +#endif + + /* Create TLB basic blocks. */ + BasicBlock *tlb_hit = BasicBlock::Create(*Context, "tlb_hit", Func); + BasicBlock *tlb_miss = BasicBlock::Create(*Context, "tlb_miss", Func); + BasicBlock *tlb_exit = BasicBlock::Create(*Context, "tlb_exit", Func); + toSink.push_back(tlb_miss); + + /* Load compared value in TLB. QEMU uses only addrlo to index the TLB entry. */ + Value *TLBEntry, *TLBValue, *CPUAddr; + AccessTy = (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) ? Int64Ty : Int32Ty; + size_t Offset = getTLBOffset(mem_index) + offsetof(CPUTLBEntry, addr_write); + TLBEntry = LSHR(AddrL, ConstantInt::get(AccessTy, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); + TLBEntry = AND(TLBEntry, ConstantInt::get(AccessTy, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS)); + TLBEntry = ADD(TLBEntry, ConstantInt::get(AccessTy, Offset)); + + if (TLBEntry->getType() != IntPtrTy) + TLBEntry = new ZExtInst(TLBEntry, IntPtrTy, "", LastInst); + + CPUAddr = new PtrToIntInst(CPU, IntPtrTy, "", LastInst); + TLBEntry = ADD(CPUAddr, TLBEntry); + TLBValue = new IntToPtrInst(TLBEntry, GuestPtrTy, "", LastInst); + TLBValue = new LoadInst(TLBValue, "tlb.write", false, LastInst); + + /* Compare GVA and TLB value. */ + Value *GVA, *Cond, *GuestPC = AddrL; + AccessTy = (TARGET_LONG_BITS == 32) ? Int32Ty : Int64Ty; + if (AddrH != nullptr) { /* guest is 64-bit and host is 32-bit. */ + GuestPC = SHL(ZEXT64(AddrH), CONST64(32)); + GuestPC = OR(GuestPC, ZEXT64(AddrL)); + } +#if defined(ALIGNED_ONLY) + GVA = AND(GuestPC, ConstantInt::get(AccessTy, + TARGET_PAGE_MASK | ((1 << s_bits) - 1))); +#elif defined(ENABLE_TLBVERSION) + GVA = ADD(GuestPC, ConstantInt::get(AccessTy, (1 << s_bits) - 1)); + GVA = AND(GVA, ConstantInt::get(AccessTy, TARGET_PAGE_MASK)); + GVA = ConcatTLBVersion(GVA); +#else + GVA = ADD(GuestPC, ConstantInt::get(AccessTy, (1 << s_bits) - 1)); + GVA = AND(GVA, ConstantInt::get(AccessTy, TARGET_PAGE_MASK)); +#endif + Cond = ICMP(GVA, TLBValue, ICmpInst::ICMP_EQ); + BranchInst::Create(tlb_hit, tlb_miss, Cond, LastInst); + LastInst->eraseFromParent(); + + /* TLB hit. */ + Value *PhyAddr, *Addend, *Addr=AddrL; + + LastInst = BranchInst::Create(tlb_exit, tlb_hit); + if (Addr->getType() != IntPtrTy) + Addr = new ZExtInst(Addr, IntPtrTy, "", LastInst); + + Offset = offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write); + Addend = ADD(TLBEntry, ConstantInt::get(IntPtrTy, Offset)); + Addend = new IntToPtrInst(Addend, HostPtrTy, "", LastInst); + Addend = new LoadInst(Addend, "tlb.addend", false, LastInst); + PhyAddr = ADD(Addr, Addend); + PhyAddr = ITP(PhyAddr, getPointerTy(Size)); + + Value *HitData = ConvertEndian(Data, opc); + + new StoreInst(HitData, PhyAddr, true, LastInst); + + /* TLB miss. */ + LastInst = BranchInst::Create(tlb_exit, tlb_miss); + SmallVector<Value *, 4> Params; + uint32_t restore_val = setRestorePoint(oi); + Params.push_back(CPUStruct); + Params.push_back(GuestPC); + Params.push_back(Data); + Params.push_back(CONST32(restore_val)); + + CallInst::Create(MissFunc, Params, "", LastInst); + + /* TLB exit. */ + CurrBB = tlb_exit; + LastInst = BranchInst::Create(ExitBB, CurrBB); +} + +#endif /* CONFIG_USER_ONLY */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/llvm-opc-vector.cpp b/llvm/llvm-opc-vector.cpp new file mode 100644 index 0000000..3ce5f68 --- /dev/null +++ b/llvm/llvm-opc-vector.cpp @@ -0,0 +1,943 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + * + * This file provides TCG vector IR to LLVM IR conversions. + */ + +#include "llvm.h" +#include "llvm-debug.h" +#include "llvm-opc.h" +#include "utils.h" + + +extern TCGOpDef llvm_op_defs[]; + + +void IRFactory::op_vector_start(const TCGArg *args) +{ + IRError("%s: this function should never be called.\n", __func__); +} + +void IRFactory::op_vector_end(const TCGArg *args) +{ + IRError("%s: this function should never be called.\n", __func__); +} + +void IRFactory::op_vmov_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vmov_128); + + TCGArg DstOff = args[0]; + TCGArg SrcOff = args[1]; + Value *Dst, *Src; + + VectorType *VectorTy = VectorType::get(Int8Ty, 16); + PointerType *PtrTy = PointerType::getUnqual(VectorTy); + Src = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(SrcOff), "", LastInst); + Src = new BitCastInst(Src, PtrTy, "", LastInst); + Dst = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(DstOff), "", LastInst); + Dst = new BitCastInst(Dst, PtrTy, "", LastInst); + + Src = new LoadInst(Src, "", false, LastInst); + new StoreInst(Src, Dst, false, LastInst); +} + +void IRFactory::op_vload_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vload_128); + + TCGArg Off = args[0]; + Register &In = Reg[args[1]]; + TCGArg Alignment = (args[2] == (TCGArg)-1) ? 4 : args[2] / 8; + Value *Base = LoadState(In); + LoadInst *LI; + + AssertType(In.Size == 32 || In.Size == 64); + + VectorType *VectorTy = VectorType::get(Int8Ty, 16); + PointerType *PtrTy = PointerType::get(VectorTy, Segment); + + if (GUEST_BASE == 0 || Segment != 0) { + Base = ITP(Base, PtrTy); + LI = new LoadInst(Base, "", true, LastInst); + } else { + Base = ITP(Base, Int8PtrTy); + Base = GetElementPtrInst::CreateInBounds(Base, GuestBaseReg.Base, "", LastInst); + if (Base->getType() != PtrTy) + Base = CAST(Base, PtrTy); + LI = new LoadInst(Base, "", true, LastInst); + } + LI->setAlignment(Alignment); + + MF->setGuestMemory(LI); + + PtrTy = PointerType::getUnqual(VectorTy); + Value *V = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Off), "", LastInst); + V = new BitCastInst(V, PtrTy, "", LastInst); + new StoreInst(LI, V, false, LastInst); +} + +void IRFactory::op_vstore_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vstore_128); + + TCGArg Off = args[0]; + Register &In = Reg[args[1]]; + TCGArg Alignment = (args[2] == (TCGArg)-1) ? 4 : args[2] / 8; + Value *Base = LoadState(In); + StoreInst *SI; + + AssertType(In.Size == 32 || In.Size == 64); + + VectorType *VectorTy = VectorType::get(Int8Ty, 16); + PointerType *PtrTy = nullptr; + + PtrTy = PointerType::getUnqual(VectorTy); + Value *V = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Off), "", LastInst); + V = new BitCastInst(V, PtrTy, "", LastInst); + V = new LoadInst(V, "", false, LastInst); + + PtrTy = PointerType::get(VectorTy, Segment); + if (GUEST_BASE == 0 || Segment != 0) { + Base = ITP(Base, PtrTy); + SI = new StoreInst(V, Base, true, LastInst); + } else { + Base = ITP(Base, Int8PtrTy); + Base = GetElementPtrInst::CreateInBounds(Base, GuestBaseReg.Base, "", LastInst); + if (Base->getType() != PtrTy) + Base = CAST(Base, PtrTy); + SI = new StoreInst(V, Base, true, LastInst); + } + + SI->setAlignment(Alignment); + + MF->setGuestMemory(SI); +} + +#define llvm_gen_vop(_Fn,_Num,_Ty) \ +do { \ + TCGArg Out = args[0]; \ + TCGArg In1 = args[1]; \ + TCGArg In2 = args[2]; \ + Value *OutPtr, *InPtr1, *InPtr2; \ + VectorType *VectorTy = VectorType::get(_Ty, _Num); \ + PointerType *PtrTy = PointerType::getUnqual(VectorTy); \ + \ + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); \ + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); \ + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); \ + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); \ + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); \ + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); \ + \ + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); \ + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); \ + InData1 = _Fn(InData1, InData2); \ + new StoreInst(InData1, OutPtr, false, LastInst); \ +} while (0) + +#define llvm_gen_vop2(_Fn1,_Fn2,_Num,_Ty) \ +do { \ + TCGArg Out = args[0]; \ + TCGArg In1 = args[1]; \ + TCGArg In2 = args[2]; \ + Value *OutPtr, *InPtr1, *InPtr2; \ + VectorType *VectorTy = VectorType::get(_Ty, _Num); \ + PointerType *PtrTy = PointerType::getUnqual(VectorTy); \ + \ + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); \ + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); \ + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); \ + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); \ + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); \ + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); \ + \ + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); \ + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); \ + Value *InData3 = new LoadInst(OutPtr, "", false, LastInst); \ + InData1 = _Fn2(InData3, _Fn1(InData1, InData2)); \ + new StoreInst(InData1, OutPtr, false, LastInst); \ +} while (0) + + +void IRFactory::op_vadd_i8_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vadd_i8_128); + llvm_gen_vop(ADD, 16, Int8Ty); +} + +void IRFactory::op_vadd_i16_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vadd_i16_128); + llvm_gen_vop(ADD, 8, Int16Ty); +} + +void IRFactory::op_vadd_i32_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vadd_i32_128); + llvm_gen_vop(ADD, 4, Int32Ty); +} + +void IRFactory::op_vadd_i64_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vadd_i64_128); + llvm_gen_vop(ADD, 2, Int64Ty); +} + +void IRFactory::op_vadd_i8_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vadd_i8_64); + llvm_gen_vop(ADD, 8, Int8Ty); +} + +void IRFactory::op_vadd_i16_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vadd_i16_64); + llvm_gen_vop(ADD, 4, Int16Ty); +} + +void IRFactory::op_vadd_i32_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vadd_i32_64); + llvm_gen_vop(ADD, 2, Int32Ty); +} + +void IRFactory::op_vsub_i8_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vsub_i8_128); + llvm_gen_vop(SUB, 16, Int8Ty); +} + +void IRFactory::op_vsub_i16_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vsub_i16_128); + llvm_gen_vop(SUB, 8, Int16Ty); +} + +void IRFactory::op_vsub_i32_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vsub_i32_128); + llvm_gen_vop(SUB, 4, Int32Ty); +} + +void IRFactory::op_vsub_i64_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vsub_i64_128); + llvm_gen_vop(SUB, 2, Int64Ty); +} + +void IRFactory::op_vsub_i8_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vsub_i8_64); + llvm_gen_vop(SUB, 8, Int8Ty); +} + +void IRFactory::op_vsub_i16_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vsub_i16_64); + llvm_gen_vop(SUB, 4, Int16Ty); +} + +void IRFactory::op_vsub_i32_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vsub_i32_64); + llvm_gen_vop(SUB, 2, Int32Ty); +} + +void IRFactory::op_vadd_f32_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vadd_f32_128); + llvm_gen_vop(FADD, 4, FloatTy); +} + +void IRFactory::op_vadd_f64_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vadd_f64_128); + llvm_gen_vop(FADD, 2, DoubleTy); +} + +void IRFactory::op_vadd_f32_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vadd_f32_64); + llvm_gen_vop(FADD, 2, FloatTy); +} + +void IRFactory::op_vpadd_f32_128(const TCGArg *args) +{ + IRError("%s not implemented.\n", __func__); +} + +void IRFactory::op_vpadd_f64_128(const TCGArg *args) +{ + IRError("%s not implemented.\n", __func__); +} + +void IRFactory::op_vpadd_f32_64(const TCGArg *args) +{ + IRError("%s not implemented.\n", __func__); +} + +void IRFactory::op_vsub_f32_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vsub_f32_128); + llvm_gen_vop(FSUB, 4, FloatTy); +} + +void IRFactory::op_vsub_f64_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vsub_f64_128); + llvm_gen_vop(FSUB, 2, DoubleTy); +} + +void IRFactory::op_vsub_f32_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vsub_f32_64); + llvm_gen_vop(FSUB, 2, FloatTy); +} + +void IRFactory::op_vabd_f32_128(const TCGArg *args) +{ + IRError("%s not implemented.\n", __func__); +} + +void IRFactory::op_vabd_f64_128(const TCGArg *args) +{ + IRError("%s not implemented.\n", __func__); +} + +void IRFactory::op_vabd_f32_64(const TCGArg *args) +{ + IRError("%s not implemented.\n", __func__); +} + +void IRFactory::op_vfma_f32_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vfma_f32_128); + llvm_gen_vop2(FMUL, FADD, 4, FloatTy); +} + +void IRFactory::op_vfma_f64_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vfma_f64_128); + llvm_gen_vop2(FMUL, FADD, 2, DoubleTy); +} + +void IRFactory::op_vfma_f32_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vfma_f32_64); + llvm_gen_vop2(FMUL, FADD, 2, FloatTy); +} + +void IRFactory::op_vfms_f32_128(const TCGArg *args) +{ + IRError("%s not implemented.\n", __func__); +} + +void IRFactory::op_vfms_f64_128(const TCGArg *args) +{ + IRError("%s not implemented.\n", __func__); +} + +void IRFactory::op_vfms_f32_64(const TCGArg *args) +{ + IRError("%s not implemented.\n", __func__); +} + +void IRFactory::op_vmul_f32_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vmul_f32_128); + llvm_gen_vop(FMUL, 4, FloatTy); +} + +void IRFactory::op_vmul_f64_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vmul_f64_128); + llvm_gen_vop(FMUL, 2, DoubleTy); +} + +void IRFactory::op_vmul_f32_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vmul_f32_64); + llvm_gen_vop(FMUL, 2, FloatTy); +} + +void IRFactory::op_vmla_f32_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vmla_f32_128); + llvm_gen_vop2(FMUL, FADD, 4, FloatTy); +} + +void IRFactory::op_vmla_f64_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vmla_f64_128); + llvm_gen_vop2(FMUL, FADD, 2, DoubleTy); +} + +void IRFactory::op_vmla_f32_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vmla_f32_64); + llvm_gen_vop2(FMUL, FADD, 2, FloatTy); +} + +void IRFactory::op_vmls_f32_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vmls_f32_128); + llvm_gen_vop2(FMUL, FSUB, 4, FloatTy); +} + +void IRFactory::op_vmls_f64_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vmls_f64_128); + llvm_gen_vop2(FMUL, FSUB, 2, DoubleTy); +} + +void IRFactory::op_vmls_f32_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vmls_f32_64); + llvm_gen_vop2(FMUL, FSUB, 2, FloatTy); +} + +void IRFactory::op_vdiv_f32_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vdiv_f32_128); + llvm_gen_vop(FDIV, 4, FloatTy); +} + +void IRFactory::op_vdiv_f64_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vdiv_f64_128); + llvm_gen_vop(FDIV, 2, DoubleTy); +} + +void IRFactory::op_vdiv_f32_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vdiv_f32_64); + llvm_gen_vop(FDIV, 2, FloatTy); +} + +void IRFactory::op_vand_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vand_128); + if (args[1] == args[2]) { + op_vmov_128(args); + return; + } + llvm_gen_vop(AND, 4, Int32Ty); +} + +void IRFactory::op_vand_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vand_64); + llvm_gen_vop(AND, 2, Int32Ty); +} + +void IRFactory::op_vbic_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vbic_128); + + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg In2 = args[2]; + Value *OutPtr, *InPtr1, *InPtr2; + VectorType *VectorTy = VectorType::get(Int32Ty, 4); + PointerType *PtrTy = PointerType::getUnqual(VectorTy); + + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); + + std::vector<Constant *> V; + for (int i = 0; i < 4; i++) + V.push_back(CONST32(-1U)); + Value *VecMinusOne = ConstantVector::get(V); + + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); + InData2 = XOR(InData2, VecMinusOne); + InData1 = AND(InData1, InData2); + new StoreInst(InData1, OutPtr, false, LastInst); +} + +void IRFactory::op_vbic_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vbic_64); + + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg In2 = args[2]; + Value *OutPtr, *InPtr1, *InPtr2; + VectorType *VectorTy = VectorType::get(Int32Ty, 2); + PointerType *PtrTy = PointerType::getUnqual(VectorTy); + + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); + + std::vector<Constant *> V; + for (int i = 0; i < 2; i++) + V.push_back(CONST32(-1U)); + Value *VecMinusOne = ConstantVector::get(V); + + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); + InData2 = XOR(InData2, VecMinusOne); + InData1 = AND(InData1, InData2); + new StoreInst(InData1, OutPtr, false, LastInst); +} + +void IRFactory::op_vorr_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vorr_128); + if (args[1] == args[2]) { + op_vmov_128(args); + return; + } + llvm_gen_vop(OR, 4, Int32Ty); +} + +void IRFactory::op_vorr_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vorr_64); + llvm_gen_vop(OR, 2, Int32Ty); +} + +void IRFactory::op_vorn_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vorn_128); + + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg In2 = args[2]; + Value *OutPtr, *InPtr1, *InPtr2; + VectorType *VectorTy = VectorType::get(Int32Ty, 4); + PointerType *PtrTy = PointerType::getUnqual(VectorTy); + + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); + + std::vector<Constant *> V; + for (int i = 0; i < 4; i++) + V.push_back(CONST32(-1U)); + Value *VecMinusOne = ConstantVector::get(V); + + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); + InData2 = XOR(InData2, VecMinusOne); + InData1 = OR(InData1, InData2); + new StoreInst(InData1, OutPtr, false, LastInst); +} + +void IRFactory::op_vorn_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vorn_64); + + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg In2 = args[2]; + Value *OutPtr, *InPtr1, *InPtr2; + VectorType *VectorTy = VectorType::get(Int32Ty, 2); + PointerType *PtrTy = PointerType::getUnqual(VectorTy); + + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); + + std::vector<Constant *> V; + for (int i = 0; i < 2; i++) + V.push_back(CONST32(-1U)); + Value *VecMinusOne = ConstantVector::get(V); + + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); + InData2 = XOR(InData2, VecMinusOne); + InData1 = OR(InData1, InData2); + new StoreInst(InData1, OutPtr, false, LastInst); +} + +void IRFactory::op_veor_128(const TCGArg *args) +{ + IRDebug(INDEX_op_veor_128); + llvm_gen_vop(XOR, 4, Int32Ty); +} + +void IRFactory::op_veor_64(const TCGArg *args) +{ + IRDebug(INDEX_op_veor_64); + llvm_gen_vop(XOR, 2, Int32Ty); +} + +void IRFactory::op_vbif_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vbif_128); + + /* vbif rd, rn, rm + * operation: rd <- (rd & rm) | (rn & ~rm) */ + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg In2 = args[2]; + Value *OutPtr, *InPtr1, *InPtr2; + VectorType *VectorTy = VectorType::get(Int32Ty, 4); + PointerType *PtrTy = PointerType::getUnqual(VectorTy); + + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); + + std::vector<Constant *> V; + for (int i = 0; i < 4; i++) + V.push_back(CONST32(-1U)); + Value *VecMinusOne = ConstantVector::get(V); + + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); + Value *InData3 = new LoadInst(OutPtr, "", false, LastInst); + + InData3 = AND(InData3, InData2); + InData1 = AND(InData1, XOR(InData2, VecMinusOne)); + InData3 = OR(InData1, InData3); + new StoreInst(InData3, OutPtr, false, LastInst); +} + +void IRFactory::op_vbif_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vbif_64); + + /* vbif rd, rn, rm + * operation: rd <- (rd & rm) | (rn & ~rm) */ + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg In2 = args[2]; + Value *OutPtr, *InPtr1, *InPtr2; + VectorType *VectorTy = VectorType::get(Int32Ty, 2); + PointerType *PtrTy = PointerType::getUnqual(VectorTy); + + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); + + std::vector<Constant *> V; + for (int i = 0; i < 2; i++) + V.push_back(CONST32(-1U)); + Value *VecMinusOne = ConstantVector::get(V); + + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); + Value *InData3 = new LoadInst(OutPtr, "", false, LastInst); + + InData3 = AND(InData3, InData2); + InData1 = AND(InData1, XOR(InData2, VecMinusOne)); + InData3 = OR(InData1, InData3); + new StoreInst(InData3, OutPtr, false, LastInst); +} + +void IRFactory::op_vbit_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vbit_128); + + /* vbit rd, rn, rm + * operation: rd <- (rn & rm) | (rd & ~rm) */ + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg In2 = args[2]; + Value *OutPtr, *InPtr1, *InPtr2; + VectorType *VectorTy = VectorType::get(Int32Ty, 4); + PointerType *PtrTy = PointerType::getUnqual(VectorTy); + + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); + + std::vector<Constant *> V; + for (int i = 0; i < 4; i++) + V.push_back(CONST32(-1U)); + Value *VecMinusOne = ConstantVector::get(V); + + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); + Value *InData3 = new LoadInst(OutPtr, "", false, LastInst); + + InData1 = AND(InData1, InData2); + InData3 = AND(InData3, XOR(InData2, VecMinusOne)); + InData3 = OR(InData1, InData3); + new StoreInst(InData3, OutPtr, false, LastInst); +} + +void IRFactory::op_vbit_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vbit_64); + + /* vbit rd, rn, rm + * operation: rd <- (rn & rm) | (rd & ~rm) */ + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg In2 = args[2]; + Value *OutPtr, *InPtr1, *InPtr2; + VectorType *VectorTy = VectorType::get(Int32Ty, 2); + PointerType *PtrTy = PointerType::getUnqual(VectorTy); + + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); + + std::vector<Constant *> V; + for (int i = 0; i < 2; i++) + V.push_back(CONST32(-1U)); + Value *VecMinusOne = ConstantVector::get(V); + + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); + Value *InData3 = new LoadInst(OutPtr, "", false, LastInst); + + InData1 = AND(InData1, InData2); + InData3 = AND(InData3, XOR(InData2, VecMinusOne)); + InData3 = OR(InData1, InData3); + new StoreInst(InData3, OutPtr, false, LastInst); +} + +void IRFactory::op_vbsl_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vbsl_128); + + /* vbsl rd, rn, rm + * operation: rd <- (rn & rd) | (rm & ~rd) */ + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg In2 = args[2]; + Value *OutPtr, *InPtr1, *InPtr2; + VectorType *VectorTy = VectorType::get(Int32Ty, 4); + PointerType *PtrTy = PointerType::getUnqual(VectorTy); + + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); + + std::vector<Constant *> V; + for (int i = 0; i < 4; i++) + V.push_back(CONST32(-1U)); + Value *VecMinusOne = ConstantVector::get(V); + + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); + Value *InData3 = new LoadInst(OutPtr, "", false, LastInst); + + InData1 = AND(InData1, InData3); + InData2 = AND(InData2, XOR(InData3, VecMinusOne)); + InData3 = OR(InData1, InData2); + new StoreInst(InData3, OutPtr, false, LastInst); +} + +void IRFactory::op_vbsl_64(const TCGArg *args) +{ + IRDebug(INDEX_op_vbsl_64); + + /* vbsl rd, rn, rm + * operation: rd <- (rn & rd) | (rm & ~rd) */ + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg In2 = args[2]; + Value *OutPtr, *InPtr1, *InPtr2; + VectorType *VectorTy = VectorType::get(Int32Ty, 2); + PointerType *PtrTy = PointerType::getUnqual(VectorTy); + + InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); + InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); + InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); + + std::vector<Constant *> V; + for (int i = 0; i < 2; i++) + V.push_back(CONST32(-1U)); + Value *VecMinusOne = ConstantVector::get(V); + + Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); + Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); + Value *InData3 = new LoadInst(OutPtr, "", false, LastInst); + + InData1 = AND(InData1, InData3); + InData2 = AND(InData2, XOR(InData3, VecMinusOne)); + InData3 = OR(InData1, InData2); + new StoreInst(InData3, OutPtr, false, LastInst); +} + +void IRFactory::op_vsitofp_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vsitofp_128); + + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg Size = args[2]; + + unsigned NumElements = 0; + Type *SrcTy = nullptr, *DstTy = nullptr; + if (Size == 32) { + NumElements = 4; + SrcTy = Int32Ty; + DstTy = FloatTy; + } else if (Size == 64) { + NumElements = 2; + SrcTy = Int64Ty; + DstTy = DoubleTy; + } else + IRError("%s: invalid element size.\n", __func__); + + Value *OutPtr, *InPtr; + VectorType *VectorInt = VectorType::get(SrcTy, NumElements); + PointerType *VIntPtrTy = PointerType::getUnqual(VectorInt); + VectorType *VectorFP = VectorType::get(DstTy, NumElements); + PointerType *VFPPtrTy = PointerType::getUnqual(VectorFP); + + InPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr = new BitCastInst(InPtr, VIntPtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, VFPPtrTy, "", LastInst); + + Value *InData = new LoadInst(InPtr, "", false, LastInst); + InData = new SIToFPInst(InData, VectorFP, "", LastInst); + new StoreInst(InData, OutPtr, false, LastInst); +} + +void IRFactory::op_vuitofp_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vuitofp_128); + + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg Size = args[2]; + + unsigned NumElements = 0; + Type *SrcTy = nullptr, *DstTy = nullptr; + if (Size == 32) { + NumElements = 4; + SrcTy = Int32Ty; + DstTy = FloatTy; + } else if (Size == 64) { + NumElements = 2; + SrcTy = Int64Ty; + DstTy = DoubleTy; + } else + IRError("%s: invalid element size.\n", __func__); + + Value *OutPtr, *InPtr; + VectorType *VectorInt = VectorType::get(SrcTy, NumElements); + PointerType *VIntPtrTy = PointerType::getUnqual(VectorInt); + VectorType *VectorFP = VectorType::get(DstTy, NumElements); + PointerType *VFPPtrTy = PointerType::getUnqual(VectorFP); + + InPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr = new BitCastInst(InPtr, VIntPtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, VFPPtrTy, "", LastInst); + + Value *InData = new LoadInst(InPtr, "", false, LastInst); + InData = new UIToFPInst(InData, VectorFP, "", LastInst); + new StoreInst(InData, OutPtr, false, LastInst); +} + +void IRFactory::op_vfptosi_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vfptosi_128); + + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg Size = args[2]; + + unsigned NumElements = 0; + Type *SrcTy = nullptr, *DstTy = nullptr; + if (Size == 32) { + NumElements = 4; + SrcTy = FloatTy; + DstTy = Int32Ty; + } else if (Size == 64) { + NumElements = 2; + SrcTy = DoubleTy; + DstTy = Int64Ty; + } else + IRError("%s: invalid element size.\n", __func__); + + Value *OutPtr, *InPtr; + VectorType *VectorFP = VectorType::get(SrcTy, NumElements); + PointerType *VFPPtrTy = PointerType::getUnqual(VectorFP); + VectorType *VectorInt = VectorType::get(DstTy, NumElements); + PointerType *VIntPtrTy = PointerType::getUnqual(VectorInt); + + InPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr = new BitCastInst(InPtr, VFPPtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, VIntPtrTy, "", LastInst); + + Value *InData = new LoadInst(InPtr, "", false, LastInst); + InData = new FPToSIInst(InData, VectorInt, "", LastInst); + new StoreInst(InData, OutPtr, false, LastInst); +} + +void IRFactory::op_vfptoui_128(const TCGArg *args) +{ + IRDebug(INDEX_op_vfptoui_128); + + TCGArg Out = args[0]; + TCGArg In1 = args[1]; + TCGArg Size = args[2]; + + unsigned NumElements = 0; + Type *SrcTy = nullptr, *DstTy = nullptr; + if (Size == 32) { + NumElements = 4; + SrcTy = FloatTy; + DstTy = Int32Ty; + } else if (Size == 64) { + NumElements = 2; + SrcTy = DoubleTy; + DstTy = Int64Ty; + } else + IRError("%s: invalid element size.\n", __func__); + + Value *OutPtr, *InPtr; + VectorType *VectorFP = VectorType::get(SrcTy, NumElements); + PointerType *VFPPtrTy = PointerType::getUnqual(VectorFP); + VectorType *VectorInt = VectorType::get(DstTy, NumElements); + PointerType *VIntPtrTy = PointerType::getUnqual(VectorInt); + + InPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); + InPtr = new BitCastInst(InPtr, VFPPtrTy, "", LastInst); + OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); + OutPtr = new BitCastInst(OutPtr, VIntPtrTy, "", LastInst); + + Value *InData = new LoadInst(InPtr, "", false, LastInst); + InData = new FPToUIInst(InData, VectorInt, "", LastInst); + new StoreInst(InData, OutPtr, false, LastInst); +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/llvm-opc.cpp b/llvm/llvm-opc.cpp new file mode 100644 index 0000000..cc8436c --- /dev/null +++ b/llvm/llvm-opc.cpp @@ -0,0 +1,4431 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + * + * This file provides LLVM IR generator in terms of basic block and trace. + */ + +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/DIBuilder.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm-debug.h" +#include "llvm-pass.h" +#include "llvm-translator.h" +#include "llvm-target.h" +#include "llvm-state.h" +#include "llvm-opc.h" + + +#define INLINE_THRESHOLD 100 /* max # inlined instructions */ +#define INLINE_INSTCOUNT 20 /* max instruction count for inlining a small function */ + +/* Options enabled by default. */ +static cl::opt<bool> DisableStateMapping("disable-sm", cl::init(false), + cl::cat(CategoryHQEMU), cl::desc("Disable state mapping")); + +/* Options Disabled by default. */ +static cl::opt<bool> EnableSimplifyPointer("enable-simptr", cl::init(false), + cl::cat(CategoryHQEMU), cl::desc("Enable SimplifyPointer")); + + +TCGOpDef llvm_op_defs[] = { +#define DEF(s, oargs, iargs, cargs, flags) \ + { #s , oargs, iargs, cargs, iargs + oargs + cargs, flags }, +#include "tcg-opc.h" +#undef DEF +}; + +static IRFactory::FuncPtr OpcFunc[] = { +#define DEF(name, oargs, iargs, cargs, flags) &IRFactory::op_ ## name, +#include "tcg-opc.h" +#undef DEF +}; + +extern LLVMEnv *LLEnv; +extern hqemu::Mutex llvm_global_lock; +extern hqemu::Mutex llvm_debug_lock; + +/* + * IRFactory() + */ +IRFactory::IRFactory(LLVMTranslator *Trans) + : InitOnce(false), Translator(*Trans), EE(nullptr), + HostDisAsm(Translator.getHostDisAsm()), Helpers(Translator.getHelpers()), + BaseReg(Translator.getBaseReg()), GuestBaseReg(Translator.getGuestBaseReg()), + NI(Translator.getNotifyInfo()) +{ + /* Track TCG virtual registers. */ + Reg.resize(TCG_MAX_TEMPS); + + TCGContext *s = &tcg_ctx_global; + int NumGlobals = s->nb_globals; + for (int i = 0; i < NumGlobals; ++i) { + TCGTemp *T = &s->temps[i]; + if (T->type != TCG_TYPE_I32 && T->type != TCG_TYPE_I64) + hqemu_error("unsupported register type.\n"); + + int Base = (T->fixed_reg) ? T->reg : T->mem_reg; + intptr_t Off = (T->fixed_reg) ? -1 : T->mem_offset; + Reg[i].set(Base, Off, T->name); + } + + for (int i = 0; i < NumGlobals; ++i) { + TCGTemp *T1 = &s->temps[i]; + for (int j = i + 1; j < NumGlobals; ++j) { + TCGTemp *T2 = &s->temps[j]; + if (T1->fixed_reg || T2->fixed_reg) + continue; + if (Reg[j].Alias) + continue; + if (T1->mem_offset == T2->mem_offset && T1->type == T2->type) + Reg[j].Alias = &Reg[i]; + } + } + + Segment = 0; +#if defined(__x86_64__) && defined(__linux__) + if (GUEST_BASE) + Segment = 256; /* GS: 256 */ +#endif + + dbg() << DEBUG_LLVM << "LLVM IR Factory initialized.\n"; +} + +IRFactory::~IRFactory() +{ + if (EE) { + EE->UnregisterJITEventListener(Listener); + EE->removeModule(Mod); + delete Listener; + delete EE; + } +} + +void IRFactory::CreateSession(TraceBuilder *builder) +{ + Builder = builder; + + CreateJIT(); + InitializeTypes(); + + MF = new MDFactory(Mod); + ExitAddr = CONSTPtr((uintptr_t)tb_ret_addr); + + runPasses = true; + + /* Reset data structures. */ + StatePtr.clear(); + InlineCalls.clear(); + IndirectBrs.clear(); + CommonBB.clear(); + toErase.clear(); + toSink.clear(); + ClonedFuncs.clear(); + NI.reset(); +} + +void IRFactory::DeleteSession() +{ + if (Func) { + Func->removeFromParent(); + delete Func; + Func = nullptr; + } + delete MF; + DeleteJIT(); +} + +static void setHostAttrs(std::string &MCPU, std::vector<std::string> &MAttrs, + TargetOptions &Options) +{ + MCPU = sys::getHostCPUName(); + + StringMap<bool> HostFeatures; + sys::getHostCPUFeatures(HostFeatures); + for (auto &F : HostFeatures) + MAttrs.push_back((F.second ? "+" : "-") + F.first().str()); + + if (MCPU == "core-avx2" || MCPU == "haswell" || MCPU == "knl") + Options.AllowFPOpFusion = FPOpFusion::Fast; +} + +#if defined(ENABLE_MCJIT) +#if defined(LLVM_V35) +void IRFactory::CreateJIT() +{ + Module *InitMod = Translator.getModule(); + Context = &InitMod->getContext(); + Mod = new Module(InitMod->getModuleIdentifier(), *Context); + Mod->setDataLayout(InitMod->getDataLayout()); + Mod->setTargetTriple(InitMod->getTargetTriple()); + + DL = getDataLayout(Mod); + + /* Create JIT execution engine. */ + std::string ErrorMsg, MCPU; + std::vector<std::string> MAttrs; + TargetOptions Options; + + setHostAttrs(MCPU, MAttrs, Options); + + EngineBuilder builder(Mod); + builder.setMCPU(MCPU); + builder.setMAttrs(MAttrs); + builder.setErrorStr(&ErrorMsg); + builder.setEngineKind(EngineKind::JIT); + builder.setOptLevel(CodeGenOpt::Default); + builder.setUseMCJIT(true); + builder.setMCJITMemoryManager(LLEnv->getMemoryManager().get()); + builder.setTargetOptions(Options); + + EE = builder.create(); + + if (!EE) + hqemu_error("%s\n", ErrorMsg.c_str()); + + /* Create JIT event listener and link target machine. */ + Listener = new EventListener(NI); + + EE->RegisterJITEventListener(Listener); + + /* Ask LLVM to reserve basereg. */ + auto TM = EE->getTargetMachine(); + auto TRI = const_cast<TargetRegisterInfo*>(TM->getRegisterInfo()); + TRI->setHQEMUReservedRegs(BaseReg[TCG_AREG0].Name); + + dbg() << DEBUG_LLVM << "LLVM MCJIT initialized.\n"; +} +#else +void IRFactory::CreateJIT() +{ + Module *InitMod = Translator.getModule(); + Context = &InitMod->getContext(); + std::unique_ptr<Module> Owner( + new Module(InitMod->getModuleIdentifier(), *Context)); + Mod = Owner.get(); + Mod->setDataLayout(InitMod->getDataLayout()); + Mod->setTargetTriple(InitMod->getTargetTriple()); + + DL = getDataLayout(Mod); + + /* Create JIT execution engine. */ + std::string ErrorMsg, MCPU; + std::vector<std::string> MAttrs; + TargetOptions Options; + + setHostAttrs(MCPU, MAttrs, Options); + + EngineBuilder builder(std::move(Owner)); + builder.setMCPU(MCPU); + builder.setMAttrs(MAttrs); + builder.setErrorStr(&ErrorMsg); + builder.setEngineKind(EngineKind::JIT); + builder.setOptLevel(CodeGenOpt::Default); + builder.setMCJITMemoryManager(LLEnv->getMemoryManager()); + builder.setTargetOptions(Options); + + EE = builder.create(); + + if (!EE) + hqemu_error("%s\n", ErrorMsg.c_str()); + + /* Create JIT event listener and link target machine. */ + Listener = new EventListener(NI); + EE->RegisterJITEventListener(Listener); + +#if LLVM_USE_INTEL_JITEVENTS + IntelJIT = JITEventListener::createIntelJITEventListener(); + EE->RegisterJITEventListener(IntelJIT); +#endif + + /* Ask LLVM to reserve basereg. */ + auto TM = EE->getTargetMachine(); + auto MII = const_cast<MCInstrInfo *>(TM->getMCInstrInfo()); + MII->setHQEMUExitAddr((unsigned long)tb_ret_addr); + + dbg() << DEBUG_LLVM << "LLVM MCJIT initialized.\n"; +} +#endif + +void IRFactory::DeleteJIT() +{ + EE->UnregisterJITEventListener(Listener); +#if LLVM_USE_INTEL_JITEVENTS + EE->UnregisterJITEventListener(IntelJIT); + delete IntelJIT; +#endif + EE->removeModule(Mod); + delete Listener; + delete EE; + delete Mod; + EE = nullptr; +} + +Function *IRFactory::ResolveFunction(std::string Name) +{ + Function *NF = Mod->getFunction(Name); + if(NF) + return NF; + + ValueToValueMapTy VMap; + Module *InitMod = Translator.getModule(); + Function *F = InitMod->getFunction(Name); + if (!F) + IRError("%s: unknown function %s.\n", __func__, Name.c_str()); + + NF = Function::Create(cast<FunctionType>(F->getType()->getElementType()), + F->getLinkage(), F->getName(), Mod); + NF->copyAttributesFrom(F); + VMap[F] = NF; + + if (Helpers.find(Name) != Helpers.end() && !F->isDeclaration()) { + Function::arg_iterator DestI = NF->arg_begin(); + for (auto J = F->arg_begin(); J != F->arg_end(); ++J) { + DestI->setName(J->getName()); + VMap[&*J] = &*DestI++; + } + SmallVector<ReturnInst*, 8> Returns; + CloneFunctionInto(NF, F, VMap, /*ModuleLevelChanges=*/true, Returns); + } + + ClonedFuncs.insert(NF); + return NF; +} + +#else +void IRFactory::CreateJIT() +{ + if (InitOnce) + return; + + Context = Translator.getContext(); + Mod = Translator.getModule(); + DL = getDataLayout(Mod); + + /* Create JIT execution engine. */ + std::string ErrorMsg, MCPU; + std::vector<std::string> MAttrs; + TargetOptions Options; + + setHostAttrs(MCPU, MAttrs, Options); + + EngineBuilder builder(Mod); + builder.setMCPU(MCPU); + builder.setMAttrs(MAttrs); + builder.setAllocateGVsWithCode(false); + builder.setJITMemoryManager(LLEnv->getMemoryManager().get()); + builder.setErrorStr(&ErrorMsg); + builder.setEngineKind(EngineKind::JIT); + builder.setOptLevel(CodeGenOpt::Default); + builder.setTargetOptions(Options); + + EE = builder.create(); + + if (!EE) + hqemu_error("%s\n", ErrorMsg.c_str()); + + /* Create JIT event listener and link target machine. */ + Listener = new EventListener(NI); + + EE->RegisterJITEventListener(Listener); + EE->DisableLazyCompilation(false); + + /* Ask LLVM to reserve basereg. */ + auto TM = EE->getTargetMachine(); + auto TRI = const_cast<TargetRegisterInfo*>(TM->getRegisterInfo()); + TRI->setHQEMUReservedRegs(BaseReg[TCG_AREG0].Name); + + /* Bind addresses to external symbols. */ + SymbolMap &Symbols = Translator.getSymbols(); + for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) { + std::string Name = I->first; + if (!Mod->getNamedValue(Name)) + continue; + EE->updateGlobalMapping(Mod->getNamedValue(Name), (void*)I->second); + } + + dbg() << DEBUG_LLVM << "LLVM JIT initialized.\n"; + + InitOnce = true; +} + +void IRFactory::DeleteJIT() +{ + /* Do nothing with the old JIT. */ +} + +Function *IRFactory::ResolveFunction(std::string Name) +{ + Function *F = Mod->getFunction(Name); + if (!F) + IRError("%s: unknown function %s.\n", __func__, Name.c_str()); + return F; +} +#endif + +/* Initialize basic types that will be used during IR conversion. */ +void IRFactory::InitializeTypes() +{ + VoidTy = Type::getVoidTy(*Context); + Int8Ty = IntegerType::get(*Context, 8); + Int16Ty = IntegerType::get(*Context, 16); + Int32Ty = IntegerType::get(*Context, 32); + Int64Ty = IntegerType::get(*Context, 64); + Int128Ty = IntegerType::get(*Context, 128); + + IntPtrTy = DL->getIntPtrType(*Context); + Int8PtrTy = Type::getInt8PtrTy(*Context, 0); + Int16PtrTy = Type::getInt16PtrTy(*Context, 0); + Int32PtrTy = Type::getInt32PtrTy(*Context, 0); + Int64PtrTy = Type::getInt64PtrTy(*Context, 0); + + FloatTy = Type::getFloatTy(*Context); + DoubleTy = Type::getDoubleTy(*Context); +} + +/* Get the function pointer of the IR converion routines. */ +void *IRFactory::getOpcFunc() +{ + return OpcFunc; +} + + +/* Get the CPU pointer. + * If the CPU pointer is not in the first block of function F, return null. */ +Instruction *IRFactory::getDefaultCPU(Function &F) +{ + if (!CPU) + return nullptr; + if (!CPU->getParent() || CPU->getParent() != &F.getEntryBlock()) + return nullptr; + return CPU; +} + +static inline std::string getGuestSymbol(target_ulong pc) +{ +#if defined(CONFIG_USER_ONLY) + hqemu::MutexGuard locked(llvm_global_lock); + + std::string Symbol = lookup_symbol(pc); + if (Symbol != "") + Symbol = "<" + Symbol + ">:"; + return Symbol; +#else + return ""; +#endif +} + +/* Prepare LLVM Function, initial BasicBlocks and variable declaration. */ +void IRFactory::CreateFunction() +{ + target_ulong pc = Builder->getEntryNode()->getGuestPC(); + std::string Name = getGuestSymbol(pc) + + Builder->getPCString(Builder->getEntryNode()); + + dbg() << DEBUG_LLVM << "Requested trace info: pc " + << format("0x%" PRIx, pc) << " length " << Builder->getNumNodes() + << "\n"; + + FunctionType *FuncTy = FunctionType::get(IntPtrTy, false); + Func = Function::Create(FuncTy, GlobalVariable::ExternalLinkage, Name, Mod); + Func->setCallingConv(CallingConv::C); + Func->addFnAttr(Attribute::NoUnwind); + Func->addFnAttr(Attribute::Naked); + Func->addFnAttr("hqemu"); + + /* Prepare all basic blocks. */ + InitBB = BasicBlock::Create(*Context, "init", Func); + ExitBB = BasicBlock::Create(*Context, "exit", Func); + CurrBB = BasicBlock::Create(*Context, "entry", Func); + LastInst = BranchInst::Create(CurrBB, InitBB); + new UnreachableInst(*Context, ExitBB); + + /* Setup base register for CPUArchState pointer, and register for + * guest_base. */ + for (int i = 0; i < TCG_TARGET_NB_REGS; i++) + BaseReg[i].Base = nullptr; + + BaseRegister &CPUReg = BaseReg[TCG_AREG0]; + char Constraint[16] = {'\0'}; + sprintf(Constraint, "={%s}", CPUReg.Name.c_str()); + auto IA = InlineAsm::get(FunctionType::get(Int8PtrTy, false), "", + Constraint, true); + CPUReg.Base = CallInst::Create(IA, "cpu", LastInst); + + /* Set special register for guest base if necessary. */ + GuestBaseReg.Base = CONSTPtr(GUEST_BASE); + if (GuestBaseReg.Name != "") { + sprintf(Constraint, "={%s}", GuestBaseReg.Name.c_str()); + IA = InlineAsm::get(FunctionType::get(Int8PtrTy, false), "", + Constraint, true); + GuestBaseReg.Base = new PtrToIntInst( + CallInst::Create(IA, "", LastInst), + IntPtrTy, "guest_base", LastInst); + } + + CPU = CPUReg.Base; + CPUStruct = new BitCastInst(CPU, CPUReg.Ty, "cpu.struct", LastInst); + GEPInsertPos = CPUStruct; +} + +/* Prepare an LLVM BasicBlock for a new guest block. */ +void IRFactory::CreateBlock() +{ + GraphNode *CurrNode = Builder->getCurrNode(); + bool isEntryNode = CurrNode == Builder->getEntryNode(); + std::string pc = Builder->getPCString(CurrNode); + + dbg() << DEBUG_LLVM << " - Process block pc " + << format("0x%" PRIx, CurrNode->getGuestPC()) << "\n"; + + if (!isEntryNode) + CurrBB = BasicBlock::Create(*Context, pc, Func); + + LastInst = BranchInst::Create(ExitBB, CurrBB); + Builder->setBasicBlock(CurrNode, CurrBB); + + /* Check if the register has legal type. */ + int NumGlobals = tcg_ctx.nb_globals; + int NumTemps = tcg_ctx.nb_temps; + for (int i = 0; i < NumTemps; ++i) { + TCGTemp *T = &tcg_ctx.temps[i]; + if (T->type != TCG_TYPE_I32 && T->type != TCG_TYPE_I64) + hqemu_error("unsupported register type.\n"); + } + + /* Initialize global registers. */ + for (int i = 0; i < NumGlobals; ++i) { + TCGTemp *T = &tcg_ctx.temps[i]; + int State = (T->fixed_reg) ? Register::STATE_REV | Register::STATE_MEM : + Register::STATE_MEM; + int Size = (T->type == TCG_TYPE_I32) ? 32 : 64; + Type *Ty = (T->type == TCG_TYPE_I32) ? Int32Ty : Int64Ty; + Reg[i].reset(State, Size, Ty); + } + + /* Initialize temporary registers. */ + for (int i = NumGlobals; i < NumTemps; ++i) { + TCGTemp *T = &tcg_ctx.temps[i]; + int State = (T->temp_local) ? Register::STATE_LOC : + Register::STATE_TMP; + int Size = (T->type == TCG_TYPE_I32) ? 32 : 64; + Type *Ty = (T->type == TCG_TYPE_I32) ? Int32Ty : Int64Ty; + Reg[i].reset(State, Size, Ty); + } + + Labels.clear(); + +#ifdef VERIFY_TB + Function *F = ResolveFunction("helper_verify_tb"); + SmallVector<Value *, 4> Params; + Params.push_back(CPUStruct); + Params.push_back(CONST32(CurrNode->getTB()->id)); + CallInst *CI = CallInst::Create(F, Params, "", LastInst); + MF->setConst(CI); +#endif +} + + +/* Wrapper function to set an unconditional branch. */ +void IRFactory::setSuccessor(BranchInst *BI, BasicBlock *BB) +{ + BI->setSuccessor(0, BB); +} + +/* Determine whether we should inline a helper function or not. */ +int IRFactory::AnalyzeInlineCost(CallSite CS) +{ + Function *Callee = CS.getCalledFunction(); + HelperInfo *Helper = Helpers[Callee->getName()]; + int InlineCost = INLINE_THRESHOLD - Helper->Metrics.NumInsts; + unsigned ArgNo = 0; + + if (Helper->Metrics.NumInsts <= INLINE_INSTCOUNT) + return 1; + + InlineCost *= InlineConstants::InstrCost; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I, ++ArgNo) { + InlineCost -= InlineConstants::InstrCost; + if (isa<AllocaInst>(I)) + InlineCost += Helper->ArgumentWeights[ArgNo].AllocaWeight; + else if (isa<Constant>(I)) + InlineCost += Helper->ArgumentWeights[ArgNo].ConstantWeight; + } + + return InlineCost; +} + + +/* Perform helper function inlining. */ +void IRFactory::ProcessInline() +{ + while (!InlineCalls.empty()) { + CallInst *CI = static_cast<CallInst *>(InlineCalls.back()); + InlineCalls.pop_back(); + InlineFunc(CI); + } +} + +void IRFactory::VerifyFunction(Function &F) +{ + if (DM.getDebugMode() & DEBUG_VERIFY) + verifyFunction(F, &DM.debug()); +} + +/* Format function to a legal format and inline calls. Be sure to make the + * function in a well form before doing any furthur optimization (i.e. inlining + * calls). Otherwise, the optimization may fail or the result may be wrong. */ +void IRFactory::PreProcess() +{ + dbg() << DEBUG_LLVM << __func__ << " entered.\n"; + + ProcessErase(toErase); + + /* Insert terminator instruction to basic blocks that branch to ExitBB. + * This could happen when the last TCG opc is a call instruction. */ + for (auto PI = pred_begin(ExitBB), PE = pred_end(ExitBB); PI != PE; PI++) { + Instruction *InsertPos = (*PI)->getTerminator(); + new UnreachableInst(*Context, InsertPos); + toErase.push_back(InsertPos); + } + ProcessErase(toErase); + ExitBB->eraseFromParent(); + + /* Remove instructions after indirect branches. */ + std::set<Instruction *> AfterIB; + for (unsigned i = 0, e = IndirectBrs.size(); i != e; ++i) { + BasicBlock *BB = IndirectBrs[i]->getParent(); + for (auto I = ++BasicBlock::iterator(IndirectBrs[i]), E = BB->end(); + I != E; ++I) + AfterIB.insert(&*I); + } + for (auto I = AfterIB.begin(), E = AfterIB.end(); I != E; ++I) + toErase.push_back(*I); + ProcessErase(toErase); + + /* Sink blocks to the end. */ + Function::iterator InsertPos = Func->end(); + Function::BasicBlockListType &Blocks = Func->getBasicBlockList(); + for (unsigned i = 0, e = toSink.size(); i != e; ++i) { + if (&*InsertPos == toSink[i]) + continue; + Blocks.splice(InsertPos, Blocks, toSink[i]); + } + + VerifyFunction(*Func); + + /* Inline helper functions. */ + ProcessInline(); + + SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> BackEdges; + FindFunctionBackedges(*Func, BackEdges); + + TraceInfo *Trace = Builder->getTrace(); + Trace->NumLoop = BackEdges.size(); + dbg() << DEBUG_LLVM << __func__ << ": trace formation with pc " + << format("0x%" PRIx, Trace->getEntryPC()) + << " length " << Trace->getNumBlock() + << " is_loop " << (Trace->NumLoop ? true : false) << "\n"; + +#if 1 || defined(CONFIG_SOFTMMU) + if (Trace->NumLoop) { + intptr_t Offset = offsetof(CPUState, tcg_exit_req) - ENV_OFFSET; + Value *ExitRequestPtr = GetElementPtrInst::CreateInBounds(CPU, + CONSTPtr(Offset), + "", InitBB->getTerminator()); + ExitRequestPtr = new BitCastInst(ExitRequestPtr, Int32PtrTy, + "tcg_exit_req", + InitBB->getTerminator()); + + /* Create the exit stub. */ + for (unsigned i = 0, e = BackEdges.size(); i != e; ++i) { + BasicBlock *TCGExitBB = BasicBlock::Create(*Context, "exit", Func); + LastInst = BranchInst::Create(TCGExitBB, TCGExitBB); + StoreInst *SI = new StoreInst(CONST32(0), ExitRequestPtr, true, LastInst); + InsertExit(0); + LastInst->eraseFromParent(); + + MF->setExit(SI); + + auto BackEdgeBB = const_cast<BasicBlock*>(BackEdges[i].first); + auto LoopHeader = const_cast<BasicBlock*>(BackEdges[i].second); + auto BI = const_cast<TerminatorInst *>(BackEdgeBB->getTerminator()); + + toErase.push_back(BI); + + Value *ExitRequest = new LoadInst(ExitRequestPtr, "", true, BI); + Value *Cond = new ICmpInst(BI, ICmpInst::ICMP_EQ, ExitRequest, + CONST32(0), ""); + BI = BranchInst::Create(LoopHeader, TCGExitBB, Cond, BI); + BI->getParent()->setName("loopback"); + MF->setLoop(BI); + } + } +#else + if (Trace->NumLoop) { + for (unsigned i = 0, e = BackEdges.size(); i != e; ++i) { + auto BackEdgeBB = const_cast<BasicBlock*>(BackEdges[i].first); + auto BI = const_cast<TerminatorInst *>(BackEdgeBB->getTerminator()); + BI->getParent()->setName("loopback"); + MF->setLoop(BI); + + for (auto BI = BackEdgeBB->begin(), BE = BackEdgeBB->end(); BI != BE; ++BI) { + if (auto SI = dyn_cast<StoreInst>(BI)) { + intptr_t Off = 0; + Value *Base = getBaseWithConstantOffset(DL, getPointerOperand(SI), Off); + if (Base == CPU && isStateOfPC(Off)) + toErase.push_back(SI); + } + } + } + } +#endif + + ProcessErase(toErase); + + if (DM.getDebugMode() & DEBUG_IR) { + hqemu::MutexGuard locked(llvm_debug_lock); + Func->print(DM.debug()); + } +} + +void IRFactory::InitializeLLVMPasses(legacy::FunctionPassManager *FPM) +{ + auto TM = EE->getTargetMachine(); +#if defined(LLVM_V35) + TM->addAnalysisPasses(*FPM); + FPM->add(new DataLayoutPass(Mod)); + FPM->add(createBasicTargetTransformInfoPass(TM)); +#else + PassRegistry &PassReg = *PassRegistry::getPassRegistry(); + initializeTargetTransformInfoWrapperPassPass(PassReg); + + FPM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis())); +#endif +} + +void IRFactory::Optimize() +{ +#define addPass(PM, P) do { PM->add(P); } while(0) +#define addPassOptional(PM, P, Disable) \ + do { \ + if (!Disable) PM->add(P); \ + } while(0) + +#if defined(ENABLE_PASSES) + if (runPasses) { + legacy::FunctionPassManager *FPM = new legacy::FunctionPassManager(Mod); + + InitializeLLVMPasses(FPM); + + addPass(FPM, createProfileExec(this)); + addPass(FPM, createCombineGuestMemory(this)); + addPass(FPM, createCombineZExtTrunc()); + addPassOptional(FPM, createStateMappingPass(this), DisableStateMapping); + addPass(FPM, createPromoteMemoryToRegisterPass()); + addPass(FPM, createCombineCasts(this)); + addPassOptional(FPM, createSimplifyPointer(this), !EnableSimplifyPointer); + addPass(FPM, createAggressiveDCEPass()); + addPass(FPM, createCFGSimplificationPass()); + addPass(FPM, createInstructionCombiningPass()); + addPass(FPM, createRedundantStateElimination(this)); + addPass(FPM, createCombineCasts(this)); + + FPM->run(*Func); + + delete FPM; + } +#endif + +#undef addPass +#undef addPassOptional +} + + +/* Legalize LLVM IR after running the pre-defined passes. */ +void IRFactory::PostProcess() +{ + dbg() << DEBUG_LLVM << __func__ << " entered.\n"; + +#if defined(ENABLE_MCJIT) + for (auto I = ClonedFuncs.begin(), E = ClonedFuncs.end(); I != E; ++I) { + Function *F = *I; + if (!F->isDeclaration()) + F->removeFromParent(); + } + /* Bind addresses to external symbols. */ + SymbolMap &Symbols = Translator.getSymbols(); + for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) { + std::string Name = I->first; + if (!Mod->getNamedValue(Name)) + continue; + EE->updateGlobalMapping(Mod->getNamedValue(Name), (void*)I->second); + } +#endif + + if (DM.getDebugMode() & DEBUG_IR_OPT) { + hqemu::MutexGuard locked(llvm_debug_lock); + Func->print(DM.debug()); + } +} + +/* Legalize LLVM IR after running the pre-defined passes. */ +void IRFactory::FinalizeObject() +{ + dbg() << DEBUG_LLVM << __func__ << " entered.\n"; + + uintptr_t Code = (uintptr_t)NI.Code; + uint32_t Size = NI.Size; + +#if defined(ENABLE_MCJIT) + for (unsigned i = 0, e = NI.Patches.size(); i != e; ++i) { + NotifyInfo::PatchInfo &Patch = NI.Patches[i]; + uintptr_t Addr = Patch.Addr; + code_ostream OS(Addr); + + /* If the address to patch is outside this code region, skip this + * invalid patch point. Actually this should not happen, but LLVM v35 + * seems to report such invalid address. */ + if (Addr >= Code + Size) + continue; + if (Patch.Type == PATCH_EXIT_TB) { +#if defined(LLVM_V35) && defined(TCG_TARGET_I386) + EmitByte(OS, 0xE9); + EmitConstant(OS, (uintptr_t)tb_ret_addr - Addr - 5, 4); +#endif + } else if (Patch.Type == PATCH_TRACE_BLOCK_CHAINING) { +#if defined(TCG_TARGET_I386) + unsigned NumSkip = 3 - Addr % 4; + OS.Skip(NumSkip); + EmitByte(OS, 0xE9); + EmitConstant(OS, 0, 4); + NI.ChainSlot[Patch.Idx].Addr = Addr + NumSkip; +#elif defined(TCG_TARGET_PPC64) + unsigned NumSkip = 0; + if (Addr & 7) + NumSkip = 4; + OS.Skip(NumSkip); + EmitConstant(OS, 0x48000000 | (16 & 0x3fffffc), 4); /* b .+16 */ + EmitConstant(OS, 0x60000000, 4); /* nop */ + EmitConstant(OS, 0x7C0903A6 | (12 << 21), 4); /* mtctr r12 */ + EmitConstant(OS, 0x4E800420, 4); /* bctr */ + NI.ChainSlot[Patch.Idx].Addr = Addr + NumSkip; +#else + NI.ChainSlot[Patch.Idx].Addr = Addr; +#endif + } + } +#endif + + /* Flush instruction cache */ + flush_icache_range(Code, Code + Size); + + if (DM.getDebugMode() & DEBUG_OUTASM) { + hqemu::MutexGuard locked(llvm_debug_lock); + if (HostDisAsm) + HostDisAsm->PrintOutAsm((uint64_t)Code, (uint64_t)Size); + else { + auto &OS = DM.debug(); + OS << "\nOUT: [size=" << Size << "]\n"; + disas(stderr, (void *)Code, Size); + OS << "\n"; + } + } +} + +/* Start the LLVM JIT compilation. */ +void IRFactory::Compile() +{ + dbg() << DEBUG_LLVM + << "Translator " << Translator.getID() << " starts compiling...\n"; + + /* Run optimization passes. */ + PreProcess(); + Optimize(); + PostProcess(); + + VerifyFunction(*Func); + + /* JIT. */ + NI.Func = Func; + EE->getPointerToFunction(Func); + EE->finalizeObject(); + + FinalizeObject(); + + dbg() << DEBUG_LLVM << __func__ << ": done.\n"; +} + +PointerType *IRFactory::getPointerTy(int Size, unsigned AS) +{ + switch (Size) { + case 32: return Type::getInt32PtrTy(*Context, AS); + case 64: return Type::getInt64PtrTy(*Context, AS); + case 16: return Type::getInt16PtrTy(*Context, AS); + case 8: return Type::getInt8PtrTy(*Context, AS); + default: + IRError("%s: invalid bit type %d.\n", __func__, Size); + } + return nullptr; +} + +Value *IRFactory::getExtendValue(Value *V, Type *Ty, int opc) +{ + int OldSize = DL->getTypeSizeInBits(V->getType()); + int NewSize = DL->getTypeSizeInBits(Ty); + + if (OldSize > NewSize) + IRError("%s: invalid size old=%d new=%d\n", __func__, OldSize, NewSize); + if (OldSize == NewSize) + return V; + + if (opc & MO_SIGN) + return SEXT(V, Ty); + return ZEXT(V, Ty); +} + +Value *IRFactory::getTruncValue(Value *V, int opc) +{ + int OldSize = DL->getTypeSizeInBits(V->getType()); + int NewSize = getSizeInBits(opc); + + if (OldSize < NewSize) + IRError("%s: invalid size old=%d new=%d\n", __func__, OldSize, NewSize); + if (OldSize == NewSize) + return V; + + Type *Ty = Type::getIntNTy(*Context, NewSize); + return TRUNC(V, Ty); +} + +Value *IRFactory::ConvertEndian(Value *V, int opc) +{ +#ifdef NEED_BSWAP + switch (opc & MO_SIZE) { + case MO_8: return V; + case MO_16: return BSWAP16(V); + case MO_32: return BSWAP32(V); + case MO_64: return BSWAP64(V); + default: + IRError("%s: invalid size (opc=%d)\n", __func__, opc); + break; + } + return V; +#else + return V; +#endif +} + +Value *IRFactory::CreateBSwap(Type *Ty, Value *V, Instruction *InsertPos) +{ + SmallVector<Value *, 4> Params; + Type *Tys[] = { Ty }; + + Function *Fn = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Tys); + Params.push_back(V); + return CallInst::Create(Fn, Params, "", InsertPos); +} + +Value *IRFactory::ConvertCPUType(Function *F, int Idx, Instruction *InsertPos) +{ + Type *ParamTy = F->getFunctionType()->getParamType(Idx); + if (CPUStruct->getType() != ParamTy) + return new BitCastInst(CPU, ParamTy, "", InsertPos); + return CPUStruct; +} + +Value *IRFactory::ConvertCPUType(Function *F, int Idx, BasicBlock *InsertPos) +{ + Type *ParamTy = F->getFunctionType()->getParamType(Idx); + if (CPUStruct->getType() != ParamTy) + return new BitCastInst(CPU, ParamTy, "", InsertPos); + return CPUStruct; +} + +/* Return true if the offset is for the state of PC. */ +bool IRFactory::isStateOfPC(intptr_t Off) +{ + intptr_t IPOffset; +#if defined(TARGET_ALPHA) + IPOffset = offsetof(CPUArchState, pc); +#elif defined(TARGET_AARCH64) + IPOffset = offsetof(CPUArchState, pc); +#elif defined(TARGET_ARM) + IPOffset = offsetof(CPUArchState, regs[15]); +#elif defined(TARGET_CRIS) + IPOffset = offsetof(CPUArchState, pc); +#elif defined(TARGET_I386) + IPOffset = offsetof(CPUArchState, eip); +#elif defined(TARGET_M68K) + IPOffset = offsetof(CPUArchState, pc); +#elif defined(TARGET_MICROBLAZE) + IPOffset = offsetof(CPUArchState, sregs[0]); +#elif defined(TARGET_MIPS) + IPOffset = offsetof(CPUArchState, active_tc.PC); +#elif defined(TARGET_PPC) + IPOffset = offsetof(CPUArchState, nip); +#elif defined(TARGET_SH4) + IPOffset = offsetof(CPUArchState, pc); +#elif defined(TARGET_SPARC) + intptr_t IPOffset2; + IPOffset = offsetof(CPUArchState, pc); + IPOffset2 = offsetof(CPUArchState, npc); +#else +#error "unsupported processor type" +#endif + +#if defined(TARGET_ALPHA) || defined(TARGET_ARM) || defined(TARGET_AARCH64) || \ + defined(TARGET_CRIS) || defined(TARGET_I386) || defined(TARGET_M68K) || \ + defined(TARGET_MICROBLAZE) || defined(TARGET_MIPS) || defined(TARGET_PPC) || \ + defined(TARGET_SH4) + return (Off >= IPOffset && Off < IPOffset + TARGET_LONG_SIZE); +#elif defined(TARGET_SPARC) + return ((Off >= IPOffset && Off < IPOffset + TARGET_LONG_SIZE) || + (Off >= IPOffset2 && Off < IPOffset2 + TARGET_LONG_SIZE)); +#endif +} + +/* Trace building requires store IP instruction to link basic blocks. + * But in some archirecture, IP is promoted to register and we need to + * regenerate the store IP instruction. */ +void IRFactory::CreateStorePC(Instruction *InsertPos) +{ + for (int i = 0, e = tcg_ctx.nb_globals; i != e; ++i) { + Register ® = Reg[i]; + if (reg.isReg() && reg.isDirty()) { + if (isStateOfPC(reg.Off)) { + StoreState(reg, InsertPos); + reg.Demote(); + } + } + } +} + +/* Store dirty states back to CPUArchState in memory. */ +void IRFactory::SaveGlobals(int level, Instruction *InsertPos) +{ + if (level == COHERENCE_NONE) + return; + + int NumGlobals = tcg_ctx.nb_globals; + int NumTemps = tcg_ctx.nb_temps; + for (int i = 0; i < NumGlobals; ++i) { + Register ® = Reg[i]; + if (reg.isReg() && reg.isDirty()) + StoreState(reg, InsertPos); + reg.Demote(); + } + + if (level == COHERENCE_GLOBAL) + return; + + /* Store local registers to stack. */ + for (int i = NumGlobals; i < NumTemps; ++i) { + Register ® = Reg[i]; + if (reg.isReg() && reg.isLocal() && reg.isDirty()) + StoreState(reg, InsertPos); + reg.Demote(); + } +} + +/* Get or insert the pointer to the CPU register in the AddrCache. */ +Value *IRFactory::StatePointer(Register ®) +{ + intptr_t Off = reg.Off; + PointerType *PTy = (reg.Size == 32) ? Int32PtrTy : Int64PtrTy; + std::pair<intptr_t, Type *> Key(Off, PTy); + if (StatePtr.find(Key) == StatePtr.end()) { + std::string Name = isStateOfPC(Off) ? "pc" : reg.Name; + auto GEP = GetElementPtrInst::CreateInBounds(BaseReg[reg.Base].Base, + CONSTPtr(Off), "", GEPInsertPos); + StatePtr[Key] = new BitCastInst(GEP, PTy, Name, InitBB->getTerminator()); + } + return StatePtr[Key]; +} + +Value *IRFactory::StatePointer(Register ®, intptr_t Off, Type *PTy) +{ + if (!reg.isRev()) + IRError("%s: internal error.\n", __func__); + + std::pair<intptr_t, Type *> Key(Off, PTy); + if (StatePtr.find(Key) == StatePtr.end()) { + std::string Name = isStateOfPC(Off) ? "pc" : ""; + auto GEP = GetElementPtrInst::CreateInBounds(BaseReg[reg.Base].Base, + CONSTPtr(Off), "", GEPInsertPos); + StatePtr[Key] = new BitCastInst(GEP, PTy, Name, InitBB->getTerminator()); + } + return StatePtr[Key]; +} + +/* Retrieve value from CPUArchState. */ +Value *IRFactory::LoadState(Register ®) +{ + if (reg.isRev()) + return BaseReg[reg.Base].Base; + if (reg.isAlias()) + return LoadState(reg.getAlias()); + if (reg.isReg()) + return reg.getData(); + if (reg.isLocal()) { + if (!reg.AI) + reg.AI = CreateAlloca(reg.Ty, 0, "loc", InitBB->getTerminator()); + return new LoadInst(reg.AI, "", false, LastInst); + } + + /* If we go here, the state is not loaded into a LLVM virtual register. + * Load it from CPUArchState. */ + Value *V = new LoadInst(StatePointer(reg), "", false, LastInst); + reg.setData(V); + + return V; +} + +void IRFactory::StoreState(Register ®, Instruction *InsertPos) +{ +#ifdef ASSERT + int Size = DL->getTypeSizeInBits(reg.getData()->getType()); + if (Size != reg.Size) + IRError("%s: internal error\n", __func__); +#endif + if (reg.isRev()) + IRError("%s: fatal error\n", __func__); + if (reg.isLocal()) { + if (!reg.AI) + reg.AI = CreateAlloca(reg.Ty, 0, "loc", InitBB->getTerminator()); + new StoreInst(reg.getData(), reg.AI, false, InsertPos); + } else { + bool Volatile = isStateOfPC(reg.Off); + new StoreInst(reg.getData(), StatePointer(reg), Volatile, InsertPos); + } +} + + +/* + * TCG opcode to LLVM IR translation functions. + */ +void IRFactory::op_hotpatch(const TCGArg *args) +{ + IRDebug(INDEX_op_hotpatch); +} + +void IRFactory::op_annotate(const TCGArg *args) +{ + IRDebug(INDEX_op_annotate); + + uint32_t Annotation = *args; + if (Annotation == A_SetCC) { + if (LastInst && LastInst != &*LastInst->getParent()->begin()) + MF->setCondition(&*--BasicBlock::iterator(LastInst)); + } else if (Annotation == A_NoSIMDization) { + Builder->addAttribute(A_NoSIMDization); + } +} + +void IRFactory::op_jmp(const TCGArg *args) +{ + IRDebug(INDEX_op_jmp); + + Register &In = Reg[args[0]]; + Value *InData = LoadState(In); + + SaveGlobals(COHERENCE_ALL, LastInst); + if (!InData->getType()->isPointerTy()) + InData = ITP8(InData); + + IndirectBrInst *IB = IndirectBrInst::Create(InData, 1, LastInst); + MF->setExit(IB); +} + +/* + * op_discard() + * args[0]: In + */ +void IRFactory::op_discard(const TCGArg *args) +{ + IRDebug(INDEX_op_discard); + Register &In = Reg[args[0]]; + if (In.isReg()) + In.Demote(); +} + +/* + * op_set_label() + * args[0]: Label number + */ +void IRFactory::op_set_label(const TCGArg *args) +{ + IRDebug(INDEX_op_set_label); + + SaveGlobals(COHERENCE_ALL, LastInst); + + TCGArg label = args[0]; + if (Labels.find(label) == Labels.end()) + Labels[label] = BasicBlock::Create(*Context, "true_dest", Func); + + CurrBB = Labels[label]; + if (LastInst) { + if (LastInst != &*LastInst->getParent()->begin() && + isa<IndirectBrInst>(--BasicBlock::iterator(LastInst))) + LastInst->eraseFromParent(); + else + setSuccessor(LastInst, CurrBB); + } + + LastInst = BranchInst::Create(ExitBB, CurrBB); +} + +/* + * op_call() + * args[0] : [nb_oargs:16][nb_iargs:16] + * args[1~#nb_oargs] : out args + * args[1+#nb_oargs~#nb_iargs-2] : function parameters + * args[1+#nb_oargs+#nb_iargs-1] : function address + * args[1+#nb_oargs+#nb_iargs] : flags + */ +void IRFactory::op_call(const TCGArg *args) +{ + IRDebug(INDEX_op_call); + + TCGOp * const op = NI.Op; + int nb_oargs = op->callo; + int nb_iargs = op->calli; + int nb_params = nb_iargs; + tcg_insn_unit *func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs]; + int flags = args[nb_oargs + nb_iargs + 1]; + SmallVector<Value *, 4> Params; + + /* If the called function is an illegal helper, skip this trace. */ + if (isIllegalHelper((void *)func_addr)) { + Builder->Abort(); + return; + } + + /* Get function declaration from LLVM module. */ + TCGHelperMap &TCGHelpers = Translator.getTCGHelpers(); + if (TCGHelpers.find((uintptr_t)func_addr) == TCGHelpers.end()) + IRError("%s: cannot resolve funtion.\n", __func__); + + std::string FName = TCGHelpers[(uintptr_t)func_addr]; + Function *F = ResolveFunction(FName); + + std::set<std::string> &ConstHelpers = Translator.getConstHelpers(); + if (ConstHelpers.find(FName) != ConstHelpers.end()) + flags |= TCG_CALL_NO_READ_GLOBALS; + + /* Package the function parameters. + NOTE: There are situations where the numbers of given arguments + are greater than the *real* function parameters. Ex: + declare void foo(int64, int64); + and + call foo(int32, int32, int32, int32); + */ + int real_nb_params = F->getFunctionType()->getNumParams(); + if (nb_params == real_nb_params) { + for (int i = 0; i < real_nb_params; ++i) { + Type *ParamTy = F->getFunctionType()->getParamType(i); + Register &In = Reg[args[nb_oargs + i]]; + Value *InData = LoadState(In); + + size_t real_size = DL->getTypeSizeInBits(ParamTy); + size_t size = DL->getTypeSizeInBits(InData->getType()); + + if (ParamTy->isPointerTy() && !InData->getType()->isPointerTy()) + InData = ITP8(InData); + else if (real_size < size) + InData = TRUNC(InData, IntegerType::get(*Context, real_size)); + + if (InData->getType() != ParamTy) + InData = new BitCastInst(InData, ParamTy, "", LastInst); + Params.push_back(InData); + } + } else { + int idx = 0; + for (int i = 0; i < real_nb_params; ++i) { + Value *V = nullptr; + Type *ParamTy = F->getFunctionType()->getParamType(i); + size_t real_size = DL->getTypeSizeInBits(ParamTy); + size_t size, remain = real_size; + +next: + Register &In = Reg[args[nb_oargs + idx]]; + Value *InData = LoadState(In); + + size = DL->getTypeSizeInBits(InData->getType()); + if (size == real_size) { + if (InData->getType() != ParamTy) + InData = new BitCastInst(InData, ParamTy, "", LastInst); + Params.push_back(InData); + idx++; + } else { + if (remain == real_size) + V = ZEXT(InData, IntegerType::get(*Context, real_size)); + else { + InData = ZEXT(InData, ParamTy); + InData = SHL(InData, ConstantInt::get(ParamTy, real_size-remain)); + V = OR(V, InData); + } + + if (remain < size) + IRError("%s: fatal error.\n", __func__); + + remain -= size; + idx++; + + if (remain) + goto next; + + Params.push_back(V); + } + } + + if (idx != nb_params) + IRError("%s: num params not matched.\n", __func__); + } + + + /* Save global registers if this function is not TCG constant function. + Otherwise, mark this call instruction for state mapping use. + The rules can be found in tcg_reg_alloc_call() in tcg/tcg.c */ + if (!(flags & TCG_CALL_NO_READ_GLOBALS)) + SaveGlobals(COHERENCE_GLOBAL, LastInst); + + /* handle COREMU's lightweight memory transaction helper */ + if (isLMTFunction(FName)) { + uint32_t Idx = NI.setRestorePoint(); + Value *ResVal = GetElementPtrInst::CreateInBounds(CPU, + CONSTPtr(offsetof(CPUArchState, restore_val)), "", LastInst); + ResVal = new BitCastInst(ResVal, Int32PtrTy, "", LastInst); + new StoreInst(CONST32(Idx), ResVal, true, LastInst); + } + + CallInst *CI = CallInst::Create(F, Params, "", LastInst); + + if (flags & TCG_CALL_NO_READ_GLOBALS) + MF->setConst(CI); + + /* Determine if this function can be inlined. */ + if (Helpers.find(FName) != Helpers.end()) { + bool MustInline = false; + HelperInfo *Helper = Helpers[FName]; + if (AnalyzeInlineCost(CallSite(CI)) > 0) { + MustInline = true; + InlineCalls.push_back(CI); + } + + if (!MustInline) { + Function *NoInlineF = ResolveFunction(Helper->FuncNoInline->getName()); + CI->setCalledFunction(NoInlineF); + } + } + + /* Format the return value. + NOTE: There are situations where the return value is split and + is used by different instructions. Ex: + int64 ret = call foo(); + ... = opcode ret[0..31]; + ... = opcode ret[32..64]; + */ + if (nb_oargs == 1) { + Register &Out = Reg[args[0]]; + Out.setData(CI, true); + } else if (nb_oargs > 1) { + Value *V = CI; + size_t size = DL->getTypeSizeInBits(F->getReturnType()); + size_t subsize = size / nb_oargs; + for (int i = 0; i < nb_oargs; ++i) { + Register &Out = Reg[args[i]]; + Value *OutData = TRUNC(V, IntegerType::get(*Context, subsize)); + Out.setData(OutData, true); + if (i != nb_oargs - 1) + V = LSHR(V, ConstantInt::get(IntegerType::get(*Context, size), subsize)); + } + } +} + +/* + * op_br() + * args[0]: Label number + */ +void IRFactory::op_br(const TCGArg *args) +{ + IRDebug(INDEX_op_br); + + SaveGlobals(COHERENCE_ALL, LastInst); + + TCGArg label = args[0]; + if (Labels.find(label) == Labels.end()) + Labels[label] = BasicBlock::Create(*Context, "direct_jump_tb", Func); + + setSuccessor(LastInst, Labels[label]); + LastInst = nullptr; +} + +/* + * op_mov_i32() + * args[0]: Out + * args[1]: In + */ +void IRFactory::op_mov_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_mov_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 32); + + Value *InData = LoadState(In); + + size_t Size = DL->getTypeSizeInBits(InData->getType()); + if (Size != 32) + InData = TRUNC32(InData); + + Out.setData(InData, true); +} + +/* + * op_movi_i32() + * args[0]: Out + * args[1]: In (const value) + */ +void IRFactory::op_movi_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_movi_i32); + + Register &Out = Reg[args[0]]; + + AssertType(Out.Size == 32); + + Out.setData(CONST32(args[1]), true); +} + +static inline CmpInst::Predicate getPred(const TCGArg cond) +{ + CmpInst::Predicate pred = ICmpInst::BAD_ICMP_PREDICATE; + switch (cond) { + case TCG_COND_EQ: pred = ICmpInst::ICMP_EQ; break; + case TCG_COND_NE: pred = ICmpInst::ICMP_NE; break; + case TCG_COND_LT: pred = ICmpInst::ICMP_SLT; break; + case TCG_COND_GE: pred = ICmpInst::ICMP_SGE; break; + case TCG_COND_LE: pred = ICmpInst::ICMP_SLE; break; + case TCG_COND_GT: pred = ICmpInst::ICMP_SGT; break; + /* unsigned */ + case TCG_COND_LTU: pred = ICmpInst::ICMP_ULT; break; + case TCG_COND_GEU: pred = ICmpInst::ICMP_UGE; break; + case TCG_COND_LEU: pred = ICmpInst::ICMP_ULE; break; + case TCG_COND_GTU: pred = ICmpInst::ICMP_UGT; break; + default: + IRError("%s - unsupported predicate\n", __func__); + } + return pred; +} + +/* + * op_setcond_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + * args[3]: In3 (condition code) + */ +void IRFactory::op_setcond_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_setcond_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + CmpInst::Predicate Pred = getPred(args[3]); + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = ICMP(InData1, InData2, Pred); + OutData = ZEXT32(OutData); + Out.setData(OutData, true); +} + +/* + * op_movcond_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + * args[3]: In3 + * args[4]: In4 + * args[5]: In5 (condition code) + */ +void IRFactory::op_movcond_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_movcond_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + Register &In3 = Reg[args[3]]; + Register &In4 = Reg[args[4]]; + CmpInst::Predicate Pred = getPred(args[5]); + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32 && + In3.Size == 32 && In4.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *InData3 = LoadState(In3); + Value *InData4 = LoadState(In4); + Value *Cond = ICMP(InData1, InData2, Pred); + Value *OutData = SelectInst::Create(Cond, InData3, InData4, "", LastInst); + Out.setData(OutData, true); +} + +/* load/store */ +/* + * op_ld8u_i32() + * args[0]: Out (ret) + * args[1]: In1 (addr) + * args[2]: In2 (offset) + */ +void IRFactory::op_ld8u_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_ld8u_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 32); + + Value *InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = new LoadInst(InData, "", false, LastInst); + InData = ZEXT32(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ld8s_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_ld8s_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 32); + + Value *InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = new LoadInst(InData, "", false, LastInst); + InData = SEXT32(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ld16u_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_ld16u_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 32); + + Value *InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = CASTPTR16(InData); + InData = new LoadInst(InData, "", false, LastInst); + InData = ZEXT32(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ld16s_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_ld16s_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 32); + + Value *InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = CASTPTR16(InData); + InData = new LoadInst(InData, "", false, LastInst); + InData = SEXT32(InData); + Out.setData(InData, true); +} + +/* + * op_ld_i32() + * args[0]: Out (ret) + * args[1]: In1 (addr) + * args[2]: In2 (offset) + */ +void IRFactory::op_ld_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_ld_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 32); + + Value *InData; + if (In.isRev()) { + InData = StatePointer(In, Off, Int32PtrTy); + InData = new LoadInst(InData, "", false, LastInst); + if (isStateOfPC(Off)) + static_cast<LoadInst*>(InData)->setVolatile(true); + } else { + InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = CASTPTR32(InData); + InData = new LoadInst(InData, "", false, LastInst); + } + Out.setData(InData, true); +} + +void IRFactory::op_st8_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_st8_i32); + + Register &In1 = Reg[args[0]]; + Register &In2 = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(In1.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = TRUNC8(InData1); + if (InData2->getType() != Int8PtrTy) + InData2 = CASTPTR8(InData2); + InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst); + new StoreInst(InData1, InData2, false, LastInst); +} + +void IRFactory::op_st16_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_st16_i32); + + Register &In1 = Reg[args[0]]; + Register &In2 = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(In1.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = TRUNC16(InData1); + if (InData2->getType() != Int8PtrTy) + InData2 = CASTPTR8(InData2); + InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst); + InData2 = CASTPTR16(InData2); + new StoreInst(InData1, InData2, false, LastInst); +} + +/* + * op_st_i32() + * args[0]: In1 + * args[1]: In2 (base) + * args[2]: In3 (offset) + */ +void IRFactory::op_st_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_st_i32); + + Register &In1 = Reg[args[0]]; + Register &In2 = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(In1.Size == 32); + + Value *InData1 = LoadState(In1); + + if (In2.isRev()) { + Value *InData2 = StatePointer(In2, Off, Int32PtrTy); + StoreInst *SI = new StoreInst(InData1, InData2, false, LastInst); + if (isStateOfPC(Off)) + SI->setVolatile(true); + } else { + Value *InData2 = LoadState(In2); + if (InData2->getType() != Int8PtrTy) + InData2 = CASTPTR8(InData2); + InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst); + InData2 = CASTPTR32(InData2); + new StoreInst(InData1, InData2, false, LastInst); + } +} + +/* arith */ +/* + * op_add_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_add_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_add_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData; + if (In1.isRev()) { + intptr_t Off = static_cast<ConstantInt*>(InData2)->getSExtValue(); + OutData = StatePointer(In1, Off, Int32PtrTy); + } else + OutData = ADD(InData1, InData2); + + Out.setData(OutData, true); +} + +/* + * op_sub_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_sub_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_sub_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = SUB(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_mul_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_mul_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = MUL(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_div_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_div_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = SDIV(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_divu_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_divu_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = UDIV(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_rem_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_rem_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = SREM(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_remu_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_remu_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = UREM(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_div2_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_div2_i32); + + Register &Out1 = Reg[args[0]]; + Register &Out2 = Reg[args[1]]; + Register &In1 = Reg[args[2]]; +#if 0 + Register &In2 = Reg[args[3]]; +#endif + Register &In3 = Reg[args[4]]; + + AssertType(Out1.Size == 32 && Out2.Size == 32 && + In1.Size == 32 && In3.Size == 32); + + Value *InData1 = LoadState(In1); +#if 0 + Value *InData2 = LoadState(In2); +#endif + Value *InData3 = LoadState(In3); + Value *OutData1 = SDIV(InData1, InData3); + Value *OutData2 = SREM(InData1, InData3); + Out1.setData(OutData1, true); + Out2.setData(OutData2, true); +} + +/* + * op_divu2_i32() + * args[0]: Out1 + * args[1]: Out2 + * args[2]: In1 + * args[3]: In2 + * args[4]: In3 + */ +void IRFactory::op_divu2_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_divu2_i32); + + Register &Out1 = Reg[args[0]]; + Register &Out2 = Reg[args[1]]; + Register &In1 = Reg[args[2]]; +#if 0 + Register &In2 = Reg[args[3]]; +#endif + Register &In3 = Reg[args[4]]; + + AssertType(Out1.Size == 32 && Out2.Size == 32 && + In1.Size == 32 && In3.Size == 32); + + Value *InData1 = LoadState(In1); +#if 0 + Value *InData2 = LoadState(In2); +#endif + Value *InData3 = LoadState(In3); + Value *OutData1 = UDIV(InData1, InData3); + Value *OutData2 = UREM(InData1, InData3); + Out1.setData(OutData1, true); + Out2.setData(OutData2, true); +} + +/* + * op_and_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_and_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_and_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = AND(InData1, InData2); + Out.setData(OutData, true); +} + +/* + * op_or_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_or_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_or_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = OR(InData1, InData2); + Out.setData(OutData, true); +} + +/* + * op_xor_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_xor_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_xor_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = XOR(InData1, InData2); + Out.setData(OutData, true); +} + +/* shifts/rotates */ +/* + * op_shl_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_shl_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_shl_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = SHL(InData1, InData2); + Out.setData(OutData, true); +} + +/* + * op_shr_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_shr_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_shr_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = LSHR(InData1, InData2); + Out.setData(OutData, true); +} + +/* + * op_sar_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_sar_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_sar_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = ASHR(InData1, InData2); + Out.setData(OutData, true); +} + +/* + * op_rotl_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_rotl_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_rotl_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + Value *C = LSHR(InData1, SUB(CONST32(32), InData2)); + Value *OutData = SHL(InData1, InData2); + OutData = OR(OutData, C); + Out.setData(OutData, true); +} + +void IRFactory::op_rotr_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_rotr_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + Value *c = SHL(InData1, SUB(CONST32(32), InData2)); + Value *OutData = LSHR(InData1, InData2); + OutData = OR(OutData, c); + Out.setData(OutData, true); +} + +/* + * op_deposit_i32() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + * args[3]: In3 (offset from LSB) + * args[4]: In4 (length) + */ +void IRFactory::op_deposit_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_deposit_i32); + + /* Deposit the lowest args[4] bits of register args[2] into register + * args[1] starting from bits args[3]. */ + APInt mask = APInt::getBitsSet(32, args[3], args[3] + args[4]); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + if (args[3]) + InData2 = SHL(InData2, CONST32(args[3])); + InData2 = AND(InData2, ConstantInt::get(*Context, mask)); + InData1 = AND(InData1, ConstantInt::get(*Context, ~mask)); + InData1 = OR(InData1, InData2); + Out.setData(InData1, true); +} + +/* + * op_brcond_i32() + * args[0]: In1 + * args[1]: In2 + * args[2]: In3 (condition code) + * args[3]: In4 (label) + */ +void IRFactory::op_brcond_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_brcond_i32); + + /* brcond_i32 format: + * brcond_i32 op1,op2,cond,<ifTrue> + * <ifFalse>: + * A + * <ifTrue>: + * B + */ + Register &In1 = Reg[args[0]]; + Register &In2 = Reg[args[1]]; + CmpInst::Predicate Pred = getPred(args[2]); + + AssertType(In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + SaveGlobals(COHERENCE_ALL, LastInst); + + TCGArg label = args[3]; + if (Labels.find(label) == Labels.end()) + Labels[label] = BasicBlock::Create(*Context, "succ", Func); + + BasicBlock *ifTrue = Labels[label]; + BasicBlock *ifFalse = BasicBlock::Create(*Context, "succ", Func); + + Value *Cond = ICMP(InData1, InData2, Pred); + BranchInst::Create(ifTrue, ifFalse, Cond, LastInst); + LastInst->eraseFromParent(); + + CurrBB = ifFalse; + LastInst = BranchInst::Create(ExitBB, CurrBB); +} + +void IRFactory::op_add2_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_add2_i32); + + Register &Out1 = Reg[args[0]]; + Register &Out2 = Reg[args[1]]; + Register &In1 = Reg[args[2]]; + Register &In2 = Reg[args[3]]; + Register &In3 = Reg[args[4]]; + Register &In4 = Reg[args[5]]; + + AssertType(Out1.Size == 32 && Out2.Size == 32 && + In1.Size == 32 && In2.Size == 32 && + In3.Size == 32 && In4.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *InData3 = LoadState(In3); + Value *InData4 = LoadState(In4); + + InData1 = ZEXT64(InData1); + InData2 = SHL(ZEXT64(InData2), CONST64(32)); + InData2 = OR(InData2, InData1); + + InData3 = ZEXT64(InData3); + InData4 = SHL(ZEXT64(InData4), CONST64(32)); + InData4 = OR(InData4, InData3); + + InData2 = ADD(InData2, InData4); + + Value *OutData1 = TRUNC32(InData2); + Value *OutData2 = TRUNC32(LSHR(InData2, CONST64(32))); + Out1.setData(OutData1, true); + Out2.setData(OutData2, true); +} + +void IRFactory::op_sub2_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_sub2_i32); + + Register &Out1 = Reg[args[0]]; + Register &Out2 = Reg[args[1]]; + Register &In1 = Reg[args[2]]; + Register &In2 = Reg[args[3]]; + Register &In3 = Reg[args[4]]; + Register &In4 = Reg[args[5]]; + + AssertType(Out1.Size == 32 && Out2.Size == 32 && + In1.Size == 32 && In2.Size == 32 && + In3.Size == 32 && In4.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *InData3 = LoadState(In3); + Value *InData4 = LoadState(In4); + + InData1 = ZEXT64(InData1); + InData2 = SHL(ZEXT64(InData2), CONST64(32)); + InData2 = OR(InData2, InData1); + + InData3 = ZEXT64(InData3); + InData4 = SHL(ZEXT64(InData4), CONST64(32)); + InData4 = OR(InData4, InData3); + + InData2 = SUB(InData2, InData4); + + Value *OutData1 = TRUNC32(InData2); + Value *OutData2 = TRUNC32(LSHR(InData2, CONST64(32))); + Out1.setData(OutData1, true); + Out2.setData(OutData2, true); +} + +void IRFactory::op_mulu2_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_mulu2_i32); + + Register &Out1 = Reg[args[0]]; + Register &Out2 = Reg[args[1]]; + Register &In1 = Reg[args[2]]; + Register &In2 = Reg[args[3]]; + + AssertType(Out1.Size == 32 && Out2.Size == 32 && + In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = ZEXT64(InData1); + InData2 = ZEXT64(InData2); + + Value *OutData = MUL(InData1, InData2); + Value *Low = TRUNC32(OutData); + Value *High = TRUNC32(LSHR(OutData, CONST64(32))); + Out1.setData(Low, true); + Out2.setData(High, true); +} + +void IRFactory::op_muls2_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_muls2_i32); + + Register &Out1 = Reg[args[0]]; + Register &Out2 = Reg[args[1]]; + Register &In1 = Reg[args[2]]; + Register &In2 = Reg[args[3]]; + + AssertType(Out1.Size == 32 && Out2.Size == 32 && + In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = SEXT64(InData1); + InData2 = SEXT64(InData2); + + Value *OutData = MUL(InData1, InData2); + Value *Low = TRUNC32(OutData); + Value *High = TRUNC32(LSHR(OutData, CONST64(32))); + Out1.setData(Low, true); + Out2.setData(High, true); +} + +void IRFactory::op_muluh_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_muluh_i32); + + Register &Out1 = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out1.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = ZEXT64(InData1); + InData2 = ZEXT64(InData2); + + Value *OutData = MUL(InData1, InData2); + Value *High = TRUNC32(LSHR(OutData, CONST64(32))); + Out1.setData(High, true); +} + +void IRFactory::op_mulsh_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_mulsh_i32); + + Register &Out1 = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out1.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = SEXT64(InData1); + InData2 = SEXT64(InData2); + + Value *OutData = MUL(InData1, InData2); + Value *High = TRUNC32(LSHR(OutData, CONST64(32))); + Out1.setData(High, true); +} + +void IRFactory::op_brcond2_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_brcond2_i32); + + Register &In1 = Reg[args[0]]; + Register &In2 = Reg[args[1]]; + Register &In3 = Reg[args[2]]; + Register &In4 = Reg[args[3]]; + CmpInst::Predicate Pred = getPred(args[4]); + + AssertType(In1.Size == 32 && In2.Size == 32 && + In3.Size == 32 && In4.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *InData3 = LoadState(In3); + Value *InData4 = LoadState(In4); + + SaveGlobals(COHERENCE_ALL, LastInst); + + InData1 = ZEXT64(InData1); + InData2 = SHL(ZEXT64(InData2), CONST64(32)); + InData3 = ZEXT64(InData3); + InData4 = SHL(ZEXT64(InData4), CONST64(32)); + + InData2 = OR(InData2, InData1); + InData4 = OR(InData4, InData3); + + TCGArg label = args[5]; + if (Labels.find(label) == Labels.end()) + Labels[label] = BasicBlock::Create(*Context, "succ", Func); + + BasicBlock *ifTrue = Labels[label]; + BasicBlock *ifFalse = BasicBlock::Create(*Context, "succ", Func); + + Value *Cond = ICMP(InData2, InData4, Pred); + BranchInst::Create(ifTrue, ifFalse, Cond, LastInst); + LastInst->eraseFromParent(); + + CurrBB = ifFalse; + LastInst = BranchInst::Create(ExitBB, CurrBB); +} + +void IRFactory::op_setcond2_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_setcond2_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + Register &In3 = Reg[args[3]]; + Register &In4 = Reg[args[4]]; + CmpInst::Predicate Pred = getPred(args[5]); + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32 && + In3.Size == 32 && In4.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *InData3 = LoadState(In3); + Value *InData4 = LoadState(In4); + + InData1 = ZEXT64(InData1); + InData2 = SHL(ZEXT64(InData2), CONST64(32)); + InData3 = ZEXT64(InData3); + InData4 = SHL(ZEXT64(InData4), CONST64(32)); + + InData2 = OR(InData2, InData1); + InData4 = OR(InData4, InData3); + + Value *OutData = ICMP(InData2, InData4, Pred); + OutData = ZEXT32(OutData); + Out.setData(OutData, true); +} + +void IRFactory::op_ext8s_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_ext8s_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 32 && In.Size == 32); + + Value *InData = LoadState(In); + InData = TRUNC8(InData); + InData = SEXT32(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ext16s_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_ext16s_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 32 && In.Size == 32); + + Value *InData = LoadState(In); + InData = TRUNC16(InData); + InData = SEXT32(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ext8u_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_ext8u_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 32 && In.Size == 32); + + Value *InData = LoadState(In); + InData = AND(InData, CONST32(0xFF)); + Out.setData(InData, true); +} + +void IRFactory::op_ext16u_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_ext16u_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 32 && In.Size == 32); + + Value *InData = LoadState(In); + InData = AND(InData, CONST32(0xFFFF)); + Out.setData(InData, true); +} + +void IRFactory::op_bswap16_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_bswap16_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 32 && In.Size == 32); + + Value *InData = LoadState(In); + InData = TRUNC16(InData); + InData = BSWAP16(InData); + InData = ZEXT32(InData); + Out.setData(InData, true); +} + +void IRFactory::op_bswap32_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_bswap32_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 32 && In.Size == 32); + + Value *InData = LoadState(In); + InData = BSWAP32(InData); + Out.setData(InData, true); +} + +/* + * op_not_i32() + * args[0]: Out + * args[1]: In + */ +void IRFactory::op_not_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_not_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 32 && In.Size == 32); + + Value *InData = LoadState(In); + Value *OutData = XOR(InData, CONST32((uint32_t)-1)); + Out.setData(OutData, true); +} + +/* + * op_neg_i32() + * args[0]: Out + * args[1]: In + */ +void IRFactory::op_neg_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_neg_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 32 && In.Size == 32); + + Value *InData = LoadState(In); + Value *OutData = SUB(CONST32(0), InData); + Out.setData(OutData, true); +} + +void IRFactory::op_andc_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_andc_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData2 = XOR(InData2, CONST32((uint32_t)-1)); + Value *OutData = AND(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_orc_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_orc_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData2 = XOR(InData2, CONST32((uint32_t)-1)); + Value *OutData = OR(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_eqv_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_eqv_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData2 = XOR(InData2, CONST32((uint32_t)-1)); + Value *OutData = XOR(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_nand_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_nand_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + Value *OutData = AND(InData1, InData2); + OutData = XOR(OutData, CONST32((uint32_t)-1)); + Out.setData(OutData, true); +} + +void IRFactory::op_nor_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_nor_i32); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + Value *OutData = OR(InData1, InData2); + OutData = XOR(OutData, CONST32((uint32_t)-1)); + Out.setData(OutData, true); +} + +void IRFactory::op_mov_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_mov_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64); + + Value *InData = LoadState(In); + + size_t Size = DL->getTypeSizeInBits(InData->getType()); + if (Size != 64) + InData = ZEXT64(InData); + + Out.setData(InData, true); +} + +/* + * op_movi_i64() + * args[0]: Out + * args[1]: In (const value) + */ +void IRFactory::op_movi_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_movi_i64); + + Register &Out = Reg[args[0]]; + + AssertType(Out.Size == 64); + + Out.setData(CONST64(args[1]), true); +} + +void IRFactory::op_setcond_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_setcond_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + CmpInst::Predicate Pred = getPred(args[3]); + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = ICMP(InData1, InData2, Pred); + OutData = ZEXT64(OutData); + Out.setData(OutData, true); +} + +void IRFactory::op_movcond_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_movcond_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + Register &In3 = Reg[args[3]]; + Register &In4 = Reg[args[4]]; + CmpInst::Predicate Pred = getPred(args[5]); + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64 && + In3.Size == 64 && In4.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *InData3 = LoadState(In3); + Value *InData4 = LoadState(In4); + Value *Cond = ICMP(InData1, InData2, Pred); + Value *OutData = SelectInst::Create(Cond, InData3, InData4, "", LastInst); + Out.setData(OutData, true); +} + + +/* load/store */ +void IRFactory::op_ld8u_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ld8u_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 64); + + Value *InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = new LoadInst(InData, "", false, LastInst); + InData = ZEXT64(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ld8s_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ld8s_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 64); + + Value *InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = new LoadInst(InData, "", false, LastInst); + InData = SEXT64(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ld16u_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ld16u_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 64); + + Value *InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = CASTPTR16(InData); + InData = new LoadInst(InData, "", false, LastInst); + InData = ZEXT64(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ld16s_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ld16s_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 64); + + Value *InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = CASTPTR16(InData); + InData = new LoadInst(InData, "", false, LastInst); + InData = SEXT64(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ld32u_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ld32u_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 64); + + Value *InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = CASTPTR32(InData); + InData = new LoadInst(InData, "", false, LastInst); + InData = ZEXT64(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ld32s_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ld32s_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 64); + + Value *InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = CASTPTR32(InData); + InData = new LoadInst(InData, "", false, LastInst); + InData = SEXT64(InData); + Out.setData(InData, true); +} + +/* + * op_ld_i64() + * args[0]: Out + * args[1]: In (base) + * args[2]: In (offset) + */ +void IRFactory::op_ld_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ld_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(Out.Size == 64); + + Value *InData; + if (In.isRev()) { + InData = StatePointer(In, Off, Int64PtrTy); + InData = new LoadInst(InData, "", false, LastInst); + if (isStateOfPC(Off)) + static_cast<LoadInst*>(InData)->setVolatile(true); + } else { + InData = LoadState(In); + if (InData->getType() != Int8PtrTy) + InData = CASTPTR8(InData); + InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst); + InData = CASTPTR64(InData); + InData = new LoadInst(InData, "", false, LastInst); + } + Out.setData(InData, true); +} + +void IRFactory::op_st8_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_st8_i64); + + Register &In1 = Reg[args[0]]; + Register &In2 = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(In1.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = TRUNC8(InData1); + if (InData2->getType() != Int8PtrTy) + InData2 = CASTPTR8(InData2); + InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst); + new StoreInst(InData1, InData2, false, LastInst); +} + +void IRFactory::op_st16_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_st16_i64); + + Register &In1 = Reg[args[0]]; + Register &In2 = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(In1.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = TRUNC16(InData1); + if (InData2->getType() != Int8PtrTy) + InData2 = CASTPTR8(InData2); + InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst); + InData2 = CASTPTR16(InData2); + new StoreInst(InData1, InData2, false, LastInst); +} + +void IRFactory::op_st32_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_st32_i64); + + Register &In1 = Reg[args[0]]; + Register &In2 = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(In1.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = TRUNC32(InData1); + if (InData2->getType() != Int8PtrTy) + InData2 = CASTPTR8(InData2); + InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst); + InData2 = CASTPTR32(InData2); + new StoreInst(InData1, InData2, false, LastInst); +} + +/* + * op_st_i64() + * args[0]: In1 + * args[1]: In2 (base) + * args[2]: In3 (offset) + */ +void IRFactory::op_st_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_st_i64); + + Register &In1 = Reg[args[0]]; + Register &In2 = Reg[args[1]]; + TCGArg Off = args[2]; + + AssertType(In1.Size == 64); + + Value *InData1 = LoadState(In1); + + if (In2.isRev()) { + Value *InData2 = StatePointer(In2, Off, Int64PtrTy); + StoreInst *SI = new StoreInst(InData1, InData2, false, LastInst); + if (isStateOfPC(Off)) + SI->setVolatile(true); + } else { + Value *InData2 = LoadState(In2); + if (InData2->getType() != Int8PtrTy) + InData2 = CASTPTR8(InData2); + InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst); + InData2 = CASTPTR64(InData2); + new StoreInst(InData1, InData2, false, LastInst); + } +} + +/* arith */ +/* + * op_add_i64() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_add_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_add_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData; + if (In1.isRev()) { + intptr_t Off = static_cast<ConstantInt*>(InData2)->getSExtValue(); + OutData = StatePointer(In1, Off, Int64PtrTy); + } else + OutData = ADD(InData1, InData2); + + Out.setData(OutData, true); +} + +void IRFactory::op_sub_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_sub_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = SUB(InData1, InData2); + Out.setData(OutData, true); +} + +/* + * op_mul_i64() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_mul_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_mul_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = MUL(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_div_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_div_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = SDIV(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_divu_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_divu_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = UDIV(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_rem_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_rem_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = SREM(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_remu_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_remu_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = UREM(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_div2_i64(const TCGArg *args) +{ + IRError("%s not implemented.\n", __func__); +} + +void IRFactory::op_divu2_i64(const TCGArg *args) +{ + IRError("%s not implemented.\n", __func__); +} + +void IRFactory::op_and_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_and_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + size_t Size = DL->getTypeSizeInBits(InData1->getType()); + if (Size == 32) + InData1 = ZEXT64(InData1); + Size = DL->getTypeSizeInBits(InData2->getType()); + if (Size == 32) + InData2 = ZEXT64(InData2); + + Value *OutData = AND(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_or_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_or_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = OR(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_xor_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_xor_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = XOR(InData1, InData2); + Out.setData(OutData, true); +} + +/* shifts/rotates */ +void IRFactory::op_shl_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_shl_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = SHL(InData1, InData2); + Out.setData(OutData, true); +} + +/* + * op_shr_i64() + * args[0]: Out + * args[1]: In1 + * args[2]: In2 + */ +void IRFactory::op_shr_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_shr_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = LSHR(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_sar_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_sar_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *OutData = ASHR(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_rotl_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_rotl_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + Value *C = LSHR(InData1, SUB(CONST64(64), InData2)); + Value *OutData = SHL(InData1, InData2); + OutData = OR(OutData, C); + Out.setData(OutData, true); +} + +void IRFactory::op_rotr_i64(const TCGArg *args) +{ + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + Value *c = SHL(InData1, SUB(CONST64(64), InData2)); + Value *OutData = LSHR(InData1, InData2); + OutData = OR(OutData, c); + Out.setData(OutData, true); +} + +void IRFactory::op_deposit_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_deposit_i64); + + /* Deposit the lowest args[4] bits of register args[2] into register + * args[1] starting from bits args[3]. */ + APInt mask = APInt::getBitsSet(64, args[3], args[3] + args[4]); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + if (args[3]) + InData2 = SHL(InData2, CONST64(args[3])); + InData2 = AND(InData2, ConstantInt::get(*Context, mask)); + InData1 = AND(InData1, ConstantInt::get(*Context, ~mask)); + InData1 = OR(InData1, InData2); + Out.setData(InData1, true); +} + +void IRFactory::op_ext_i32_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ext_i32_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 32); + + Value *InData = LoadState(In); + InData = SEXT64(InData); + Out.setData(InData, true); +} + +void IRFactory::op_extu_i32_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_extu_i32_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 32); + + Value *InData = LoadState(In); + InData = ZEXT64(InData); + Out.setData(InData, true); +} + +void IRFactory::op_extrl_i64_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_extrl_i64_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 32 && In.Size == 64); + + Value *InData = LoadState(In); + InData = TRUNC32(InData); + Out.setData(InData, true); +} + +void IRFactory::op_extrh_i64_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_extrh_i64_i32); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 32 && In.Size == 64); + + Value *InData = LoadState(In); + InData = TRUNC32(LSHR(InData, CONST64(32))); + Out.setData(InData, true); +} + +void IRFactory::op_brcond_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_brcond_i64); + + /* brcond_i32 format: + * brcond_i32 op1,op2,cond,<ifTrue> + * <ifFalse>: + * A + * <ifTrue>: + * B + */ + Register &In1 = Reg[args[0]]; + Register &In2 = Reg[args[1]]; + CmpInst::Predicate Pred = getPred(args[2]); + + AssertType(In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + SaveGlobals(COHERENCE_ALL, LastInst); + + TCGArg label = args[3]; + if (Labels.find(label) == Labels.end()) + Labels[label] = BasicBlock::Create(*Context, "succ", Func); + + BasicBlock *ifTrue = Labels[label]; + BasicBlock *ifFalse = BasicBlock::Create(*Context, "succ", Func); + + Value *Cond = ICMP(InData1, InData2, Pred); + BranchInst::Create(ifTrue, ifFalse, Cond, LastInst); + LastInst->eraseFromParent(); + + CurrBB = ifFalse; + LastInst = BranchInst::Create(ExitBB, CurrBB); +} + +void IRFactory::op_ext8s_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ext8s_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 64); + + Value *InData = LoadState(In); + InData = TRUNC8(InData); + InData = SEXT64(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ext16s_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ext16s_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 64); + + Value *InData = LoadState(In); + InData = TRUNC16(InData); + InData = SEXT64(InData); + Out.setData(InData, true); +} + +/* + * op_ext32s_i64() + * args[0]: Out + * args[1]: In + */ +void IRFactory::op_ext32s_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ext32s_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 64); + + Value *InData = LoadState(In); + if (DL->getTypeSizeInBits(InData->getType()) != 32) + InData = TRUNC32(InData); + InData = SEXT64(InData); + Out.setData(InData, true); +} + +void IRFactory::op_ext8u_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ext8u_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 64); + + Value *InData = LoadState(In); + InData = AND(InData, CONST64(0xFF)); + Out.setData(InData, true); +} + +void IRFactory::op_ext16u_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ext16u_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 64); + + Value *InData = LoadState(In); + InData = AND(InData, CONST64(0xFFFF)); + Out.setData(InData, true); +} + +/* + * op_ext32u_i64() + * args[0]: Out + * args[1]: In + */ +void IRFactory::op_ext32u_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_ext32u_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 64); + + Value *InData = LoadState(In); + if (DL->getTypeSizeInBits(InData->getType()) == 32) + InData = ZEXT64(InData); + else + InData = AND(InData, CONST64(0xFFFFFFFF)); + Out.setData(InData, true); +} + +void IRFactory::op_bswap16_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_bswap16_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 64); + + Value *InData = LoadState(In); + InData = TRUNC16(InData); + InData = BSWAP16(InData); + InData = ZEXT64(InData); + Out.setData(InData, true); +} + +void IRFactory::op_bswap32_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_bswap32_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 64); + + Value *InData = LoadState(In); + InData = TRUNC32(InData); + InData = BSWAP32(InData); + InData = ZEXT64(InData); + Out.setData(InData, true); +} + +void IRFactory::op_bswap64_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_bswap64_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 64); + + Value *InData = LoadState(In); + InData = BSWAP64(InData); + Out.setData(InData, true); + +} + +void IRFactory::op_not_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_not_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 64); + + Value *InData = LoadState(In); + Value *OutData = XOR(InData, CONST64((uint64_t)-1)); + Out.setData(OutData, true); +} + +void IRFactory::op_neg_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_neg_i64); + + Register &Out = Reg[args[0]]; + Register &In = Reg[args[1]]; + + AssertType(Out.Size == 64 && In.Size == 64); + + Value *InData = LoadState(In); + Value *OutData = SUB(CONST64(0), InData); + Out.setData(OutData, true); +} + +void IRFactory::op_andc_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_andc_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData2 = XOR(InData2, CONST64((uint64_t)-1)); + Value *OutData = AND(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_orc_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_orc_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData2 = XOR(InData2, CONST64((uint64_t)-1)); + Value *OutData = OR(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_eqv_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_eqv_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData2 = XOR(InData2, CONST64((uint64_t)-1)); + Value *OutData = XOR(InData1, InData2); + Out.setData(OutData, true); +} + +void IRFactory::op_nand_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_nand_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + Value *OutData = AND(InData1, InData2); + OutData = XOR(OutData, CONST64((uint64_t)-1)); + Out.setData(OutData, true); +} + +void IRFactory::op_nor_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_nor_i64); + + Register &Out = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + Value *OutData = OR(InData1, InData2); + OutData = XOR(OutData, CONST64((uint64_t)-1)); + Out.setData(OutData, true); +} + +void IRFactory::op_add2_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_add2_i64); + + Register &Out1 = Reg[args[0]]; + Register &Out2 = Reg[args[1]]; + Register &In1 = Reg[args[2]]; + Register &In2 = Reg[args[3]]; + Register &In3 = Reg[args[4]]; + Register &In4 = Reg[args[5]]; + + AssertType(Out1.Size == 64 && Out2.Size == 64 && + In1.Size == 64 && In2.Size == 64 && + In3.Size == 64 && In4.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *InData3 = LoadState(In3); + Value *InData4 = LoadState(In4); + + InData1 = ZEXT128(InData1); + InData2 = SHL(ZEXT128(InData2), CONST128(64)); + InData2 = OR(InData2, InData1); + + InData3 = ZEXT128(InData3); + InData4 = SHL(ZEXT128(InData4), CONST128(64)); + InData4 = OR(InData4, InData3); + + InData2 = ADD(InData2, InData4); + + Value *OutData1 = TRUNC64(InData2); + Value *OutData2 = TRUNC64(LSHR(InData2, CONST128(64))); + Out1.setData(OutData1, true); + Out2.setData(OutData2, true); +} + +void IRFactory::op_sub2_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_sub2_i64); + + Register &Out1 = Reg[args[0]]; + Register &Out2 = Reg[args[1]]; + Register &In1 = Reg[args[2]]; + Register &In2 = Reg[args[3]]; + Register &In3 = Reg[args[4]]; + Register &In4 = Reg[args[5]]; + + AssertType(Out1.Size == 64 && Out2.Size == 64 && + In1.Size == 64 && In2.Size == 64 && + In3.Size == 64 && In4.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *InData3 = LoadState(In3); + Value *InData4 = LoadState(In4); + + InData1 = ZEXT128(InData1); + InData2 = SHL(ZEXT128(InData2), CONST128(64)); + InData2 = OR(InData2, InData1); + + InData3 = ZEXT128(InData3); + InData4 = SHL(ZEXT128(InData4), CONST128(64)); + InData4 = OR(InData4, InData3); + + InData2 = SUB(InData2, InData4); + + Value *OutData1 = TRUNC64(InData2); + Value *OutData2 = TRUNC64(LSHR(InData2, CONST128(64))); + Out1.setData(OutData1, true); + Out2.setData(OutData2, true); +} + +void IRFactory::op_mulu2_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_mulu2_i64); + + Register &Out1 = Reg[args[0]]; + Register &Out2 = Reg[args[1]]; + Register &In1 = Reg[args[2]]; + Register &In2 = Reg[args[3]]; + + AssertType(Out1.Size == 64 && Out2.Size == 64 && + In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = ZEXT128(InData1); + InData2 = ZEXT128(InData2); + + Value *OutData = MUL(InData1, InData2); + Value *Low = TRUNC64(OutData); + Value *High = TRUNC64(LSHR(OutData, CONST128(64))); + Out1.setData(Low, true); + Out2.setData(High, true); +} + +void IRFactory::op_muls2_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_muls2_i64); + + Register &Out1 = Reg[args[0]]; + Register &Out2 = Reg[args[1]]; + Register &In1 = Reg[args[2]]; + Register &In2 = Reg[args[3]]; + + AssertType(Out1.Size == 64 && Out2.Size == 64 && + In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = SEXT128(InData1); + InData2 = SEXT128(InData2); + + Value *OutData = MUL(InData1, InData2); + Value *Low = TRUNC64(OutData); + Value *High = TRUNC64(LSHR(OutData, CONST128(64))); + Out1.setData(Low, true); + Out2.setData(High, true); +} + +void IRFactory::op_muluh_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_muluh_i64); + + Register &Out1 = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out1.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = ZEXT128(InData1); + InData2 = ZEXT128(InData2); + + Value *OutData = MUL(InData1, InData2); + Value *High = TRUNC64(LSHR(OutData, CONST128(64))); + Out1.setData(High, true); +} + +void IRFactory::op_mulsh_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_mulsh_i64); + + Register &Out1 = Reg[args[0]]; + Register &In1 = Reg[args[1]]; + Register &In2 = Reg[args[2]]; + + AssertType(Out1.Size == 64 && In1.Size == 64 && In2.Size == 64); + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + + InData1 = SEXT128(InData1); + InData2 = SEXT128(InData2); + + Value *OutData = MUL(InData1, InData2); + Value *High = TRUNC64(LSHR(OutData, CONST128(64))); + Out1.setData(High, true); +} + +/* QEMU specific */ +void IRFactory::op_insn_start(const TCGArg *args) +{ + IRDebug(INDEX_op_insn_start); + NI.NumInsts++; +} + +void IRFactory::InsertLinkAndExit(Instruction *InsertPos) +{ + auto ChainSlot = LLEnv->getChainSlot(); + size_t Key = ChainSlot.first; + uintptr_t RetVal = ChainSlot.second; + unsigned Idx = NI.setChainSlot(Key); + uintptr_t Addr = NI.getChainSlotAddr(Idx); + + /* Here we use the llvm.trap intrinsic to notify LLVM backend to insert + * jump instruction for chaining. */ + ConstantInt *Meta[] = { CONST32(PATCH_TRACE_BLOCK_CHAINING), CONSTPtr(Addr) }; + Function *TrapFn = Intrinsic::getDeclaration(Mod, Intrinsic::trap); + CallInst *CI = CallInst::Create(TrapFn, "", InsertPos); + DebugLoc DL = MF->getDebugLoc(PATCH_TRACE_BLOCK_CHAINING, Idx, Func, Meta); + CI->setDebugLoc(DL); + + MF->setExit(CI); + + InsertExit(RetVal); +} + +void IRFactory::InsertExit(uintptr_t RetVal, bool setExit) +{ + ConstantInt *Meta[] = { CONST32(PATCH_EXIT_TB), ExitAddr }; + ReturnInst *RI = ReturnInst::Create(*Context, CONSTPtr(RetVal), LastInst); + DebugLoc DL = MF->getDebugLoc(PATCH_EXIT_TB, 0, Func, Meta); + RI->setDebugLoc(DL); + + if (setExit) + MF->setExit(RI); +} + +void IRFactory::InsertLookupIBTC(GraphNode *CurrNode) +{ + BasicBlock *BB = nullptr; + + if (CommonBB.find("ibtc") == CommonBB.end()) { + BB = CommonBB["ibtc"] = BasicBlock::Create(*Context, "ibtc", Func); + + SmallVector<Value *, 4> Params; + Function *F = ResolveFunction("helper_lookup_ibtc"); + Value *Env = ConvertCPUType(F, 0, BB); + + Params.push_back(Env); + CallInst *CI = CallInst::Create(F, Params, "", BB); + IndirectBrInst *IB = IndirectBrInst::Create(CI, 1, BB); + MF->setConst(CI); + MF->setExit(CI); + + IndirectBrs.push_back(IB); + toSink.push_back(BB); + } + + BB = CommonBB["ibtc"]; + BranchInst::Create(BB, LastInst); +} + +void IRFactory::InsertLookupCPBL(GraphNode *CurrNode) +{ + SmallVector<Value *, 4> Params; + Function *F = ResolveFunction("helper_lookup_cpbl"); + Value *Env = ConvertCPUType(F, 0, LastInst); + + Params.push_back(Env); + CallInst *CI = CallInst::Create(F, Params, "", LastInst); + IndirectBrInst *IB = IndirectBrInst::Create(CI, 1, LastInst); + MF->setConst(CI); + MF->setExit(CI); + + IndirectBrs.push_back(IB); + toSink.push_back(CurrBB); +} + +void IRFactory::TraceValidateCPBL(GraphNode *NextNode, StoreInst *StorePC) +{ + TranslationBlock *NextTB = NextNode->getTB(); + Value *Cond; + + SmallVector<Value *, 4> Params; + Function *F = ResolveFunction("helper_validate_cpbl"); + Value *Env = ConvertCPUType(F, 0, LastInst); + + Params.push_back(Env); + Params.push_back(ConstantInt::get(StorePC->getValueOperand()->getType(), + NextTB->pc)); + Params.push_back(CONST32(NextTB->id)); + CallInst *CI = CallInst::Create(F, Params, "", LastInst); + Cond = ICMP(CI, CONST32(1), ICmpInst::ICMP_EQ); + + MF->setConst(CI); + + BasicBlock *Valid = BasicBlock::Create(*Context, "cpbl.valid", Func); + BasicBlock *Invalid = BasicBlock::Create(*Context, "cpbl.invalid", Func); + toSink.push_back(Invalid); + + BranchInst::Create(Valid, Invalid, Cond, LastInst); + LastInst->eraseFromParent(); + + LastInst = BranchInst::Create(ExitBB, Invalid); + Instruction *SI = StorePC->clone(); + SI->insertBefore(LastInst); + InsertExit(0); + LastInst->eraseFromParent(); + + MF->setExit(SI); + + CurrBB = Valid; + LastInst = BranchInst::Create(ExitBB, CurrBB); +} + +/* + * TraceLinkIndirectJump() + * This routine implements IB inlining, i.e., linking two intra-trace blocks + * via indirect branch. + * Note that we don't need to validate CPBL because this routine is only + * used for user-mode emulation. + */ +void IRFactory::TraceLinkIndirectJump(GraphNode *NextNode, StoreInst *SI) +{ + dbg() << DEBUG_LLVM << " - Found an indirect branch. Guess pc " + << format("0x%" PRIx, NextNode->getGuestPC()) << "\n"; + + BasicBlock *ifTrue = BasicBlock::Create(*Context, "main_path", Func); + BasicBlock *ifFalse = BasicBlock::Create(*Context, "exit_stub", Func); + + Value *NextPC = SI->getValueOperand(); + Value *GuessPC = ConstantInt::get(NextPC->getType(), + Builder->getGuestPC(NextNode)); + + Value *Cond = ICMP(NextPC, GuessPC, ICmpInst::ICMP_EQ); + BranchInst::Create(ifTrue, ifFalse, Cond, LastInst); + LastInst->eraseFromParent(); + + CurrBB = ifTrue; + + /* First set the branch to exit BB, and the link will be resolved + at the trace finalization procedure. */ + BranchInst *BI = BranchInst::Create(ExitBB, CurrBB); + Builder->setBranch(BI, NextNode); + + CurrBB = ifFalse; + LastInst = BranchInst::Create(ExitBB, CurrBB); +} + +void IRFactory::TraceLinkDirectJump(GraphNode *NextNode, StoreInst *SI) +{ + ConstantInt *NextPC = static_cast<ConstantInt *>(SI->getValueOperand()); + target_ulong next_pc = NextPC->getZExtValue() + + Builder->getCurrNode()->getTB()->cs_base; + NextPC = ConstantInt::get(NextPC->getType(), next_pc); + + dbg() << DEBUG_LLVM << " - Found a direct branch to pc " + << format("0x%" PRIx, next_pc) << "\n"; + +#if defined(CONFIG_SOFTMMU) + TranslationBlock *tb = Builder->getCurrNode()->getTB(); + TranslationBlock *next_tb = NextNode->getTB(); + /* If two blocks are not in the same page or the next block is across + * the page boundary, we have to handle it with CPBL. */ + if ((tb->pc & TARGET_PAGE_MASK) != (next_tb->pc & TARGET_PAGE_MASK) || + next_tb->page_addr[1] != (tb_page_addr_t)-1) + TraceValidateCPBL(NextNode, SI); +#endif + /* First set the branch to exit BB, and the link will be resolved + at the trace finalization procedure. */ + BranchInst *BI = BranchInst::Create(ExitBB, LastInst); + Builder->setBranch(BI, NextNode); +} + +void IRFactory::TraceLinkDirectJump(StoreInst *SI) +{ + ConstantInt *NextPC = static_cast<ConstantInt *>(SI->getValueOperand()); + target_ulong next_pc = NextPC->getZExtValue() + + Builder->getCurrNode()->getTB()->cs_base; + NextPC = ConstantInt::get(NextPC->getType(), next_pc); + + dbg() << DEBUG_LLVM << " - Found a direct branch to pc " + << format("0x%" PRIx, next_pc) << " (exit)\n"; + +#if defined(CONFIG_SOFTMMU) + TranslationBlock *tb = Builder->getCurrNode()->getTB(); + if ((tb->pc & TARGET_PAGE_MASK) != (next_pc & TARGET_PAGE_MASK)) { + InsertLookupCPBL(Builder->getCurrNode()); + return; + } +#endif + InsertLinkAndExit(SI); +} + +GraphNode *IRFactory::findNextNode(target_ulong pc) +{ +#ifdef USE_TRACETREE_ONLY + for (auto Child : Builder->getCurrNode()->getChildren()) { + if (pc == Builder->getGuestPC(Child)) + return Child; + } + return nullptr; +#else + return Builder->getNode(pc); +#endif +} + +void IRFactory::TraceLink(StoreInst *SI) +{ + GraphNode *CurrNode = Builder->getCurrNode(); + ConstantInt *CI = dyn_cast<ConstantInt>(SI->getValueOperand()); + if (!CI) { + /* Indirect branch */ + SaveGlobals(COHERENCE_ALL, LastInst); + +#if defined(CONFIG_USER_ONLY) + for (auto NextNode : CurrNode->getChildren()) + TraceLinkIndirectJump(NextNode, SI); +#endif + InsertLookupIBTC(CurrNode); + } else { + /* Direct branch. */ + target_ulong pc = CI->getZExtValue(); + GraphNode *NextNode = findNextNode(pc); + if (NextNode) { + TraceLinkDirectJump(NextNode, SI); + return; + } + + TraceLinkDirectJump(SI); + std::string Name = CurrBB->getName().str() + ".exit"; + CurrBB->setName(Name); + toSink.push_back(CurrBB); + } +} + +StoreInst *IRFactory::getStorePC() +{ +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + std::vector<std::pair<intptr_t, StoreInst *> > StorePC; + + /* Search for store instructions that write value to PC in this block. */ + bool hasAllConstantPC = true; + BasicBlock *BB = LastInst->getParent(); + for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + if (StoreInst *SI = dyn_cast<StoreInst>(BI)) { + intptr_t Off = 0; + Value *Base = getBaseWithConstantOffset(DL, + SI->getPointerOperand(), Off); + if (Base == BaseReg[TCG_AREG0].Base && isStateOfPC(Off)) { + StorePC.push_back(std::make_pair(Off, SI)); + if (!isa<ConstantInt>(SI->getValueOperand())) + hasAllConstantPC = false; + } + } + } + + if (StorePC.empty()) + return nullptr; + if (StorePC.size() == 1) + return StorePC[0].second; + + /* We only consider the last two stores. */ + unsigned I1 = StorePC.size() - 2, I2 = StorePC.size() - 1; + if (StorePC[I1].first > StorePC[I2].first) { + unsigned tmp = I1; + I1 = I2; + I2 = tmp; + } + + intptr_t OffsetA = StorePC[I1].first; + intptr_t OffsetB = StorePC[I2].first; + StoreInst *SA = StorePC[I1].second; + StoreInst *SB = StorePC[I2].second; + intptr_t SzA = DL->getTypeSizeInBits(SA->getValueOperand()->getType()); + intptr_t SzB = DL->getTypeSizeInBits(SB->getValueOperand()->getType()); + if (SzA != SzB || OffsetA + SzA != OffsetB || SzA + SzB != TARGET_LONG_BITS) + return nullptr; + + Value *NewPC; + Type *Ty = (TARGET_LONG_BITS == 32) ? Int32Ty : Int64Ty; + Type *PTy = (TARGET_LONG_BITS == 32) ? Int32PtrTy : Int64PtrTy; + if (hasAllConstantPC) { + target_ulong PCA = static_cast<ConstantInt*>(SA->getValueOperand())->getZExtValue(); + target_ulong PCB = static_cast<ConstantInt*>(SA->getValueOperand())->getZExtValue(); + NewPC = ConstantInt::get(Ty, PCA | (PCB << SzA)); + } else { + Value *PCA = ZEXT(SA->getValueOperand(), Ty); + Value *PCB = ZEXT(SB->getValueOperand(), Ty); + PCB = SHL(PCB, ConstantInt::get(Ty, SzA)); + NewPC = OR(PCA, PCB); + } + + toErase.push_back(SA); + toErase.push_back(SB); + + Value *Addr = CAST(SA->getPointerOperand(), PTy); + return new StoreInst(NewPC, Addr, true, LastInst); + +#else + return dyn_cast<StoreInst>(--BasicBlock::iterator(LastInst)); +#endif +} + +/* + * op_exit_tb() + * args[0]: return value + */ +void IRFactory::op_exit_tb(const TCGArg *args) +{ + IRDebug(INDEX_op_exit_tb); + + if (!LastInst) + return; + + /* Some guest architectures (e.g., ARM) do not explicitly generete a store + * instruction to sync the PC value to the memory before exit_tb. We + * generate the store PC instruction here so that the following routine can + * analyze the PC value it will branch to. Note that other dirty states will + * be synced later. */ + CreateStorePC(LastInst); + + if (LastInst == &*LastInst->getParent()->begin()) { + SaveGlobals(COHERENCE_ALL, LastInst); + InsertExit(0, true); + } else if (isa<CallInst>(--BasicBlock::iterator(LastInst))) { + /* Tail call. */ + for (int i = 0, e = tcg_ctx.nb_globals; i != e; ++i) { + Register ® = Reg[i]; + if (reg.isReg() && reg.isDirty()) + runPasses = false; + } + + SaveGlobals(COHERENCE_ALL, LastInst); + InsertExit(0, true); + } else if (StoreInst *SI = getStorePC()) { + SaveGlobals(COHERENCE_ALL, SI); + TraceLink(SI); + } else { + runPasses = false; + SaveGlobals(COHERENCE_ALL, LastInst); + InsertExit(0, true); + } + + LastInst->eraseFromParent(); + LastInst = nullptr; +} + +/* + * op_goto_tb() + * args[0]: jump index + */ +void IRFactory::op_goto_tb(const TCGArg *args) +{ + IRDebug(INDEX_op_goto_tb); +} + +void IRFactory::op_qemu_ld_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_qemu_ld_i32); + + TCGArg DataLo = *args++; + TCGArg AddrLo = *args++; + TCGArg AddrHi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) ? *args++ : 0; + TCGMemOpIdx oi = *args++; + TCGMemOp opc = get_memop(oi); + + Register &Out = Reg[DataLo]; + Register &In1 = Reg[AddrLo]; + + Value *InData1 = LoadState(In1); + Value *InData2 = (AddrHi) ? LoadState(Reg[AddrHi]) : nullptr; + + AssertType(In1.Size == 32 || In1.Size == 64); + + SaveStates(); + + Value *OutData = QEMULoad(InData1, InData2, oi); + OutData = getExtendValue(OutData, Out.Ty, opc); + Out.setData(OutData, true); +} + +void IRFactory::op_qemu_st_i32(const TCGArg *args) +{ + IRDebug(INDEX_op_qemu_st_i32); + + TCGArg DataLo = *args++; + TCGArg AddrLo = *args++; + TCGArg AddrHi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) ? *args++ : 0; + TCGMemOpIdx oi = *args++; + TCGMemOp opc = get_memop(oi); + + Register &In1 = Reg[DataLo]; + Register &In2 = Reg[AddrLo]; + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *InData3 = (AddrHi) ? LoadState(Reg[AddrHi]) : nullptr; + + AssertType(In1.Size == 32 || In1.Size == 64); + + SaveStates(); + + InData1 = getTruncValue(InData1, opc); + QEMUStore(InData1, InData2, InData3, oi); +} + +void IRFactory::op_qemu_ld_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_qemu_ld_i64); + + TCGArg DataLo = *args++; + TCGArg DataHi = (TCG_TARGET_REG_BITS == 32) ? *args++ : 0; + TCGArg AddrLo = *args++; + TCGArg AddrHi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) ? *args++ : 0; + TCGMemOpIdx oi = *args++; + TCGMemOp opc = get_memop(oi); + + Register &Out = Reg[DataLo]; + Register &In1 = Reg[AddrLo]; + + Value *InData1 = LoadState(In1); + Value *InData2 = (AddrHi) ? LoadState(Reg[AddrHi]) : nullptr; + + AssertType(In1.Size == 32 || In1.Size == 64); + + SaveStates(); + + Value *OutData = QEMULoad(InData1, InData2, oi); + OutData = getExtendValue(OutData, Out.Ty, opc); + + if (DataHi == 0) + Out.setData(OutData, true); + else { + Register &Out2 = Reg[DataHi]; + Value *OutData1 = TRUNC32(OutData); + Value *OutData2 = TRUNC32(LSHR(OutData, CONST64(32))); + Out.setData(OutData1, true); + Out2.setData(OutData2, true); + } +} + +void IRFactory::op_qemu_st_i64(const TCGArg *args) +{ + IRDebug(INDEX_op_qemu_st_i64); + + TCGArg DataLo = *args++; + TCGArg DataHi = (TCG_TARGET_REG_BITS == 32) ? *args++ : 0; + TCGArg AddrLo = *args++; + TCGArg AddrHi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) ? *args++ : 0; + TCGMemOpIdx oi = *args++; + TCGMemOp opc = get_memop(oi); + + Register &In1 = Reg[DataLo]; + Register &In2 = Reg[AddrLo]; + + Value *InData1 = LoadState(In1); + Value *InData2 = LoadState(In2); + Value *InData3 = (AddrHi) ? LoadState(Reg[AddrHi]) : nullptr; + + AssertType(In2.Size == 32 || In2.Size == 64); + + SaveStates(); + + Value *InData; + if (DataHi == 0) + InData = InData1; + else { + InData = LoadState(Reg[DataHi]); + InData = SHL(ZEXT64(InData), CONST64(32)); + InData = OR(InData, ZEXT64(InData1)); + } + + InData = getTruncValue(InData, opc); + QEMUStore(InData, InData2, InData3, oi); +} + + +/* + * Metadata Factory + */ +MDFactory::MDFactory(Module *M) : UID(0), Context(M->getContext()) +{ + Dummy = getMDNode(ArrayRef<ConstantInt*>(getUID())); +} + +MDFactory::~MDFactory() {} + +#if defined(LLVM_V35) +void MDFactory::setConstStatic(LLVMContext &Context, Instruction *I, + ArrayRef<ConstantInt*> V) +{ + SmallVector<Value *, 4> MDs; + for (unsigned i = 0, e = V.size(); i != e; ++i) + MDs.push_back(V[i]); + I->setMetadata(META_CONST, MDNode::get(Context, MDs)); +} + +MDNode *MDFactory::getMDNode(ArrayRef<ConstantInt*> V) +{ + SmallVector<Value *, 4> MDs; + MDs.push_back(getUID()); + for (unsigned i = 0, e = V.size(); i != e; ++i) + MDs.push_back(V[i]); + return MDNode::get(Context, MDs); +} +#else +void MDFactory::setConstStatic(LLVMContext &Context, Instruction *I, + ArrayRef<ConstantInt*> V) +{ + SmallVector<Metadata *, 4> MDs; + for (unsigned i = 0, e = V.size(); i != e; ++i) + MDs.push_back(ConstantAsMetadata::get(V[i])); + I->setMetadata(META_CONST, MDNode::get(Context, MDs)); +} + +MDNode *MDFactory::getMDNode(ArrayRef<ConstantInt*> V) +{ + SmallVector<Metadata *, 4> MDs; + MDs.push_back(ConstantAsMetadata::get(getUID())); + for (unsigned i = 0, e = V.size(); i != e; ++i) + MDs.push_back(ConstantAsMetadata::get(V[i])); + return MDNode::get(Context, MDs); +} +#endif + +#if defined(ENABLE_MCJIT) +DebugLoc MDFactory::getDebugLoc(unsigned Line, unsigned Col, Function *F, + ArrayRef<ConstantInt*> Meta) +{ + Module *M = F->getParent(); + DIBuilder DIB(*M); + auto File = DIB.createFile(F->getName(), "hqemu/"); +#if defined(LLVM_V35) + auto CU = DIB.createCompileUnit(dwarf::DW_LANG_Cobol74, F->getName(), + "hqemu/", "hqemu", true, "", 0); + auto Type = DIB.createSubroutineType(File, + DIB.getOrCreateArray(ArrayRef<Value *>())); + auto SP = DIB.createFunction(CU, F->getName(), "", File, 1, Type, false, + true, 1, 0, true); + auto Scope = DIB.createLexicalBlockFile(SP, File); + DebugLoc DL = DebugLoc::get(Line, Col, Scope); + DIB.finalize(); + SP.replaceFunction(F); +#elif defined(LLVM_V38) || defined(LLVM_V39) + auto CU = DIB.createCompileUnit(dwarf::DW_LANG_Cobol74, F->getName(), + "hqemu/", "hqemu", true, "", 0); + auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None)); + auto SP = DIB.createFunction(CU, F->getName(), "", File, 1, Type, false, + true, 1, 0, true); + auto Scope = DIB.createLexicalBlockFile(SP, File, 0); + DebugLoc DL = DebugLoc::get(Line, Col, Scope); + DIB.finalize(); + F->setSubprogram(SP); +#else + auto CU = DIB.createCompileUnit(dwarf::DW_LANG_Cobol74, File, + "hqemu", true, "", 0); + auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None)); + auto SP = DIB.createFunction(CU, F->getName(), "", File, 1, Type, false, + true, 1, DINode::FlagZero, true); + auto Scope = DIB.createLexicalBlockFile(SP, File, 0); + DebugLoc DL = DebugLoc::get(Line, Col, Scope); + DIB.finalize(); + F->setSubprogram(SP); +#endif + + return DL; +} +#else +DebugLoc MDFactory::getDebugLoc(unsigned Line, unsigned Col, Function *F, + ArrayRef<ConstantInt*> Meta) +{ + return DebugLoc::get(Line, Col, getMDNode(Meta)); +} +#endif + + +/* + * TraceBuilder() + */ +TraceBuilder::TraceBuilder(IRFactory *IRF, OptimizationInfo *Opt) + : IF(IRF), Opt(Opt), Aborted(false), Attribute(A_None), Trace(nullptr) +{ + GraphNode *EntryNode = Opt->getCFG(); + if (!EntryNode) + hqemu_error("invalid optimization request.\n"); + + /* Find unique nodes. */ + NodeVec VisitStack; + NodeSet Visited; + VisitStack.push_back(EntryNode); + do { + GraphNode *Node = VisitStack.back(); + VisitStack.pop_back(); + if (Visited.find(Node) == Visited.end()) { + Visited.insert(Node); + + setUniqueNode(Node); + + for (auto Child : Node->getChildren()) + VisitStack.push_back(Child); + } + } while (!VisitStack.empty()); + + /* Add entry node into the building queue. */ + NodeQueue.push_back(EntryNode); + + IF->CreateSession(this); + IF->CreateFunction(); +} + +void TraceBuilder::ConvertToTCGIR(CPUArchState *env) +{ + TranslationBlock *tb = CurrNode->getTB(); + + if (LLEnv->isTraceMode()) { + env->image_base = (uintptr_t)tb->image - tb->pc; + tcg_copy_state(env, tb); + } + + tcg_func_start(&tcg_ctx, tb); + gen_intermediate_code(env, tb); + tcg_liveness_analysis(&tcg_ctx); +} + +static inline bool isVecOp(TCGOpcode opc) +{ + switch (opc) { + case INDEX_op_vector_start ... INDEX_op_vector_end: + return true; + default: + return false; + } +} + +void TraceBuilder::ConvertToLLVMIR() +{ + IF->CreateBlock(); + + auto OpcFunc = (IRFactory::FuncPtr *)IF->getOpcFunc(); + TCGArg *VecArgs = tcg_ctx.vec_opparam_buf; + + IF->NI.setTB(CurrNode->getTB()); + for (int oi = tcg_ctx.gen_first_op_idx; oi >= 0; ) { + TCGOp * const op = &tcg_ctx.gen_op_buf[oi]; + TCGArg *args = &tcg_ctx.gen_opparam_buf[op->args]; + oi = op->next; + + if (isVecOp(op->opc)) { + args = VecArgs; + VecArgs += 3; + } + + IF->NI.setOp(op); + (IF->*OpcFunc[op->opc])(args); + + if (isAborted()) { + IF->DeleteSession(); + return; + } + } +} + +void TraceBuilder::Abort() +{ + Aborted = true; +} + +void TraceBuilder::Finalize() +{ + /* Reconnect links of basic blocks. The links are previously + set to ExitBB. */ + for (unsigned i = 0, e = Branches.size(); i != e; ++i) { + BranchInst *BI = Branches[i].first; + GraphNode *Node = Branches[i].second; + IF->setSuccessor(BI, getBasicBlock(Node)); + } + + Trace = new TraceInfo(NodeUsed, Attribute); + IF->Compile(); + IF->DeleteSession(); +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/llvm-soft-perfmon.cpp b/llvm/llvm-soft-perfmon.cpp new file mode 100644 index 0000000..a5f9a56 --- /dev/null +++ b/llvm/llvm-soft-perfmon.cpp @@ -0,0 +1,357 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include <iostream> +#include <sstream> +#include "tracer.h" +#include "utils.h" +#include "llvm.h" +#include "llvm-target.h" +#include "llvm-soft-perfmon.h" + + +extern LLVMEnv *LLEnv; +extern unsigned ProfileThreshold; +extern unsigned PredictThreshold; + +/* + * Software Performance Monitor (SPM) + */ +void SoftwarePerfmon::ParseProfileMode(std::string &ProfileLevel) +{ + static std::string profile_str[SPM_NUM] = { + "none", "basic", "trace", "cache", "pass", "hpm", "exit", "hotspot", "all" + }; + static uint64_t profile_enum[SPM_NUM] = { + SPM_NONE, SPM_BASIC, SPM_TRACE, SPM_CACHE, SPM_PASS, SPM_HPM, + SPM_EXIT, SPM_HOTSPOT, SPM_ALL, + }; + + if (ProfileLevel.empty()) + return; + + std::istringstream ss(ProfileLevel); + std::string token; + while(getline(ss, token, ',')) { + for (int i = 0; i != SPM_NUM; ++i) { + if (token == profile_str[i]) { + Mode |= profile_enum[i]; + break; + } + } + } +} + +void SoftwarePerfmon::printProfile() +{ + if (!isEnabled()) + return; + + if (LLVMEnv::TransMode == TRANS_MODE_NONE || + LLVMEnv::TransMode == TRANS_MODE_INVALID) + return; + + if (LLVMEnv::TransMode == TRANS_MODE_BLOCK) + printBlockProfile(); + else + printTraceProfile(); +} + +void SoftwarePerfmon::printBlockProfile() +{ + LLVMEnv::TransCodeList &TransCode = LLEnv->getTransCode(); + uint32_t GuestSize = 0, GuestICount = 0, HostSize = 0; + uint64_t TransTime = 0, MaxTime = 0; + + for (auto TC : TransCode) { + TraceInfo *Trace = TC->Trace; + TranslationBlock *TB = TC->EntryTB; + GuestSize += TB->size; + GuestICount += TB->icount; + HostSize += TC->Size; + TransTime += Trace->TransTime; + if (Trace->TransTime > MaxTime) + MaxTime = Trace->TransTime; + } + + auto &OS = DM.debug(); + OS << "\nBlock statistic:\n" + << "Num of Blocks : " << TransCode.size() << "\n" + << "G/H Code Size : " << GuestSize << "/" << HostSize << "bytes\n" + << "Guest ICount : " << GuestICount << "\n" + << "Translation Time : " << format("%.6f", (double)TransTime * 1e-6) + << " seconds (max=" << MaxTime /1000 << " ms)\n"; +} + +static void printBasic(LLVMEnv::TransCodeList &TransCode) +{ + uint32_t GuestSize = 0, GuestICount = 0, HostSize = 0; + uint32_t NumBlock = 0, NumLoop = 0, NumExit = 0, NumIndirectBr = 0; + uint32_t MaxBlock = 0, MaxLoop = 0, MaxExit = 0, MaxIndirectBr = 0; + uint64_t TransTime = 0, MaxTime = 0; + unsigned NumTraces = TransCode.size(); + std::map<unsigned, unsigned> LenDist; + + for (auto TC : TransCode) { + TraceInfo *Trace = TC->Trace; + TBVec &TBs = Trace->TBs; + for (unsigned i = 0, e = TBs.size(); i != e; ++i) { + GuestSize += TBs[i]->size; + GuestICount += TBs[i]->icount; + } + HostSize += TC->Size; + + NumBlock += TBs.size(); + NumLoop += Trace->NumLoop; + NumExit += Trace->NumExit; + NumIndirectBr += Trace->NumIndirectBr; + TransTime += Trace->TransTime; + + if (TBs.size() > MaxBlock) + MaxBlock = TBs.size(); + if (Trace->NumLoop > MaxLoop) + MaxLoop = Trace->NumLoop; + if (Trace->NumExit > MaxExit) + MaxExit = Trace->NumExit; + if (Trace->NumIndirectBr > MaxIndirectBr) + MaxIndirectBr = Trace->NumIndirectBr; + if (Trace->TransTime > MaxTime) + MaxTime = Trace->TransTime; + LenDist[TBs.size()]++; + } + + auto &OS = DM.debug(); + OS << "Trace statistic:\n" + << "Num of Traces : " << NumTraces << "\n" + << "Profile Thres. : " << ProfileThreshold << "\n" + << "Predict Thres. : " << PredictThreshold << "\n" + << "G/H Code Size : " << GuestSize << "/" << HostSize << " bytes\n" + << "Translation Time : " << format("%.6f", (double)TransTime * 1e-6) + << " seconds (max=" << MaxTime /1000 << " ms)\n" + << "Average # Blocks : " << format("%.1f", (double)NumBlock / NumTraces) + << " (max=" << MaxBlock << ")\n" + << "Average # Loops : " << format("%.1f", (double)NumLoop / NumTraces) + << " (max=" << MaxLoop << ")\n" + << "Average # Exits : " << format("%.1f", (double)NumExit / NumTraces) + << " (max=" << MaxExit << ")\n" + << "Average # IBs : " << format("%.1f", (double)NumIndirectBr / NumTraces) + << " (max=" << MaxIndirectBr << ")\n" + << "Flush Count : " << LLEnv->getNumFlush() << "\n"; + + OS << "Trace length distribution: (1-" << MaxBlock << ")\n "; + for (unsigned i = 1; i <= MaxBlock; i++) + OS << LenDist[i] << " "; + OS << "\n"; +} + +static void printTraceExec(LLVMEnv::TransCodeList &TransCode) +{ + unsigned NumThread = 0; + for (auto next_cpu = first_cpu; next_cpu != nullptr; + next_cpu = CPU_NEXT(next_cpu)) + NumThread++; + + /* Detailed trace information and runtime counters. */ + auto &OS = DM.debug(); + OS << "----------------------------\n" + << "Trace execution information:\n"; + + unsigned NumTraces = TransCode.size(); + for (unsigned i = 0; i != NumThread; ++i) { + unsigned TraceUsed = 0; + + OS << ">\n" + << "Thread " << i << ":\n" + << " dynamic exec count\n" + << " id pc #loop:#exit loop ibtc exit\n"; + for (unsigned j = 0; j != NumTraces; ++j) { + TraceInfo *Trace = TransCode[j]->Trace; + uint64_t *Counter = Trace->ExecCount[i]; + if (Counter[0] + Counter[1] + Counter[2] == 0) + continue; + TraceUsed++; + OS << format("%4d", j) << ") " + << format("0x%08" PRIx, Trace->getEntryPC()) << " " + << format("%2d", Trace->NumLoop) << " " + << format("%2d", Trace->NumExit) << " " + << format("%8" PRId64, Counter[0]) << " " + << format("%8" PRId64, Counter[1]) << " " + << format("%8" PRId64, Counter[2]) << "\n"; + } + OS << "Trace used: " << TraceUsed << "/" << NumTraces <<"\n"; + } +} + +static void printHPM() +{ + auto &OS = DM.debug(); + OS << "Num of Insns : " << SP->NumInsns << "\n" + << "Num of Loads : " << SP->NumLoads << "\n" + << "Num of Stores : " << SP->NumStores << "\n" + << "Num of Branches : " << SP->NumBranches << "\n" + << "Sample Time : " << format("%.6f seconds", (double)SP->SampleTime * 1e-6) + << "\n"; +} + +static void printHotspot(unsigned &CoverSet, + std::vector<std::vector<uint64_t> *> &SampleListVec) +{ + auto &OS = DM.debug(); + auto &TransCode = LLEnv->getTransCode(); + auto &SortedCode = LLEnv->getSortedCode(); + uint64_t BlockCacheStart = (uintptr_t)tcg_ctx_global.code_gen_buffer; + uint64_t BlockCacheEnd = BlockCacheStart + tcg_ctx_global.code_gen_buffer_size; + uint64_t TraceCacheStart = (uintptr_t)LLVMEnv::TraceCache; + uint64_t TraceCacheEnd = TraceCacheStart + LLVMEnv::TraceCacheSize; + uint64_t TotalSamples = 0; + uint64_t NumBlockCache = 0, NumTraceCache = 0, NumOther = 0; + + for (auto *L : SampleListVec) { + for (uint64_t IP : *L) { + if (IP >= BlockCacheStart && IP < BlockCacheEnd) + NumBlockCache++; + else if (IP >= TraceCacheStart && IP < TraceCacheEnd) + NumTraceCache++; + else + NumOther++; + + auto IT = SortedCode.upper_bound(IP); + if (IT == SortedCode.begin()) + continue; + auto TC = (--IT)->second; + if (IP < (uint64_t)TC->Code + TC->Size) + TC->SampleCount++;; + } + delete L; + } + + TotalSamples = NumBlockCache + NumTraceCache + NumOther; + if (TotalSamples == 0 || TransCode.empty()) { + OS << CoverSet << "% CoverSet : 0\n"; + return; + } + + /* Print the time breakdown of block cache, trace cache and other. */ + char buf[128] = {'\0'}; + double RatioBlockCache = (double)NumBlockCache * 100 / TotalSamples; + double RatioTraceCache = (double)NumTraceCache * 100 / TotalSamples; + sprintf(buf, "block (%.1f%%) trace (%.1f%%) other (%.1f%%)", RatioBlockCache, + RatioTraceCache, 100.0f - RatioBlockCache - RatioTraceCache); + OS << "Breakdown : " << buf << "\n"; + + /* Print the amount of traces in the cover set. */ + std::map<TranslatedCode *, unsigned> IndexMap; + for (unsigned i = 0, e = TransCode.size(); i != e; ++i) + IndexMap[TransCode[i]] = i; + + LLVMEnv::TransCodeList Covered(TransCode.begin(), TransCode.end()); + std::sort(Covered.begin(), Covered.end(), + [](const TranslatedCode *a, const TranslatedCode *b) { + return a->SampleCount > b->SampleCount; + }); + + uint64_t CoverSamples = TotalSamples * CoverSet / 100; + uint64_t AccuSamples = 0; + unsigned NumTracesInCoverSet = 0; + for (TranslatedCode *TC : Covered) { + if (AccuSamples >= CoverSamples || TC->SampleCount == 0) + break; + NumTracesInCoverSet++; + AccuSamples += TC->SampleCount; + } + + OS << CoverSet << "% CoverSet : " << NumTracesInCoverSet << "\n"; + + if (NumTracesInCoverSet == 0) + return; + + /* Print the percentage of time of the traces in the cover set. */ + if (DM.getDebugMode() & DEBUG_IR_OPT) { + OS << "Traces of CoverSet:\n"; + for (unsigned i = 0; i < NumTracesInCoverSet; ++i) { + TranslatedCode *TC = Covered[i]; + sprintf(buf, "%4d (%.1f%%): ", IndexMap[TC], + (double)TC->SampleCount * 100 / TotalSamples); + OS << buf; + int j = 0; + for (auto *TB: TC->Trace->TBs) { + std::stringstream ss; + ss << std::hex << TB->pc; + OS << (j++ == 0 ? "" : ",") << ss.str(); + } + OS << "\n"; + } + } else { + unsigned top = 10; + + OS << "Percentage of CoverSet (top 10): "; + if (NumTracesInCoverSet < top) + top = NumTracesInCoverSet; + for (unsigned i = 0; i < top; ++i) { + TranslatedCode *TC = Covered[i]; + sprintf(buf, "%.1f%%", (double)TC->SampleCount * 100 / TotalSamples); + OS << (i == 0 ? "" : " ") << buf; + } + OS << "\n"; + } +} + +void SoftwarePerfmon::printTraceProfile() +{ + auto &OS = DM.debug(); + unsigned NumTraces = LLEnv->getTransCode().size(); + + OS << "\n"; + if (NumTraces == 0) { + OS << "Trace statistic:\n" + << "Num of Traces : " << NumTraces << "\n\n"; + return; + } + + /* Static information */ + if (Mode & SPM_BASIC) + printBasic(LLEnv->getTransCode()); + if (Mode & SPM_EXIT) + OS << "Num of TraceExit : " << NumTraceExits << "\n"; + if (Mode & SPM_HPM) + printHPM(); + if (Mode & SPM_HOTSPOT) + printHotspot(CoverSet, SP->SampleListVec); + + /* Code cache infomation - start address and size */ + if (Mode & SPM_CACHE) { + size_t BlockSize = (uintptr_t)tcg_ctx_global.code_gen_ptr - + (uintptr_t)tcg_ctx_global.code_gen_buffer; + size_t TraceSize = LLEnv->getMemoryManager()->getCodeSize(); + + OS << "-------------------------\n" + << "Block/Trace Cache information:\n"; + OS << "Block: start=" << tcg_ctx_global.code_gen_buffer + << " size=" << tcg_ctx_global.code_gen_buffer_size + << " code=" << format("%8d", BlockSize) << " (ratio=" + << format("%.2f", (double)BlockSize * 100 / tcg_ctx_global.code_gen_buffer_size) + << "%)\n"; + OS << "Trace: start=" << LLVMEnv::TraceCache + << " size=" << LLVMEnv::TraceCacheSize + << " code=" << format("%8d", TraceSize) << " (ratio=" + << format("%.2f", (double)TraceSize * 100 / LLVMEnv::TraceCacheSize) + << "%)\n\n"; + } + + if (Mode & SPM_TRACE) + printTraceExec(LLEnv->getTransCode()); + + if ((Mode & SPM_PASS) && !ExitFunc.empty()) { + OS << "\n-------------------------\n" + << "Pass information:\n"; + for (unsigned i = 0, e = ExitFunc.size(); i != e; ++i) + (*ExitFunc[i])(); + } +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/llvm-target.cpp b/llvm/llvm-target.cpp new file mode 100644 index 0000000..609a4ad --- /dev/null +++ b/llvm/llvm-target.cpp @@ -0,0 +1,812 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/Object/Binary.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/Support/Debug.h" +#include "llvm-debug.h" +#include "llvm-opc.h" +#include "llvm-target.h" + +using namespace llvm::object; + +extern "C" { +#if defined(TARGET_I386) +extern const int comis_eflags[4]; +extern const int fcom_ccval[4]; +#endif +} + + +static std::vector<TCGHelperInfo> MMUHelper = { +#if defined(CONFIG_SOFTMMU) + { (void *)llvm_ret_ldub_mmu, "llvm_ret_ldub_mmu", }, + { (void *)llvm_le_lduw_mmu, "llvm_le_lduw_mmu", }, + { (void *)llvm_le_ldul_mmu, "llvm_le_ldul_mmu", }, + { (void *)llvm_le_ldq_mmu, "llvm_le_ldq_mmu", }, + { (void *)llvm_be_lduw_mmu, "llvm_be_lduw_mmu", }, + { (void *)llvm_be_ldul_mmu, "llvm_be_ldul_mmu", }, + { (void *)llvm_be_ldq_mmu, "llvm_be_ldq_mmu", }, + { (void *)llvm_ret_ldsb_mmu, "llvm_ret_ldsb_mmu", }, + { (void *)llvm_le_ldsw_mmu, "llvm_le_ldsw_mmu", }, + { (void *)llvm_le_ldsl_mmu, "llvm_le_ldsl_mmu", }, + { (void *)llvm_be_ldsw_mmu, "llvm_be_ldsw_mmu", }, + { (void *)llvm_be_ldsl_mmu, "llvm_be_ldsl_mmu", }, + + { (void *)llvm_ret_stb_mmu, "llvm_ret_stb_mmu", }, + { (void *)llvm_le_stw_mmu, "llvm_le_stw_mmu", }, + { (void *)llvm_le_stl_mmu, "llvm_le_stl_mmu", }, + { (void *)llvm_le_stq_mmu, "llvm_le_stq_mmu", }, + { (void *)llvm_be_stw_mmu, "llvm_be_stw_mmu", }, + { (void *)llvm_be_stl_mmu, "llvm_be_stl_mmu", }, + { (void *)llvm_be_stq_mmu, "llvm_be_stq_mmu", }, +#endif +}; + + +/* Helper functions that cause side effect. + * For example, helpers modifying CPU states that cannot be identified, + * or helpers that call MMU helpers. + * During translating qemu_ld/st, we record MMU helper calls so that we + * know how to restore when page fault is handled. Unfortunately, we lose + * track of the MMU helper calls in a helper function and the restoration + * will fail. Currently, we mark such helper functions as illegal ones and + * we skip trace building when a call to one of them when translating + * op_call. */ +static std::vector<TCGHelperInfo> IllegalHelper = { +#if defined(CONFIG_SOFTMMU) +# if defined(TARGET_I386) + { (void *)helper_cmpxchg8b, "helper_cmpxchg8b", }, + { (void *)helper_boundw, "helper_boundw", }, + { (void *)helper_boundl, "helper_boundl", }, +# elif defined(TARGET_ARM) + { (void *)helper_dc_zva, "helper_dc_zva", }, +# endif +#else +# if defined(TARGET_AARCH64) + { (void *)helper_simd_tbl, "helper_simd_tbl", }, +# endif +#endif +}; + + +#define DEF_HELPER_FLAGS_0(name, flags, ret) { (void *)helper_##name, "helper_"#name }, +#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) DEF_HELPER_FLAGS_0(name, flags, ret) +#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) DEF_HELPER_FLAGS_0(name, flags, ret) +#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) DEF_HELPER_FLAGS_0(name, flags, ret) +#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) DEF_HELPER_FLAGS_0(name, flags, ret) + +static std::vector<TCGHelperInfo> LMTHelper = { +#if defined(CONFIG_SOFTMMU) +#include "atomic-helper.h" +#endif +}; + +#undef DEF_HELPER_FLAGS_0 +#undef DEF_HELPER_FLAGS_1 +#undef DEF_HELPER_FLAGS_2 +#undef DEF_HELPER_FLAGS_3 +#undef DEF_HELPER_FLAGS_4 + + +const char *getMMUFName(const void *func) +{ + for (unsigned i = 0, e = MMUHelper.size(); i != e; ++i) { + if (func == MMUHelper[i].func) + return MMUHelper[i].name; + } + return ""; +} + +bool isMMUFunction(std::string &Name) +{ + for (unsigned i = 0, e = MMUHelper.size(); i != e; ++i) { + if (Name == MMUHelper[i].name) + return true; + } + return false; +} + +bool isLMTFunction(std::string &Name) +{ + for (unsigned i = 0, e = LMTHelper.size(); i != e; ++i) { + if (Name == LMTHelper[i].name) + return true; + } + return false; +} + +bool isIllegalHelper(const void *func) +{ + for (unsigned i = 0, e = IllegalHelper.size(); i != e; ++i) { + if (func == IllegalHelper[i].func) + return true; + } + return false; +} + +/* Determine whether the function name is a system library or not. */ +bool isLibcall(std::string &Name) +{ + if (Name == "fmodf" || Name == "fmod" || Name == "fmodl" || + Name == "abs" || Name == "labs" || Name == "llabs" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || + Name == "sqrtf" || Name == "sqrt" || Name == "sqrtl" || + Name == "logf" || Name == "log" || Name == "logl" || + Name == "log2f" || Name == "log2" || Name == "log2l" || + Name == "log10f" || Name == "log10" || Name == "log10l" || + Name == "expf" || Name == "exp" || Name == "expl" || + Name == "exp2f" || Name == "exp2" || Name == "exp2l" || + Name == "ldexpf" || Name == "ldexp" || Name == "ldexpl" || + Name == "sinf" || Name == "sin" || Name == "sinl" || + Name == "cosf" || Name == "cos" || Name == "cosl" || + Name == "tanf" || Name == "tan" || Name == "tanl" || + Name == "atanf" || Name == "atan" || Name == "atanl" || + Name == "atanf2" || Name == "atan2" || Name == "atanl2" || + Name == "powf" || Name == "pow" || Name == "powl" || + Name == "ceilf" || Name == "ceil" || Name == "ceill" || + Name == "truncf" || Name == "trunc" || Name == "truncl" || + Name == "rintf" || Name == "rint" || Name == "rintl" || + Name == "lrintf" || Name == "lrint" || Name == "lrintl" || + Name == "nearbyintf" || Name == "nearbyint" || Name == "nearbyintl" || + Name == "floorf" || Name == "floor" || Name == "floorl" || + Name == "copysignf" || Name == "copysign" || Name == "copysignl" || + Name == "memcpy" || Name == "memmove" || Name == "memset" || + Name == "fegetround" || Name == "fesetround" || + Name == "__isinfl" || Name == "__isnanl") + { + return true; + } + + return false; +} + +/* Determine whether the function name is a softfloat helper or not. */ +bool isSoftFPcall(std::string &Name) +{ + static char SoftFPName[][128] = { + "float16_to_float32", + "float32_add", + "float32_compare", + "float32_compare_quiet", + "float32_div", + "float32_mul", + "float32_scalbn", + "float32_sqrt", + "float32_sub", + "float32_to_float16", + "float32_to_float64", + "float32_to_int32", + "float32_to_int64", + "float32_to_uint32", + "float32_minnum", + "float32_maxnum", + "float64_add", + "float64_compare", + "float64_compare_quiet", + "float64_div", + "float64_mul", + "float64_scalbn", + "float64_sqrt", + "float64_sub", + "float64_to_float32", + "float64_to_int32", + "float64_to_int64", + "float64_to_uint32", + "float64_minnum", + "float64_maxnum", + "int32_to_float32", + "int32_to_float64", + "int64_to_float32", + "normalizeRoundAndPackFloat128", + "propagateFloat128NaN", + "propagateFloatx80NaN", + "roundAndPackFloat128", + "roundAndPackFloat32", + "roundAndPackFloat64", + "roundAndPackFloatx80", + "set_float_rounding_mode", + "subFloat128Sigs", + "subFloat32Sigs", + "subFloat64Sigs", + "subFloatx80Sigs", + "uint32_to_float32", + "uint32_to_float64", +#if 0 + /* FIXME: this function causes LLVM JIT error: + LLVM ERROR: Error reading function 'set_float_exception_flags' from bitcode file: Malformed block record */ + "set_float_exception_flags", +#endif + "addFloat32Sigs", + "addFloat64Sigs", + + "float32_to_int32_round_to_zero", + "float64_to_int32_round_to_zero", + + "int32_to_floatx80", + "int64_to_floatx80", + "float32_to_floatx80", + "float64_to_floatx80", + "floatx80_abs", + "floatx80_chs", + "floatx80_is_infinity", + "floatx80_is_neg", + "floatx80_is_zero", + "floatx80_is_zero_or_denormal", + "floatx80_is_any_nan", + + "floatx80_to_int32", + "floatx80_to_int32_round_to_zero", + "floatx80_to_int64", + "floatx80_to_int64_round_to_zero", + "floatx80_to_float32", + "floatx80_to_float64", + "floatx80_to_float128", + "floatx80_round_to_int", + "floatx80_add", + "floatx80_sub", + "floatx80_mul", + "floatx80_div", + "floatx80_rem", + "floatx80_sqrt", + "floatx80_eq", + "floatx80_le", + "floatx80_lt", + "floatx80_unordered", + "floatx80_eq_quiet", + "floatx80_le_quiet", + "floatx80_lt_quiet", + "floatx80_unordered_quiet", + "floatx80_compare", + "floatx80_compare_quiet", + "floatx80_is_quiet_nan", + "floatx80_is_signaling_nan", + "floatx80_maybe_silence_nan", + "floatx80_scalbn", + }; + + for (int i = 0, e = ARRAY_SIZE(SoftFPName); i < e; i++) { + if (Name == SoftFPName[i]) + return true; + } + return false; +} + +/* Bind function names/addresses that are used in the softfloat helpers. */ +void AddFPUSymbols(LLVMTranslator *Translator) +{ +#define AddSymbol(a) Translator->AddSymbol(#a, (void*)a) + AddSymbol(float32_add); + AddSymbol(float32_sub); + AddSymbol(float32_mul); + AddSymbol(float32_div); + AddSymbol(float32_sqrt); + AddSymbol(float32_scalbn); + AddSymbol(float32_compare); + AddSymbol(float32_compare_quiet); + AddSymbol(float32_minnum); + AddSymbol(float32_maxnum); + AddSymbol(float64_add); + AddSymbol(float64_sub); + AddSymbol(float64_mul); + AddSymbol(float64_div); + AddSymbol(float64_sqrt); + AddSymbol(float64_scalbn); + AddSymbol(float64_compare); + AddSymbol(float64_compare_quiet); + AddSymbol(float64_minnum); + AddSymbol(float64_maxnum); + AddSymbol(float16_to_float32); + AddSymbol(float32_to_float16); + AddSymbol(float32_to_float64); + AddSymbol(float32_to_int32); + AddSymbol(float32_to_int64); + AddSymbol(float32_to_uint32); + AddSymbol(float64_to_float32); + AddSymbol(float64_to_int32); + AddSymbol(float64_to_int64); + AddSymbol(float64_to_uint32); + AddSymbol(int32_to_float32); + AddSymbol(int32_to_float64); + AddSymbol(int64_to_float32); + AddSymbol(uint32_to_float32); + AddSymbol(uint32_to_float64); + AddSymbol(float32_to_int32_round_to_zero); + AddSymbol(float64_to_int32_round_to_zero); + + AddSymbol(int32_to_floatx80); + AddSymbol(int64_to_floatx80); + AddSymbol(float32_to_floatx80); + AddSymbol(float64_to_floatx80); + AddSymbol(floatx80_abs); + AddSymbol(floatx80_chs); + AddSymbol(floatx80_is_infinity); + AddSymbol(floatx80_is_neg); + AddSymbol(floatx80_is_zero); + AddSymbol(floatx80_is_zero_or_denormal); + AddSymbol(floatx80_is_any_nan); + + AddSymbol(floatx80_to_int32); + AddSymbol(floatx80_to_int32_round_to_zero); + AddSymbol(floatx80_to_int64); + AddSymbol(floatx80_to_int64_round_to_zero); + AddSymbol(floatx80_to_float32); + AddSymbol(floatx80_to_float64); + AddSymbol(floatx80_to_float128); + AddSymbol(floatx80_round_to_int); + AddSymbol(floatx80_add); + AddSymbol(floatx80_sub); + AddSymbol(floatx80_mul); + AddSymbol(floatx80_div); + AddSymbol(floatx80_rem); + AddSymbol(floatx80_sqrt); + AddSymbol(floatx80_eq); + AddSymbol(floatx80_le); + AddSymbol(floatx80_lt); + AddSymbol(floatx80_unordered); + AddSymbol(floatx80_eq_quiet); + AddSymbol(floatx80_le_quiet); + AddSymbol(floatx80_lt_quiet); + AddSymbol(floatx80_unordered_quiet); + AddSymbol(floatx80_compare); + AddSymbol(floatx80_compare_quiet); + AddSymbol(floatx80_is_quiet_nan); + AddSymbol(floatx80_is_signaling_nan); + AddSymbol(floatx80_maybe_silence_nan); + AddSymbol(floatx80_scalbn); + + AddSymbol(rint); + AddSymbol(rintf); + AddSymbol(lrint); + AddSymbol(lrintf); + AddSymbol(llrint); + AddSymbol(llrintf); + AddSymbol(remainder); + AddSymbol(remainderf); + AddSymbol(fabs); + AddSymbol(fabsf); + AddSymbol(sqrt); + AddSymbol(sqrtf); + AddSymbol(trunc); + AddSymbol(exp2); + AddSymbol(log); + AddSymbol(ldexp); + AddSymbol(floor); + AddSymbol(ceil); + AddSymbol(sin); + AddSymbol(cos); + AddSymbol(tan); + AddSymbol(atan2); + AddSymbol(__isinf); + AddSymbol(__isnan); +#undef AddSymbol +} + +void AddLMTSymbols(LLVMTranslator *Translator) +{ + for (unsigned i = 0, e = LMTHelper.size(); i != e; ++i) { + TCGHelperInfo &H = LMTHelper[i]; + Translator->AddSymbol(H.name, H.func); + } +} + +void AddMMUSymbols(LLVMTranslator *Translator) +{ + for (unsigned i = 0, e = MMUHelper.size(); i != e; ++i) { + TCGHelperInfo &H = MMUHelper[i]; + Translator->AddSymbol(H.name, H.func); + } +} + +/* Bind function names/addresses that are used by the helpers. */ +#if defined(CONFIG_USER_ONLY) +void AddDependentSymbols(LLVMTranslator *Translator) +{ + Translator->AddSymbol("helper_verify_tb", (void*)helper_verify_tb); + Translator->AddSymbol("helper_lookup_ibtc", (void*)helper_lookup_ibtc); + Translator->AddSymbol("guest_base", (void*)&guest_base); + Translator->AddSymbol("cpu_loop_exit", (void*)cpu_loop_exit); + Translator->AddSymbol("qemu_logfile", (void*)&qemu_logfile); + Translator->AddSymbol("qemu_loglevel", (void*)&qemu_loglevel); + + Translator->AddSymbol("alignment_count", (void*)alignment_count); + Translator->AddSymbol("aligned_boundary", (void*)&aligned_boundary); + +#if defined(TARGET_I386) + Translator->AddSymbol("parity_table", (void*)parity_table); + Translator->AddSymbol("comis_eflags", (void*)comis_eflags); + Translator->AddSymbol("fcom_ccval", (void*)fcom_ccval); + Translator->AddSymbol("raise_exception", (void*)raise_exception); + Translator->AddSymbol("raise_exception_err", (void*)raise_exception_err); +#endif + + AddFPUSymbols(Translator); +} +#else +void AddDependentSymbols(LLVMTranslator *Translator) +{ + Translator->AddSymbol("helper_verify_tb", (void*)helper_verify_tb); + Translator->AddSymbol("helper_lookup_ibtc", (void*)helper_lookup_ibtc); + Translator->AddSymbol("helper_lookup_cpbl", (void*)helper_lookup_cpbl); + Translator->AddSymbol("helper_validate_cpbl", (void*)helper_validate_cpbl); + Translator->AddSymbol("cpu_loop_exit", (void*)cpu_loop_exit); + Translator->AddSymbol("qemu_logfile", (void*)&qemu_logfile); + Translator->AddSymbol("qemu_loglevel", (void*)&qemu_loglevel); + Translator->AddSymbol("exp2", (void*)exp2); + +#if defined(TARGET_I386) + Translator->AddSymbol("parity_table", (void*)parity_table); + Translator->AddSymbol("comis_eflags", (void*)comis_eflags); + Translator->AddSymbol("fcom_ccval", (void*)fcom_ccval); +#endif + + AddFPUSymbols(Translator); + AddLMTSymbols(Translator); + AddMMUSymbols(Translator); +} +#endif + +/* Return base address and offset of a memory access pointer. */ +Value *getBaseWithConstantOffset(const DataLayout *DL, Value *Ptr, + intptr_t &Offset) +{ + Operator *PtrOp = dyn_cast<Operator>(Ptr); + if (!PtrOp) + return Ptr; + + if (PtrOp->getOpcode() == Instruction::BitCast || + PtrOp->getOpcode() == Instruction::IntToPtr) + return getBaseWithConstantOffset(DL, PtrOp->getOperand(0), Offset); + + /* If this is a GEP with constant indices, we can look through it. */ + GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp); + if (!GEP || !GEP->hasAllConstantIndices()) + return Ptr; + + gep_type_iterator GTI = gep_type_begin(GEP); + for (auto I = GEP->idx_begin(), E = GEP->idx_end(); I != E; ++I, ++GTI) { + ConstantInt *OpC = cast<ConstantInt>(*I); + if (OpC->isZero()) + continue; + + /* Handle a struct and array indices which add their offset to the + * pointer. */ +#if defined(LLVM_V35) || defined(LLVM_V38) || defined(LLVM_V39) + if (StructType *STy = dyn_cast<StructType>(*GTI)) +#else + if (StructType *STy = GTI.getStructTypeOrNull()) +#endif + Offset += DL->getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + else { + intptr_t Size = DL->getTypeAllocSize(GTI.getIndexedType()); + Offset += OpC->getSExtValue() * Size; + } + } + + return getBaseWithConstantOffset(DL, GEP->getPointerOperand(), Offset); +} + +static bool accumulateConstantOffset(const DataLayout *DL, GEPOperator *GEP, + APInt &Offset, Value *GuestBase) +{ + for (auto GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); GTI != GTE; ++GTI) { + /* Skip the operand if it is from the guest base. */ + if (GTI.getOperand() == GuestBase) + continue; + ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); + if (!OpC) + return false; + if (OpC->isZero()) + continue; + + /* Handle a struct index, which adds its field offset to the pointer. */ +#if defined(LLVM_V35) || defined(LLVM_V38) || defined(LLVM_V39) + if (StructType *STy = dyn_cast<StructType>(*GTI)) { +#else + if (StructType *STy = GTI.getStructTypeOrNull()) { +#endif + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = DL->getStructLayout(STy); + Offset += APInt(Offset.getBitWidth(), + SL->getElementOffset(ElementIdx)); + continue; + } + + /* For array or vector indices, scale the index by the size of the type. */ + APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth()); + Offset += Index * APInt(Offset.getBitWidth(), + DL->getTypeAllocSize(GTI.getIndexedType())); + } + return true; +} + +Value *StripPointer(Value *Ptr) +{ + if (!Ptr->getType()->isPointerTy()) + return Ptr; + + SmallPtrSet<Value *, 8> Visited; + Visited.insert(Ptr); + do { + Operator *PtrOp = cast<Operator>(Ptr); + unsigned Opcode = PtrOp->getOpcode(); + if (Opcode == Instruction::BitCast || + Opcode == Instruction::IntToPtr || + Opcode == Instruction::GetElementPtr) + Ptr = cast<Operator>(Ptr)->getOperand(0); + else + return Ptr; + + if (Visited.count(Ptr)) + break; + Visited.insert(Ptr); + } while (true); + + return Ptr; +} + +Value *StripPointerWithConstantOffset(const DataLayout *DL, Value *Ptr, + APInt &Offset, Value *GuestBase) +{ + if (!Ptr->getType()->isPointerTy()) + return Ptr; + + std::set<Value *> Visited; + Visited.insert(Ptr); + Value *V = Ptr; + do { + if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { + APInt GEPOffset(Offset); + if (!accumulateConstantOffset(DL, GEP, GEPOffset, GuestBase)) + return V; + Offset = GEPOffset; + V = GEP->getPointerOperand(); + continue; + } + + Operator *PtrOp = cast<Operator>(V); + unsigned Opcode = PtrOp->getOpcode(); + if (Opcode == Instruction::BitCast || Opcode == Instruction::IntToPtr) { + V = cast<Operator>(V)->getOperand(0); + } else if (Opcode == Instruction::Add || + Opcode == Instruction::Sub) { + if (!isa<ConstantInt>(PtrOp->getOperand(1))) + return V; + + int64_t C = cast<ConstantInt>(PtrOp->getOperand(1))->getSExtValue(); + if (Opcode == Instruction::Add) + Offset += APInt(Offset.getBitWidth(), C, true); + else + Offset -= APInt(Offset.getBitWidth(), C, true); + V = PtrOp->getOperand(0); + } else + return V; + + if (Visited.find(V) != Visited.end()) + break; + Visited.insert(V); + } while (true); + + return V; +} + +/* Remove an instruction from a basic block. Also delete any instrution used by + * this instruction if it is no longer being used. */ +static void DeleteDeadInstructions(Instruction *Inst) +{ + SmallVector<Instruction*, 16> DeadInsts; + DeadInsts.push_back(Inst); + do { + Instruction *I = DeadInsts.pop_back_val(); + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { + Value *OpV = I->getOperand(i); + I->setOperand(i, nullptr); + + if (!OpV->use_empty()) continue; + + Instruction *OpI = dyn_cast<Instruction>(OpV); + if (OpI && OpI->getParent()) + DeadInsts.push_back(OpI); + } + I->eraseFromParent(); + } while (!DeadInsts.empty()); +} + +/* Perform instruction removal from the parent container. */ +void ProcessErase(IVec &toErase) +{ + for (auto I = toErase.begin(), E = toErase.end(); I != E; ++I) + DeleteDeadInstructions(*I); + toErase.clear(); +} + + +/* + * JIT Event Listener + */ +void EventListener::NotifyFunctionEmitted(const Function &F, + void *Code, size_t Size, + const EmittedFunctionDetails &Details) +{ + if (!NI.Func) + return; + + NI.Code = (uint8_t *)Code; + NI.Size = Size; +} + +#if defined(LLVM_V35) +void EventListener::NotifyObjectEmitted(const ObjectImage &Obj) +{ + StringRef Name; + uint64_t Code; + uint64_t Size; + unsigned NumFunc = 0; + DIContext* Context = DIContext::getDWARFContext(Obj.getObjectFile()); + + for (auto I = Obj.begin_symbols(), E = Obj.end_symbols(); I != E; ++I) { + object::SymbolRef::Type SymType; + if (I->getType(SymType)) continue; + if (SymType == object::SymbolRef::ST_Function) { + if (I->getName(Name)) continue; + if (I->getAddress(Code)) continue; + if (I->getSize(Size)) continue; + + NumFunc++; + if (!Context) + continue; + + DILineInfoTable Lines = Context->getLineInfoForAddressRange(Code, Size); + DILineInfoTable::iterator Begin = Lines.begin(); + DILineInfoTable::iterator End = Lines.end(); + for (DILineInfoTable::iterator It = Begin; It != End; ++It) + NI.addPatch(It->second.Line, It->second.Column, It->first); + } + } + if (NumFunc != 1) + hqemu_error("internal error.\n"); + + NI.Code = (uint8_t *)Code; + NI.Size = Size; +} +#elif defined(LLVM_V38) +void EventListener::NotifyObjectEmitted(const ObjectFile &Obj, + const RuntimeDyld::LoadedObjectInfo &L) +{ + OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj); + const ObjectFile &DebugObj = *DebugObjOwner.getBinary(); + DIContext* Context = new DWARFContextInMemory(DebugObj); + uint64_t Code; + uint64_t Size; + unsigned NumFunc = 0; + + for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) { + SymbolRef Sym = P.first; + if (Sym.getType() != SymbolRef::ST_Function) + continue; + + ErrorOr<StringRef> Name = Sym.getName(); + if (!Name) + continue; + + ErrorOr<uint64_t> AddrOrErr = Sym.getAddress(); + if (AddrOrErr.getError()) + continue; + + Code = *AddrOrErr; + Size = P.second; + NumFunc++; + + DILineInfoTable Lines = Context->getLineInfoForAddressRange(Code, Size); + DILineInfoTable::iterator Begin = Lines.begin(); + DILineInfoTable::iterator End = Lines.end(); + for (DILineInfoTable::iterator It = Begin; It != End; ++It) + NI.addPatch(It->second.Line, It->second.Column, It->first); + } + + if (NumFunc != 1) + hqemu_error("internal error.\n"); + + NI.Code = (uint8_t *)Code; + NI.Size = Size; +} +#elif defined(LLVM_V39) || defined(LLVM_V50) +void EventListener::NotifyObjectEmitted(const ObjectFile &Obj, + const RuntimeDyld::LoadedObjectInfo &L) +{ + OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj); + const ObjectFile &DebugObj = *DebugObjOwner.getBinary(); + DIContext* Context = new DWARFContextInMemory(DebugObj); + uint64_t Code; + uint64_t Size; + unsigned NumFunc = 0; + + for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) { + SymbolRef Sym = P.first; + Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType(); + if (!SymTypeOrErr) + continue; + + SymbolRef::Type SymType = *SymTypeOrErr; + if (SymType != SymbolRef::ST_Function) + continue; + + Expected<StringRef> Name = Sym.getName(); + if (!Name) + continue; + + Expected<uint64_t> AddrOrErr = Sym.getAddress(); + if (!AddrOrErr) + continue; + + Code = *AddrOrErr; + Size = P.second; + NumFunc++; + + DILineInfoTable Lines = Context->getLineInfoForAddressRange(Code, Size); + DILineInfoTable::iterator Begin = Lines.begin(); + DILineInfoTable::iterator End = Lines.end(); + for (DILineInfoTable::iterator It = Begin; It != End; ++It) + NI.addPatch(It->second.Line, It->second.Column, It->first); + } + + if (NumFunc != 1) + hqemu_error("internal error.\n"); + + NI.Code = (uint8_t *)Code; + NI.Size = Size; +} +#else +void EventListener::NotifyObjectEmitted(const ObjectFile &Obj, + const RuntimeDyld::LoadedObjectInfo &L) +{ + OwningBinary<ObjectFile> DebugObjOwner = L.getObjectForDebug(Obj); + const ObjectFile &DebugObj = *DebugObjOwner.getBinary(); + std::unique_ptr<DIContext> Context = DWARFContext::create(DebugObj); + uint64_t Code; + uint64_t Size; + unsigned NumFunc = 0; + + for (const std::pair<SymbolRef, uint64_t> &P : computeSymbolSizes(DebugObj)) { + SymbolRef Sym = P.first; + Expected<SymbolRef::Type> SymTypeOrErr = Sym.getType(); + if (!SymTypeOrErr) + continue; + + SymbolRef::Type SymType = *SymTypeOrErr; + if (SymType != SymbolRef::ST_Function) + continue; + + Expected<StringRef> Name = Sym.getName(); + if (!Name) + continue; + + Expected<uint64_t> AddrOrErr = Sym.getAddress(); + if (!AddrOrErr) + continue; + + Code = *AddrOrErr; + Size = P.second; + NumFunc++; + + DILineInfoTable Lines = Context->getLineInfoForAddressRange(Code, Size); + DILineInfoTable::iterator Begin = Lines.begin(); + DILineInfoTable::iterator End = Lines.end(); + for (DILineInfoTable::iterator It = Begin; It != End; ++It) + NI.addPatch(It->second.Line, It->second.Column, It->first); + } + + if (NumFunc != 1) + hqemu_error("internal error.\n"); + + NI.Code = (uint8_t *)Code; + NI.Size = Size; +} +#endif + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/llvm-translator.cpp b/llvm/llvm-translator.cpp new file mode 100644 index 0000000..e435b1f --- /dev/null +++ b/llvm/llvm-translator.cpp @@ -0,0 +1,924 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Analysis/InlineCost.h" +#include "fpu/softfloat-native-def.h" +#include "utils.h" +#include "tracer.h" +#include "llvm.h" +#include "llvm-debug.h" +#include "llvm-soft-perfmon.h" +#include "llvm-hard-perfmon.h" +#include "llvm-target.h" +#include "llvm-pass.h" +#include "llvm-opc.h" +#include "llvm-state.h" +#include "llvm-translator.h" + + +static cl::opt<bool> DisableFastMath("disable-fast-math", cl::init(false), + cl::cat(CategoryHQEMU), cl::desc("Disable fast-math optimizations")); + + +static char include_helper[][64] = { +#include "llvm-helper.h" +}; + +extern LLVMEnv *LLEnv; +extern hqemu::Mutex llvm_global_lock; +extern hqemu::Mutex llvm_debug_lock; + +extern bool TraceCacheFull; + + +#if defined(TCG_TARGET_I386) +# if defined(__i386__) +# define AREG0 "ebp" +# elif defined(__x86_64__) +# define AREG0 "r14" +# endif +#elif defined(TCG_TARGET_PPC64) +# define AREG0 "r27" +#elif defined(TCG_TARGET_ARM) +# define AREG0 "r7" +#elif defined(TCG_TARGET_AARCH64) +# define AREG0 "x19" +# define AREG1 "x28" +#else +# error "unsupported processor type" +#endif +const char *BaseRegStr = AREG0; /* The base register name */ + + + +/* + * LLVM Translator + */ +LLVMTranslator::LLVMTranslator(unsigned id, CPUArchState *env) + : MyID(id), Env(env) +{ + dbg() << DEBUG_LLVM << "Starting LLVM Translator " << MyID << ".\n"; + + if (!Env) + hqemu_error("internal error. LLVMEnv is not initialized.\n"); + + /* Create LLVM module and basic types. */ + InitializeModule(); + InitializeType(); + InitializeTarget(); + InitializeHelpers(); + InitializeDisasm(); + + /* Create the TCG IR to LLVM IR conversion module. */ + IF = new IRFactory(this); + +#if defined(ENABLE_MCJIT) + if (MyID == 0) + LLEnv->getMemoryManager()->AddSymbols(Symbols); +#endif + + dbg() << DEBUG_LLVM << "LLVM Translator " << MyID << " initialized.\n"; +} + +LLVMTranslator::~LLVMTranslator() +{ + if (GuestDisAsm) delete GuestDisAsm; + if (HostDisAsm) delete HostDisAsm; + delete IF; + delete Mod; +} + +/* Perform the initialization of the LLVM module. */ +void LLVMTranslator::InitializeModule() +{ + const char *p = strrchr(CONFIG_LLVM_BITCODE, '/'); + if (!p || ++p == 0) + hqemu_error("unknown bitcode file.\n"); + + std::string Bitcode(p); + std::vector<std::string> Path; + + Path.push_back(std::string("/etc/hqemu/").append(Bitcode)); + p = getenv("HOME"); + if (p) + Path.push_back(std::string(p).append("/.hqemu/").append(Bitcode)); + Path.push_back(CONFIG_LLVM_BITCODE); + + unsigned i = 0, e = Path.size(); + for (; i != e; ++i) { + struct stat buf; + if (stat(Path[i].c_str(), &buf) != 0) + continue; + + SMDiagnostic Err; +#if defined(LLVM_V35) + Mod = ParseIRFile(Path[i], Err, Context); +#else + std::unique_ptr<Module> Owner = parseIRFile(Path[i], Err, Context); + Mod = Owner.release(); +#endif + if (Mod) + break; + } + + if (i == e) + hqemu_error("cannot find bitcode file %s.\n", Bitcode.c_str()); + + DL = getDataLayout(Mod); + + dbg() << DEBUG_LLVM << "Use bitcode file " << Path[i] << ".\n"; + dbg() << DEBUG_LLVM << "LLVM module initialized (" << Mod->getTargetTriple() << ").\n"; +} + +void LLVMTranslator::InitializeType() +{ + VoidTy = Type::getVoidTy(Context); + Int8Ty = IntegerType::get(Context, 8); + Int16Ty = IntegerType::get(Context, 16); + Int32Ty = IntegerType::get(Context, 32); + Int64Ty = IntegerType::get(Context, 64); + Int128Ty = IntegerType::get(Context, 128); + + IntPtrTy = DL->getIntPtrType(Context); + Int8PtrTy = Type::getInt8PtrTy(Context, 0); + Int16PtrTy = Type::getInt16PtrTy(Context, 0); + Int32PtrTy = Type::getInt32PtrTy(Context, 0); + Int64PtrTy = Type::getInt64PtrTy(Context, 0); + + FloatTy = Type::getFloatTy(Context); + DoubleTy = Type::getDoubleTy(Context); + + FloatPtrTy = Type::getFloatPtrTy(Context, 0); + DoublePtrTy = Type::getDoublePtrTy(Context, 0); +} + +/* Setup guest-dependent data structures. */ +void LLVMTranslator::InitializeTarget() +{ + /* TODO: any smart way to hack into CPUArchState type? */ + Value *Base = Mod->getNamedValue("basereg"); + if (!Base) + hqemu_error("cannot resolve cpu_proto.\n"); + + BaseReg.resize(TCG_TARGET_NB_REGS); + BaseReg[TCG_AREG0].RegNo = TCG_AREG0; + BaseReg[TCG_AREG0].Name = BaseRegStr; + BaseReg[TCG_AREG0].Ty = Base->getType(); + BaseReg[TCG_AREG0].Base = nullptr; + +#if defined(CONFIG_USER_ONLY) && defined(AREG1) + if (guest_base != 0 || TARGET_LONG_BITS == 32) { + GuestBaseReg.Name = AREG1; + GuestBaseReg.Base = nullptr; + } +#endif + + /* Define the new types of special registers. */ + std::map<Type *, Type *> SpecialReg; + DefineSpecialReg(SpecialReg); + + /* Convert the CPUArchState of aggregate type to the list of single element + * of primitive type. */ + intptr_t Off = 0; + FlattenCPUState(Base->getType()->getContainedType(0), Off, SpecialReg); +} + +/* This function defines the special registers and the new types to be reset. */ +void LLVMTranslator::DefineSpecialReg(std::map<Type *, Type *> &SpecialReg) +{ +#if defined(TARGET_I386) + Value *SIMDReg = Mod->getNamedValue("xmm_reg"); + if (SIMDReg) { + /* remap XMMReg --> <64 x i8> */ + Type *Int8Ty = IntegerType::get(Context, 8); + Type *OldTy = SIMDReg->getType()->getContainedType(0); + Type *NewTy = VectorType::get(Int8Ty, 16); + SpecialReg[OldTy] = NewTy; + } +#endif +} + +/* Convert the CPUArchState of the aggregate type to a list of single element of + * primitive type. Each element contains a pair of offset to CPUArchState and its + * type. This list of flattened type will be used for the state mapping pass. */ +void LLVMTranslator::FlattenCPUState(Type *Ty, intptr_t &Off, + std::map<Type *, Type *> &SpecialReg) +{ + switch (Ty->getTypeID()) { + default: + { + StateType[Off] = Ty; + Off += DL->getTypeSizeInBits(Ty) / 8; + break; + } + case Type::StructTyID: + { + /* Map a special register to another type with the same size as the + * original type. E.g., mapping a <16 * i8> type to <2 * i64>. */ + if (SpecialReg.find(Ty) != SpecialReg.end()) { + Type *NewTy = SpecialReg[Ty]; + StateType[Off] = NewTy; + Off += DL->getTypeSizeInBits(Ty) / 8; + break; + } + + StructType *STy = cast<StructType>(Ty); + intptr_t Size = DL->getTypeSizeInBits(STy) / 8; + intptr_t SubOff; + + const StructLayout *SL = DL->getStructLayout(STy); + for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { + SubOff = Off + SL->getElementOffset(i); + FlattenCPUState(STy->getElementType(i), SubOff, SpecialReg); + } + + Off += Size; + + /* Structure could have padding at the end of the struct. Expand + * the size of the last struct member by adding the padding size. */ + if (Off != SubOff) { + intptr_t LastOff = StateType.rbegin()->first; + intptr_t NewSize = (Off - LastOff) * 8; + Type *NewTy = IntegerType::get(Context, NewSize); + StateType[LastOff] = NewTy; + } + break; + } + case Type::ArrayTyID: + { +#if defined(CONFIG_SOFTMMU) + /* Do not flatten the SoftTLB because it could create a huge amount + * of flattened states. */ + if (Off == offsetof(CPUArchState, tlb_table[0][0])) { + StateType[Off] = Ty; + Off += DL->getTypeSizeInBits(Ty) / 8; + break; + } +#endif + ArrayType *ATy = cast<ArrayType>(Ty); + intptr_t ElemSize = DL->getTypeSizeInBits(ATy->getElementType()) / 8; + for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) { + intptr_t SubOff = Off + i * ElemSize; + FlattenCPUState(ATy->getElementType(), SubOff, SpecialReg); + } + Off += DL->getTypeSizeInBits(ATy) / 8; + break; + } + } +} + +static inline void Materialize(Function &F) +{ +#if defined(LLVM_V35) + std::string ErrInfo; + F.Materialize(&ErrInfo); +#else + F.materialize(); +#endif +} + +/* Materialize helper functions and compute inline costs. */ +void LLVMTranslator::InitializeHelpers() +{ + /* Set target-specific symbols. */ + AddDependentSymbols(this); + + /* Set const helpers. (i.e., helpers that have no side effect) */ + InitializeConstHelpers(); + + /* Materialize fpu helper functions. */ + TCGHelperInfo *FPUHelper = (TCGHelperInfo *)get_native_fpu_helpers(); + for (int i = 0, e = num_native_fpu_helpers(); i != e; ++i) { + std::string ErrInfo; + Function *Func = Mod->getFunction(FPUHelper[i].name); + if (Func && Func->isMaterializable()) + Materialize(*Func); + } + + /* Materialize defined helper functions that are allowed for inlining. */ + for (int i = 0, e = ARRAY_SIZE(include_helper); i < e; ++i) { + std::string ErrInfo; + Helpers[include_helper[i]] = new HelperInfo; + Function *Func = Mod->getFunction(include_helper[i]); + if (Func && Func->isMaterializable()) + Materialize(*Func); + } + + /* Initialize all TCG helper functions. */ + const TCGHelperInfo *all_helpers = get_tcg_helpers(); + for (int i = 0, e = tcg_num_helpers(); i != e; ++i) { + uintptr_t func = (uintptr_t)all_helpers[i].func; + const char *name = all_helpers[i].name; + if (!name) + hqemu_error("invalid helper name.\n"); + + TCGHelpers[func] = std::string("helper_") + std::string(name); + } + + for (int i = 0, e = tcg_num_helpers(); i != e; ++i) { + std::string FName = std::string("helper_") + + std::string(all_helpers[i].name); + std::string FNameNoInline = FName + std::string("_noinline"); + if (Helpers.find(FName) != Helpers.end()) { + HelperInfo *Helper = Helpers[FName]; + Function *F = Mod->getFunction(FName); + if (!F) + hqemu_error("fatal error - %s\n", FName.c_str()); + Helper->Func = F; + Mod->getOrInsertFunction(FNameNoInline, F->getFunctionType()); + Helper->FuncNoInline = Mod->getFunction(FNameNoInline); + Helpers[FNameNoInline] = Helper; + + AddSymbol(FNameNoInline, all_helpers[i].func); + } + } + + /* Analyze the inline cost for each helper function and make a non-inlined + * counterpart object in LLVM Module. For the non-inlined function, just + * remap the function address in LLVM module which causes the JIT to emit a + * call instruction to the function address. */ + for (int i = 0, e = tcg_num_helpers(); i != e; ++i) { + const TCGHelperInfo *th = &all_helpers[i]; + std::string FName = std::string("helper_") + std::string(th->name); + if (Helpers.find(FName) != Helpers.end()) { + HelperInfo *Helper = Helpers[FName]; + bool ret = OptimizeHelper(*Helper); + if (!ret) { + /* If the helper function consists of loops, it is not suitable + * to be inlined because it conflicts to the state mapping + * pass. */ + Helpers.erase(FName); + goto skip; + } + + Helper->CalculateMetrics(Helper->Func); + continue; + } +skip: + AddSymbol(FName, th->func); + } + + /* Add all states of the nested helpers to the calling helper. + * Then, calculate state boundary and determine if we can know all states + * (included in the nested functions) by this helper function. + * + * Note that we only allow one-level helper inlining. */ + for (auto &I : Helpers) { + HelperInfo *Helper = I.second; + bool hasNestNestedCall = false; + for (CallInst *CI : Helper->NestedCalls) { + std::string FName = CI->getCalledFunction()->getName(); + HelperInfo *NestedHelper = Helpers[FName]; + Helper->States.insert(Helper->States.begin(), + NestedHelper->States.begin(), + NestedHelper->States.end()); + + CI->setCalledFunction(NestedHelper->FuncNoInline); + if (I.first != FName && NestedHelper->hasNestedCall) + hasNestNestedCall = true; + } + /* Clear hasNestedCall if onle one level nested functions. If the + * helper has only one level nested helpers, then all states are found. */ + Helper->hasNestedCall = hasNestNestedCall; + + /* Compute state boundaries. */ + StateAnalyzer Analyzer(DL); + for (auto J : Helper->States) + Analyzer.addStateRef(J.first, J.second); + + StateRange Reads, Writes; + Analyzer.computeStateRange(Reads, Writes); + + Helper->insertState(Reads, false); + Helper->insertState(Writes, true); + } + + for (auto &I : Helpers) { + HelperInfo *Helper = I.second; + Helper->States.clear(); + Helper->NestedCalls.clear(); + } +} + +void LLVMTranslator::InitializeDisasm() +{ + std::string TargetTriple = "UnknownArch"; + +#if defined(TARGET_I386) + #if defined(TARGET_X86_64) + TargetTriple = "x86_64"; + #else + TargetTriple = "i386"; + #endif +#elif defined(TARGET_ARM) + #if defined(TARGET_AARCH64) + TargetTriple = "aarch64"; + #else + TargetTriple = "arm"; + #endif +#elif defined(TARGET_PPC) + TargetTriple = "ppc"; +#endif + + GuestDisAsm = MCDisasm::CreateMCDisasm(TargetTriple, false); + HostDisAsm = MCDisasm::CreateMCDisasm(Mod->getTargetTriple(), true); + + if (GuestDisAsm) + dbg() << DEBUG_INASM << __func__ + << ": use LLVM disassembler for guest (" << TargetTriple << ").\n"; + else + dbg() << DEBUG_INASM << __func__ + << ": can't find LLVM disassembler for guest (" + << TargetTriple << "). Use QEMU disas.\n"; + + if (HostDisAsm) + dbg() << DEBUG_OUTASM << __func__ + << ": use LLVM disassembler for host (" + << Mod->getTargetTriple() << ").\n"; + else + dbg() << DEBUG_OUTASM << __func__ + << ": can't find LLVM disassembler for host (" + << Mod->getTargetTriple() << "). Use QEMU disas.\n"; +} + +static bool isLegalIntrinsic(IntrinsicInst *II) +{ + switch (II->getIntrinsicID()) { + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + case Intrinsic::dbg_declare: + return false; + default: + break; + } + return true; +} + +/* Determine if the function argument and Ptr are alias. */ +static Value *isFromFuncArgument(Function &F, Value *Ptr) +{ + Ptr = StripPointer(Ptr); + for (auto I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { + if (Ptr == &*I) + return Ptr; + } + return nullptr; +} + +/* Create function pass manager to optimize the helper function. */ +static void Optimize(Function &F) +{ + auto FPM = new legacy::FunctionPassManager(F.getParent()); + + FPM->add(createReplaceIntrinsic()); + if (!DisableFastMath) + FPM->add(createFastMathPass()); + FPM->run(F); + + delete FPM; +} + +/* Analyze and optimize a helper function. */ +bool LLVMTranslator::OptimizeHelper(HelperInfo &Helper) +{ + Function &F = *Helper.Func; + + /* We don't want to inline helper functions that contain loop. */ + SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> BackEdges; + FindFunctionBackedges(F, BackEdges); + if (BackEdges.size()) + return false; + + Optimize(F); + + /* Collect and analyze memory and call instructions. */ + SmallVector<CallInst *, 16> Calls; + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; ++II) { + Instruction *I = &*II; + + if (isa<LoadInst>(I) || isa<StoreInst>(I)) { + intptr_t Off = 0; + Value *Base = getBaseWithConstantOffset(DL, getPointerOperand(I), Off); + + if (auto GV = dyn_cast<GlobalValue>(StripPointer(Base))) { + if (!GV->hasPrivateLinkage()) + continue; + } + + /* XXX: We assume the pointer is derived from the function argument. + * Skip it if not from the the function argument. */ + Value *Arg = isFromFuncArgument(F, Base); + if (!Arg) + return false; + + if (Base->getType() == BaseReg[TCG_AREG0].Ty) { + /* This is a load/store of CPU state plus a constant offset. + * Track the state. */ + Helper.States.push_back(std::make_pair(I, Off)); + } else { + /* This is a load/store of unknown pointer. + * Track the maximum access size. */ + Type *Ty = cast<PointerType>(Arg->getType())->getElementType(); + intptr_t Size = DL->getTypeSizeInBits(Ty) / 8; + Helper.mayConflictArg = true; + Helper.ConflictSize = std::max(Helper.ConflictSize, Size); + } + } else if (CallInst *CI = dyn_cast<CallInst>(I)) { + Calls.push_back(CI); + } + } + + /* Analyze calls. */ + for (CallInst *CI : Calls) { + if (CI->isInlineAsm()) + continue; + + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) { + if (!isLegalIntrinsic(II)) + return false; + continue; + } + + if (!CI->getCalledFunction()) + return false; + + std::string FName = CI->getCalledFunction()->getName(); + if (isLibcall(FName) || isSoftFPcall(FName)) { + /* Libcalls/SoftFPCalls are always const function. Mark it. */ + ConstantInt *Meta[] = { CONST32(0) }; + MDFactory::setConstStatic(Context, CI, Meta); + continue; + } + + if (Helpers.find(FName) == Helpers.end()) + return false; + + Helper.hasNestedCall = true; + Helper.NestedCalls.push_back(CI); + } + + return true; +} + +/* Figure out an approximation for how many instructions will be constant + * folded if the specified value is constant. */ +static unsigned CountCodeReductionForConstant(Value *V, CodeMetrics &Metrics) +{ + unsigned IndirectCallBonus; + IndirectCallBonus = -InlineConstants::IndirectCallThreshold; + + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI) { + User *U = UI->getUser(); + if (isa<BranchInst>(U) || isa<SwitchInst>(U)) { + /* We will be able to eliminate all but one of the successors. */ + const TerminatorInst &TI = cast<TerminatorInst>(*U); + const unsigned NumSucc = TI.getNumSuccessors(); + unsigned Instrs = 0; + for (unsigned I = 0; I != NumSucc; ++I) + Instrs += Metrics.NumBBInsts[TI.getSuccessor(I)]; + /* We don't know which blocks will be eliminated, so use the average size. */ + Reduction += InlineConstants::InstrCost*Instrs*(NumSucc-1)/NumSucc*2; + } else if (CallInst *CI = dyn_cast<CallInst>(U)) { + /* Turning an indirect call into a direct call is a BIG win */ + if (CI->getCalledValue() == V) + Reduction += IndirectCallBonus; + } else if (InvokeInst *II = dyn_cast<InvokeInst>(U)) { + /* Turning an indirect call into a direct call is a BIG win */ + if (II->getCalledValue() == V) + Reduction += IndirectCallBonus; + } else { + Instruction &Inst = cast<Instruction>(*U); + + if (Inst.mayReadFromMemory() || Inst.mayHaveSideEffects() || + isa<AllocaInst>(Inst)) + continue; + + bool AllOperandsConstant = true; + for (unsigned i = 0, e = Inst.getNumOperands(); i != e; ++i) + if (!isa<Constant>(Inst.getOperand(i)) && Inst.getOperand(i) != V) { + AllOperandsConstant = false; + break; + } + + if (AllOperandsConstant) { + /* We will get to remove this instruction... */ + Reduction += InlineConstants::InstrCost; + Reduction += CountCodeReductionForConstant(&Inst, Metrics); + } + } + } + return Reduction; +} + +/* Figure out an approximation of how much smaller the function will be if + * it is inlined into a context where an argument becomes an alloca. */ +static unsigned CountCodeReductionForAlloca(Value *V) +{ + if (!V->getType()->isPointerTy()) return 0; + + unsigned Reduction = 0; + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI) { + Instruction *I = cast<Instruction>(UI->getUser()); + + if (isa<LoadInst>(I) || isa<StoreInst>(I)) + Reduction += InlineConstants::InstrCost; + else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + /* If the GEP has variable indices, we won't be able to do much with it. */ + if (GEP->hasAllConstantIndices()) + Reduction += CountCodeReductionForAlloca(GEP); + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + /* Track pointer through bitcasts. */ + Reduction += CountCodeReductionForAlloca(BCI); + } else + return 0; + } + + return Reduction; +} + +void HelperInfo::CalculateMetrics(Function *F) +{ + Metrics.NumInsts = 0; + + for (auto FI = F->begin(); FI != F->end(); FI++) { + unsigned NumInsts = 0; + BasicBlock *BB = &*FI; + for (auto BI = FI->begin(); BI != FI->end(); BI++) { + if (isa<PHINode>(BI)) /* PHI nodes don't count. */ + continue; + NumInsts++; + } + Metrics.NumBlocks++; + Metrics.NumInsts += NumInsts; + Metrics.NumBBInsts[BB] = NumInsts; + } + + ArgumentWeights.reserve(F->arg_size()); + for (auto I = F->arg_begin(), E = F->arg_end(); I != E; ++I) { + Value *V = &*I; + ArgumentWeights.push_back(ArgInfo( + CountCodeReductionForConstant(V, Metrics), + CountCodeReductionForAlloca(V))); + } +} + +void LLVMTranslator::InitializeConstHelpers() +{ +#if defined(TARGET_I386) + ConstHelpers.insert("helper_outb"); + ConstHelpers.insert("helper_inb"); + ConstHelpers.insert("helper_outw"); + ConstHelpers.insert("helper_inw"); + ConstHelpers.insert("helper_outl"); + ConstHelpers.insert("helper_inl"); +#elif defined(TARGET_ARM) + ConstHelpers.insert("helper_vfp_tosis"); + ConstHelpers.insert("helper_vfp_tosid"); + ConstHelpers.insert("helper_vfp_tosizs"); + ConstHelpers.insert("helper_vfp_tosizd"); + ConstHelpers.insert("helper_vfp_touis"); + ConstHelpers.insert("helper_vfp_touid"); + ConstHelpers.insert("helper_vfp_touizs"); + ConstHelpers.insert("helper_vfp_touizd"); + + ConstHelpers.insert("helper_vfp_sitos"); + ConstHelpers.insert("helper_vfp_sitod"); + ConstHelpers.insert("helper_vfp_uitos"); + ConstHelpers.insert("helper_vfp_uitod"); + + ConstHelpers.insert("helper_vfp_fcvtds"); + ConstHelpers.insert("helper_vfp_fcvtsd"); + + ConstHelpers.insert("helper_vfp_cmps"); + ConstHelpers.insert("helper_vfp_cmpd"); + ConstHelpers.insert("helper_vfp_cmpes"); + ConstHelpers.insert("helper_vfp_cmped"); + +#if defined(TARGET_AARCH64) + ConstHelpers.insert("helper_vfp_tosls"); + ConstHelpers.insert("helper_vfp_tosld"); + ConstHelpers.insert("helper_vfp_sqtos"); + ConstHelpers.insert("helper_vfp_sqtod"); + ConstHelpers.insert("helper_vfp_uqtos"); + ConstHelpers.insert("helper_vfp_uqtod"); + + ConstHelpers.insert("helper_vfp_cmps_a64"); + ConstHelpers.insert("helper_vfp_cmpd_a64"); + ConstHelpers.insert("helper_vfp_cmpes_a64"); + ConstHelpers.insert("helper_vfp_cmped_a64"); + ConstHelpers.insert("helper_vfp_minnums"); + ConstHelpers.insert("helper_vfp_maxnums"); + ConstHelpers.insert("helper_vfp_minnumd"); + ConstHelpers.insert("helper_vfp_maxnumd"); + + ConstHelpers.insert("helper_get_cp_reg64"); + ConstHelpers.insert("helper_dc_zva"); +#endif +#endif +} + +void LLVMTranslator::Abort(TraceBuilder &Builder) +{ + target_ulong pc = Builder.getEntryNode()->getGuestPC(); + dbg() << DEBUG_LLVM << __func__ + << ": abort trace pc " << format("0x%" PRIx "", pc) << "\n"; +} + +/* Make a jump from the head block in the block code cache to the translated + * host code of this region in the optimized code cache. Also patch previous + * built regions that have direct branch to this region. */ +void LLVMTranslator::Commit(TraceBuilder &Builder) +{ + bool Invalid = false; + OptimizationInfo *Opt = Builder.getOpt(); + TraceInfo *Trace = Builder.getTrace(); + TBVec &TBs = Trace->TBs; + + for (unsigned i = 0, e = TBs.size(); i != e; ++i) { + if (TBs[i]->mode == BLOCK_INVALID) { + Invalid = true; + break; + } + } + + if (Invalid || llvm_check_cache() == 1) { + delete Trace; + delete Opt; + return; + } + + TranslatedCode *TC = new TranslatedCode; + TC->Active = true; + TC->Size = NI.Size; + TC->Code = NI.Code; + TC->EntryTB = Trace->getEntryTB(); + TC->Restore = NI.Restore; + TC->Trace = Trace; + + /* If we go here, this is a legal trace. */ + LLVMEnv::ChainSlot &ChainPoint = LLEnv->getChainPoint(); + TranslationBlock *EntryTB = TC->EntryTB; + + hqemu::MutexGuard locked(llvm_global_lock); + + for (unsigned i = 0; i != NI.NumChainSlot; ++i) + ChainPoint[NI.ChainSlot[i].Key] = NI.ChainSlot[i].Addr; + + TraceID tid = LLEnv->insertTransCode(TC); + EntryTB->tid = tid; + EntryTB->mode = BLOCK_OPTIMIZED; + EntryTB->opt_ptr = TC->Code; + + /* Set the jump from the block to the trace */ + patch_jmp(tb_get_jmp_entry(EntryTB), TC->Code); + + if (!SP->isEnabled()) { + delete Trace; + TC->Trace = nullptr; + } + + delete Opt; +} + +void LLVMTranslator::dump(CPUArchState *env, TranslationBlock *tb) +{ + auto &DebugMode = DM.getDebugMode(); + if (DebugMode & (DEBUG_INASM | DEBUG_OP)) { + hqemu::MutexGuard locked(llvm_debug_lock); + dbg() << DEBUG_LLVM << "Translator " << MyID << " dumps asm...\n"; + if (DebugMode & DEBUG_INASM) + printAsm(Env, tb); + if (DebugMode & DEBUG_OP) + printOp(Env, tb); + } +} + +void LLVMTranslator::GenBlock(CPUArchState *env, OptimizationInfo *Opt) +{ + struct timeval start, end; + if (SP->isEnabled()) + gettimeofday(&start, nullptr); + + TraceBuilder Builder(IF, Opt); + GraphNode *Node = Builder.getNextNode(); + if (!Node) + hqemu_error("fatal error.\n"); + + Builder.ConvertToTCGIR(env); + + if (DM.getDebugMode() & (DEBUG_INASM | DEBUG_OP)) + dump(env, Opt->getCFG()->getTB()); + + Builder.ConvertToLLVMIR(); + Builder.Finalize(); + + if (SP->isEnabled()) { + gettimeofday(&end, nullptr); + Builder.getTrace()->setTransTime(&start, &end); + } + + Commit(Builder); +} + +void LLVMTranslator::GenTrace(CPUArchState *env, OptimizationInfo *Opt) +{ + struct timeval start, end; + if (SP->isEnabled()) + gettimeofday(&start, nullptr); + + TraceBuilder Builder(IF, Opt); + for (;;) { + GraphNode *Node = Builder.getNextNode(); + if (!Node) + break; + + Builder.ConvertToTCGIR(Env); + + if (DM.getDebugMode() & (DEBUG_INASM | DEBUG_OP)) + dump(Env, Node->getTB()); + + Builder.ConvertToLLVMIR(); + + if (Node->getTB()->mode == BLOCK_INVALID || Builder.isAborted()) { + Abort(Builder); + return; + } + } + Builder.Finalize(); + + if (SP->isEnabled()) { + gettimeofday(&end, nullptr); + Builder.getTrace()->setTransTime(&start, &end); + } + + Commit(Builder); +} + +/* Display the guest assembly code of the given basic block. */ +void LLVMTranslator::printAsm(CPUArchState *env, TranslationBlock *tb) +{ + auto &OS = DM.debug(); + if (GuestDisAsm) { + OS << "----------------\n" + << "IN: [size=" << tb->size << "]\n"; +#if defined(CONFIG_USER_ONLY) + GuestDisAsm->PrintInAsm((uint64_t)g2h(tb->pc), tb->size, tb->pc); +#else + GuestDisAsm->PrintInAsm((uint64_t)tb->image, tb->size, tb->pc); +#endif + OS << "\n"; + return; + } + +#if defined(CONFIG_USER_ONLY) + /* The guest is not supported by the LLVM MCDisassembler. Use QEMU disas. */ + int disas_flags = 0; + +#if defined(TARGET_I386) + #if defined(TARGET_X86_64) + if ((tb->flags >> HF_CS64_SHIFT) & 1) + disas_flags = 2; + else + #endif + disas_flags = !((tb->flags >> HF_CS32_SHIFT) & 1); +#elif defined(TARGET_ARM) + #if defined(TARGET_AARCH64) + disas_flags = 4 | (0 << 1); + #else + disas_flags = env->thumb; + #endif +#elif defined(TARGET_PPC) + int le_mode = env->hflags & (1 << MSR_LE) ? 1 : 0; + disas_flags = env->bfd_mach; + disas_flags |= le_mode << 16; +#endif + + OS << "----------------\n"; + OS << "IN: [size=" << tb->size << "%d]\n"; + target_disas(stderr, ENV_GET_CPU(env), tb->pc, tb->size, disas_flags); + OS << "\n"; +#endif +} + +extern "C" void printops(const char *outbuf) { + DM.debug() << outbuf; +} + +/* Display TCG IR of the given basic block. */ +void LLVMTranslator::printOp(CPUArchState *env, TranslationBlock *tb) +{ + auto &OS = DM.debug(); + OS << "OP:\n"; + tcg_dump_ops_fn(&tcg_ctx, printops); + OS << "\n"; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/llvm.cpp b/llvm/llvm.cpp new file mode 100644 index 0000000..80c8473 --- /dev/null +++ b/llvm/llvm.cpp @@ -0,0 +1,1251 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include <fstream> +#include <dlfcn.h> +#include "llvm/Support/ManagedStatic.h" +#include "llvm-types.h" +#include "llvm-annotate.h" +#include "llvm-soft-perfmon.h" +#include "llvm-hard-perfmon.h" +#include "llvm-translator.h" +#include "llvm-state.h" +#include "llvm-opc.h" +#include "llvm.h" +#include "tracer.h" +#include "optimization.h" + + +#define MAX_TRANSLATORS 8 +#define MAX_SEARCH_DEPTH 8 +#define ACTIVE_QUEUE_SIZE (1 << 16) +#define ACTIVE_QUEUE_MASK (ACTIVE_QUEUE_SIZE - 1) + + +cl::OptionCategory CategoryHQEMU("HQEMU Options"); + +static cl::opt<std::string> DebugLevel("debuglv", cl::init(""), + cl::cat(CategoryHQEMU), cl::desc("Set debug level")); + +static cl::opt<std::string> DebugFile("debugfile", cl::init(""), + cl::cat(CategoryHQEMU), cl::desc("Set debug file (default=stderr)")); + +static cl::opt<std::string> ProfileLevel("profile", cl::init(""), + cl::cat(CategoryHQEMU), cl::desc("Set profile level")); + +static cl::opt<unsigned> NumThreads("threads", cl::init(1), + cl::cat(CategoryHQEMU), cl::desc("Number of threads used in the hybridm mode")); + +static cl::opt<unsigned> NumTranslations("count", cl::init(-1U), + cl::cat(CategoryHQEMU), + cl::desc("Maximum number of traces to translate (default=2^32)")); + +static cl::opt<unsigned> NETProfileThreshold("net-profile", + cl::init(NET_PROFILE_THRESHOLD), + cl::cat(CategoryHQEMU), + cl::desc("Hot threshold value for NET trace creation (default=50)")); + +static cl::opt<unsigned> NETPredictThreshold("net-predict", + cl::init(NET_PREDICT_THRESHOLD), + cl::cat(CategoryHQEMU), + cl::desc("Maximum number of basic blocks in a NET trace (default=64)")); + +static cl::opt<bool> DisableNETPlus("disable-netplus", cl::init(false), + cl::cat(CategoryHQEMU), + cl::desc("Disable NETPlus algorithm (use NET trace formation only)")); + + +/* static members */ +bool LLVMEnv::InitOnce = false; +int LLVMEnv::TransMode = TRANS_MODE_NONE; +uint8_t *LLVMEnv::TraceCache = nullptr; +size_t LLVMEnv::TraceCacheSize = 0; +bool LLVMEnv::RunWithVTune = false; + +LLVMDebug DM; +LLVMEnv *LLEnv; +QueueManager *QM; +AnnotationFactory *AF; +SoftwarePerfmon *SP; +HardwarePerfmon *HP; +ControlFlowGraph GlobalCFG; + +hqemu::Mutex llvm_global_lock; +hqemu::Mutex llvm_debug_lock; + +bool ThreadStop = false; +bool ThreadExit = false; +bool TraceCacheFull = false; +unsigned NumPendingThread = 0; +int MonThreadID; + +extern unsigned ProfileThreshold; +extern unsigned PredictThreshold; + +/* + * LLVMEnv() + * Intialize LLVM translator(s) and globally shared resources. The LLVMEnv + * instance must be initialized before using the underlying transaltion + * service and should be initialized only ONCE. + */ +LLVMEnv::LLVMEnv() : NumTranslator(1), UseThreading(false), NumFlush(0) +{ + /* Set LLVMEnv pointer first so other classes can access it. */ + LLEnv = this; + + ParseCommandLineOptions(); + + /* Check if HQEMU is running in Intel VTune. */ + ProbeIntelVTune(); + + /* Initialize debugger and software profiler. */ + DM.setDebugMode(DebugLevel, DebugFile); + + dbg() << DEBUG_LLVM << "Initializing LLVM Environment.\n"; + + /* Initialize LLVM targets. */ + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllAsmPrinters(); + InitializeAllAsmParsers(); + InitializeAllTargetMCs(); + InitializeAllDisassemblers(); + + MonThreadID = gettid(); + qemu_mutex_init(&mutex); + + Translator.resize(NumTranslator); + HelperThread.resize(NumTranslator); + ThreadEnv.resize(NumTranslator); + for (unsigned i = 0; i < NumTranslator; ++i) { + CPUState *cpu = ThreadEnv[i] = cpu_create(); + CPUArchState *env = (CPUArchState *)cpu->env_ptr; + cpu->cpu_index = -i -1; + env->build_mode = BUILD_LLVM; + Translator[i] = nullptr; + } + + QM = new QueueManager; + AF = new AnnotationFactory; + SP = new SoftwarePerfmon(ProfileLevel); + HP = new HardwarePerfmon; + + if (SP->Mode & (SPM_HPM | SPM_HOTSPOT)) { + if (RunWithVTune) + DM.debug() << "Warning: cannot profile hpm,hotspot inside VTune. Disable it.\n"; + } + + /* Create the memory manager and intialize the optimized code cache. There + * is only copy of the optimized code cache and is shared by all underlying + * translators. */ + MM = std::shared_ptr<MemoryManager>( + MemoryManager::Create(TraceCache, TraceCacheSize)); + + CreateTranslator(); + + /* Initialize HPM after the LLVM thread is initialized. */ + HP->Init(MonThreadID); + + dbg() << DEBUG_LLVM << "LLVM Environment initialized. " + << format("guest_base=0x%lx.\n", GUEST_BASE) + << format("\tBlock code cache: addr=%p size=%zd bytes.\n", + tcg_ctx_global.code_gen_buffer, + tcg_ctx_global.code_gen_buffer_size) + << format("\tTrace code cache: addr=%p size=%zd bytes.\n", + TraceCache, TraceCacheSize); +} + +LLVMEnv::~LLVMEnv() +{ + if (TransMode == TRANS_MODE_BLOCK) { + size_t BlockCodeSize = MM->getCodeSize(); + dbg() << DEBUG_LLVM << "Finalizing LLVM environment." + << "\n\tBlock code size: " << BlockCodeSize << " bytes.\n"; + } else { + size_t BlockCodeSize = (uintptr_t)tcg_ctx_global.code_gen_ptr - + (uintptr_t)tcg_ctx_global.code_gen_buffer; + size_t TraceCodeSize = MM->getCodeSize(); + dbg() << DEBUG_LLVM << "Finalizing LLVM environment." + << "\n\tBlock code size : " << format("%8d", BlockCodeSize) << " bytes" + << "\n\tTrace code size : " << format("%8d", TraceCodeSize) << " bytes" + << "\n\tTrace/Block ratio: " + << format("%.2f%%\n\n", (double)TraceCodeSize * 100 / BlockCodeSize); + } + + /* Stop the HPM early so that the handling thread will no longer receive + * the overflow signal. */ + delete HP; + + if (UseThreading && !ThreadExit) + StopThread(); + + DeleteTranslator(); + + for (int i = 0, e = tcg_ctx_global.tb_ctx->nb_tbs; i != e; ++i) { + if (tbs[i].image) delete_image(&tbs[i]); + if (tbs[i].state) delete_state(&tbs[i]); + if (tbs[i].chain) ChainInfo::free(&tbs[i]); + } + + SP->printProfile(); + + delete SP; + delete QM; + delete AF; + + /* Delete all translated code. */ + for (unsigned i = 0, e = TransCode.size(); i != e; ++i) + delete TransCode[i]; + + dbg() << DEBUG_LLVM << "LLVM environment finalized.\n"; + + DM.Flush(); +} + +void LLVMEnv::ProbeIntelVTune() +{ +#if defined(__i386__) +#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER32" +#elif defined(__x86_64__) +#define NEW_DLL_ENVIRONMENT_VAR "INTEL_JIT_PROFILER64" +#else +#define NEW_DLL_ENVIRONMENT_VAR "" +#endif +#define DLL_ENVIRONMENT_VAR "VS_PROFILER" +#define DEFAULT_DLLNAME "libJitPI.so" + + if (!strcmp(NEW_DLL_ENVIRONMENT_VAR, "")) + return; + + void *DLLHandle = nullptr; + char *DLLName = getenv(NEW_DLL_ENVIRONMENT_VAR); + if (!DLLName) + DLLName = getenv(DLL_ENVIRONMENT_VAR); + + if (DLLName) { + DLLHandle = dlopen(DLLName, RTLD_LAZY); + if (DLLHandle) + goto has_vtune; + } + if (!DLLHandle) { + DLLHandle = dlopen(DEFAULT_DLLNAME, RTLD_LAZY); + if (DLLHandle) + goto has_vtune; + } + return; + +has_vtune: + dlclose(DLLHandle); + RunWithVTune = true; +} + +#if defined(LLVM_V35) || defined(LLVM_V38) || defined(LLVM_V39) || defined(LLVM_V50) +static void PrintVersion() +{ + Triple HostTriple(sys::getDefaultTargetTriple()); + raw_ostream &OS = outs(); + + OS << "HQEMU (http://itanium.iis.sinica.edu.tw/hqemu/):\n" + << " HQEMU version: " << PACKAGE_VERSION_MAJOR << "." + << PACKAGE_VERSION_MINOR << "\n" + << " QEMU version: " << QEMU_VERSION << "\n" + << " Guest ISA: " << TARGET_NAME << "\n" + << " Host ISA: " << HostTriple.getArchName() << "\n"; + OS << "\n"; + cl::PrintVersionMessage(); +} +#else +static void PrintVersion(raw_ostream &OS) +{ + Triple HostTriple(sys::getDefaultTargetTriple()); + OS << "HQEMU (http://itanium.iis.sinica.edu.tw/hqemu/):\n" + << " HQEMU version: " << PACKAGE_VERSION_MAJOR << "." + << PACKAGE_VERSION_MINOR << "\n" + << " QEMU version: " << QEMU_VERSION << "\n" + << " Guest ISA: " << TARGET_NAME << "\n" + << " Host ISA: " << HostTriple.getArchName() << "\n"; + OS << "\n"; + cl::PrintVersionMessage(); +} +#endif + +void LLVMEnv::ParseCommandLineOptions() +{ + /* Disable passes that would change the DebugLoc metadata which + * may fail our block/trace chaining. */ + static const char *argv[] = { + "-disable-tail-duplicate", + "-disable-early-taildup", + "-disable-block-placement", +#if defined(TCG_TARGET_ARM) || defined(TCG_TARGET_AARCH64) + "-disable-branch-fold", +#elif defined(TCG_TARGET_PPC64) + "-disable-branch-fold", + "-ppc-asm-full-reg-names", +#endif + }; + + cl::SetVersionPrinter(PrintVersion); + + /* Hide LLVM builtin options. */ +#if defined(LLVM_V35) + StringMap<cl::Option*> opts; + cl::getRegisteredOptions(opts); +#else + StringMap<cl::Option*> &opts = cl::getRegisteredOptions(); +#endif + for (auto &I : opts) { + auto opt = I.second; + if (opt->Category == &cl::GeneralCategory) + opt->setHiddenFlag(cl::Hidden); + } + + dbg() << DEBUG_LLVM << "Parsing command line options.\n"; + + /* Get translation mode from LLVM_MODE. */ + TransMode = getTransMode(); + if (TransMode == TRANS_MODE_INVALID) + hqemu_error("invalid LLVM_MODE.\n"); + + /* Get command-line options from LLVM_CMD and update them in LLVM. */ + std::vector<const char *> PassArgs; + char *p = getenv("LLVM_CMD"); + if (p) { + const char *token = strtok(p, " "); + while (token) { + PassArgs.push_back(token); + token = strtok(nullptr, " "); + } + } + + SmallVector<const char *, 16> Args; + Args.push_back("qemu-" TARGET_NAME); + for (unsigned i = 0, e = ARRAY_SIZE(argv); i < e; ++i) + Args.push_back(argv[i]); + for (const char *s : PassArgs) + Args.push_back(s); + Args.push_back(nullptr); + cl::ParseCommandLineOptions(Args.size() - 1, + const_cast<char **>(&Args[0])); + + /* Overwrite NET trace formation parameters. */ + ProfileThreshold = NETProfileThreshold; + PredictThreshold = NETPredictThreshold; + + /* + * After this point, command-line options are all set. + * We need to update functions that are controlled by the options. + */ + + /* Update threading number if hybridm is enabled. */ + UseThreading = (TransMode == TRANS_MODE_HYBRIDM); + if (!UseThreading) + return; + + if (NumThreads != 1) + NumTranslator = (NumThreads < 1) ? 1 : MIN(MAX_TRANSLATORS, NumThreads); +} + +#if defined(CONFIG_USER_ONLY) +#define TIMEOUT_INTERVAL 1 +#else +#define TIMEOUT_INTERVAL 1000 +#endif + +/* + * WorkerFunc() + * The thread routine of the LLVM translation threads. + */ +void *WorkerFunc(void *argv) +{ + unsigned MyID = (unsigned long)argv; + LLVMTranslator *Translator = LLEnv->getTranslator(MyID); + MemoryManager *MM = LLEnv->getMemoryManager().get(); + CPUState *cpu = LLEnv->getThreadEnv(MyID); + CPUArchState *env = (CPUArchState *)cpu->env_ptr; + + /* Block all signals. */ + sigset_t set; + sigfillset(&set); + pthread_sigmask(SIG_SETMASK, &set, nullptr); + + copy_tcg_context(); + optimization_init(env); + + Atomic<unsigned>::inc_return(&NumPendingThread); + + for (;;) { + /* Exit the loop if a request is received. */ + if (unlikely(ThreadExit)) + break; + + if (unlikely(ThreadStop)) { + Atomic<unsigned>::inc_return(&NumPendingThread); + while (ThreadStop) + usleep(100); + + Translator = LLEnv->getTranslator(MyID); + } + + /* Exit the loop if the trace cache is full. */ + if (unlikely(!MM->isSizeAvailable())) { + TraceCacheFull = true; + ThreadStop = true; + continue; + } + + /* Everything is fine. Process an optimization request. */ + OptimizationInfo *Opt = (OptimizationInfo *)QM->Dequeue(); + if (Opt) + Translator->GenTrace(env, Opt); + + usleep(TIMEOUT_INTERVAL); + } + + pthread_exit(nullptr); + return nullptr; +} + +/* + * CreateTranslator() + * Create LLVM translators and worker threads. We create the instances of + * translators and helper threads during the initialization of LLVMEnv and + * each helper thread will pick its own translator instance later. + */ +void LLVMEnv::CreateTranslator() +{ + dbg() << DEBUG_LLVM << "Creating " << NumTranslator << " translator(s).\n"; + + for (unsigned i = 0; i < NumTranslator; ++i) { + CPUArchState *env = (CPUArchState *)ThreadEnv[i]->env_ptr; + Translator[i] = LLVMTranslator::CreateLLVMTranslator(i, env); + } + + ThreadStop = false; + ThreadExit = false; + TraceCacheFull = false; + + if (UseThreading) + StartThread(); +} + +/* + * DeleteTranslator() + * Destroy LLVMTranslator. + */ +void LLVMEnv::DeleteTranslator() +{ + dbg() << DEBUG_LLVM << "Destroying " << NumTranslator << " translator(s).\n"; + + /* Wait for worker threads finishing their jobs, clear all optimization + * requests and flush trace code cache. */ + if (UseThreading && !ThreadExit) { + ThreadStop = true; + while (NumPendingThread != NumTranslator) + usleep(100); + + QM->Flush(); + MM->Flush(); + } + + for (unsigned i = 0; i < NumTranslator; ++i) { + delete Translator[i]; + Translator[i] = nullptr; + } +} + +void LLVMEnv::RestartTranslator() +{ + dbg() << DEBUG_LLVM << "Restarting " << NumTranslator << " translator(s).\n"; + + for (unsigned i = 0; i < NumTranslator; ++i) { + CPUArchState *env = (CPUArchState *)ThreadEnv[i]->env_ptr; + Translator[i] = LLVMTranslator::CreateLLVMTranslator(i, env); + } + + TraceCacheFull = false; + NumPendingThread = 0; + ThreadStop = false;; +} + +void LLVMEnv::StartThread() +{ + ThreadExit = false; + for (unsigned i = 0; i < NumTranslator; ++i) { + int ret = pthread_create(&HelperThread[i], nullptr, WorkerFunc, + (void*)(long)i); + if (ret != 0) + hqemu_error("failed to create worker thread.\n"); + } + + /* Wait until all threads are ready. */ + while (NumPendingThread != NumTranslator) + usleep(200); + NumPendingThread = 0; +} + +void LLVMEnv::StopThread() +{ + ThreadExit = true; + for (unsigned i = 0; i < NumTranslator; ++i) + pthread_join(HelperThread[i], nullptr); +} + +LLVMTranslator *LLVMEnv::AcquireSingleTranslator() +{ + if (Translator.empty()) + hqemu_error("internal error.\n"); + + qemu_mutex_lock(&mutex); + return Translator[0]; +} + +void LLVMEnv::ReleaseSingleTranslator() +{ + qemu_mutex_unlock(&mutex); +} + + +/* + * CreateLLVMEnv() + * The interface to create the LLVMEnv instance. + */ +void LLVMEnv::CreateLLVMEnv() +{ + if (InitOnce == true) + hqemu_error("LLVM environment already initialized.\n"); + + if (TraceCache == nullptr) + hqemu_error("llvm_alloc_cache() must be called before this function.\n"); + + new LLVMEnv; + InitOnce = true; +} + +void LLVMEnv::DeleteLLVMEnv() +{ + if (InitOnce == false) + hqemu_error("LLVM environment already destroyed.\n"); + + /* Stop the LLVM translation threads before the program is terminated. */ + delete LLEnv; + InitOnce = false; +} + +TraceID LLVMEnv::insertTransCode(TranslatedCode *TC) +{ + TraceID tid = TransCode.size(); + TransCode.push_back(TC); + SortedCode[(uintptr_t)TC->Code] = TC; + + for (auto TB : TC->Trace->TBs) { + ChainInfo &Chain = *ChainInfo::get(TB); + Chain.insertDepTrace(TC->EntryTB->id); + } + return tid; +} + +LLVMEnv::SlotInfo LLVMEnv::getChainSlot() +{ + hqemu::MutexGuard locked(llvm_global_lock); + + size_t Key = ChainPoint.size(); + uintptr_t RetVal = (Key << 2) | TB_EXIT_LLVM; + ChainPoint.push_back(0); + return SlotInfo(Key, RetVal); +} + +static bool OptimizeOrSkip() +{ + static unsigned curr = 0; + + dbg() << DEBUG_LLVM << "Received an optimization request ID=" << curr << "." + << (curr >= NumTranslations ? " (skip)\n" : "\n"); + + return curr++ >= NumTranslations; +} + +int LLVMEnv::OptimizeBlock(CPUArchState *env, OptRequest Request) +{ + if (InitOnce == false) + hqemu_error("internal error.\n"); + + if (OptimizeOrSkip() == true) + return 0; + + env->build_mode = BUILD_LLVM | BUILD_TCG; + LLVMTranslator *Translator = LLEnv->AcquireSingleTranslator(); + Translator->GenBlock(env, Request.release()); + LLEnv->ReleaseSingleTranslator(); + env->build_mode = BUILD_NONE; + return 1; +} + +int LLVMEnv::OptimizeTrace(CPUArchState *env, OptRequest Request) +{ + if (InitOnce == false) + return 0; + + if (TransMode == TRANS_MODE_NONE) + return 0; + if (OptimizeOrSkip() == true) + return 0; + + OptimizationInfo *Opt = Request.release(); + Opt->ComposeCFG(); + + if (TransMode == TRANS_MODE_HYBRIDS) { + if (!TraceCacheFull) { + if (!LLEnv->getMemoryManager()->isSizeAvailable()) + TraceCacheFull = true; + else { + LLVMTranslator *Translator = LLEnv->AcquireSingleTranslator(); + Translator->GenTrace(env, Opt); + LLEnv->ReleaseSingleTranslator(); + } + } + + if (TraceCacheFull) + return 0; + } else if (TransMode == TRANS_MODE_HYBRIDM) { + /* Put the optimization request into the request queue and continue. */ + QM->Enqueue(Opt); + } + + return 1; +} + +#if defined(CONFIG_USER_ONLY) +QueueManager::QueueManager() +{ + CurrentQueue = new Queue; +} + +QueueManager::~QueueManager() +{ + delete CurrentQueue; +} + +void QueueManager::Enqueue(OptimizationInfo *Opt) +{ + CurrentQueue->enqueue(Opt); +} + +void *QueueManager::Dequeue() +{ + return CurrentQueue->dequeue(); +} + +void QueueManager::Flush() +{ + while (1) { + OptimizationInfo *Opt = (OptimizationInfo *)CurrentQueue->dequeue(); + if (Opt == nullptr) + break; + delete Opt; + } +} + +#else +QueueManager::QueueManager() +{ + ActiveQueue.resize(ACTIVE_QUEUE_SIZE); + for (unsigned i = 0, e = ActiveQueue.size(); i != e; ++i) + ActiveQueue[i] = nullptr; +} + +QueueManager::~QueueManager() +{ + for (unsigned i = 0, e = ActiveQueue.size(); i != e; ++i) { + if (ActiveQueue[i]) + delete ActiveQueue[i]; + } +} + +void QueueManager::Enqueue(OptimizationInfo *Opt) +{ + Queue *CurrentQueue = ActiveQueue[pcid & ACTIVE_QUEUE_MASK]; + if (unlikely(!CurrentQueue)) + CurrentQueue = ActiveQueue[pcid & ACTIVE_QUEUE_MASK] = new Queue; + CurrentQueue->enqueue(Opt); +} + +void *QueueManager::Dequeue() +{ + Queue *CurrentQueue = ActiveQueue[pcid & ACTIVE_QUEUE_MASK]; + if (unlikely(!CurrentQueue)) + return nullptr; + return CurrentQueue->dequeue(); +} + +void QueueManager::Flush() +{ + for (unsigned i = 0, e = ActiveQueue.size(); i != e; ++i) { + if (!ActiveQueue[i]) + continue; + + while (1) { + OptimizationInfo *Opt = (OptimizationInfo *)ActiveQueue[i]->dequeue(); + if (!Opt) + break; + delete Opt; + } + } +} +#endif + + +/* + * OptimizationInfo + */ + +OptimizationInfo::OptimizationInfo(TranslationBlock *HeadTB, TraceEdge &Edges) + : isUserTrace(true), isBlock(false), CFG(nullptr) +{ + for (auto &E : Edges) + Trace.push_back(E.first); + +#if defined(CONFIG_USER_ONLY) + if (!llvm_has_annotation(HeadTB->pc, ANNOTATION_LOOP)) + ExpandTrace(HeadTB, Edges); +#endif + + /* Build CFG from the edges. */ + std::map<TranslationBlock *, GraphNode *> NodeMap; + + NodeMap[HeadTB] = new GraphNode(HeadTB); + for (auto &E : Edges) { + TranslationBlock *Parent = E.first; + if (NodeMap.find(Parent) == NodeMap.end()) + NodeMap[Parent] = new GraphNode(Parent); + + GraphNode *ParentNode = NodeMap[Parent]; + for (auto Child : E.second) { + if (NodeMap.find(Child) == NodeMap.end()) + NodeMap[Child] = new GraphNode(Child); + + ParentNode->insertChild(NodeMap[Child]); + } + } + + CFG = NodeMap[HeadTB]; +} + +void OptimizationInfo::SearchCycle(TraceNode &SearchNodes, TraceNode &Nodes, + TraceEdge &Edges, TBVec &Visited, int Depth) +{ + TranslationBlock *Curr = Visited.back(); + + if (llvm_has_annotation(Curr->pc, ANNOTATION_LOOP)) + return; + if (Nodes.size() >= PredictThreshold) + return; + + /* If the current node is one of the main NET trace node, we found a cyclic path. + * The links of such cyclic path are added to the trace edges. */ + if (SearchNodes.find(Curr) != SearchNodes.end()) { + for (unsigned i = 1, e = Visited.size(); i != e; ++i) { + TranslationBlock *Pred = Visited[i - 1]; + TranslationBlock *Succ = Visited[i]; + Nodes.insert(Succ); + Edges[Pred].insert(Succ); + } + return; + } + /* Stop if we reach the maximum search depth. */ + if (Depth == MAX_SEARCH_DEPTH) + return; + + /* Still cannot find a cyclic path? Keep looking for the successors. */ + for (auto Succ : GlobalCFG.getSuccessor(Curr)) { + Visited.push_back(Succ); + SearchCycle(SearchNodes, Nodes, Edges, Visited, Depth + 1); + Visited.pop_back(); + } +} + +/* + * ExpandTrace() + * Expand a NET trace to a bigger region with the NETPlus algorithm. + * NETPlus: trace formation algorithm based on the paper published in + * RESoLVE'11. D. Davis and K. Hazelwood, "Improving Region Selection Through + * Loop Completion," in ASPLOS Workshop on Runtime Environments/Systems, + * Layering, and Virtualized Environments, 2011. + */ +void OptimizationInfo::ExpandTrace(TranslationBlock *HeadTB, TraceEdge &Edges) +{ + if (DisableNETPlus) + return; + + TraceNode Nodes; + TraceNode MainTraceNodes; + std::map<target_ulong, TranslationBlock*> NodeMap; +#ifdef USE_TRACETREE_ONLY + MainTraceNodes.insert(HeadTB); + NodeMap[HeadTB->pc] = HeadTB; +#else + for (auto &E : Edges) { + TranslationBlock *TB = E.first; + MainTraceNodes.insert(TB); + NodeMap[TB->pc] = TB; + } +#endif + + for (auto &E : Edges) + Nodes.insert(E.first); + + /* Put critical section when traversing GlobalCFG. */ + hqemu::MutexGuard locked(GlobalCFG.getLock()); + + for (auto TB : Trace) { + TBVec Visited; + Visited.push_back(TB); + if (NodeMap.find(TB->jmp_pc[0]) != NodeMap.end()) + Edges[TB].insert(NodeMap[TB->jmp_pc[0]]); + if (TB->jmp_pc[1] != (target_ulong)-1 && + NodeMap.find(TB->jmp_pc[1]) != NodeMap.end()) + Edges[TB].insert(NodeMap[TB->jmp_pc[1]]); + + for (auto Succ : GlobalCFG.getSuccessor(TB)) { + Visited.push_back(Succ); + SearchCycle(MainTraceNodes, Nodes, Edges, Visited, 0); + Visited.pop_back(); + } + } +} + +/* + * ComposeCFG() + * Compose a trace of CFG from a list of TBs. + */ +void OptimizationInfo::ComposeCFG() +{ + bool isUser = true; + TranslationBlock *HeadTB = Trace[0]; + +#if defined(CONFIG_SOFTMMU) + isUser = isUserTB(HeadTB) ? true : false; + for (auto TB : Trace) { + if (unlikely(TB->mode == BLOCK_INVALID)) { + /* A NET trace may contain invalidated block because the block + * is invalidated during trace formation. */ + dbg() << DEBUG_LLVM << __func__ << ": skip due to invalidated block\n"; + return; + } + + if (isUser && isUserTB(TB) == false) { + dbg() << DEBUG_LLVM << __func__ << ": skip due to mixed mode\n"; + return; + } + + /* Our translator assumes that component blocks have the same cs_base. */ + if (TB->cs_base != HeadTB->cs_base) { + dbg() << DEBUG_LLVM << __func__ << ": skip due to inconsistent cs\n"; + return; + } + } +#endif + + /* Check if the consecutive blocks are really connected. */ + TraceEdge Edges; + + TranslationBlock *Curr = Trace[0]; + for (unsigned i = 1, e = Trace.size(); i != e; ++i) { + TranslationBlock *Pred = Trace[i - 1]; + Curr = Trace[i]; + if (Pred->jmp_pc[0] != (target_ulong)-1 && + Pred->jmp_pc[0] != Curr->pc && + Pred->jmp_pc[1] != Curr->pc) { + /* Disconnected. Discard the tailing blocks. */ + Trace.resize(i); + LoopHeadIdx = -1; + break; + } + + /* Connected. */ + Edges[Pred].insert(Curr); + } + if (LoopHeadIdx != -1) + Edges[Curr].insert(Trace[LoopHeadIdx]); + +#if defined(CONFIG_USER_ONLY) + if (!llvm_has_annotation(Trace[0]->pc, ANNOTATION_LOOP)) + ExpandTrace(HeadTB, Edges); +#endif + + /* Build CFG from the edges. */ + std::map<TranslationBlock *, GraphNode *> NodeMap; + + NodeMap[HeadTB] = new GraphNode(HeadTB); + for (auto &E : Edges) { + TranslationBlock *Parent = E.first; + if (NodeMap.find(Parent) == NodeMap.end()) + NodeMap[Parent] = new GraphNode(Parent); + + GraphNode *ParentNode = NodeMap[Parent]; + for (auto Child : E.second) { + if (NodeMap.find(Child) == NodeMap.end()) + NodeMap[Child] = new GraphNode(Child); + + ParentNode->insertChild(NodeMap[Child]); + } + } + + CFG = NodeMap[HeadTB]; + isUserTrace = isUser; +} + + +/* The following implements routines of the C interfaces for QEMU. */ +extern "C" { + +void hqemu_help(void) +{ + /* Hide LLVM builtin options. */ +#if defined(LLVM_V35) + StringMap<cl::Option*> opts; + cl::getRegisteredOptions(opts); +#else + StringMap<cl::Option*> &opts = cl::getRegisteredOptions(); +#endif + for (auto &I : opts) { + auto opt = I.second; + if (opt->Category == &cl::GeneralCategory) + opt->setHiddenFlag(cl::Hidden); + } + + SmallVector<const char *, 16> Args; + Args.push_back("\n export LLVM_CMD='[OPTION1] [OPTION2]'\n qemu-" TARGET_NAME); + Args.push_back(nullptr); + cl::ParseCommandLineOptions(Args.size() - 1, + const_cast<char **>(&Args[0])); + cl::PrintHelpMessage(false, false); +} + +int llvm_init() +{ + LLVMEnv::CreateLLVMEnv(); + return 0; +} + +int llvm_finalize() +{ + LLVMEnv::DeleteLLVMEnv(); +#if 0 + llvm_shutdown(); +#endif + return 0; +} + +int llvm_alloc_cache() +{ + size_t BlockCacheSize = (tcg_ctx.code_gen_buffer_size / 2) + & qemu_real_host_page_mask; + LLVMEnv::TraceCacheSize = tcg_ctx.code_gen_buffer_size - BlockCacheSize; + LLVMEnv::TraceCache = (uint8_t *)tcg_ctx.code_gen_buffer + BlockCacheSize; + + tcg_ctx.code_gen_buffer_size = BlockCacheSize; + return 0; +} + +int llvm_check_cache(void) +{ + if (LLVMEnv::InitOnce == false) + return 1; + return TraceCacheFull ? 1 : 0; +} + +/* + * llvm_tb_flush() + * Wrapper fucntion to flush the optmizated code cache. + */ +int llvm_tb_flush(void) +{ + if (LLVMEnv::InitOnce == false) + return 1; + if (LLVMEnv::TransMode == TRANS_MODE_NONE) + return 1; + + dbg() << DEBUG_LLVM << __func__ << " entered.\n"; + + LLEnv->DeleteTranslator(); + + for (int i = 0, e = tcg_ctx_global.tb_ctx->nb_tbs; i != e; ++i) { + if (tbs[i].image) delete_image(&tbs[i]); + if (tbs[i].state) delete_state(&tbs[i]); + if (tbs[i].chain) ChainInfo::free(&tbs[i]); + + tbs[i].image = tbs[i].state = tbs[i].chain = nullptr; + } + + /* Remove all translated code. */ + LLVMEnv::TransCodeList &TransCode = LLEnv->getTransCode(); + for (unsigned i = 0, e = TransCode.size(); i != e; ++i) + delete TransCode[i]; + + TransCode.clear(); + LLEnv->getSortedCode().clear(); + LLEnv->getChainPoint().clear(); + + /* Clear global cfg. */ + GlobalCFG.reset(); + + LLEnv->RestartTranslator(); + LLEnv->incNumFlush(); + + dbg() << DEBUG_LLVM << __func__ << ": trace cache flushed.\n"; + + return 0; +} + +static void llvm_suppress_chaining(TranslationBlock *tb) +{ + /* TODO: add unlinking rule for non-x86 hosts. */ + std::vector<uintptr_t> &Chains = ChainInfo::get(tb)->Chains; + if (Chains.empty()) + return; + + for (unsigned i = 0, e = Chains.size(); i != e; ++i) { +#if defined(TCG_TARGET_I386) + patch_jmp(Chains[i], Chains[i] + 5); +#elif defined(TCG_TARGET_ARM) || defined(TCG_TARGET_AARCH64) + patch_jmp(Chains[i], Chains[i] + 4); +#elif defined(TCG_TARGET_PPC64) + patch_jmp(Chains[i], Chains[i] + 16); +#endif + } + Chains.clear(); +} + +/* + * llvm_tb_remove() + * Remove the traces containing the `tb' that is invalidated by QEMU. + */ +int llvm_tb_remove(TranslationBlock *tb) +{ + if (LLVMEnv::TransMode == TRANS_MODE_NONE) + return 1; + if (!tb->chain) + return 1; + + /* Unlink traces that jump to this tb. */ + llvm_suppress_chaining(tb); + + if (LLVMEnv::TransMode == TRANS_MODE_BLOCK) { + patch_jmp(tb_get_jmp_entry(tb), tb_get_jmp_next(tb)); + ChainInfo::free(tb); + return 1; + } + + LLVMEnv::TransCodeList &TransCode = LLEnv->getTransCode(); + LLVMEnv::TransCodeMap &SortedCode = LLEnv->getSortedCode(); + std::vector<BlockID> &DepTraces = ChainInfo::get(tb)->DepTraces; + + hqemu::MutexGuard locked(llvm_global_lock); + + /* Remove traces that contain this tb. */ + if (DepTraces.empty()) + return 0; + + for (unsigned i = 0, e = DepTraces.size(); i != e; ++i) { + TranslationBlock *EntryTB = &tbs[DepTraces[i]]; + if (EntryTB->tid == -1) { + /* This can happen when a trace block (not head) was removed + * before and at that time the tid of the trace head block is + * set to -1. Now, the trace head block is going to be removed + * and we just skip it. */ + continue; + } + + TranslatedCode *TC = TransCode[EntryTB->tid]; + if (!TC->Active) + hqemu_error("fatal error.\n"); + + TC->Active = false; + SortedCode.erase((uintptr_t)TC->Code); + patch_jmp(tb_get_jmp_entry(EntryTB), tb_get_jmp_next(EntryTB)); + + /* For system-mode emulation, since the source traces do not directly + * jump to the trace code, we do not need to suppress the traces + * chaining to the trace head block. Unlinking the jump from the + * trace head block to the trace code is sufficient to make execution + * from going to the trace code. */ +#if defined(CONFIG_USER_ONLY) + llvm_suppress_chaining(EntryTB); +#endif + + EntryTB->mode = BLOCK_ACTIVE; + EntryTB->exec_count = 0; + EntryTB->opt_ptr = EntryTB->tc_ptr; + EntryTB->tid = -1; + } + + DepTraces.clear(); + ChainInfo::free(tb); + + return 1; +} + +/* + * llvm_resolve_address() + * Given the value returned when leaving the code cache, return the patch + * address for the region chaining. + */ +static uintptr_t llvm_resolve_address(uintptr_t addr) +{ + if (LLVMEnv::InitOnce == false) + return 0; + + hqemu::MutexGuard locked(llvm_global_lock); + + LLVMEnv::ChainSlot &ChainPoint = LLEnv->getChainPoint(); + size_t Key = addr >> 2; + return ChainPoint[Key]; +} + +#if defined(CONFIG_USER_ONLY) +#define cross_page(__tb) (0) +#define trace_add_jump(src, dst) patch_jmp(next_tb, tb->opt_ptr) +#else +#define cross_page(__tb) (__tb->page_addr[1] != (unsigned long)-1) +#define trace_add_jump(src, dst) patch_jmp(next_tb, tb->tc_ptr) +#endif + +void llvm_handle_chaining(uintptr_t next_tb, TranslationBlock *tb) +{ + if ((next_tb & TB_EXIT_MASK) == TB_EXIT_LLVM) { + next_tb = llvm_resolve_address(next_tb); + if (next_tb && !cross_page(tb)) { + /* Keep track of traces (i.e., next_tb) that jump to this tb. */ + ChainInfo &Chain = *ChainInfo::get(tb); + Chain.insertChain(next_tb); + + /* For system-mode emulation, we only let the source traces + * jump to the trace head 'block' in the block code cache. */ + trace_add_jump(next_tb, tb); + } + } else if (next_tb != 0 && !cross_page(tb)) { + TranslationBlock *pred = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK); + int n = next_tb & TB_EXIT_MASK; + tb_add_jump(pred, n, tb); + + GlobalCFG.insertLink(pred, tb); + } +} + +int llvm_locate_trace(uintptr_t searched_pc) +{ + uintptr_t Start = (uintptr_t)LLVMEnv::TraceCache; + uintptr_t End = Start + LLVMEnv::TraceCacheSize; + return (searched_pc >= Start && searched_pc < End); +} + +TranslationBlock *llvm_find_pc(CPUState *cpu, uintptr_t searched_pc) +{ + LLVMEnv::TransCodeMap &SortedCode = LLEnv->getSortedCode(); + CPUArchState *env = (CPUArchState *)cpu->env_ptr; + + if (LLVMEnv::InitOnce == false) + return nullptr; + if (!llvm_locate_trace(searched_pc)) + return nullptr; + + hqemu::MutexGuard locked(llvm_global_lock); + + LLVMEnv::TransCodeMap::iterator I = SortedCode.upper_bound(searched_pc); + TranslatedCode *TC = (--I)->second; + + if (env->restore_val >= TC->Restore.size()) { + auto HostDisAsm = LLEnv->getTranslator(0)->getHostDisAsm(); + if (HostDisAsm) + HostDisAsm->PrintOutAsm((uint64_t)TC->Code, TC->Size); + hqemu_error("got exception at 0x%zx\n", searched_pc); + } + + /* Since restore_val is no longer used, we set it to the + * the opc index so the later restore can quickly get it. */ + std::pair<BlockID, uint16_t> RestoreInfo = TC->Restore[env->restore_val]; + env->restore_val = RestoreInfo.second - 1; + return &tbs[RestoreInfo.first]; +} + +/* + * llvm_restore_state() + * The cpu state corresponding to 'searched_pc' is restored. + */ +int llvm_restore_state(CPUState *cpu, TranslationBlock *tb, + uintptr_t searched_pc) +{ + target_ulong data[TARGET_INSN_START_WORDS] = { tb->pc }; + CPUArchState *env = (CPUArchState *)cpu->env_ptr; + uintptr_t host_pc = (uintptr_t)tb->tc_ptr; + uint8_t *p = tb->tc_search; + + /* Reconstruct the stored insn data while looking for the point at + which the end of the insn exceeds the searched_pc. */ + for (unsigned i = 0, e = tb->icount; i != e; ++i) { + for (unsigned j = 0; j < TARGET_INSN_START_WORDS; ++j) { + data[j] += decode_sleb128(&p); + } + host_pc += decode_sleb128(&p); + if (env->restore_val == i) + goto found; + } + return -1; + +found: + restore_state_to_opc(env, tb, data); + + return 0; +} + +/* + * llvm_fork_start() + * Wrapper function to stop the optimization service before performing fork. + */ +void llvm_fork_start(void) +{ + if (!LLEnv->isThreading()) + return; + + dbg() << DEBUG_LLVM << __func__ << " entered.\n"; + + LLEnv->StopThread(); +} + +/* + * llvm_fork_end() + * Wrapper function to restart the optimization service after performing fork. + */ +void llvm_fork_end(int child) +{ + if (!LLEnv->isThreading()) + return; + + dbg() << DEBUG_LLVM << __func__ << " entered.\n"; + + /* Now, restart the LLVM thread. */ + if (child == 0) { + LLEnv->StartThread(); + } else { + ThreadExit = true; + LLVMEnv::setTransMode(TRANS_MODE_NONE); + + qemu_mutex_init(&LLEnv->mutex); + } +} + +int llvm_has_annotation(target_ulong addr, int annotation) +{ + if (annotation == ANNOTATION_LOOP) + return AF->hasLoopAnnotation(addr) == true; + return 0; +} + +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/optimization.cpp b/llvm/optimization.cpp new file mode 100644 index 0000000..15597bf --- /dev/null +++ b/llvm/optimization.cpp @@ -0,0 +1,317 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + * + * This file implements the basic optimization schemes including + * (1) instruction TLB (iTLB), + * (2) indirect branch target cache (IBTC), + * (3) cross-page block linking (CPBL), and + * (4) large page table (LPT). + */ + +#include "tracer.h" +#include "optimization.h" + + +#if defined(ENALBE_CPU_PROFILE) +# define PROFILE(X) do { X; } while (0) +#else +# define PROFILE(X) do { } while (0) +#endif + +/* The following implements routines of the C interfaces for QEMU. */ +extern "C" { + +TranslationBlock *tbs; +unsigned long alignment_count[2]; /* 0: misaligned, 1: aligned. */ +unsigned long aligned_boundary = 16; + +extern uint8_t *ibtc_ret_addr; + +/* + * iTLB (Instruction TLB) + */ +void itlb_update_entry(CPUArchState *env, TranslationBlock *tb) +{ + ITLB &itlb = cpu_get_itlb(env); + itlb.insert(tb->pc, tb->page_addr[0] & TARGET_PAGE_MASK); + if (tb->page_addr[1] != (tb_page_addr_t)-1) + itlb.insert(tb->pc + tb->size, tb->page_addr[1] & TARGET_PAGE_MASK); +} + +int itlb_lookup(CPUArchState *env, target_ulong pc, uint64_t paddr) +{ + ITLB &itlb = cpu_get_itlb(env); + return itlb.get(pc) == (paddr & TARGET_PAGE_MASK); +} + +/* + * IBTC (Indirect Branch Translation Cache) + */ +#if defined(ENABLE_IBTC) + +/* Update IBTC hash table. + * Note: we do not cache TBs that cross page boundary. */ +void ibtc_update_entry(CPUArchState *env, TranslationBlock *tb) +{ + IBTC &ibtc = cpu_get_ibtc(env); + if (!ibtc.needUpdate()) + return; + + ibtc.resetUpdate(); + +#if defined(CONFIG_SOFTMMU) + if (tb->page_addr[1] != (tb_page_addr_t)-1) + return; +#endif + + ibtc.insert(tb->pc, tb); +} + +/* Helper function to lookup the IBTC hash table. */ +void *helper_lookup_ibtc(CPUArchState *env) +{ + CPUState *cpu = ENV_GET_CPU(env); + if (unlikely(cpu->tcg_exit_req != 0)) { + cpu->tcg_exit_req = 0; + return ibtc_ret_addr; + } + + /* A match of 'pc', 'cs_base' and 'flags' results in a IBTC hit. Since + * cs_base is only meaningful with x86 guest and system mode (cs_base is + * always 0 for user-mode emulation and non-x86 guest), we only compare + * cs_base with system mode emulation of x86 guest. */ + + target_ulong pc = cpu_get_pc(env); + IBTC &ibtc = cpu_get_ibtc(env); + TranslationBlock *next_tb = ibtc.get(pc); + + PROFILE( ibtc.incTotal() ); + + if (likely(next_tb)) { +#if defined(CONFIG_SOFTMMU) + if (likely(itlb_lookup(env, pc, next_tb->page_addr[0]))) +#endif + if (likely(cpu_check_state(env, next_tb->cs_base, next_tb->flags))) { + cpu->current_tb = next_tb; + return next_tb->opt_ptr; + } + } + + PROFILE( ibtc.incMiss() ); + + ibtc.setUpdate(); + return ibtc_ret_addr; +} +#else +void ibtc_update_entry(CPUArchState *env, TranslationBlock *tb) {} +void *helper_lookup_ibtc(CPUArchState *env) { return ibtc_ret_addr; } +#endif /* ENABLE_IBTC */ + + +/* + * CPBL (Cross-Page Block Linking) + */ +#if defined(ENABLE_CPBL) +void *helper_lookup_cpbl(CPUArchState *env) +{ + CPUState *cpu = ENV_GET_CPU(env); + if (unlikely(cpu->tcg_exit_req != 0)) { + cpu->tcg_exit_req = 0; + return ibtc_ret_addr; + } + + /* A match of 'pc', 'cs_base' and 'flags' results in a CPBL hit. Since + * cs_base is only meaningful with x86 guest and system mode (cs_base is + * always 0 for user-mode emulation and non-x86 guest), we only compare + * cs_base with system mode emulation of x86 guest. */ + + target_ulong pc = cpu_get_pc(env); + TranslationBlock *next_tb = cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)]; + + PROFILE( cpu_get_cpbl(env).incTotal() ); + + if (likely(next_tb && next_tb->pc == pc)) + if (likely(cpu_check_state(env, next_tb->cs_base, next_tb->flags))) { + cpu->current_tb = next_tb; + return next_tb->opt_ptr; + } + + PROFILE( cpu_get_cpbl(env).incMiss() ); + + return ibtc_ret_addr; +} + +int helper_validate_cpbl(CPUArchState *env, target_ulong pc, int id) +{ + TranslationBlock *tb = &tbs[id]; + + PROFILE( cpu_get_cpbl(env).incValidateTotal() ); + if (tb->page_addr[1] == (tb_page_addr_t)-1 && + likely(itlb_lookup(env, pc, tb->page_addr[0]))) + return 1; + if (likely(itlb_lookup(env, pc + TARGET_PAGE_SIZE, tb->page_addr[1]))) + return 1; + PROFILE( cpu_get_cpbl(env).incValidateMiss() ); + return 0; +} + +#else +void *helper_lookup_cpbl(CPUArchState *env) { return ibtc_ret_addr; } +int helper_validate_cpbl(CPUArchState *env, target_ulong pc, int id) { return 0; } +#endif /* ENABLE_CPBL */ + + +#if defined(ENABLE_LPAGE) +int lpt_reset(CPUArchState *env) +{ + if (env->opt_link == nullptr) + return 0; + LargePageTable &lpt = cpu_get_lpt(env); + lpt.reset(); + return 1; +} +/* Add a large page to LPT. */ +int lpt_add_page(CPUArchState *env, target_ulong addr, target_ulong size) +{ + LargePageTable &lpt = cpu_get_lpt(env); + lpt.insert(addr, size); + return 1; +} + +/* Given an address, return 1 if this address overlaps with any tracked + * large page and return 0 otherwise. The large page record is NOT removed + * if it is found. */ +int lpt_search_page(CPUArchState *env, target_ulong addr, target_ulong *addrp, + target_ulong *sizep) +{ + LargePageTable &lpt = cpu_get_lpt(env); + return lpt.search(addr, LargePageTable::SEARCH, addrp, sizep); +} + +/* Given an address, return the pte index if this address overlaps with + * any tracked large page and return -1 otherwise. If a large page is found, + * remove it from the list. */ +int lpt_flush_page(CPUArchState *env, target_ulong addr, target_ulong *addrp, + target_ulong *sizep) +{ + LargePageTable &lpt = cpu_get_lpt(env); + PROFILE( lpt.incTotal() ); + if (lpt.search(addr, LargePageTable::FLUSH, addrp, sizep)) + return 1; + PROFILE( lpt.incMiss() ); + return 0; +} +#else +int lpt_reset(CPUArchState *env) { return 0; } +int lpt_add_page(CPUArchState *env, target_ulong addr, target_ulong size) { return 0; } +int lpt_search_page(CPUArchState *env, target_ulong addr, + target_ulong *addrp, target_ulong *sizep) { return 0; } +int lpt_flush_page(CPUArchState *env, target_ulong addr, + target_ulong *addrp, target_ulong *sizep) { return 0; } +#endif + +/* Initialize the optimization schemes. */ +int optimization_init(CPUArchState *env) +{ + CPUState *cpu = ENV_GET_CPU(env); + if (cpu->cpu_index == 0) { + tbs = tcg_ctx.tb_ctx->tbs; + if (!tbs) { + std::cerr << __func__ << ": fatal error.\n"; + exit(0); + } + if (get_cpu_size() != sizeof(CPUArchState)) { + std::cerr << "Inconsistent CPUArchState size in C and C++.\n" + "This may be because sizeof empty struct in C is " + "different with C++. Please fix this.\n"; + exit(0); + } + } + + /* Create a processor tracer for each env. */ + BaseTracer *Tracer = BaseTracer::CreateTracer(env); + + /* Create optimization facilities. */ + CPUOptimization *Opt = new CPUOptimization(cpu, Tracer); + + /* Make an uplink to the optimizaiton facility object. */ + env->opt_link = Opt; + return 1; +} + +/* Finalize the optimization schemes. */ +int optimization_finalize(CPUArchState *env) +{ + if (env->opt_link == nullptr) + return 0; + + PROFILE( cpu_get_ibtc(env).dump() ); +#if defined(CONFIG_SOFTMMU) + PROFILE( cpu_get_cpbl(env).dump() ); + PROFILE( cpu_get_lpt(env).dump() ); +#endif + + BaseTracer::DeleteTracer(env); + delete (CPUOptimization *)env->opt_link; + return 1; +} + +/* Reset to default values of the optimizatiion schemes. */ +int optimization_reset(CPUArchState *env, int force_flush) +{ + if (env->opt_link == nullptr) + return 0; + + ITLB &itlb = cpu_get_itlb(env); + IBTC &ibtc = cpu_get_ibtc(env); + + itlb.reset(); + if (force_flush) + ibtc.reset(); + + tracer_reset(env); + return 1; +} + +int optimization_remove_entry(CPUArchState *env, TranslationBlock *tb) +{ + IBTC &ibtc = cpu_get_ibtc(env); + ibtc.remove(tb); + return 1; +} + +int optimization_flush_page(CPUArchState *env, target_ulong pc) +{ +#if defined(CONFIG_SOFTMMU) + ITLB &itlb = cpu_get_itlb(env); + itlb.flush(pc); +#else + IBTC &ibtc = cpu_get_ibtc(env); + ibtc.reset(); +#endif + return 1; +} + +int optimization_init_tb(TranslationBlock *tb, int id) +{ + tb->id = id; + tb->tid = -1; + tb->mode = BLOCK_NONE; + tb->opt_ptr = nullptr; + tb->exec_count = 0; + tb->patch_jmp = 0; + tb->patch_next = 0; + tb->jmp_pc[0] = tb->jmp_pc[1] = (target_ulong)-1; + tb->image = nullptr; + tb->state = nullptr; + tb->chain = nullptr; + return 1; +} + +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/CombineCasts.cpp b/llvm/pass/CombineCasts.cpp new file mode 100644 index 0000000..71a74ff --- /dev/null +++ b/llvm/pass/CombineCasts.cpp @@ -0,0 +1,321 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm-target.h" +#include "llvm-opc.h" +#include "llvm-pass.h" +#include "utils.h" + +#define PASS_NAME "CombineCasts" + +/* + * CombineCasts Pass + */ +class CombineCasts : public FunctionPass { + IRFactory *IF; + const DataLayout *DL; + MDFactory *MF; + IntegerType *Int8Ty; + IntegerType *Int32Ty; + IntegerType *Int64Ty; + IntegerType *IntPtrTy; + PointerType *Int8PtrTy; + PointerType *Int32PtrTy; + PointerType *Int64PtrTy; + Type *FloatTy; + Type *DoubleTy; + IVec toErase; + +public: + static char ID; + explicit CombineCasts() : FunctionPass(ID) {} + explicit CombineCasts(IRFactory *IF) + : FunctionPass(ID), IF(IF), DL(IF->getDL()), MF(IF->getMDFactory()) + { + LLVMContext &Context = IF->getContext();; + Int8Ty = IntegerType::get(Context, 8); + Int32Ty = IntegerType::get(Context, 32); + Int64Ty = IntegerType::get(Context, 64); + IntPtrTy = DL->getIntPtrType(Context); + Int8PtrTy = Type::getInt8PtrTy(Context, 0); + Int32PtrTy = Type::getInt32PtrTy(Context, 0); + Int64PtrTy = Type::getInt64PtrTy(Context, 0); + FloatTy = Type::getFloatTy(Context); + DoubleTy = Type::getDoubleTy(Context); + } + + Instruction *getUniqueUser(Instruction *I) { + if (I->hasOneUse()) + return I->user_back(); + return nullptr; + }; + + bool combineLoadCast(LoadInst *LI); + bool combineStoreCast(StoreInst *SI); + bool combineCastCast(Function &F); + bool simplifySignChange(Function &F); + bool runOnFunction(Function &F); +}; + +char CombineCasts::ID = 0; +INITIALIZE_PASS(CombineCasts, "combinecast", + "Combine bitcast with guest memory loads/stores", false, false) + +FunctionPass *llvm::createCombineCasts(IRFactory *IF) +{ + return new CombineCasts(IF); +} + +static bool hasSameCastingTy(ArrayRef<BitCastInst *> IL) { + Type *SrcTy = IL[0]->getSrcTy(); + Type *DstTy = IL[0]->getDestTy(); + for (BitCastInst *I : IL) { + if (I->getSrcTy() != SrcTy) + return false; + if (I->getDestTy() != DstTy) + return false; + } + return true; +} + +/* This function aims to change the load type if (1) the type of loaded data is + * casted to another type, (2) only one user of the load instruction is bitcast, + * and (3) all other users of the load instruction are stores. + * + * For example: + * %0 = load <typeA>* %0 = load <typeB>* + * %1 = bitcast %0, <typeB> %1 = bitcast %0, <typeA> + * + * %2 = op <typeB> %1, ... => %2 = op <typeB> %0, ... + * + * store %0, <typeA>* store %1, <typeA>* + * store %1, <typeB>* store %0, <typeB>* + */ +bool CombineCasts::combineLoadCast(LoadInst *LI) +{ + Instruction *Ptr = dyn_cast<Instruction>(LI->getPointerOperand()); + + if (!Ptr) + return false; + + /* Find all bitcast users of this load. */ + SmallVector<BitCastInst *, 4> BCIs; + for (User *U : LI->users()) { + Instruction *UI = cast<Instruction>(U); + switch (UI->getOpcode()) { + default: + return false; + case Instruction::PHI: + case Instruction::Load: + case Instruction::Store: + break; + case Instruction::BitCast: + BCIs.push_back(cast<BitCastInst>(UI)); + break; + } + } + + if (BCIs.empty() || !hasSameCastingTy(BCIs)) + return false; + + Instruction *InsertPos = LI; + unsigned Alignment = LI->getAlignment(); + unsigned Volatile = LI->isVolatile(); + Type *SrcTy = LI->getType(); + Type *DstTy = BCIs[0]->getDestTy(); + + Type *PtrTy = PointerType::get(DstTy, LI->getPointerAddressSpace()); + if (isa<IntToPtrInst>(Ptr)) + Ptr = new IntToPtrInst(Ptr->getOperand(0), PtrTy, "", InsertPos); + else + Ptr = new BitCastInst(Ptr, PtrTy, "", InsertPos); + + Instruction *NewLI = new LoadInst(Ptr, "", Volatile, Alignment, InsertPos); + Instruction *NewBCI = new BitCastInst(NewLI, SrcTy, "", InsertPos); + + if (MF->isGuestMemory(LI)) + MF->setGuestMemory(NewLI); + for (BitCastInst *BCI : BCIs) + BCI->replaceAllUsesWith(NewLI); + LI->replaceAllUsesWith(NewBCI); + + toErase.push_back(LI); + for (BitCastInst *BCI : BCIs) + toErase.push_back(BCI); + + return true; +} + +/* This function aims to change the store type if stored data is casted from + * another type. + * + * For example: + * %0 = <typeA> + * %1 = bitcast %0, <typeB> => store %0, <typeA>* + * store %1, <typeB>* + */ +bool CombineCasts::combineStoreCast(StoreInst *SI) +{ + Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()); + Instruction *Data = dyn_cast<Instruction>(SI->getValueOperand()); + + if (!Ptr || !Data || !isa<BitCastInst>(Data)) + return false; + + Instruction *InsertPos = SI; + unsigned Alignment = SI->getAlignment(); + unsigned Volatile = SI->isVolatile(); + BitCastInst *BCI = cast<BitCastInst>(Data); + Value *V = BCI->getOperand(0); + Type *SrcTy = V->getType(); + + Type *PtrTy = PointerType::get(SrcTy, SI->getPointerAddressSpace()); + if (isa<IntToPtrInst>(Ptr)) + Ptr = new IntToPtrInst(Ptr->getOperand(0), PtrTy, "", InsertPos); + else + Ptr = new BitCastInst(Ptr, PtrTy, "", InsertPos); + + Instruction *NewSI = new StoreInst(V, Ptr, Volatile, Alignment, InsertPos); + + if (MF->isGuestMemory(SI)) + MF->setGuestMemory(NewSI); + + toErase.push_back(SI); + return true; +} + +/* This function aims to eliminate redundant casts. + * For example: + * %0 = <typeA> %0 = <typeA> + * %1 = bitcast %0, <typeB> => + * %2 = bitcast %1, <typeC> %2 = bitcast %0, <typeC> + * = op <typeC> %2, ... = op <typeC> %2, ... + * + * And if <typeA> is the same as <typeC>, the code is further optimized to + * %0 = <typeA> %0 = <typeA> + * %1 = bitcast %0, <typeB> => + * %2 = bitcast %1, <typeC> + * = op <typeC> %2, ... = op <typeA> %0, ... + */ +bool CombineCasts::combineCastCast(Function &F) +{ + SmallVector<Instruction*, 4> Worklist; + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) { + Instruction *I = &*II; + if (isa<BitCastInst>(I)) + Worklist.push_back(I); + } + + for (auto I : Worklist) { + BitCastInst *CI = cast<BitCastInst>(I); + BitCastInst *CSrc = dyn_cast<BitCastInst>(CI->getOperand(0)); + if (!CSrc) + continue; + + Type *SrcTy = CSrc->getOperand(0)->getType(); + Type *DstTy = CI->getType(); + Value *Result = (SrcTy == DstTy) ? CSrc->getOperand(0) : + new BitCastInst(CSrc->getOperand(0), CI->getType(), "", CI); + I->replaceAllUsesWith(Result); + toErase.push_back(I); + } + + if (toErase.empty()) + return false; + + ProcessErase(toErase); + return true; +} + +/* This function converts sign change of float/double data (i.e., -num), + * which is implemented with integer operations, to use float/double ops. + * For example: + * %0 = bitcast float %num to i32 + * %1 = xor i32 %0, 0x80000000 => %0 = fsub float 0, %num + * %2 = bitcast %1, float + */ +bool CombineCasts::simplifySignChange(Function &F) +{ + SmallVector<BitCastInst*, 16> Worklist; + + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) { + Instruction *I = &*II; + if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + Type *SrcTy = BCI->getSrcTy(); + Type *DstTy = BCI->getDestTy(); + if (SrcTy == FloatTy && DstTy == Int32Ty) + Worklist.push_back(BCI); + else if (SrcTy == DoubleTy && DstTy == Int64Ty) + Worklist.push_back(BCI); + } + } + + for (auto I : Worklist) { + Type *Ty = I->getSrcTy(); + Value *C = (Ty == FloatTy) ? CONST32(0x80000000) + : CONST64(0x8000000000000000LL); + + /* Check whether the single user of this bitcast is Xor. */ + Instruction *UI = getUniqueUser(I); + if (UI && UI->getOpcode() == Instruction::Xor && UI->getOperand(1) == C) { + /* Check whether the single user of this Xor is a bitcast + * instruction that casts the type back to the src type. */ + Instruction *UUI = getUniqueUser(UI); + if (UUI && UUI->getOpcode() == Instruction::BitCast && + cast<BitCastInst>(UUI)->getDestTy() == Ty) { + Value *V = BinaryOperator::Create(Instruction::FSub, + ConstantFP::get(Ty, 0), + I->getOperand(0), "", I); + UUI->replaceAllUsesWith(V); + toErase.push_back(UUI); + } + } + } + + if (toErase.empty()) + return false; + + ProcessErase(toErase); + return true; +} + +bool CombineCasts::runOnFunction(Function &F) +{ + bool Changed = false; + SmallVector<LoadInst *, 16> Loads; + SmallVector<StoreInst *, 16> Stores; + + /* Collect all guest memory and non-volatile cpu state loads/stores. */ + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) { + Instruction *I = &*II; + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (MF->isGuestMemory(LI) || !LI->isVolatile()) + Loads.push_back(LI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (MF->isGuestMemory(SI) || !SI->isVolatile()) + Stores.push_back(SI); + } + } + + for (auto LI : Loads) + Changed |= combineLoadCast(LI); + for (auto SI : Stores) + Changed |= combineStoreCast(SI); + + if (toErase.size()) + ProcessErase(toErase); + + Changed |= combineCastCast(F); + Changed |= simplifySignChange(F); + + return Changed; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/CombineGuestMemory.cpp b/llvm/pass/CombineGuestMemory.cpp new file mode 100644 index 0000000..0740a8b --- /dev/null +++ b/llvm/pass/CombineGuestMemory.cpp @@ -0,0 +1,389 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm-debug.h" +#include "llvm-opc.h" +#include "llvm-target.h" +#include "llvm-pass.h" +#include "utils.h" + +#define PASS_NAME "CombineGuestMemory" + +/* + * CombineGuestMemory Pass + */ +class CombineGuestMemory : public FunctionPass { + + struct StateInfo { + StateInfo() : Ptr(nullptr) {} + StateInfo(Value *ptr, APInt &offset, APInt &size) + : Ptr(ptr), Offset(offset), Size(size) {} + Value *Ptr; + APInt Offset; + APInt Size; + }; + + typedef std::pair<Value *, Value *> ValuePair; + typedef std::map<size_t, size_t> StateMap; + typedef DenseMap<ValuePair, StateInfo> CSMap; + + IRFactory *IF; + const DataLayout *DL; + MDFactory *MF; + IntegerType *Int8Ty; + IntegerType *Int32Ty; + IntegerType *Int64Ty; + IntegerType *IntPtrTy; + PointerType *Int8PtrTy; + PointerType *Int32PtrTy; + PointerType *Int64PtrTy; + Value *CPU; + Value *GuestBase; + Instruction *InitLastInst; + StateMap LegalStates; + IVec toErase; + +public: + static char ID; + explicit CombineGuestMemory() : FunctionPass(ID) {} + explicit CombineGuestMemory(IRFactory *IF) + : FunctionPass(ID), IF(IF), DL(IF->getDL()), MF(IF->getMDFactory()) + { + LLVMContext &Context = IF->getContext();; + Int8Ty = IntegerType::get(Context, 8); + Int32Ty = IntegerType::get(Context, 32); + Int64Ty = IntegerType::get(Context, 64); + IntPtrTy = DL->getIntPtrType(Context); + Int8PtrTy = Type::getInt8PtrTy(Context, 0); + Int32PtrTy = Type::getInt32PtrTy(Context, 0); + Int64PtrTy = Type::getInt64PtrTy(Context, 0); + + GuestBase = IF->getGuestBase(); + + addLegalStates(); + } + + unsigned getAddressSpaceOperand(Value *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->getPointerAddressSpace(); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getPointerAddressSpace(); + return -1U; + } + + int getNumUsers(Instruction *I) { + return distance(I->user_begin(), I->user_end()); + } + + void addLegalStates(); + bool isLegalState(Value *Ptr, APInt &Offset, APInt &Size); + bool isConsecutiveAccess(Value *A, Value *B, Value *&Ptr, APInt &Offset, APInt &Size); + bool tryCombineLoad(Value *A, Value *B, CSMap &States); + bool tryCombineStore(Value *A, Value *B, CSMap &States); + bool combineMemory(SmallVector<Value *, 8> &Memory, SmallVector<Value *, 8> &States); + bool runOnFunction(Function &F); +}; + +char CombineGuestMemory::ID = 0; +INITIALIZE_PASS(CombineGuestMemory, "combinegm", + "Combine guest memory loads and stores", false, false) + +FunctionPass *llvm::createCombineGuestMemory(IRFactory *IF) +{ + return new CombineGuestMemory(IF); +} + + +void CombineGuestMemory::addLegalStates() +{ +#if defined(TARGET_I386) + size_t Start = offsetof(CPUArchState, xmm_regs[0]); + size_t Size = sizeof(XMMReg); + for (int i = 0; i < CPU_NB_REGS; ++i) + LegalStates[Start + Size * i] = Size; +#elif defined(TARGET_ARM) + size_t Start = offsetof(CPUArchState, vfp.regs[0]); + size_t Size = sizeof(float64) * 2; + for (int i = 0; i < 32; ++i) + LegalStates[Start + Size * i] = Size; +#endif +} + +bool CombineGuestMemory::isConsecutiveAccess(Value *A, Value *B, Value *&Ptr, + APInt &Offset, APInt &Size) +{ + Value *PtrA = getPointerOperand(A); + Value *PtrB = getPointerOperand(B); + unsigned ASA = getAddressSpaceOperand(A); + unsigned ASB = getAddressSpaceOperand(B); + + if (!PtrA || !PtrB || (ASA != ASB)) + return false; + + Type *TyA = cast<PointerType>(PtrA->getType())->getElementType(); + Type *TyB = cast<PointerType>(PtrB->getType())->getElementType(); + if (DL->getTypeStoreSize(TyA) != DL->getTypeStoreSize(TyB)) + return false; + + unsigned PtrBitWidth = DL->getTypeSizeInBits(TyA); + APInt Sz(PtrBitWidth, DL->getTypeStoreSize(TyA)); + + APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); + PtrA = StripPointerWithConstantOffset(DL, PtrA, OffsetA, GuestBase); + PtrB = StripPointerWithConstantOffset(DL, PtrB, OffsetB, GuestBase); + + APInt OffsetDelta = OffsetB - OffsetA; + if (PtrA == PtrB && OffsetDelta == Sz) { + Ptr = PtrA; + Offset = OffsetA; + Size = Sz + Sz; + return true; + } + + return false; +} + +bool CombineGuestMemory::isLegalState(Value *Ptr, APInt &Offset, APInt &Size) +{ + if (Ptr != CPU) + return false; + uint64_t Start = Offset.getZExtValue(); + if (LegalStates.find(Start) == LegalStates.end() || + Size.getZExtValue() > LegalStates[Start]) + return false; + return true; +} + +static bool hasMemoryViolation(Instruction *SA, Instruction *SB, + Instruction *EA, Instruction *EB) +{ + std::set<Value*> Insts; + Insts.insert(SA); + Insts.insert(SB); + Insts.insert(EA); + Insts.insert(EB); + + BasicBlock::iterator BI = BasicBlock::iterator(SA); + BasicBlock::iterator BE = BasicBlock::iterator(EA); + for (; BI != BE; ++BI) { + Instruction *I = &*BI; + if (isa<CallInst>(I)) + return true; + if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) + continue; + if (Insts.find(I) == Insts.end()) + return true; + } + + BI = BasicBlock::iterator(SB); + BE = BasicBlock::iterator(EB); + for (; BI != BE; ++BI) { + Instruction *I = &*BI; + if (isa<CallInst>(I)) + return true; + if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) + continue; + if (Insts.find(I) == Insts.end()) + return true; + } + return false; +} + +bool CombineGuestMemory::tryCombineLoad(Value *A, Value *B, CSMap &States) +{ + /* First, check if the guest loads are 'only' used by the store instructions + * to consecutive CPU states, and if any other loads/stores occurs between + * the queried operation. */ + LoadInst *LA = cast<LoadInst>(A); + LoadInst *LB = cast<LoadInst>(B); + if (getNumUsers(LA) != 1 || getNumUsers(LB) != 1) + return false; + + Value *VA = *LA->user_begin(); + Value *VB = *LB->user_begin(); + CSMap::iterator CSI = States.find(ValuePair(VA, VB)); + if (CSI == States.end()) + return false; + + Instruction *SA = cast<Instruction>(VA); + Instruction *SB = cast<Instruction>(VB); + + if (hasMemoryViolation(LA, LB, SA, SB)) + return false; + + /* Here we found the guest memory operations are loaded and stored to the + * CPU states immediately. The operations are safe to combine. */ + Instruction *InsertPos = SA; + StateInfo &SI = CSI->second; + uint64_t Size = SI.Size.getZExtValue(); + unsigned AS = getAddressSpaceOperand(LA); + unsigned Align = Size / 2; + Type *Ty = PointerType::get(VectorType::get(Int8Ty, Size), AS); + Instruction *Ptr = cast<Instruction>(LA->getPointerOperand()); + if (isa<IntToPtrInst>(Ptr)) + Ptr = new IntToPtrInst(Ptr->getOperand(0), Ty, "", InsertPos); + else + Ptr = new BitCastInst(Ptr, Ty, "", InsertPos); + Instruction *NewLI = new LoadInst(Ptr, "", true, Align, InsertPos); + MF->setGuestMemory(NewLI); + + Ty = PointerType::getUnqual(VectorType::get(Int8Ty, Size)); + Value *Offset = ConstantInt::get(Ty->getContext(), SI.Offset); + Ptr = GetElementPtrInst::CreateInBounds(CPU, Offset, "", InitLastInst); + Ptr = new BitCastInst(Ptr, Ty, "", InitLastInst); + new StoreInst(NewLI, Ptr, false, InsertPos); + + States.erase(CSI); + toErase.push_back(SA); + toErase.push_back(SB); + return true; +} + +bool CombineGuestMemory::tryCombineStore(Value *A, Value *B, CSMap &States) +{ + /* First, check if the CPU state loads are 'only' used by the guest store + * instructions, and if any other loads/stores occurs between the + * queried operation. */ + StoreInst *SA = cast<StoreInst>(A); + StoreInst *SB = cast<StoreInst>(B); + Instruction *LA = dyn_cast<Instruction>(SA->getOperand(0)); + Instruction *LB = dyn_cast<Instruction>(SB->getOperand(0)); + + if (!LA || !LB) + return false; + if (getNumUsers(LA) != 1 || getNumUsers(LB) != 1) + return false; + + CSMap::iterator CSI = States.find(ValuePair(LA, LB)); + if (CSI == States.end()) + return false; + + if (hasMemoryViolation(LA, LB, SA, SB)) + return false; + + /* Here we found the CPU states are loaded and stored to the guest memory + * immediately. The operations are safe to combine. */ + Instruction *InsertPos = SA; + StateInfo &SI = CSI->second; + uint64_t Size = SI.Size.getZExtValue(); + Type *Ty = PointerType::getUnqual(VectorType::get(Int8Ty, Size)); + Value *Offset = ConstantInt::get(Ty->getContext(), SI.Offset); + Instruction *Ptr = GetElementPtrInst::CreateInBounds(CPU, Offset, "", InitLastInst); + Ptr = new BitCastInst(Ptr, Ty, "", InitLastInst); + Value *V = new LoadInst(Ptr, "", false, InsertPos); + + unsigned AS = getAddressSpaceOperand(SA); + unsigned Align = Size / 2; + Ty = PointerType::get(VectorType::get(Int8Ty, Size), AS); + Ptr = cast<Instruction>(SA->getPointerOperand()); + if (isa<IntToPtrInst>(Ptr)) + Ptr = new IntToPtrInst(Ptr->getOperand(0), Ty, "", InsertPos); + else + Ptr = new BitCastInst(Ptr, Ty, "", InsertPos); + Instruction *NewSI = new StoreInst(V, Ptr, true, Align, InsertPos); + MF->setGuestMemory(NewSI); + + States.erase(CSI); + toErase.push_back(SA); + toErase.push_back(SB); + return true; +} + +bool CombineGuestMemory::combineMemory(SmallVector<Value *, 8> &Memory, + SmallVector<Value *, 8> &States) +{ + bool Changed = false; + SmallPtrSet<Value *, 4> Used; + CSMap ConsecutiveStates; + Value *Ptr; + APInt Offset, Size; + + /* Find consecutive CPU states. */ + for (unsigned i = 1, e = States.size(); i != e; i++) { + if (!isConsecutiveAccess(States[i-1], States[i], Ptr, Offset, Size)) + continue; + + if (!isLegalState(Ptr, Offset, Size)) + continue; + + ConsecutiveStates[ValuePair(States[i-1], States[i])] = + StateInfo(Ptr, Offset, Size); + } + + if (ConsecutiveStates.size() == 0) + return false; + + /* Find and combine consecutive guest memory accesses if their referrenced + * CPU states are also consecutive. */ + for (unsigned i = 1, e = Memory.size(); i != e; i++) { + if (Used.count(Memory[i-1]) || Used.count(Memory[i])) + continue; + if (!isConsecutiveAccess(Memory[i-1], Memory[i], Ptr, Offset, Size)) + continue; + + bool ret = false; + if (isa<LoadInst>(Memory[i-1]) && isa<LoadInst>(Memory[i])) { + ret = tryCombineLoad(Memory[i-1], Memory[i], ConsecutiveStates); + } else if (isa<StoreInst>(Memory[i-1]) && isa<StoreInst>(Memory[i])) { + ret = tryCombineStore(Memory[i-1], Memory[i], ConsecutiveStates); + } + if (ret) { + Used.insert(Memory[i-1]); + Used.insert(Memory[i]); + Changed = true; + } + } + return Changed; +} + +bool CombineGuestMemory::runOnFunction(Function &F) +{ + bool Changed = false; + +#if defined(CONFIG_SOFTMMU) + return Changed; +#endif + + /* Skip if no state is allowed to be combined. */ + if (LegalStates.empty()) + return Changed; + + CPU = IF->getDefaultCPU(F); + if (!CPU) { + dbg() << DEBUG_PASS << "CombineGuestMemory: Cannot find CPU pointer.\n"; + return false; + } + + InitLastInst = F.getEntryBlock().getTerminator(); + + for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + SmallVector<Value *, 8> Memory; + SmallVector<Value *, 8> States; + for (auto BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + if (MF->isGuestMemory(I)) { + Memory.push_back(I); + } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (!LI->isVolatile()) + States.push_back(LI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (!SI->isVolatile()) + States.push_back(SI); + } + } + if (Memory.size() >= 2 && States.size() >= 2) + Changed |= combineMemory(Memory, States); + } + + if (!toErase.empty()) + ProcessErase(toErase); + + return Changed; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/CombineZExtTrunc.cpp b/llvm/pass/CombineZExtTrunc.cpp new file mode 100644 index 0000000..de9a87f --- /dev/null +++ b/llvm/pass/CombineZExtTrunc.cpp @@ -0,0 +1,70 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm-target.h" +#include "llvm-opc.h" +#include "llvm-pass.h" +#include "utils.h" + +#define PASS_NAME "CombineZExtTrunc" + +/* + * CombineZExtTrunc Pass + */ +class CombineZExtTrunc : public FunctionPass { +public: + static char ID; + explicit CombineZExtTrunc() : FunctionPass(ID) {} + bool runOnFunction(Function &F); +}; + +char CombineZExtTrunc::ID = 0; +INITIALIZE_PASS(CombineZExtTrunc, "combinezet", + "Combine ZExt followed by Trunc", false, false) + +FunctionPass *llvm::createCombineZExtTrunc() +{ + return new CombineZExtTrunc; +} + +bool CombineZExtTrunc::runOnFunction(Function &F) +{ + bool Changed = false; + IVec toErase; + + SmallVector<Instruction*, 4> Worklist; + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) { + Instruction *I = &*II; + if (isa<TruncInst>(I)) + Worklist.push_back(I); + } + + for (auto I : Worklist) { + TruncInst *TI = cast<TruncInst>(I); + ZExtInst *ZI = dyn_cast<ZExtInst>(TI->getOperand(0)); + if (!ZI) + continue; + + Type *SrcTy = ZI->getOperand(0)->getType(); + Type *DstTy = TI->getType(); + if (SrcTy == DstTy) { + I->replaceAllUsesWith(ZI->getOperand(0)); + if (TI->use_empty()) + toErase.push_back(TI); + Changed = true; + } + } + + if (toErase.size()) + ProcessErase(toErase); + + return Changed; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/FastMathPass.cpp b/llvm/pass/FastMathPass.cpp new file mode 100644 index 0000000..2b6a592 --- /dev/null +++ b/llvm/pass/FastMathPass.cpp @@ -0,0 +1,87 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm-target.h" +#include "llvm-pass.h" +#include "fpu/softfloat-native-def.h" + +#define PASS_DEBUG "FastMathPass" + +class FastMathPass : public FunctionPass { +public: + static char ID; + std::map<std::string, std::string> FPUNameMap; + + explicit FastMathPass() : FunctionPass(ID) + { + TCGHelperInfo *FPUHelper = (TCGHelperInfo *)get_native_fpu_helpers(); + for (int i = 0, e = num_native_fpu_helpers(); i != e; ++i) { + /* ex: llvm_int32_to_float32 --> int32_to_float32 */ + TCGHelperInfo &fpu = FPUHelper[i]; + const char *native = fpu.name; + const char *soft = native + 5; + FPUNameMap[soft] = native; + } + } + bool runOnFunction(Function &F); +}; + +bool FastMathPass::runOnFunction(Function &F) +{ + IVec toErase; + SmallVector<CallInst *, 16> InlineCalls; + Module *Mod = F.getParent(); + + for (auto I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (CallInst *CI = dyn_cast<CallInst>(&*I)) { + if (CI->isInlineAsm() || + CI->getCalledFunction() == nullptr || + CI->getCalledFunction()->isIntrinsic()) + continue; + + std::string Fname = CI->getCalledFunction()->getName(); + if (FPUNameMap.count(Fname) == 0) + continue; + + Function *Fn = Mod->getFunction(FPUNameMap[Fname]); + FunctionType *FTy = cast<FunctionType>( + cast<PointerType>(Fn->getType())->getElementType()); + + unsigned NumArgs = FTy->getNumParams(); + assert(NumArgs <= CI->getNumArgOperands()); + + SmallVector<Value *, 4> Params; + for (unsigned i = 0; i != NumArgs; ++i) + Params.push_back(CI->getArgOperand(i)); + + CallInst *NewCI = CallInst::Create(Fn, Params, "", CI); + CI->replaceAllUsesWith(NewCI); + InlineCalls.push_back(NewCI); + toErase.push_back(CI); + } + } + + ProcessErase(toErase); + + while (!InlineCalls.empty()) + InlineFunc(InlineCalls.pop_back_val()); + + return false; +} + +char FastMathPass::ID = 0; +INITIALIZE_PASS(FastMathPass, "fastmath", + "Transform softfloat subroutines to native FP operations", false, false) + +FunctionPass *llvm::createFastMathPass() +{ + return new FastMathPass(); +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/ProfileExec.cpp b/llvm/pass/ProfileExec.cpp new file mode 100644 index 0000000..56a68e1 --- /dev/null +++ b/llvm/pass/ProfileExec.cpp @@ -0,0 +1,172 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm-debug.h" +#include "llvm-soft-perfmon.h" +#include "llvm-pass.h" +#include "llvm-opc.h" +#include "llvm.h" +#include "utils.h" + +#define PASS_NAME "ProfileExec" + +extern LLVMEnv *LLEnv; + +/* + * Profile Pass + */ +class ProfileExec : public FunctionPass { + enum { + IDX_LOOP = 0, + IDX_EXIT, + IDX_INBR, + }; + + IRFactory *IF; + const DataLayout *DL; + MDFactory *MF; + IntegerType *Int8Ty; + IntegerType *Int32Ty; + IntegerType *Int64Ty; + IntegerType *IntPtrTy; + PointerType *Int8PtrTy; + PointerType *Int32PtrTy; + PointerType *Int64PtrTy; + +public: + static char ID; + explicit ProfileExec() : FunctionPass(ID) {} + explicit ProfileExec(IRFactory *IF) + : FunctionPass(ID), IF(IF), DL(IF->getDL()), MF(IF->getMDFactory()) + { + LLVMContext &Context = IF->getContext();; + Int8Ty = IntegerType::get(Context, 8); + Int32Ty = IntegerType::get(Context, 32); + Int64Ty = IntegerType::get(Context, 64); + IntPtrTy = DL->getIntPtrType(Context); + Int8PtrTy = Type::getInt8PtrTy(Context, 0); + Int32PtrTy = Type::getInt32PtrTy(Context, 0); + Int64PtrTy = Type::getInt64PtrTy(Context, 0); + } + bool runOnFunction(Function &F); + + Instruction *getInsertPos(BasicBlock *BB) { + if (BB == &BB->getParent()->getEntryBlock()) + return &*++BB->begin(); + return BB->getFirstNonPHI(); + } +}; + +char ProfileExec::ID = 0; +INITIALIZE_PASS(ProfileExec, "profile", "Profile trace execution", false, false) + +FunctionPass *llvm::createProfileExec(IRFactory *IF) +{ + return new ProfileExec(IF); +} + +bool ProfileExec::runOnFunction(Function &F) +{ + if (!LLEnv->isTraceMode()) + return false; + if (!SP->isEnabled()) + return false; + + Instruction *CPU = IF->getDefaultCPU(F); + if (!CPU) { + dbg() << DEBUG_PASS << PASS_NAME << ": Cannot find CPU pointer.\n"; + return false; + } + + TraceInfo *Trace = IF->getTrace(); + + for (auto FI = F.begin(), FE = F.end(); FI != FE; FI++) { + BasicBlock *BB = &*FI; + if (distance(succ_begin(BB), succ_end(BB)) != 0) + continue; + + /* Find exit points and indirect branches. */ + Trace->NumExit++; + if (isa<IndirectBrInst>(BB->getTerminator())) + Trace->NumIndirectBr++; + } + + /* Insert code to profile trace exit counts. */ + if (SP->Mode & SPM_EXIT) { + Instruction *InsertPos = &*++BasicBlock::iterator(CPU); + Value *NumExitPtr = GetElementPtrInst::CreateInBounds(CPU, + CONSTPtr(offsetof(CPUArchState, num_trace_exits)), + "", InsertPos); + NumExitPtr = new BitCastInst(NumExitPtr, Int64PtrTy, "", InsertPos); + Instruction *NumExits = new LoadInst(NumExitPtr, "", true, InsertPos); + NumExits = BinaryOperator::Create(Instruction::Add, NumExits, + CONST64(1), "", InsertPos); + new StoreInst(NumExits, NumExitPtr, true, InsertPos); + } + + if (!(SP->Mode & SPM_TRACE)) + return false; + + SmallVector<CallInst*, 16> InlineCalls; + Function *Helper = IF->ResolveFunction("helper_profile_exec"); + + /* Prepare counter structures. */ + if (!Trace->ExecCount) { + Trace->ExecCount = new uint64_t *[MAX_SPM_THREADS]; + for (int i = 0; i < MAX_SPM_THREADS; i++) + Trace->ExecCount[i] = new uint64_t[3] {0, 0, 0}; + } + + /* Find all profiling point. */ + std::vector<std::pair<Instruction *, int> > ProfilePoint; + + SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> BackEdges; + FindFunctionBackedges(F, BackEdges); + for (unsigned i = 0, e = BackEdges.size(); i != e; ++i) { + auto BackEdgeBB = const_cast<BasicBlock*>(BackEdges[i].first); + ProfilePoint.push_back(std::make_pair(BackEdgeBB->getTerminator(), IDX_LOOP)); + } + + for (auto FI = F.begin(), FE = F.end(); FI != FE; FI++) { + BasicBlock *BB = &*FI; + if (distance(succ_begin(BB), succ_end(BB)) != 0) + continue; + bool isIndirectBr = isa<IndirectBrInst>(BB->getTerminator()); + ProfilePoint.push_back(std::make_pair(getInsertPos(BB), + isIndirectBr ? IDX_INBR : IDX_EXIT)); + } + + /* Insert profiling routines. */ + for (unsigned i = 0, e = ProfilePoint.size(); i != e; ++i) { + Instruction *InsertPos = ProfilePoint[i].first; + Value *Ty = CONST32(ProfilePoint[i].second); + + Value *Counter = ConstantExpr::getIntToPtr( + CONSTPtr((uintptr_t)Trace->ExecCount), + PointerType::getUnqual(Int8Ty)); + + SmallVector<Value *, 4> Params; + Type *ParamTy = Helper->getFunctionType()->getParamType(0); + Value *Env = new BitCastInst(CPU, ParamTy, "", InsertPos); + Params.push_back(Env); + Params.push_back(Counter); + Params.push_back(Ty); + + CallInst *CI = CallInst::Create(Helper, Params, "", InsertPos); + MF->setConst(CI); + InlineCalls.push_back(CI); + } + + while (!InlineCalls.empty()) + InlineFunc(InlineCalls.pop_back_val()); + + return true; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/RedundantStateElimination.cpp b/llvm/pass/RedundantStateElimination.cpp new file mode 100644 index 0000000..2e5f715 --- /dev/null +++ b/llvm/pass/RedundantStateElimination.cpp @@ -0,0 +1,179 @@ +/* + * (C) 2017 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm-debug.h" +#include "llvm-opc.h" +#include "llvm-target.h" +#include "llvm-pass.h" +#include "utils.h" + +#define PASS_NAME "RedundantStateElimination" + +/* + * The RedundantStateElimination pass aims to remove + * (1) redundant stores to PC, and + * (2) redundant loads and stores enclosed by two helper function calls. + */ +class RedundantStateElimination : public FunctionPass { + IRFactory *IF; + MDFactory *MF; + const DataLayout *DL; + Value *CPU; + IVec toErase; + +public: + static char ID; + explicit RedundantStateElimination() : FunctionPass(ID) {} + explicit RedundantStateElimination(IRFactory *IF) + : FunctionPass(ID), IF(IF), MF(IF->getMDFactory()), DL(IF->getDL()) {} + + int getNumUsers(Instruction *I) { + return distance(I->user_begin(), I->user_end()); + } + + bool isStateOfPC(Value *Ptr) { + intptr_t Off = 0; + Value *Base = getBaseWithConstantOffset(DL, Ptr, Off); + if (Base == CPU && IRFactory::isStateOfPC(Off)) + return true; + return false; + } + + bool isDirectDominator(LoadInst *LI, StoreInst *SI) { + Instruction *A = LI, *B = SI; + if (A->getParent() != B->getParent()) + return false; + for (auto II = BasicBlock::iterator(A), EE = A->getParent()->end(); + II != EE; ++II) { + if (&*II == B) + return true; + /* If a non-const helper function is between the two instructions, + * this is not a direct domination because the helper function could + * cause side effect. */ + auto CI = dyn_cast<CallInst>(II); + if (CI && !MDFactory::isConst(CI)) + return false; + } + return false; + } + + bool removePCState(Function &F); + bool removeHelperState(Function &F); + bool runOnFunction(Function &F); +}; + +char RedundantStateElimination::ID = 0; +INITIALIZE_PASS(RedundantStateElimination, "rse", + "Eliminate redundant CPU state loads/stores", false, false) + +FunctionPass *llvm::createRedundantStateElimination(IRFactory *IF) +{ + return new RedundantStateElimination(IF); +} + +/* Eliminate redundant stores to PC for each basic block. */ +bool RedundantStateElimination::removePCState(Function &F) +{ + for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + bool Found = false; + + for (auto BI = FI->rbegin(), BE = FI->rend(); BI != BE; ++BI) { + Instruction *I = &*BI; + if (MF->isGuestMemory(I)) + continue; + + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (isStateOfPC(getPointerOperand(SI))) { + if (!Found) + Found = true; + else + toErase.push_back(SI); + } + } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (isStateOfPC(getPointerOperand(LI))) + Found = false; + } + } + } + + if (toErase.empty()) + return false; + + ProcessErase(toErase); + return true; + +} + +/* Eliminate redundant loads/stores enclosed by two helper function calls. + * The redundant loads and stores are generated by StateMappingPass for + * handling synchronization of CPU states around helper function calls. + * A load and store can be removed if a state value is loaded and immediately + * stored back to the same state. For example: + * + * Before optimization: After optimization: + * instructions to sync states instructions to sync states + * call void @helper_function1() call void @helper_function1() + * + * %v0 = load i32, i32* %state0 + * %v1 = load i32, i32* %state1 + * store i32 %v0, i32* %state0 + * store i32 %v1, i32* %state1 + * + * call void @helper_function2() call void @helper_function2() + * instructions to reload states instructions to reload states + */ +bool RedundantStateElimination::removeHelperState(Function &F) +{ + for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + for (auto BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + if (MF->isGuestMemory(I)) + continue; + + StoreInst *SI = dyn_cast<StoreInst>(I); + if (!SI || SI->isVolatile()) + continue; + + LoadInst *LI = dyn_cast<LoadInst>(getValueOperand(SI)); + if (LI && isDirectDominator(LI, SI)) { + /* We can try removing the store instruction if LI is a direct + * dominator of SI. */ + Value *PtrA = getPointerOperand(LI); + Value *PtrB = getPointerOperand(SI); + if (StripPointer(PtrA) == CPU && PtrA == PtrB) + toErase.push_back(SI); + } + } + } + + if (toErase.empty()) + return false; + + ProcessErase(toErase); + return true; +} + +bool RedundantStateElimination::runOnFunction(Function &F) +{ + bool Changed = false; + + CPU = IF->getDefaultCPU(F); + if (!CPU) { + dbg() << DEBUG_PASS << "RedundantStateElimination: Cannot find CPU pointer.\n"; + return false; + } + + Changed |= removePCState(F); +#if 0 + Changed |= removeHelperState(F); +#endif + + return Changed; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/ReplaceIntrinsic.cpp b/llvm/pass/ReplaceIntrinsic.cpp new file mode 100644 index 0000000..62505f4 --- /dev/null +++ b/llvm/pass/ReplaceIntrinsic.cpp @@ -0,0 +1,137 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm-types.h" +#include "llvm-debug.h" +#include "llvm-target.h" +#include "llvm-pass.h" + + +#define PASS_NAME "ReplaceIntrinsic" + +/* + * HQEMU does not allow helpers to contain any memory or debug intrinsics. + * This pass substitutes memory intrinsics to load/store instuctions and + * removes debug intrinsics (generated by Clang with -g flag). + */ +class ReplaceIntrinsic : public FunctionPass { + IVec toErase; +public: + static char ID; + explicit ReplaceIntrinsic() : FunctionPass(ID) {} + + Value *ConvertType(Value *V, Type *T, Instruction *InsertPos) { + if (likely(V->getType() == T)) + return V; + return new BitCastInst(V, T, "", InsertPos); + } + + bool replaceMemoryIntrinsic(IntrinsicInst *I); + bool runOnFunction(Function &F); +}; + +char ReplaceIntrinsic::ID = 0; +INITIALIZE_PASS(ReplaceIntrinsic, "replaceintrinsic", + "Replace memory and debug intrinsics generated by clang", + false, false) + +FunctionPass *llvm::createReplaceIntrinsic() +{ + return new ReplaceIntrinsic(); +} + + +/* + * Transform memcpy/memmove/memset to load/store instruction. + * Clang attempts to move memory data using LLVM memory intrinsic instructions. + * This causes the statemapping pass to miss some guest states. (Statemapping + * only considers guest states accessed by general load/store insts). + * So, we simply rewrite the memory intrinsics to load/store instuctions. + */ +bool ReplaceIntrinsic::replaceMemoryIntrinsic(IntrinsicInst *I) +{ + switch (I->getIntrinsicID()) { + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + break; + default: + return false; + } + + LLVMContext &Context = I->getContext(); + Type *Int8PtrTy = Type::getInt8PtrTy(Context); + CallInst *CI = cast<CallInst>(I); + + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + /* memcpy/memmove */ + Value *Src = MTI->getSource(); + Value *Dst = MTI->getDest(); + Value *NumBytes = MTI->getLength(); + + if (CI->getArgOperand(0)->getType() != Int8PtrTy || + CI->getArgOperand(1)->getType() != Int8PtrTy || + !isa<ConstantInt>(NumBytes) || + MTI->isVolatile()) + return false; + + /* Remove this instruction if the access size is zero. */ + size_t Len = cast<ConstantInt>(NumBytes)->getZExtValue(); + if (Len == 0) + goto done; + + Type *Ty = Type::getIntNPtrTy(Context, Len * 8); + Src = ConvertType(Src, Ty, I); + Dst = ConvertType(Dst, Ty, I); + Src = new LoadInst(Src, "", false, I); + new StoreInst(Src, Dst, false, I); + } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { + /* memset */ + Value *Src = MSI->getValue(); + Value *Dst = MSI->getDest(); + Value *NumBytes = MSI->getLength(); + + if (CI->getArgOperand(0)->getType() != Int8PtrTy || + !isa<ConstantInt>(Src) || + !isa<ConstantInt>(NumBytes) || + MSI->isVolatile()) + return false; + + size_t Val = cast<ConstantInt>(Src)->getZExtValue(); + size_t Len = cast<ConstantInt>(NumBytes)->getZExtValue(); + if (Val != 0) + return false; + if (Len == 0) + goto done; + + Type *Ty = Type::getIntNPtrTy(Context, Len * 8); + Src = ConstantInt::get(Type::getIntNTy(Context, Len * 8), 0); + Dst = ConvertType(Dst, Ty, I); + new StoreInst(Src, Dst, false, I); + } + +done: + toErase.push_back(I); + return true; +} + +bool ReplaceIntrinsic::runOnFunction(Function &F) +{ + for (auto I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + Instruction *Inst = &*I; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + if (replaceMemoryIntrinsic(II)) + continue; + if (isa<DbgInfoIntrinsic>(II)) + toErase.push_back(II); + } + } + ProcessErase(toErase); + return true; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/pass/SimplifyPointer.cpp b/llvm/pass/SimplifyPointer.cpp new file mode 100644 index 0000000..87afbdd --- /dev/null +++ b/llvm/pass/SimplifyPointer.cpp @@ -0,0 +1,334 @@ +//===- SimplifyPointer.cpp - Reassociate guest pointer arithmetic ---------===// +// +// The HQEMU Dynamic Binary Translator Infrastructure +// +// (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. +// COVART Laboratory, CSIE Department, National Taiwan University, Taiwan. +// See COPYRIGHT in top-level directory. +// +//===----------------------------------------------------------------------===// +// This pass implements a simple pointer arithmetic reassociator for easier +// pointer stripping. It gets scalar evolution results of all guest pointers +// which are in simplest form. Next, it inserts new instructions to evaluate the +// simplified expressions to construct new pointers, and rewrites corresponding +// guest load/store with new pointers. +// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstIterator.h" + +#include "llvm-opc.h" +#include "llvm-pass.h" +#include "llvm-target.h" +#include "utils.h" + +#define PASS_NAME "SIMPTR" +#define DEBUG_TYPE "SIMPTR" + +//#define VERBOSE + +/// \brief Dump pass debug message with pass name mark. +static inline llvm::raw_ostream &pout() { + return dbg() << DEBUG_PASS << PASS_NAME ": "; +} + +/// \returns True if \p A dominates \p B. +static bool dominates(Value *A, Value *B, DominatorTree *DT) { + auto *AI = dyn_cast<Instruction>(A); + auto *BI = dyn_cast<Instruction>(B); + if (AI && BI) + return DT->dominates(AI, BI); + return false; +} + +class SimplifyPointer : public FunctionPass { +public: + using ValueList = SmallVector<Value *, 32>; + using InstrList = SmallVector<Instruction *, 32>; + using ExprValMap = DenseMap<const SCEV *, Value *>; + + // Pass identification, replacement for type id. + static char ID; + + // LLVM pass constructor and destructor. + explicit SimplifyPointer() : FunctionPass(ID){}; + explicit SimplifyPointer(IRFactory *IF) + : FunctionPass(ID), IF(IF), MF(IF->getMDFactory()), DL(IF->getDL()) { + // Initialize all. + initializeSimplifyPointerPass(*PassRegistry::getPassRegistry()); + } + + // LLVM pass public interfaces. + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + +private: + /// \return The evaluation result of expression \p S or null if not cached. + Value *lookupBaseExpressionCache(const SCEV *S) const { + auto V = BaseExprVal.find(S); + if (V != BaseExprVal.end()) + return V->second; + return nullptr; + } + + /// \returns True if spread constants in the expression tree of \p S can be + /// collected by reassociation and reduced to \p FoldVal. + /// + /// It traverses the expression tree of \p S and propagates constant nodes + /// from add, multiply and recurrent add nodes, i.e., (%1 + %2 + 5) * (%3 - 7) + /// should return 5 * -7 = -35. + bool foldConstantExpression(const SCEV *S, int64_t &FoldVal) const; + + /// \returns The first non-pointer value traced along the use-define chain of + /// casting which starts from \p V and ends with a IntToPtrInst, or null if + /// the length of searching chain exceeds \p MaxLookup. + /// + /// In the context of DBT, pointer type is represented and manipulated as + /// integer data until used as a pointer. Therefore, it follows: + /// + /// [Expression Tree] + /// + + + + + /// \ / \ / + /// - ... - + /// \ / + /// + + /// [Scalar Root] + /// | + /// [Casting] + /// | + /// [Load/Store] + /// + /// This method targets the scalar root value. + Value *findPointerScalarRoot(Value *V, unsigned MaxLookup = 4); + + /// \brief Simplify the pointer arithmetic of \p LSI based on scalar evolution + /// results which folds constants into simplest form. After extracting the + /// folded constant from the expression, the rest nodes can form a base + /// expression which is likely a common sub-expression of other \p LSI. + /// + /// It assumes \p LSI has the following use-define chain starting from its + /// pointer and containing only add, multiply and recurrent add nodes. + /// + /// [Expression Tree] [Expression Tree] [Expression Tree] + /// + A B + + + B A + + + /// \ / \ / \ / \ / \ / + /// - ... - - ... - - (B-A) + /// \ / \ / \ / + /// + + + + /// [Scalar Root] >> [Scalar Root] >> [Scalar Root] + /// | | | + /// [Casting] [Casting] [Casting] + /// | | | + /// [LSI] [LSI] [LSI] + /// + /// Suppose A and B are constants, they can be folded into (B-A) with scalar + /// evolution results. Need to insert instructions for other operations in + /// tree (e.g., the new sub in the right-most figure). + /// + /// First it tries to find the folded constant and substract it from root + /// expression to form the base expression. Then it generates instructions to + /// evaluate the base expression. + bool tryToSimplifyPointer(Instruction *I); + + // HQEMU internal infrastructure. + IRFactory *IF = nullptr; + MDFactory *MF = nullptr; + // LLVM analysis and data type info. + const DataLayout *DL = nullptr; + DominatorTree *DT = nullptr; + ScalarEvolution *SE = nullptr; + + /// The cache of base expression to corresponding evaluated value map. + ExprValMap BaseExprVal; +}; + +bool SimplifyPointer::foldConstantExpression(const SCEV *S, + int64_t &FoldVal) const { + // Handle expression tree of scalar root containing only add, multiply and + // recurrent add nodes. + if (auto *AddSE = dyn_cast<SCEVAddExpr>(S)) { + FoldVal = 0; + for (auto Op : AddSE->operands()) { + int64_t Val; + if (foldConstantExpression(Op, Val)) + FoldVal += Val; + } + return true; + } else if (auto *MulSE = dyn_cast<SCEVMulExpr>(S)) { + FoldVal = 1; + for (auto Op : MulSE->operands()) { + int64_t Val; + // If one operand of multiplication fails to report a constant, entire + // expression becomes non-constant as well. + if (foldConstantExpression(Op, Val)) + FoldVal *= Val; + else + return false; + } + return true; + } else if (auto *RecSE = dyn_cast<SCEVAddRecExpr>(S)) { + // Trace only the start expression, because the step expression must be + // multiplied by the loop trip count which is unlikely constant. + return foldConstantExpression(RecSE->getStart(), FoldVal); + } else if (auto *ConstSE = dyn_cast<SCEVConstant>(S)) { + FoldVal = ConstSE->getValue()->getValue().getSExtValue(); + return true; + } + return false; +} + +Value *SimplifyPointer::findPointerScalarRoot(Value *V, unsigned MaxLookup) { + if (!V || !V->getType()->isPointerTy()) + return V; + + for (unsigned i = 0; i < MaxLookup; ++i) { + if (BitCastInst *Cast = dyn_cast<BitCastInst>(V)) { + V = Cast->getOperand(0); + } else if (IntToPtrInst *Cast = dyn_cast<IntToPtrInst>(V)) { + // Found first scalar, return it. + V = Cast->getOperand(0); + return V; + } + } + return nullptr; +} + +bool SimplifyPointer::tryToSimplifyPointer(Instruction *LSI) { + Value *Ptr = getPointerOperand(LSI); + Value *Root = findPointerScalarRoot(Ptr); + Type *RootTy = Root->getType(); + Type *PtrTy = Ptr->getType(); + if (!Ptr || !Root || !RootTy->isIntegerTy()) + return false; + +#ifdef VERBOSE + if (DM.getDebugMode() & DEBUG_PASS) { + pout() << "Visiting memory instruction.\n"; + pout() << "- " << *LSI << ".\n"; + } +#endif + + // Traverse the simplest form expression tree and collect folded constants. + // Note the folded constant can be zero (base = root) if no folded constant + // is found. + auto *RootSE = SE->getSCEV(Root); + int64_t FoldConst = 0; + foldConstantExpression(RootSE, FoldConst); + + // Substract offset constant from root expression to get the base expression, + // then query base expression cache to find whether it has been evaluated. + auto *BaseSE = SE->getMinusSCEV(RootSE, + SE->getConstant(RootTy, FoldConst, true)); + Value *Base = lookupBaseExpressionCache(BaseSE); + + // Create instructions to evaluate base expression if cache miss or previously + // computed value doesn't dominate load/store instruction. + if (!Base || !dominates(Base, LSI, DT)) { +#ifdef VERBOSE + pout() << " Need to build base expression.\n"; + pout() << " - Base " << *BaseSE << ".\n"; + pout() << " - Offset " << FoldConst << ".\n"; +#endif + // Expand the base expression if it is safe. + if (isSafeToExpand(BaseSE, *SE)) { +#if defined(LLVM_V35) + SCEVExpander Expander(*SE, ""); +#else + SCEVExpander Expander(*SE, *DL, ""); +#endif + Base = Expander.expandCodeFor(BaseSE, RootTy, LSI); + } + } else { +#ifdef VERBOSE + pout() << " Use cached base expression value.\n"; + pout() << " - Base " << *BaseSE << ".\n"; + pout() << " - Offset " << FoldConst << ".\n"; +#endif + } + + // Neither using cached value nor re-computing works, abort. + if (!Base) + return false; + + // Add back folded constant (offset) to new root value and feed the result as + // new pointer to load/store instruction. + IRBuilder<> Builder(IF->getContext()); + + bool FoldZero = (FoldConst == 0); + Value *Offset = ConstantInt::get(RootTy, FoldConst); + + Builder.SetInsertPoint(LSI); + Value *NewRoot = FoldZero ? Base : Builder.CreateAdd(Base, Offset); + Value *NewPtr = Builder.CreateIntToPtr(NewRoot, PtrTy); + LSI->replaceUsesOfWith(Ptr, NewPtr); + + // Cache base expression value. + BaseExprVal[BaseSE] = Base; + + return true; +} + +void SimplifyPointer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTreeWrapperPass>(); +#if defined(LLVM_V35) + AU.addRequired<ScalarEvolution>(); +#else + AU.addRequired<ScalarEvolutionWrapperPass>(); +#endif +} + +bool SimplifyPointer::runOnFunction(Function &F) { + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); +#if defined(LLVM_V35) + SE = &getAnalysis<ScalarEvolution>(); +#else + SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); +#endif + + bool Changed = false; + + InstrList MemoryInstrs; + for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + BasicBlock *BB = &*FI; + + // Skip dead basic blocks. + if (!DT->isReachableFromEntry(BB)) + continue; + + // Collect all guest memory instructions. + for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + if (MDFactory::isGuestMemory(I)) + MemoryInstrs.push_back(I); + } + } + + // Try to simplify pointers of collected load/store instructions. + for (Instruction *I : MemoryInstrs) + Changed |= tryToSimplifyPointer(I); + + return Changed; +} + +char SimplifyPointer::ID = 0; +INITIALIZE_PASS_BEGIN(SimplifyPointer, "simplifypointer", + "Reassiciate pointer arithmetic", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +#if defined(LLVM_V35) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +#else +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +#endif +INITIALIZE_PASS_END(SimplifyPointer, "simplifypointer", + "Reassiciate pointer arithmetic", false, false) + +FunctionPass *llvm::createSimplifyPointer(IRFactory *IF) { + return new SimplifyPointer(IF); +} + +/* + * vim: ts=2 sts=2 sw=2 expandtab + */ diff --git a/llvm/pass/StateMappingPass.cpp b/llvm/pass/StateMappingPass.cpp new file mode 100644 index 0000000..0d9dd9b --- /dev/null +++ b/llvm/pass/StateMappingPass.cpp @@ -0,0 +1,885 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm-debug.h" +#include "llvm-opc.h" +#include "llvm-target.h" +#include "llvm-pass.h" + +#define PASS_NAME "StateMapping" + + +/* + * StateMappingPass is used to eliminate the redundant loads and stores to the + * CPUArchState. The loads and stores of the guest memory operations are not + * removed in order not to violate the memory model of the guest architecture. + * + * The state mapping rules are: + * - A guest state is not overlapped: (i.e., same access size) + * - Same type: map to this type. + * - Different type: select type in the order: vector, float and integer; + * use bitcast to convert between different types. + * - A guest state is overlapped with other state(s): + * - Query StateType to find state size (i.e., boundary) and type: + * - Vector type: use insert/extract to manipulate a vector element. + * - Other types: use shift to manipulate a vector element. + */ +class StateMappingPass : public FunctionPass { + IRFactory *IF; /* Uplink to the IRFactory */ + +public: + static char ID; + explicit StateMappingPass() : FunctionPass(ID) {} + explicit StateMappingPass(IRFactory *IF) : FunctionPass(ID), IF(IF) {} + + bool runOnFunction(Function &F); +}; + +struct StateMapping { + StateMapping() + : State(nullptr), Addr(nullptr), Ty(nullptr), AI(nullptr), + hasLoad(false), hasStore(false) {} + + StateData *State; + Value *Addr; + Type *Ty; + AllocaInst *AI; + bool hasLoad; + bool hasStore; + + intptr_t getSize() { return State->End - State->Start; } + intptr_t getStart() { return State->Start; } + intptr_t getEnd() { return State->End; } + Value *getAddr() { return Addr; } + Type *getType() { return Ty; } + bool isVector() { return Ty->isVectorTy(); } + + bool overlap(StateRange &Range) { + if (Range.empty()) + return false; + intptr_t Start = getStart(); + intptr_t End = getEnd(); + auto I = --Range.upper_bound(Start); + for (; I != Range.end() && I->first < End; ++I) { + if (I->second > Start) + return true; + } + return false; + } +}; + +struct ElementInfo { + ElementInfo() : Shift(0), NumElts(0), EltTy(nullptr), StateTy(nullptr) {} + + intptr_t Shift; + unsigned NumElts; + Type *EltTy; + Type *StateTy; +}; + +class StateMapper { + typedef std::vector<StateMapping> StateMapList; + + IRFactory *IF; + const DataLayout *DL; + Instruction *CPU; /* The CPU pointer */ + Instruction *PreCastPos; /* The position to cast CPU states */ + Instruction *PreLoadPos; /* The position to preload CPU states */ + IVec toErase; /* The instructions to be removed */ + + FlatType &StateType; + StateAnalyzer Analyzer; + StateMapList StateMaps; + +public: + StateMapper(IRFactory *IF) + : IF(IF), DL(IF->getDL()), StateType(IF->getTranslator().getStateType()), + Analyzer(DL) {} + + bool run(Function &F) { + if (!init(F)) + return false; + + AnalyzeState(F); + if (!StateMaps.empty()) + PromoteState(F); + + ProcessErase(toErase); + return true; + } + + /* Rearrange instructions in the 'init' block. */ + bool init(Function &F); + + /* Analyze instructions in a Function that access CPU states. */ + void AnalyzeState(Function &F); + + /* Compute state mapping information. */ + void ComputeStateMap(StateMapping &StateMap, StateData &State); + + /* Determine if the state can be operated as a vector. */ + Type *TryVectorState(StateData &State, Type *Ty); + + /* Map state references to the virtual states. */ + void PromoteState(Function &F); + + /* Rewrite state loads and stores. */ + void RewriteLoad(StateMapping &StateMap, StateRef &Ref); + void RewriteStore(StateMapping &StateMap, StateRef &Ref); + void RewriteLoadVector(StateMapping &StateMap, StateRef &Ref); + void RewriteStoreVector(StateMapping &StateMap, StateRef &Ref); + + /* Compute state and element types for element insertion and extraction. */ + void getElementInfo(StateMapping &StateMap, StateRef &Ref, ElementInfo &Info); + + /* Sync CPU states around helper calls. */ + void SyncHelperState(); + + /* Store dirty states at the leaf blocks. */ + void ProcessExitBB(BasicBlock *BB); + + /* Get the pointer without GEP and BitCast. */ + void StripPointer(Value *V, IVec &IV); + + /* Move the pointer before InsertPos. */ + void MoveStatePointer(Value *V); + + /* Load state from Src and store it to Dest. */ + void CopyState(Value *Dest, Value *Src, Instruction *InsertPos); + + bool isLegalState(Value *Ptr, intptr_t &Off); + + /* Return true if the input is alias of a state pointer. */ + bool isStatePointer(Value *V) { + if (auto BCI = dyn_cast<BitCastInst>(V)) { + if (BCI->getOperand(0) == CPU) + return true; + return isStatePointer(BCI->getOperand(0)); + } else if (auto GEP = dyn_cast<GetElementPtrInst>(V)) + return GEP->getOperand(0) == CPU; + return false; + } + + bool isSimpleFunction(Function *F) { + HelperMap &Helpers = IF->getHelpers(); + if (Helpers.find(F->getName()) == Helpers.end() || + Helpers[F->getName()]->hasNestedCall) + return false; + return true; + } + + Value *ConvertType(Value *V, Type *Ty, Instruction *InsertPos) { + return V->getType() == Ty ? V : new BitCastInst(V, Ty, "", InsertPos); + } +}; + +/* Return a pre-defined state name. */ +static std::string getStateName(intptr_t Off) +{ +#if defined(TARGET_I386) + if (Off == offsetof(CPUArchState,xmm_regs[0])) return "xmm0"; + if (Off == offsetof(CPUArchState,xmm_regs[1])) return "xmm1"; + if (Off == offsetof(CPUArchState,xmm_regs[2])) return "xmm2"; + if (Off == offsetof(CPUArchState,xmm_regs[3])) return "xmm3"; + if (Off == offsetof(CPUArchState,xmm_regs[4])) return "xmm4"; + if (Off == offsetof(CPUArchState,xmm_regs[5])) return "xmm5"; + if (Off == offsetof(CPUArchState,xmm_regs[6])) return "xmm6"; + if (Off == offsetof(CPUArchState,xmm_regs[7])) return "xmm7"; + if (Off == offsetof(CPUArchState,xmm_t0)) return "xmm_t0"; +#endif + return ""; +} + +/* Determine if the offset is to access the temporary state. */ +static inline bool isLocalState(intptr_t Off) +{ +#if defined(TARGET_I386) + if (Off == offsetof(CPUArchState, xmm_t0)) + return true; +#endif + return false; +} + +/* Return states that should be ignored during state mapping. */ +static bool isSkipState(intptr_t Off) +{ + if (Off == (intptr_t)(offsetof(CPUState, tcg_exit_req) - ENV_OFFSET)) + return true; + +#define stateof(X) \ + (Off >= (intptr_t)offsetof(CPUArchState,X) && \ + Off < (intptr_t)(offsetof(CPUArchState,X) + sizeof(((CPUArchState*)0)->X))) +#define is_fpstatus(X) \ + (stateof(X.float_detect_tininess) || \ + stateof(X.float_rounding_mode) || \ + stateof(X.float_exception_flags) || \ + stateof(X.floatx80_rounding_precision) || \ + stateof(X.flush_to_zero) || \ + stateof(X.flush_inputs_to_zero) || \ + stateof(X.default_nan_mode)) + +#if defined(TARGET_ARM) + if (is_fpstatus(vfp.fp_status) || is_fpstatus(vfp.standard_fp_status)) + return true; +#elif defined(TARGET_I386) + if (is_fpstatus(fp_status)) + return true; +#endif + return false; + +#undef stateof +#undef is_fpstatus +} + +/* Check if the state is legal for state mapping. A legal state must have CPU + * as the base pointer, plus a positive constant offset. */ +bool StateMapper::isLegalState(Value *Ptr, intptr_t &Off) +{ + Value *Base = getBaseWithConstantOffset(DL, Ptr, Off); + if (Off < 0) + return false; + if (Base == CPU && !isSkipState(Off) && !IRFactory::isStateOfPC(Off)) + return true; + return false; +} + +/* Get the pointer without GEP and BitCast. The stripped GEP and BitCast + * instructions are returned to the caller. */ +void StateMapper::StripPointer(Value *V, IVec &IV) +{ + std::set<Value *> Visited; + Visited.insert(V); + do { + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { + IV.push_back(GEP); + V = GEP->getOperand(0); + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { + IV.push_back(BCI); + V = BCI->getOperand(0); + } else + return; + if (Visited.find(V) != Visited.end()) + break; + Visited.insert(V); + } while (true); +} + +/* Move the pointer before InsertPos. */ +void StateMapper::MoveStatePointer(Value *V) +{ + IVec toMove; + StripPointer(V, toMove); + while (!toMove.empty()) { + Instruction *I = toMove.back(); + toMove.pop_back(); + if (I->getParent() == CPU->getParent()) + continue; + I->moveBefore(PreCastPos); + } +} + +/* Copy state data from src address to destination address. */ +void StateMapper::CopyState(Value *Dest, Value *Src, Instruction *InsertPos) +{ + if (!isa<AllocaInst>(Src)) { + MoveStatePointer(Src); + LoadInst *LI = new LoadInst(Src, "", false, InsertPos); + new StoreInst(LI, Dest, false, InsertPos); + + if (Src->getType()->getPointerElementType()->isVectorTy()) + LI->setAlignment(4); + } else { + MoveStatePointer(Dest); + LoadInst *LI = new LoadInst(Src, "", false, InsertPos); + StoreInst *SI = new StoreInst(LI, Dest, false, InsertPos); + + if (Dest->getType()->getPointerElementType()->isVectorTy()) + SI->setAlignment(4); + } +} + +/* Store dirty states at the leaf blocks. */ +void StateMapper::ProcessExitBB(BasicBlock *BB) +{ + Instruction *InsertPos = nullptr; + for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + if (MDFactory::isExit(&*BI)) { + InsertPos = &*BI; + break; + } + } + if (!InsertPos) + InsertPos = BB->getTerminator(); + + for (auto &StateMap : StateMaps) { + if (!StateMap.hasStore || isLocalState(StateMap.getStart())) + continue; + CopyState(StateMap.Addr, StateMap.AI, InsertPos); + } +} + +/* Sync CPU states around helper calls. */ +void StateMapper::SyncHelperState() +{ + CallList &Calls = Analyzer.getCalls(); + if (Calls.empty()) + return; + + /* + * Rules of syncing states around calls: + * 1. Dirty states (i.e., stores) are written back before calls. + * 2. All states, including loads and stores, are read back after calls. + * + * If the helper is a simple function, only dependent states are synced. + * If the helper is a complicated function, all states are synced. + */ + HelperMap &Helpers = IF->getHelpers(); + DenseMap<CallInst*, std::set<unsigned> > StoreBeforeCall; + DenseMap<CallInst*, std::set<unsigned> > LoadAfterCall; + + for (auto CI : Calls) { + Function *Func = CI->getCalledFunction(); + std::string Name = Func->getName(); + + if (isSimpleFunction(Func)) { + /* A pre-defined helper without nested call. */ + HelperInfo *Helper = Helpers[Name]; + for (unsigned i = 0, e = StateMaps.size(); i != e; ++i) { + auto &StateMap = StateMaps[i]; + if (StateMap.hasStore && StateMap.overlap(Helper->StateUse)) + StoreBeforeCall[CI].insert(i); + + if (StateMap.overlap(Helper->StateDef)) + LoadAfterCall[CI].insert(i); + + if (Helper->mayConflictArg) { + unsigned NumArgs = CI->getNumArgOperands(); + for (unsigned j = 1; j < NumArgs; ++j) { + intptr_t Off = 0; + Value *Arg = CI->getArgOperand(j); + if (!isLegalState(Arg, Off)) + continue; + if (Off + Helper->ConflictSize <= StateMap.getStart() || + Off >= StateMap.getEnd()) + continue; + if (StateMap.hasStore) + StoreBeforeCall[CI].insert(i); + LoadAfterCall[CI].insert(i); + } + } + } + } else { + /* Sync states for a complicated function (an unknown helper or a + * helper with nested calls). */ + for (unsigned i = 0, e = StateMaps.size(); i != e; ++i) { + auto &StateMap = StateMaps[i]; + if (StateMap.hasStore) + StoreBeforeCall[CI].insert(i); + LoadAfterCall[CI].insert(i); + } + } + } + + /* Perform state syncing. */ + for (auto CI : Calls) { + Instruction *InsertPos = CI; + + if (!StoreBeforeCall.empty()) { + for (auto i : StoreBeforeCall[CI]) { + auto &StateMap = StateMaps[i]; + CopyState(StateMap.Addr, StateMap.AI, InsertPos); + } + } + + InsertPos = &*std::next(BasicBlock::iterator(InsertPos)); + if (isa<UnreachableInst>(InsertPos)) { + /* No read back is required after tail call. */ + continue; + } + + if (!LoadAfterCall.empty()) { + for (auto i : LoadAfterCall[CI]) { + auto &StateMap = StateMaps[i]; + CopyState(StateMap.AI, StateMap.Addr, InsertPos); + } + } + } +} + +static inline bool isSameSize(StateMapping &StateMap, StateRef &Ref) +{ + return StateMap.getSize() == Ref.getSize(); +} + +/* Compute state and element types for element insertion and extraction. */ +void StateMapper::getElementInfo(StateMapping &StateMap, StateRef &Ref, + ElementInfo &Info) +{ + intptr_t StateSize = StateMap.getSize(); + intptr_t Size = Ref.getSize(); + intptr_t Shift = Ref.Start - StateMap.getStart(); + Type *StateTy = StateMap.getType(); + LLVMContext &Context = StateTy->getContext(); + + if (!StateMap.isVector()) { + /* Use int-N to emulate the state. */ + Info.NumElts = 1; + Info.EltTy = Type::getIntNTy(Context, Size * 8); + Info.StateTy = Type::getIntNTy(Context, StateSize * 8); + Info.Shift = Shift; + return; + } + + /* The state is emulated as a vector. */ + if (StateSize % Size == 0 && Shift % Size == 0) { + Type *EltTy = Type::getIntNTy(Context, Size * 8); + + Info.NumElts = 1; + Info.EltTy = EltTy; + Info.StateTy = VectorType::get(EltTy, StateSize / Size); + Info.Shift = Shift / Size; + } else { + VectorType *VecTy = cast<VectorType>(StateTy); + Type *EltTy = VecTy->getScalarType(); + intptr_t EltSize = DL->getTypeSizeInBits(EltTy) / 8; + + Info.NumElts = Size / EltSize; + Info.EltTy = VectorType::get(EltTy, Info.NumElts); + Info.StateTy = StateTy; + Info.Shift = Shift / EltSize; + } +} + +void StateMapper::RewriteLoad(StateMapping &StateMap, StateRef &Ref) +{ + LoadInst *LI = cast<LoadInst>(Ref.I); + Type *Ty = LI->getType(); + Instruction *InsertPos = LI; + + /* The same reference size as the state size. */ + if (isSameSize(StateMap, Ref)) { + Value *V = new LoadInst(StateMap.AI, "", false, InsertPos); + V = ConvertType(V, Ty, InsertPos); + LI->replaceAllUsesWith(V); + toErase.push_back(LI); + return; + } + + if (StateMap.isVector()) { + RewriteLoadVector(StateMap, Ref); + return; + } + + /* This is a non-vector state. Transform the state to the type of Int-N + * and use logical shift to extract/insert element data. */ + ElementInfo Info; + getElementInfo(StateMap, Ref, Info); + + Value *V = new LoadInst(StateMap.AI, "", false, InsertPos); + V = ConvertType(V, Info.StateTy, InsertPos); + + /* Extract the element. */ + if (Info.Shift) { + Value *Shift = ConstantInt::get(V->getType(), Info.Shift * 8); + V = BinaryOperator::Create(Instruction::LShr, V, Shift, "", InsertPos); + } + V = new TruncInst(V, Info.EltTy, "", InsertPos); + V = ConvertType(V, Ty, InsertPos); + + LI->replaceAllUsesWith(V); + toErase.push_back(LI); +} + +void StateMapper::RewriteStore(StateMapping &StateMap, StateRef &Ref) +{ + StoreInst *SI = cast<StoreInst>(Ref.I); + Value *Data = SI->getValueOperand(); + Instruction *InsertPos = SI; + + /* The same reference size as the state size. */ + if (isSameSize(StateMap, Ref)) { + Value *V = ConvertType(Data, StateMap.getType(), InsertPos); + new StoreInst(V, StateMap.AI, false, InsertPos); + toErase.push_back(SI); + return; + } + + if (StateMap.isVector()) { + RewriteStoreVector(StateMap, Ref); + return; + } + + /* This is a non-vector state. Transform the state to the type of Int-N + * and use logical shift to extract/insert element data. */ + ElementInfo Info; + getElementInfo(StateMap, Ref, Info); + + Value *V = new LoadInst(StateMap.AI, "", false, InsertPos); + V = ConvertType(V, Info.StateTy, InsertPos); + + /* Insert the element. */ + Data = ConvertType(Data, Info.EltTy, InsertPos); + Data = new ZExtInst(Data, Info.StateTy, "", InsertPos); + + if (Info.Shift) { + Value *Shift = ConstantInt::get(Data->getType(), Info.Shift * 8); + Data = BinaryOperator::Create(Instruction::Shl, Data, Shift, "", InsertPos); + } + + unsigned numBits = StateMap.getSize() * 8; + unsigned loBit = Info.Shift * 8, hiBit = loBit + Ref.getSize() * 8; + APInt mask = ~APInt::getBitsSet(numBits, loBit, hiBit); + Value *Mask = ConstantInt::get(Data->getContext(), mask); + + V = BinaryOperator::Create(Instruction::And, V, Mask, "", InsertPos); + V = BinaryOperator::Create(Instruction::Or, V, Data, "", InsertPos); + V = ConvertType(V, StateMap.getType(), InsertPos); + + new StoreInst(V, StateMap.AI, false, InsertPos); + toErase.push_back(SI); +} + +void StateMapper::RewriteLoadVector(StateMapping &StateMap, StateRef &Ref) +{ + LoadInst *LI = cast<LoadInst>(Ref.I); + Type *Ty = LI->getType(); + Instruction *InsertPos = LI; + + /* Compute offset, size and element type of this vector operation. */ + ElementInfo Info; + getElementInfo(StateMap, Ref, Info); + + Value *V = new LoadInst(StateMap.AI, "", false, InsertPos); + V = ConvertType(V, Info.StateTy, InsertPos); + + /* Extract the element(s) from the vector value. */ + IntegerType *I32 = IntegerType::get(V->getContext(), 32); + + if (Info.EltTy->isVectorTy()) { + /* Multiple elements to load. Use shufflevector. */ + Value *UndefVal = UndefValue::get(Info.StateTy); + SmallVector<Constant*, 8> Indices; + for (unsigned i = 0, e = Info.Shift; i != e; ++i) + Indices.push_back(ConstantInt::get(I32, Info.Shift + i)); + Value *CV = ConstantVector::get(Indices); + V = new ShuffleVectorInst(V, UndefVal, CV, "", InsertPos); + } else { + /* Only one element. Use extractelement. */ + V = ExtractElementInst::Create(V, + ConstantInt::get(I32, Info.Shift), "", InsertPos); + } + + V = ConvertType(V, Ty, InsertPos); + + LI->replaceAllUsesWith(V); + toErase.push_back(LI); +} + +void StateMapper::RewriteStoreVector(StateMapping &StateMap, StateRef &Ref) +{ + StoreInst *SI = cast<StoreInst>(Ref.I); + Value *Data = SI->getValueOperand(); + Instruction *InsertPos = SI; + + /* Compute offset, size and element type of this vector operation. */ + ElementInfo Info; + getElementInfo(StateMap, Ref, Info); + + Value *V = new LoadInst(StateMap.AI, "", false, InsertPos); + V = ConvertType(V, Info.StateTy, InsertPos); + Data = ConvertType(Data, Info.EltTy, InsertPos); + + /* Extract element(s) from data and insert it into the vector value. */ + IntegerType *I32 = IntegerType::get(V->getContext(), 32); + + if (Info.EltTy->isVectorTy()) { + SmallVector<Value *, 8> Partial; + for (unsigned i = 0, e = Info.NumElts; i != e; ++i) { + Partial.push_back(ExtractElementInst::Create(Data, + ConstantInt::get(I32, i), "", InsertPos)); + } + for (unsigned i = 0, e = Info.NumElts; i != e; ++i) { + V = InsertElementInst::Create(V, Partial[i], + ConstantInt::get(I32, Info.Shift + i), "", InsertPos); + } + } else { + /* Only one element. Use insertelement. */ + V = InsertElementInst::Create(V, Data, + ConstantInt::get(I32, Info.Shift), "", InsertPos); + } + + V = ConvertType(V, StateMap.getType(), InsertPos); + + new StoreInst(V, StateMap.AI, false, InsertPos); + toErase.push_back(SI); +} + +/* Map state references to the virtual states. */ +void StateMapper::PromoteState(Function &F) +{ + /* Pre-load CPU states. */ + Type *IntPtrTy = DL->getIntPtrType(CPU->getContext()); + for (auto &StateMap : StateMaps) { + if (!StateMap.Addr) { + Value *Off = ConstantInt::get(IntPtrTy, StateMap.getStart()); + Value *GEP = GetElementPtrInst::CreateInBounds(CPU, Off, "", + PreCastPos); + StateMap.Addr = new BitCastInst(GEP, + PointerType::getUnqual(StateMap.getType()), "", + PreCastPos); + } + + std::string StateName = StateMap.Addr->getName(); + if (StateName == "") + StateName = getStateName(StateMap.getStart()); + if (StateName == "") + StateName = "state"; + StateName.append(".a"); + + StateMap.AI = CreateAlloca(StateMap.getType(), 0, StateName, PreCastPos); + CopyState(StateMap.AI, StateMap.Addr, PreLoadPos); + } + + /* Rewrite loads and stores. */ + for (auto &StateMap : StateMaps) { + for (auto Ref : StateMap.State->Refs) { + if (isa<LoadInst>(Ref->I)) + RewriteLoad(StateMap, *Ref); + else + RewriteStore(StateMap, *Ref); + } + } + + /* Sync CPU states around helper calls. */ + SyncHelperState(); + + /* Post-store dirty values back to CPU states for each exiting block. */ + for (auto BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + BasicBlock *BB = &*BI; + if (distance(succ_begin(BB), succ_end(BB)) == 0) /* leaf node */ + ProcessExitBB(BB); + } +} + +/* Determine if the state can be operated as a vector. */ +Type *StateMapper::TryVectorState(StateData &State, Type *Ty) +{ + intptr_t StateStart = State.Start; + intptr_t StateEnd = State.End; + intptr_t StateSize = StateEnd - StateStart; + + /* If the reference type (from the IR) is already a vector type, use it. + * Otherwise, query StateType to determine if it is a vector state. */ + VectorType *VecTy = dyn_cast<VectorType>(Ty); + if (!VecTy) { + auto TI = --StateType.upper_bound(StateStart); + for (; TI != StateType.end() && TI->first < StateEnd; ++TI) { + if (TI->second->isVectorTy()) { + VecTy = cast<VectorType>(TI->second); + break; + } + } + } + + if (!VecTy) + return nullptr; + + /* This is a vector state. Now, we need to check whether all state refs can + * be composed by the vector element type: (a) the state size is a multiple + * of the vector element size, and (b) the size and shift of each state ref + * are both a multiple of the vector element size. */ + Type *ElementTy = VecTy->getScalarType(); + intptr_t ElementSize = DL->getTypeSizeInBits(ElementTy) / 8; + if (StateSize % ElementSize != 0) + return nullptr; + + for (auto Ref : State.Refs) { + if (Ref->getSize() % ElementSize != 0 || + (Ref->Start - StateStart) % ElementSize != 0) + return nullptr; + } + return VectorType::get(ElementTy, StateSize / ElementSize); +} + +/* Compute state mapping information based on the state mapping rules. */ +void StateMapper::ComputeStateMap(StateMapping &StateMap, StateData &State) +{ + /* State mapping rule: + * - A guest state is not overlapped: (i.e., same access size) + * - Same type: map to this type. + * - Different type: select type in the order: vector, float and integer; + * use bitcast to convert between different types. + * - A guest state is overlapped with other state(s): + * - Query StateType to find state size (i.e., boundary) and type: + * - Vector type: use insert/extract to manipulate a vector element. + * - Other types: use shift to manipulate a sub-register element. */ + bool sameSize = true; + bool hasLoad = false; + bool hasStore = false; + + for (auto Ref : State.Refs) { + hasLoad |= isa<LoadInst>(Ref->I); + hasStore |= isa<StoreInst>(Ref->I); + } + + StateRef *Ref = State.Refs.front(); + Type *Ty = Ref->getType(); + Value *Addr = getPointerOperand(Ref->I); + intptr_t Size = Ref->getSize(); + + for (unsigned i = 1, e = State.Refs.size(); i != e; ++i) { + StateRef *NextRef = State.Refs[i]; + Type *NextTy = NextRef->getType(); + Value *NextAddr = getPointerOperand(NextRef->I); + + /* Check type. */ + if (Ty != NextTy) { + /* Select type in the order: vector, float and integer. */ + bool Swap = false; + if (Ty->isVectorTy() && NextTy->isVectorTy()) { + /* We prefer a vector type of small element type. */ + Type *ATy = cast<VectorType>(Ty)->getScalarType(); + Type *BTy = cast<VectorType>(NextTy)->getScalarType(); + if (DL->getTypeSizeInBits(BTy) < DL->getTypeSizeInBits(ATy)) + Swap = true; + } else if (!Ty->isVectorTy() && NextTy->isVectorTy()) { + Swap = true; + } else if (Ty->isIntegerTy() && NextTy->isFloatTy()) { + Swap = true; + } + + if (Swap) { + std::swap(Ty, NextTy); + std::swap(Addr, NextAddr); + } + } + + /* Check size. */ + if (Size != NextRef->getSize()) + sameSize = false; + } + + if (sameSize) { + /* The same reference size as the state size. */ + StateMap.Ty = Ty; + StateMap.Addr = Addr; + } else { + /* Different reference sizes. */ + intptr_t StateSize = State.End - State.Start; + Type *VecTy = TryVectorState(State, Ty); + StateMap.Ty = VecTy ? VecTy + : Type::getIntNTy(Ty->getContext(), StateSize * 8); + StateMap.Addr = nullptr; + } + StateMap.State = &State; + StateMap.hasLoad = hasLoad; + StateMap.hasStore = hasStore; +} + +/* Analyze instructions in a Function that access CPU states. */ +void StateMapper::AnalyzeState(Function &F) +{ + /* Collect instructions (load/store/call) that access CPU states. + * Loads/stores that access guest memory or are tagged with volatile + * (e.g., accessing the states: %pc and %tcg_exit_req) are ignored. */ + + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; ++II) { + Instruction *I = &*II; + intptr_t Off = 0; + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (MDFactory::isGuestMemory(I) || LI->isVolatile()) + continue; + + if (isLegalState(LI->getPointerOperand(), Off)) + Analyzer.addStateRef(I, Off); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (MDFactory::isGuestMemory(I) || SI->isVolatile()) + continue; + + if (isLegalState(SI->getPointerOperand(), Off)) + Analyzer.addStateRef(I, Off); + } else if (CallInst *CI = dyn_cast<CallInst>(I)) { + /* Skip const helper, inlineasm and intrinsic function call. */ + if (MDFactory::isConst(CI)) + continue; + if (CI->isInlineAsm() || isa<IntrinsicInst>(CI)) + continue; + + Analyzer.addCall(CI); + } + } + + /* Ask Analyzer to put state references into groups. */ + Analyzer.computeState(); + + StateList &States = Analyzer.getStateList(); + if (States.empty()) + return; + + /* Compute state mapping info. */ + StateMaps.resize(States.size()); + for (unsigned i = 0, e = States.size(); i != e; ++i) + ComputeStateMap(StateMaps[i], States[i]); +} + +/* Rearrange instructions in the 'init' block. */ +bool StateMapper::init(Function &F) +{ + /* + * We would like to rearrange the instructions in the 'init' block, in which + * gep/cast instructions are in front of other instructions in the block. + * For example: + * %0 = getelementptr i8* %cpu, i64 0 + * %1 = bitcast i8* %0 to i32* # gep/cast insns + * -------------------------------------- # precast_pos + * -------------------------------------- # preload_pos + * %2 = load i32, i32* %1 # the other insns + * br label %entry + */ + CPU = IF->getDefaultCPU(F); + if (!CPU || CPU->getParent() != &F.getEntryBlock()) + return false; + + Instruction *InsertPos = &*std::next(BasicBlock::iterator(CPU)); + PreLoadPos = new UnreachableInst(CPU->getContext(), InsertPos); + PreCastPos = new UnreachableInst(CPU->getContext(), PreLoadPos); + + toErase.push_back(PreLoadPos); + toErase.push_back(PreCastPos); + + /* Move gep/cast instructions. */ + IVec toMove; + BasicBlock *BB = CPU->getParent(); + for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + if (isStatePointer(I)) + toMove.push_back(I); + } + for (auto I : toMove) + I->moveBefore(PreCastPos); + + return true; +} + +/* + * StateMappingPass + */ +bool StateMappingPass::runOnFunction(Function &F) +{ + return StateMapper(IF).run(F); +} + +char StateMappingPass::ID = 0; +INITIALIZE_PASS(StateMappingPass, "statemap", + "Eliminate redundant loads/stores by mapping CPU states", false, false) + +FunctionPass *llvm::createStateMappingPass(IRFactory *IF) +{ + return new StateMappingPass(IF); +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/pmu/arm/arm-events.cpp b/llvm/pmu/arm/arm-events.cpp new file mode 100644 index 0000000..3da7339 --- /dev/null +++ b/llvm/pmu/arm/arm-events.cpp @@ -0,0 +1,42 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "pmu/pmu-global.h" + +namespace pmu { + +/* ARMv8 recommended implementation defined event types. + * (copied from linux-4.x/arch/arm64/kernel/perf_event.c) */ +#define ICACHE_MISS_CONFIG (0x01) +#define MEM_LOADS_CONFIG (0x06) +#define MEM_STORES_CONFIG (0x07) + + +extern EventID PreEvents[PMU_EVENT_MAX]; /* Pre-defined events. */ + +static void ARMSetupEventCode() +{ +#define SetupEvent(_Event,_Config) \ + PreEvents[_Event].Type = PERF_TYPE_RAW; \ + PreEvents[_Event].Config = _Config; + + SetupEvent(PMU_ICACHE_MISSES, ICACHE_MISS_CONFIG); + SetupEvent(PMU_MEM_LOADS, MEM_LOADS_CONFIG); + SetupEvent(PMU_MEM_STORES, MEM_STORES_CONFIG); + +#undef SetEventCode +} + +int ARMInit() +{ + ARMSetupEventCode(); + return PMU_OK; +} + +} /* namespace pmu */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/pmu/pmu-events.cpp b/llvm/pmu/pmu-events.cpp new file mode 100644 index 0000000..d3f2d08 --- /dev/null +++ b/llvm/pmu/pmu-events.cpp @@ -0,0 +1,414 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include <algorithm> +#include <signal.h> +#include <sys/time.h> +#include "llvm-soft-perfmon.h" +#include "pmu/pmu-global.h" +#include "pmu/pmu-events.h" + + +namespace { + +/* Mutex */ +class Mutex { + pthread_mutex_t M; +public: + Mutex() { pthread_mutex_init(&M, nullptr); } + inline void acquire() { pthread_mutex_lock(&M); } + inline void release() { pthread_mutex_unlock(&M); } + inline bool trylock() { return pthread_mutex_trylock(&M) == 0; } +}; + +class MutexGuard { + Mutex &M; +public: + MutexGuard(Mutex &M) : M(M) { M.acquire(); } + ~MutexGuard() { M.release(); } +}; + +} + +/* + * Performance Monitoring Unit (PMU). + */ +namespace pmu { + +static Mutex Lock; + +SampleList *ReadSampleData(PMUEvent *Event); + +/* The timer interrupt handler. */ +void DefaultHandler(int signum, siginfo_t *info, void *data) +{ + /* If the thread is signaled while it is currently holding the lock, we + * might enter deadlock if we attempt to acquire the lock. Use trylock to + * detect such a condition and return from this handler if we cannot + * successfully acquire the lock. */ + if (Lock.trylock() == false) + return; + + /* We have hold the lock. Iterate over all sampling events and process + * the sample buffer. */ + + auto &SampleEvents = EventMgr->SampleEvents; + if (SampleEvents.empty()) { + Lock.release(); + return; + } + + struct timeval Start, End, Elapse; + if (SP->Mode & SPM_HPM) + gettimeofday(&Start, nullptr); + + for (auto I = SampleEvents.begin(), E = SampleEvents.end(); I != E; ++I) { + PMUEvent *Event = *I; + if (Event->Mode & MODE_SAMPLE) { + SampleList *Data = ReadSampleData(Event); + if (Data) + Event->SampleHandler(Event->Hndl, SampleDataPtr(Data), + Event->Opaque); + } + } + + auto &ChangedEvents = EventMgr->ChangedEvents; + if (!ChangedEvents.empty()) { + for (auto *Event : ChangedEvents) { + if (Event->State == STATE_GOTO_STOP) { + Event->State = STATE_STOP; + SampleEvents.remove(Event); + } else if (Event->State == STATE_GOTO_START) { + Event->State = STATE_START; + SampleEvents.push_back(Event); + } + } + ChangedEvents.clear(); + } + + if (SP->Mode & SPM_HPM) { + gettimeofday(&End, nullptr); + timersub(&End, &Start, &Elapse); + SP->SampleTime += Elapse.tv_sec * 1e6 + Elapse.tv_usec; + } + + if (!SampleEvents.empty()) + EventMgr->EventTimer->Start(); + Lock.release(); +} + +/* + * Event Manager + */ +EventManager::EventManager() +{ + for (unsigned i = 0; i < PMU_MAX_EVENTS; ++i) { + Events[i].Hndl = i; + FreeEvents.push_back(&Events[i]); + } + + /* Install the signal handler for the timer. */ + struct sigaction act; + memset(&act, 0, sizeof(struct sigaction)); + act.sa_sigaction = DefaultHandler; + act.sa_flags = SA_SIGINFO; + sigaction(PMU_SIGNAL_NUM, &act, 0); + + EventTimer = new Timer(PMU_SIGNAL_NUM, SysConfig.SignalReceiver); +} + +EventManager::~EventManager() +{ + EventTimer->Stop(); + delete EventTimer; +} + +/* Return the event of the input handle. */ +PMUEvent *EventManager::GetEvent(Handle Hndl) +{ + if (Hndl >= PMU_MAX_EVENTS) + return nullptr; + return &Events[Hndl]; +} + +/* Add a counting event and return its handle. */ +Handle EventManager::AddEvent(int fd) +{ + MutexGuard Locked(Lock); + + if (FreeEvents.empty()) + return PMU_INVALID_HNDL; + + auto Event = FreeEvents.front(); + FreeEvents.pop_front(); + + Event->FD.push_back(fd); + Event->Data.Base = nullptr; + Event->Aux.Base = nullptr; + Event->OverflowHandler = nullptr; + + Event->Mode = MODE_COUNTER; + Event->State = STATE_STOP; + + return Event->Hndl; +} + +/* Add a sampling event and return its handle. */ +Handle EventManager::AddSampleEvent(unsigned NumFDs, int *FD, uint64_t DataSize, + void *Data, uint32_t Mode, + SampleConfig &Config) +{ + MutexGuard Locked(Lock); + + if (FreeEvents.empty()) + return PMU_INVALID_HNDL; + + auto Event = FreeEvents.front(); + FreeEvents.pop_front(); + + for (unsigned i = 0; i < NumFDs; ++i) + Event->FD.push_back(FD[i]); + + Event->Data.Base = Data; + Event->Data.Size = DataSize; + Event->Data.Prev = 0; + Event->Aux.Base = nullptr; + Event->Aux.Size = 0; + Event->Aux.Prev = 0; + Event->Watermark = std::min(Config.Watermark, DataSize); + Event->SampleHandler = Config.SampleHandler; + Event->Opaque = Config.Opaque; + + Event->Mode = MODE_SAMPLE | Mode; + Event->State = STATE_STOP; + + return Event->Hndl; +} + +/* Notify that an event is started. */ +void EventManager::StartEvent(PMUEvent *Event, bool ShouldLock) +{ + if (ShouldLock) { + MutexGuard Locked(Lock); + + /* We don't add this event to the sampling event list if user doesn't + * provide a valid overflow handler for a sampling event. */ + if (Event->State == STATE_STOP && Event->OverflowHandler) { + SampleEvents.push_back(Event); + EventTimer->Start(); + } + Event->State = STATE_START; + } else { + /* We are within the overflow handling and it's not safe to change the + * structure of the sampling event list. Here we only change the state + * of the event and the event list will be fixed at the end of the + * overflow handling. */ + if (Event->State == STATE_STOP && Event->OverflowHandler) { + Event->State = STATE_GOTO_START; + ChangedEvents.push_back(Event); + } + } +} + +/* Notify that an event is stopped. */ +void EventManager::StopEvent(PMUEvent *Event, bool ShouldLock) +{ + if (ShouldLock) { + /* If this is a sampling event and is currently under sampling, remove + * it from the sampling event list. */ + Lock.acquire(); + if (Event->State == STATE_START && Event->OverflowHandler) { + SampleEvents.remove(Event); + if (SampleEvents.empty()) + EventTimer->Stop(); + } + Event->State = STATE_STOP; + Lock.release(); + } else { + /* We are within the overflow handling and it's not safe to change the + * structure of the sampling event list. Here we only change the state + * of the event and the event list will be fixed at the end of the + * overflow handling. */ + if (Event->State == STATE_START && Event->OverflowHandler) { + Event->State = STATE_GOTO_STOP; + ChangedEvents.push_back(Event); + } + } +} + +/* Notify that an event is deleted. */ +void EventManager::DeleteEvent(PMUEvent *Event) +{ + MutexGuard Locked(Lock); + + Event->FD.clear(); + FreeEvents.push_back(Event); +} + +/* Stop the event manager. */ +void EventManager::Pause() +{ + MutexGuard Locked(Lock); + if (!SampleEvents.empty()) + EventTimer->Stop(); +} + +/* Restart the event manager. */ +void EventManager::Resume() +{ + MutexGuard Locked(Lock); + if (!SampleEvents.empty()) + EventTimer->Start(); +} + +/* + * Buffer processing + */ +static uint8_t *CopyData(uint8_t *Data, uint64_t DataSize, uint64_t Head, uint64_t Tail) { + uint64_t Mask = DataSize - 1; + uint64_t Size = Head - Tail; + uint64_t HeadOff = Head & Mask; + uint64_t TailOff = Tail & Mask; + uint8_t *Buf = new uint8_t[Size]; + + if (HeadOff > TailOff) { + memcpy(Buf, Data + TailOff, Size); + } else { + uint64_t UpperSize = DataSize - TailOff; + memcpy(Buf, Data + TailOff, UpperSize); + memcpy(&Buf[UpperSize], Data, HeadOff); + } + return Buf; +} + +/* Process and decode the sample buffer. */ +SampleList *ReadSampleData(PMUEvent *Event) +{ + uint64_t Head = perf_read_data_head(Event->Data.Base); + uint64_t Old = Event->Data.Prev; + uint64_t Size = Head - Old; + uint8_t *Data = (uint8_t *)Event->Data.Base + SysConfig.PageSize; + uint64_t DataSize = Event->Data.Size - SysConfig.PageSize; + SampleList *OutData = nullptr; + + if (Size < Event->Watermark) + return OutData; + + OutData = new SampleList; + if (Size == 0) + return OutData; + + /* Overwrite head if we failed to keep up with the mmap data. */ + if (Size > DataSize) { + Event->Data.Prev = Head; + perf_write_data_tail(Event->Data.Base, Head); + return OutData; + } + + /* Process the buffer. */ + uint8_t *Buf = CopyData(Data, DataSize, Head, Old); + uint8_t *Orig = Buf, *BufEnd = Buf + Size; + bool SampleIP = Event->Mode & MODE_SAMPLE_IP; + bool ReadFormat = Event->Mode & MODE_SAMPLE_READ; + bool ReadGroup = Event->FD.size() > 1; + + while (1) { + /* Check if we have enough size for the event header. */ + if (Buf + sizeof(struct perf_event_header) > BufEnd) + break; + + auto *Header = (struct perf_event_header *)Buf; + Buf += sizeof(struct perf_event_header); + + /* Check if we have enough size for the sample payload. */ + if (Buf + Header->size > BufEnd) + break; + + if (Header->size == 0) + continue; + + /* Skip this sample if it's not a PERF_RECORD_SAMPLE type. */ + if (Header->type != PERF_RECORD_SAMPLE) { + Buf += Header->size; + continue; + } + + if (SampleIP) { /* if PERF_SAMPLE_IP */ + uint64_t ip = *(uint64_t *)Buf; + Buf += 8; + OutData->push_back(ip); + } + if (ReadFormat) { /* if PERF_SAMPLE_READ */ + if (ReadGroup) { + uint64_t nr = *(uint64_t *)Buf; + Buf += 8; + while (nr--) { + uint64_t value = *(uint64_t *)Buf; + Buf += 8; + OutData->push_back(value); + } + } else { + uint64_t value = *(uint64_t *)Buf; + Buf += 8; + OutData->push_back(value); + } + } + } + + delete [] Orig; + + /* We have finished the buffer. Update data tail. */ + Event->Data.Prev = Head; + perf_write_data_tail(Event->Data.Base, Head); + + return OutData; +} + +/* + * Timer + */ +Timer::Timer(int Signum, int TID) +{ + struct sigevent ev; + memset(&ev, 0, sizeof(ev)); + ev.sigev_value.sival_int = 0; + ev.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID; + ev.sigev_signo = Signum; + ev._sigev_un._tid = TID; + timer_create(CLOCK_REALTIME, &ev, &T); +} + +Timer::~Timer() +{ + Stop(); + timer_delete(T); +} + +/* Fire a timer which expires just once. */ +void Timer::Start() +{ + struct itimerspec Timeout; + Timeout.it_interval.tv_sec = 0; + Timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */ + Timeout.it_value.tv_sec = 0; + Timeout.it_value.tv_nsec = SysConfig.Timeout; + timer_settime(T, 0 /* RELATIVE */, &Timeout, NULL); +} + +void Timer::Stop() +{ + struct itimerspec Timeout; + Timeout.it_interval.tv_sec = 0; + Timeout.it_interval.tv_nsec = 0; /* 0 for one-shot timer */ + Timeout.it_value.tv_sec = 0; + Timeout.it_value.tv_nsec = 0; + timer_settime(T, 0 /* RELATIVE */, &Timeout, NULL); +} + +} /* namespace pmu */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/pmu/pmu.cpp b/llvm/pmu/pmu.cpp new file mode 100644 index 0000000..640997f --- /dev/null +++ b/llvm/pmu/pmu.cpp @@ -0,0 +1,491 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include <errno.h> +#include <sys/mman.h> +#include "pmu/pmu-global.h" +#include "pmu/pmu-events.h" + +/* + * Performance Monitoring Unit (PMU) tools. + */ +namespace pmu { + +static bool InitOnce; + +EventManager *EventMgr; /* Event manager. */ +GlobalConfig SysConfig; /* System-wide configuration. */ +EventID PreEvents[PMU_EVENT_MAX]; /* Pre-defined events. */ + + +/* Initialize system-wide configuration. */ +static void SetupGlobalConfig(PMUConfig &Config) +{ + /* Get page size. */ + SysConfig.PageSize = getpagesize(); + + /* Configure timeout and signal receiver for the timer. */ + SysConfig.SignalReceiver = Config.SignalReceiver; + if (SysConfig.SignalReceiver <= 0) + SysConfig.SignalReceiver = getpid(); + + SysConfig.Timeout = Config.Timeout; + if (SysConfig.Timeout == 0) + SysConfig.Timeout = PMU_TIMER_PERIOD; + + SysConfig.Timeout *= 1000; /* nanosecond */ + + /* Determine the Linux Perf version used by this tool and the kernel. + * We set the last few bytes of the perf_event_attr structure and see the + * size field returned from the kernel. */ + + SysConfig.PerfVersion = 0; + SysConfig.OSPerfVersion = 0; + + struct perf_event_attr attr; + perf_attr_init(&attr, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES); + attr.aux_watermark = 1; + int fd = sys_perf_event_open(&attr, 0, -1, -1, 0); + close(fd); + +#define CheckPerfVersion(_Ver) \ + do { \ + SysConfig.PerfVersion = _Ver; \ + if (attr.size == PERF_ATTR_SIZE_VER##_Ver) \ + SysConfig.OSPerfVersion = _Ver; \ + } while(0) + + CheckPerfVersion(1); + CheckPerfVersion(2); + CheckPerfVersion(3); + CheckPerfVersion(4); + CheckPerfVersion(5); + +#undef CheckPerfVersion +} + +/* Initialize pre-defined events. */ +static void SetupDefaultEvent() +{ + for (unsigned i = 0; i < PMU_EVENT_MAX; ++i) { + PreEvents[i].Type = -1; + PreEvents[i].Config = -1; + } + +#define SetupEvent(_Event,_Config) \ + PreEvents[_Event].Type = PERF_TYPE_HARDWARE; \ + PreEvents[_Event].Config = _Config; + + /* Basic events. */ + SetupEvent(PMU_CPU_CYCLES, PERF_COUNT_HW_CPU_CYCLES); + SetupEvent(PMU_REF_CPU_CYCLES, PERF_COUNT_HW_REF_CPU_CYCLES); + SetupEvent(PMU_INSTRUCTIONS, PERF_COUNT_HW_INSTRUCTIONS); + SetupEvent(PMU_LLC_REFERENCES, PERF_COUNT_HW_CACHE_REFERENCES); + SetupEvent(PMU_LLC_MISSES, PERF_COUNT_HW_CACHE_MISSES); + SetupEvent(PMU_BRANCH_INSTRUCTIONS, PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + SetupEvent(PMU_BRANCH_MISSES, PERF_COUNT_HW_BRANCH_MISSES); + +#undef SetEventCode +} + +/* Initialize the PMU module. */ +int PMU::Init(PMUConfig &Config) +{ + if (InitOnce == true) + return PMU_OK; + + /* Set the global configuration. */ + SetupGlobalConfig(Config); + + /* Initialize pre-defined event codes. */ + SetupDefaultEvent(); + + /* Allocate event manager. */ + EventMgr = new EventManager; + + /* Initialize target-specific events. */ +#if defined(__i386__) || defined(__x86_64__) + X86Init(); +#elif defined(__arm__) || defined (__aarch64__) + ARMInit(); +#elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) + PPCInit(); +#endif + + Config.PerfVersion = SysConfig.PerfVersion; + Config.OSPerfVersion = SysConfig.OSPerfVersion; + + InitOnce = true; + return PMU_OK; +} + +/* Finalize the PMU module. */ +int PMU::Finalize(void) +{ + if (InitOnce == false) + return PMU_OK; + + delete EventMgr; + + InitOnce = false; + return PMU_OK; +} + +/* Stop the PMU module. */ +int PMU::Pause(void) +{ + EventMgr->Pause(); + return PMU_OK; +} + +/* Restart the PMU module. */ +int PMU::Resume(void) +{ + EventMgr->Resume(); + return PMU_OK; +} + +/* Start a counting/sampling/tracing event. */ +int PMU::Start(Handle Hndl) +{ + auto Event = EventMgr->GetEvent(Hndl); + if (!Event) + return PMU_EINVAL; + + if (perf_event_start(Event->getFD()) != 0) + return PMU_EEVENT; + + EventMgr->StartEvent(Event); + + return PMU_OK; +} + +/* Stop a counting/sampling/tracing event. */ +int PMU::Stop(Handle Hndl) +{ + auto Event = EventMgr->GetEvent(Hndl); + if (!Event) + return PMU_EINVAL; + + if (perf_event_stop(Event->getFD()) != 0) + return PMU_EEVENT; + + EventMgr->StopEvent(Event); + + return PMU_OK; +} + +/* Reset the hardware counter. */ +int PMU::Reset(Handle Hndl) +{ + auto Event = EventMgr->GetEvent(Hndl); + if (!Event) + return PMU_EINVAL; + + if (perf_event_reset(Event->getFD()) != 0) + return PMU_EEVENT; + return PMU_OK; +} + +/* Remove an event. */ +int PMU::Cleanup(Handle Hndl) +{ + auto Event = EventMgr->GetEvent(Hndl); + if (!Event) + return PMU_EINVAL; + + /* Do stop the event if the user hasn't called it. */ + if (Event->State != STATE_STOP) { + int EC = Stop(Hndl); + if (EC != PMU_OK) + return EC; + } + + /* At this point, this event has been removed from the sampling list and we + * no longer get overflow handling (if this is a sampling event). We are + * now able to release all resources. */ + + /* Stop all events in a group. */ + for (auto fd : Event->FD) + perf_event_stop(fd); + + /* Release allocated buffers. */ + if (Event->Data.Base) + munmap(Event->Data.Base, Event->Data.Size); + if (Event->Aux.Base) + munmap(Event->Aux.Base, Event->Aux.Size); + + for (auto fd : Event->FD) + close(fd); + + EventMgr->DeleteEvent(Event); + return PMU_OK; +} + +/* Start/stop a sampling/tracing event without acquiring a lock. + * Note that these two function should only be used within the overflow + * handler. Since the overflow handling is already in a locked section, + * acquiring a lock is not required. */ +int PMU::StartUnlocked(Handle Hndl) +{ + auto Event = EventMgr->GetEvent(Hndl); + if (!Event) + return PMU_EINVAL; + if (Event->Mode & MODE_COUNTER) + return PMU_EINVAL; + + if (perf_event_start(Event->getFD()) != 0) + return PMU_EEVENT; + + EventMgr->StartEvent(Event, false); + + return PMU_OK; +} + +int PMU::StopUnlocked(Handle Hndl) +{ + auto Event = EventMgr->GetEvent(Hndl); + if (!Event) + return PMU_EINVAL; + if (Event->Mode & MODE_COUNTER) + return PMU_EINVAL; + + if (perf_event_stop(Event->getFD()) != 0) + return PMU_EEVENT; + + EventMgr->StopEvent(Event, false); + + return PMU_OK; +} + +/* Open an event using the pre-defined event code. */ +int PMU::CreateEvent(unsigned EventCode, Handle &Hndl) +{ + int fd; + struct perf_event_attr Attr; + + Hndl = PMU_INVALID_HNDL; + + if (EventCode >= PMU_EVENT_MAX) + return PMU_EINVAL; + if (PreEvents[EventCode].Type == -1) + return PMU_ENOEVENT; + + perf_attr_init(&Attr, PreEvents[EventCode].Type, PreEvents[EventCode].Config); + fd = sys_perf_event_open(&Attr, 0, -1, -1, 0); + if (fd < 0) + return ErrorCode(errno); + + Hndl = EventMgr->AddEvent(fd); + if (Hndl == PMU_INVALID_HNDL) { + close(fd); + return PMU_ENOMEM; + } + return PMU_OK; +} + +/* Open an event using the raw event number and umask value. + * The raw event code is computed as (RawEvent | (Umask << 8)). */ +int PMU::CreateRawEvent(unsigned RawEvent, unsigned Umask, Handle &Hndl) +{ + int fd; + struct perf_event_attr Attr; + + Hndl = PMU_INVALID_HNDL; + + perf_attr_init(&Attr, PERF_TYPE_RAW, RawEvent | (Umask << 8)); + fd = sys_perf_event_open(&Attr, 0, -1, -1, 0); + if (fd < 0) + return ErrorCode(errno); + + Hndl = EventMgr->AddEvent(fd); + if (Hndl == PMU_INVALID_HNDL) { + close(fd); + return PMU_ENOMEM; + } + return PMU_OK; +} + +/* Open a sampling event, with the 1st EventCode as the interrupt event. + * The sample data will be recorded in a vector of type 'uint64_t'. + * The following vector shows the data format of sampling with N events: + * { pc, val1, val2, ..., valN, # 1st sample + * ... + * pc, val1, val2, ..., valN }; # nth sample + * + * Note that ownwership of the output vector is transferred to the user. + * It is the user's responsibility to free the resource of the vector. */ +int PMU::CreateSampleEvent(SampleConfig &Config, Handle &Hndl) +{ + unsigned i, NumEvents = Config.NumEvents; + unsigned NumPages = Config.NumPages; + uint64_t Period = Config.Period; + int fds[PMU_GROUP_EVENTS], EC = PMU_ENOMEM; + uint64_t DataSize; + void *Data; + + Hndl = PMU_INVALID_HNDL; + + if (NumPages == 0) + NumPages = PMU_SAMPLE_PAGES; + if (Period < 1e3) + Period = PMU_SAMPLE_PERIOD; + + if (NumEvents == 0 || NumEvents > PMU_GROUP_EVENTS || !isPowerOf2(NumPages)) + return PMU_EINVAL; + + /* Check event codes. */ + for (i = 0; i < NumEvents; ++i) { + unsigned EventCode = Config.EventCode[i]; + if (EventCode >= PMU_EVENT_MAX) + return PMU_EINVAL; + if (PreEvents[EventCode].Type == -1) + return PMU_ENOEVENT; + } + + /* Open the events. If more than one event is requested, set read_format + * to PERF_FORMAT_GROUP. */ + fds[0] = -1; + for (i = 0; i < NumEvents; ++i) { + struct perf_event_attr Attr; + unsigned EventCode = Config.EventCode[i]; + perf_attr_init(&Attr, PreEvents[EventCode].Type, PreEvents[EventCode].Config); + + Attr.disabled = !i; + if (i == 0) { + Attr.sample_period = Period; + Attr.sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_READ; + Attr.read_format = (NumEvents > 1) ? PERF_FORMAT_GROUP : 0; + } + + fds[i] = sys_perf_event_open(&Attr, 0, -1, fds[0], 0); + if (fds[i] < 0) { + EC = ErrorCode(errno); + goto failed; + } + } + + /* Allocate buffer for the sampling data. */ + DataSize = (1 + NumPages) * SysConfig.PageSize; + Data = mmap(nullptr, DataSize, PROT_READ|PROT_WRITE, MAP_SHARED, fds[0], 0); + if (Data == MAP_FAILED) + goto failed; + + Hndl = EventMgr->AddSampleEvent(NumEvents, fds, DataSize, Data, + MODE_SAMPLE_IP | MODE_SAMPLE_READ, Config); + if (Hndl == PMU_INVALID_HNDL) { + munmap(Data, DataSize); + goto failed; + } + return PMU_OK; + +failed: + while (--i) + close(fds[i]); + return EC; +} + +/* Generate an IP histogram using EventCode as the interrupt event. + * The IP histogram will be recorded in a vector of type 'uint64_t' with + * the format: { pc1, pc2, pc3, ..., pcN }. + * Note that ownwership of the output vector is transferred to the user. + * It is the user's responsibility to free the resource of the vector. */ +int PMU::CreateSampleIP(Sample1Config &Config, Handle &Hndl) +{ + int fd; + unsigned EventCode = Config.EventCode; + unsigned NumPages = Config.NumPages; + uint64_t Period = Config.Period; + uint64_t DataSize; + void *Data; + + Hndl = PMU_INVALID_HNDL; + + if (NumPages == 0) + NumPages = PMU_SAMPLE_PAGES; + if (Period < 1e3) + Period = PMU_SAMPLE_PERIOD; + + if (!isPowerOf2(NumPages)) + return PMU_EINVAL; + + /* Check the events. */ + if (EventCode >= PMU_EVENT_MAX) + return PMU_EINVAL; + if (PreEvents[EventCode].Type == -1) + return PMU_ENOEVENT; + + struct perf_event_attr Attr; + perf_attr_init(&Attr, PreEvents[EventCode].Type, PreEvents[EventCode].Config); + + Attr.disabled = 1; + Attr.sample_period = Period; + Attr.sample_type = PERF_SAMPLE_IP; + + fd = sys_perf_event_open(&Attr, 0, -1, -1, 0); + if (fd < 0) + return ErrorCode(errno); + + /* Allocate buffer for the sampling data. */ + DataSize = (1 + NumPages) * SysConfig.PageSize; + Data = mmap(nullptr, DataSize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + if (Data == MAP_FAILED) + goto failed; + + /* Set the sampling config. */ + SampleConfig SConfig; + SConfig.NumEvents = 1; + SConfig.EventCode[0] = Config.EventCode; + SConfig.NumPages = NumPages; + SConfig.Period = Period; + SConfig.Watermark = Config.Watermark; + SConfig.SampleHandler = Config.SampleHandler; + SConfig.Opaque = Config.Opaque; + + Hndl = EventMgr->AddSampleEvent(1, &fd, DataSize, Data, MODE_SAMPLE_IP, SConfig); + if (Hndl == PMU_INVALID_HNDL) { + munmap(Data, DataSize); + goto failed; + } + return PMU_OK; + +failed: + close(fd); + return PMU_ENOMEM; +} + +/* Read value from the hardware counter. */ +int PMU::ReadEvent(Handle Hndl, uint64_t &Value) +{ + auto Event = EventMgr->GetEvent(Hndl); + if (!Event) + return PMU_EINVAL; + + if (read(Event->getFD(), &Value, sizeof(uint64_t)) != sizeof(uint64_t)) + return PMU_EEVENT; + return PMU_OK; +} + +/* Convert error code to string. */ +const char *PMU::strerror(int ErrCode) +{ + switch (ErrCode) { + case PMU_OK: return "Success"; + case PMU_EINVAL: return "Invalid argument"; + case PMU_ENOMEM: return "Insufficient memory"; + case PMU_ENOEVENT: return "Pre-defined event not available"; + case PMU_EEVENT: return "Hardware event error"; + case PMU_EPERM: return "Permission denied"; + case PMU_EINTER: return "Internal error"; + case PMU_EDECODER: return "Decoder error"; + default: return "Unknown error"; + } +} + +} /* namespace pmu */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/pmu/ppc/ppc-events.cpp b/llvm/pmu/ppc/ppc-events.cpp new file mode 100644 index 0000000..249de52 --- /dev/null +++ b/llvm/pmu/ppc/ppc-events.cpp @@ -0,0 +1,37 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "pmu/pmu-global.h" + +namespace pmu { + +#define ICACHE_MISS_CONFIG (0x200fd) +#define MEM_LOADS_CONFIG (0x100fc) + +extern EventID PreEvents[PMU_EVENT_MAX]; /* Pre-defined events. */ + +static void PPCSetupEventCode() +{ +#define SetupEvent(_Event,_Config) \ + PreEvents[_Event].Type = PERF_TYPE_RAW; \ + PreEvents[_Event].Config = _Config; + + SetupEvent(PMU_ICACHE_MISSES, ICACHE_MISS_CONFIG); + SetupEvent(PMU_MEM_LOADS, MEM_LOADS_CONFIG); + +#undef SetEventCode +} + +int PPCInit() +{ + PPCSetupEventCode(); + return PMU_OK; +} + +} /* namespace pmu */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/pmu/x86/x86-events.cpp b/llvm/pmu/x86/x86-events.cpp new file mode 100644 index 0000000..fe25f70 --- /dev/null +++ b/llvm/pmu/x86/x86-events.cpp @@ -0,0 +1,41 @@ +/* + * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "pmu/pmu-global.h" + +namespace pmu { + +#define ICACHE_HIT_CONFIG (0x83 | (0x1 << 8)) /* skylake/event=0x83,umask=0x1/ */ +#define ICACHE_MISS_CONFIG (0x83 | (0x2 << 8)) /* skylake/event=0x83,umask=0x2/ */ +#define MEM_LOADS_CONFIG (0xd0 | (0x81 << 8 )) /* skylake/event=0xd0,umask=0x81/ */ +#define MEM_STORES_CONFIG (0xd0 | (0x82 << 8 )) /* skylake/event=0xd0,umask=0x82/ */ + +extern EventID PreEvents[PMU_EVENT_MAX]; /* Pre-defined events. */ + +static void X86SetupEventCode() +{ +#define SetupEvent(_Event,_Config) \ + PreEvents[_Event].Type = PERF_TYPE_RAW; \ + PreEvents[_Event].Config = _Config; + + SetupEvent(PMU_ICACHE_HITS, ICACHE_HIT_CONFIG); + SetupEvent(PMU_ICACHE_MISSES, ICACHE_MISS_CONFIG); + SetupEvent(PMU_MEM_LOADS, MEM_LOADS_CONFIG); + SetupEvent(PMU_MEM_STORES, MEM_STORES_CONFIG); + +#undef SetEventCode +} + +int X86Init() +{ + X86SetupEventCode(); + return PMU_OK; +} + +} /* namespace pmu */ + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/tracer.cpp b/llvm/tracer.cpp new file mode 100644 index 0000000..9e37442 --- /dev/null +++ b/llvm/tracer.cpp @@ -0,0 +1,365 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + * + * This file implements the trace/region formation algorithm. + */ + + +#include "utils.h" +#include "tracer.h" +#include "llvm-state.h" + +#define USE_RELAXED_NET + + +unsigned ProfileThreshold = NET_PROFILE_THRESHOLD; +unsigned PredictThreshold = NET_PREDICT_THRESHOLD; + +static inline void start_trace_profiling(TranslationBlock *tb) +{ + /* Turn on trace profiling by jumping to the next instruction. */ + uintptr_t jmp_addr = tb_get_jmp_entry(tb); +#if defined(TCG_TARGET_I386) + patch_jmp(jmp_addr, jmp_addr + 5); +#elif defined(TCG_TARGET_ARM) || defined(TCG_TARGET_AARCH64) + patch_jmp(jmp_addr, jmp_addr + 4); +#elif defined(TCG_TARGET_PPC64) + patch_jmp(jmp_addr, jmp_addr + 16); +#endif +} + +static inline void copy_image(CPUArchState *env, TranslationBlock *tb) +{ +#if defined(CONFIG_LLVM) && defined(CONFIG_SOFTMMU) + char *p = new char[tb->size]; + for (int i = 0, e = tb->size; i != e; ++i) + p[i] = cpu_ldub_code(env, tb->pc + i); + tb->image = (void *)p; +#endif +} + +static inline void tracer_handle_chaining(uintptr_t next_tb, TranslationBlock *tb) +{ +#if defined(CONFIG_LLVM) + llvm_handle_chaining(next_tb, tb); +#else + /* see if we can patch the calling TB. When the TB spans two pages, we + * cannot safely do a direct jump. */ + if (next_tb != 0 && tb->page_addr[1] == (tb_page_addr_t)-1 + && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) { + tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK), + next_tb & TB_EXIT_MASK, tb); + } +#endif +} + + +#if defined(CONFIG_LLVM) +#include "llvm.h" +#include "llvm-soft-perfmon.h" +#include "llvm-hard-perfmon.h" +static inline void OptimizeBlock(CPUArchState *env, TranslationBlock *TB) +{ + auto Request = OptimizationInfo::CreateRequest(TB); + LLVMEnv::OptimizeBlock(env, std::move(Request)); +} +static inline void OptimizeTrace(CPUArchState *env, NETTracer::TBVec &TBs, + int LoopHeadIdx) +{ + auto Request = OptimizationInfo::CreateRequest(TBs, LoopHeadIdx); + LLVMEnv::OptimizeTrace(env, std::move(Request)); +} +static inline void RegisterThread(CPUArchState *env, BaseTracer *tracer) +{ + if (ENV_GET_CPU(env)->cpu_index < 0) + return; + HP->RegisterThread(tracer); +} +static inline void UnregisterThread(CPUArchState *env, BaseTracer *tracer) +{ + if (ENV_GET_CPU(env)->cpu_index < 0) + return; + HP->UnregisterThread(tracer); + SP->NumTraceExits += env->num_trace_exits; +} +static inline void NotifyCacheEnter(CPUArchState *env) +{ + if (ENV_GET_CPU(env)->cpu_index < 0) + return; + HP->NotifyCacheEnter(cpu_get_tracer(env)); +} +static inline void NotifyCacheLeave(CPUArchState *env) +{ + if (ENV_GET_CPU(env)->cpu_index < 0) + return; + HP->NotifyCacheLeave(cpu_get_tracer(env)); +} +#else +static inline void OptimizeBlock(CPUArchState *, TranslationBlock *) {} +static inline void OptimizeTrace(CPUArchState *, NETTracer::TBVec &, int) {} +static inline void RegisterThread(CPUArchState *, BaseTracer *) {} +static inline void UnregisterThread(CPUArchState *, BaseTracer *) {} +static inline void NotifyCacheEnter(CPUArchState *) {} +static inline void NotifyCacheLeave(CPUArchState *) {} +#endif + + +/* + * BaseTracer + */ +BaseTracer *BaseTracer::CreateTracer(CPUArchState *env) +{ +#if defined(CONFIG_LLVM) + switch (LLVMEnv::TransMode) { + case TRANS_MODE_NONE: + return new BaseTracer(env); + case TRANS_MODE_BLOCK: + return new SingleBlockTracer(env); + case TRANS_MODE_HYBRIDS: + return new NETTracer(env, TRANS_MODE_HYBRIDS); + case TRANS_MODE_HYBRIDM: + return new NETTracer(env, TRANS_MODE_HYBRIDM); + default: + break; + } +#endif + return new BaseTracer(env); +} + +void BaseTracer::DeleteTracer(CPUArchState *env) +{ + auto Tracer = cpu_get_tracer(env); + if (Tracer) { + delete Tracer; + Tracer = nullptr; + } +} + + +/* + * SingleBlockTracer + */ +SingleBlockTracer::SingleBlockTracer(CPUArchState *env) : BaseTracer(env) +{ + if (tracer_mode == TRANS_MODE_NONE) + tracer_mode = TRANS_MODE_BLOCK; +} + +void SingleBlockTracer::Record(uintptr_t next_tb, TranslationBlock *tb) +{ + /* Optimize the block if we see this block for the first time. */ + if (update_tb_mode(tb, BLOCK_NONE, BLOCK_ACTIVE)) + OptimizeBlock(Env, tb); + TB = tb; +} + + +/* + * NETTracer + */ +NETTracer::NETTracer(CPUArchState *env, int Mode) : BaseTracer(env) +{ + if (tracer_mode == TRANS_MODE_NONE) + tracer_mode = Mode; + RegisterThread(Env, this); +} + +NETTracer::~NETTracer() +{ + UnregisterThread(Env, this); +} + +void NETTracer::Reset() +{ + TBs.clear(); + Env->start_trace_prediction = 0; +} + +void NETTracer::Record(uintptr_t next_tb, TranslationBlock *tb) +{ + bool NewTB = (tb->mode == BLOCK_NONE); + + /* Promote tb to the active state before any checks if it is a new tb. */ + if (update_tb_mode(tb, BLOCK_NONE, BLOCK_ACTIVE)) { + tcg_save_state(Env, tb); + copy_image(Env, tb); + } + + if (isTraceHead(next_tb, tb, NewTB)) { + if (update_tb_mode(tb, BLOCK_ACTIVE, BLOCK_TRACEHEAD)) + start_trace_profiling(tb); + } + + Env->fallthrough = 0; +} + +/* Determine whether tb is a potential trace head. tb is a trace head if it is + * (1) a target of an existing trace exit, + * (2) a target of an indirect branch, + * (3) (relaxed NET) a block in a cyclic path (i.e., seen more than once), or + * (original NET) a target of a backward branch. */ +bool NETTracer::isTraceHead(uintptr_t next_tb, TranslationBlock *tb, bool NewTB) +{ + /* Rule 1: a target of an existing trace exit. */ + if ((next_tb & TB_EXIT_MASK) == TB_EXIT_LLVM) + return true; + + /* Rule 2: a target of an indirect branch. + * Here we check 'next_tb == 0', which can cover the cases other than the + * indirect branches (e.g., system calls and exceptions). It is fine to + * also start trace formation from the successors of these blocks. */ + if (next_tb == 0 && Env->fallthrough == 0) + return true; + +#ifdef USE_RELAXED_NET + /* Rule 3: a block in a cyclic path (i.e., seen more than once). */ + if (!NewTB) + return true; +#else + /* Rule 3: a target of a backward branch. */ + if (next_tb != 0) { + TranslationBlock *pred = (TranslationBlock *)(next_tb & ~TB_EXIT_MASK); + if (tb->pc <= pred->pc) + return true; + } +#endif + return false; +} + +void NETTracer::Profile(TranslationBlock *tb) +{ + if (Atomic<uint32_t>::inc_return(&tb->exec_count) != ProfileThreshold) + return; + +#if 0 + /* If the execution is already in the prediction mode, process the + * previously recorded trace. */ + if (Env->start_trace_prediction && !TBs.empty()) { + OptimizeTrace(Env, TBs, -1); + Reset(); + } +#endif + + /* We reach a profile threshold, stop trace profiling and start trace tail + * prediction. The profiling is disabled by setting the jump directly to + * trace prediction stub. */ + patch_jmp(tb_get_jmp_entry(tb), tb_get_jmp_next(tb)); + Env->start_trace_prediction = 1; +} + +void NETTracer::Predict(TranslationBlock *tb) +{ + /* The trace prediction will terminate if a cyclic path is detected. + * (i.e., current tb has existed in the tracing butter either in the + * head or middle of the buffer.) */ + int LoopHeadIdx = -1; + +#if defined(CONFIG_LLVM) + /* Skip this trace if the next block is an annotated loop head and + * is going to be included in the middle of a trace. */ + if (!TBs.empty() && TBs[0] != tb && + llvm_has_annotation(tb->pc, ANNOTATION_LOOP)) { + goto trace_building; + } +#endif + +#if defined(USE_TRACETREE_ONLY) + /* We would like to have a straight-line or O-shape trace. + * (the 6-shape trace is excluded) */ + if (!TBs.empty() && tb == TBs[0]) { + LoopHeadIdx = 0; + goto trace_building; + } +#elif defined(USE_RELAXED_NET) + /* Find any cyclic path in recently recorded blocks. */ + for (int i = 0, e = TBs.size(); i != e; ++i) { + if (tb == TBs[i]) { + LoopHeadIdx = i; + goto trace_building; + } + } +#else + if (!TBs.empty()) { + if (tb == TBs[0]) { + /* Cyclic path. */ + LoopHeadIdx = 0; + goto trace_building; + } + if (tb->pc <= TBs[TBs.size() - 1]->pc) { + /* Backward branch. */ + goto trace_building; + } + } +#endif + + TBs.push_back(tb); + + /* Stop if the maximum prediction length is reached. */ + if (TBs.size() == PredictThreshold) + goto trace_building; + + return; + +trace_building: + /* If the trace is a loop with a branch to the middle of the loop body, + * we forms two sub-traces: (1) the loop starting from the loopback to + * the end of the trace and (2) the original trace. */ + /* NOTE: We want to find more traces so the original trace is included. */ + + if (LoopHeadIdx > 0) { + /* Loopback at the middle. The sub-trace (1) is optimized first. */ + TBVec Loop(TBs.begin() + LoopHeadIdx, TBs.end()); + update_tb_mode(Loop[0], BLOCK_ACTIVE, BLOCK_TRACEHEAD); + OptimizeTrace(Env, Loop, 0); + } + OptimizeTrace(Env, TBs, LoopHeadIdx); + + Reset(); +} + + +/* The follows implement routines of the C interfaces for QEMU. */ +extern "C" { + +int tracer_mode = TRANS_MODE_NONE; + +void tracer_reset(CPUArchState *env) +{ + auto Tracer = cpu_get_tracer(env); + Tracer->Reset(); +} + +/* This routine is called when QEMU is going to leave the dispatcher and enter + * the code cache to execute block code `tb'. Here, we determine whether tb is + * a potential trace head and should perform trace formation. */ +void tracer_exec_tb(CPUArchState *env, uintptr_t next_tb, TranslationBlock *tb) +{ + auto Tracer = cpu_get_tracer(env); + Tracer->Record(next_tb, tb); + + tracer_handle_chaining(next_tb, tb); +} + + +/* Helper function to perform trace profiling. */ +void helper_NET_profile(CPUArchState *env, int id) +{ + auto &Tracer = getNETTracer(env); + Tracer.Profile(&tbs[id]); +} + +/* Helper function to perform trace prediction. */ +void helper_NET_predict(CPUArchState *env, int id) +{ + auto &Tracer = getNETTracer(env); + Tracer.Predict(&tbs[id]); +} + +} /* extern "C" */ + + + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/utils.cpp b/llvm/utils.cpp new file mode 100644 index 0000000..69e77af --- /dev/null +++ b/llvm/utils.cpp @@ -0,0 +1,223 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include <unistd.h> +#include <sys/syscall.h> +#include "utils.h" + + +/* Remove a CFG starting from Root. */ +void GraphNode::DeleteCFG(GraphNode *Root) +{ + NodeVec VisitStack; + NodeSet Visited; + VisitStack.push_back(Root); + do { + GraphNode *Parent = VisitStack.back(); + VisitStack.pop_back(); + if (Visited.find(Parent) == Visited.end()) { + Visited.insert(Parent); + for (auto Child : Parent->getChildren()) + VisitStack.push_back(Child); + } + } while(!VisitStack.empty()); + + for (auto I = Visited.begin(), E = Visited.end(); I != E; ++I) + delete *I; +} + +#ifdef LOCK_FREE +/* Lock-free FIFO queue algorithm of Michael and Scott (MS-queue). + * The code is based on the paper published in PODC'96: + * Maged M. Michael and Michael L. Scott, "Simple, Fast, and Practical + * Non-Blocking and Blocking Concurrent Queue Algorithms," Proc. 15th ACM + * Symp. on Principles of Distributed Computing, pages 267-275, 1996. + */ +static inline char CAS2(volatile struct pointer_t *ptr, + struct pointer_t _old, + struct pointer_t _new) +{ + char flag = 0; + +#if defined(__i386__) + asm volatile("lock; cmpxchg8b %0; setz %1;" + : "=m" (*ptr), "=q" (flag) + : "d" (_old.count), "a" (_old.ptr), "c" (_new.count), "b" (_new.ptr) + : "memory", "cc"); +#elif defined(__x86_64__) + asm volatile("lock; cmpxchg16b %0; setz %1;" + : "=m" (*ptr), "=q" (flag) + : "d" (_old.count), "a" (_old.ptr), "c" (_new.count), "b" (_new.ptr) + : "memory", "cc"); +#elif defined(__arm__) + unsigned long oldval, res; + asm volatile("ldrex %1, [%3]\n" + "mov %0, #0\n" + "teq %1, %4\n" + "strexeq %0, %5, [%3]\n" + : "=&r" (res), "=&r" (oldval), "+Qo" (*ptr->ptr) + : "r" (ptr->ptr), "Ir" (_old.ptr), "r" (_new.ptr) + : "cc"); + flag = !res; +#endif + return flag; +} + +Queue::Queue() +{ + node_t *dummy = new_node(nullptr); + Q.head.ptr = Q.tail.ptr = dummy; + Q.head.count = Q.tail.count = 0; +} + +void Queue::enqueue(void *data) +{ + pointer_t tail, next, insert; + node_t *node = new_node(data); + insert.ptr = node; + + for (;;) { + tail = Q.tail; + next = tail.ptr->next; + + /* If Tail is consistent (addresses and versions are not changed), + continue to enqueue. */ + if (CAS2(&Q.tail, tail, Q.tail)) { + /* If Tail is pointing to the last node, continue to enqueue. + Otherwise, try to advance Tail because it might be pointing + to the second last node. */ + if (next.ptr == nullptr) { /* Last node */ + /* Try to insert node at the end of the linked list. + if it succeeds, exit the loop. */ + insert.count = next.count + 1; + if (CAS2(&(tail.ptr->next), next, insert)) + break; + } else { + next.count = tail.count + 1; + CAS2(&Q.tail, tail, next); + } + } + } + + /* Enqueue is done, try to swing Tail to the inserted node. */ + insert.count = tail.count + 1; + CAS2(&Q.tail, tail, insert); +} + +void *Queue::dequeue() +{ + pointer_t head, tail, next; + void *data; + + for (;;) { + head = Q.head; + tail = Q.tail; + next = head.ptr->next; + + /* If Head is consistent (addresses and versions are not changed), + continue to dequeue. */ + if (CAS2(&Q.head, head, Q.head)) { + /* If Queue is empty, stop dequeueing. If Tail falling behind, + try to advance it. Otherwise, continue to dequeue. */ + if (head.ptr == tail.ptr) { + if (next.ptr == nullptr) /* Queue is empty */ + return nullptr; + + /* Tail is falling behand, try to advance it. */ + next.count = tail.count + 1; + CAS2(&Q.tail, tail, next); + } else { + /* We must read value before CAS, otherwise another dequeue + might free the next node. */ + data = next.ptr->value; + next.count = head.count + 1; + if (CAS2(&Q.head, head, next)) + break; + } + } + } + + /* Dequeue succeeded. It is safe to free the dummy node. + Node pointed by Head becomes the new dummy node */ + delete_node(head.ptr); + + return data; +} +#else +Queue::Queue(void) +{ + node_t *dummy = new node_t(nullptr); + Q.head = Q.tail = dummy; + pthread_mutex_init(&lock, nullptr); +} + +void Queue::enqueue(void *data) +{ + node_t *node = new node_t(data); + + pthread_mutex_lock(&lock); + Q.tail->next = node; + Q.tail = node; + pthread_mutex_unlock(&lock); +} + +void *Queue::dequeue() +{ + node_t *node, *new_head; + void *data; + + pthread_mutex_lock(&lock); + node = Q.head; + new_head = node->next; + if (new_head == nullptr) { + pthread_mutex_unlock(&lock); + return nullptr; + } + + data = new_head->value; + Q.head = new_head; + pthread_mutex_unlock(&lock); + + delete node; + return data; +} +#endif + +/* Get the thread ID. */ +pid_t gettid() +{ +#ifdef SYS_gettid + return (pid_t)syscall(SYS_gettid); +#elif defined(__NR_gettid) + return (pid_t)syscall(__NR_gettid); +#else + return -1; +#endif +} + + +/* Patch a direct jump from patch_addr to addr. */ +void patch_jmp(volatile uintptr_t patch_addr, volatile uintptr_t addr) +{ +#if defined(__i386__) || defined(__x86_64__) + tb_set_jmp_target1(patch_addr + 1, addr); +#elif defined(__aarch64__) + tb_set_jmp_target1(patch_addr, addr); +#elif defined(__arm__) + *(uintptr_t *)patch_addr = addr; +#elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) + tb_set_jmp_target1(patch_addr, addr); +#endif +} + +void patch_jmp(volatile uintptr_t patch_addr, volatile void *addr) +{ + patch_jmp(patch_addr, (uintptr_t)addr); +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/xml/tinyxml2.cpp b/llvm/xml/tinyxml2.cpp new file mode 100644 index 0000000..354200c --- /dev/null +++ b/llvm/xml/tinyxml2.cpp @@ -0,0 +1,2013 @@ +/* +Original code by Lee Thomason (www.grinninglizard.com) + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any +damages arising from the use of this software. + +Permission is granted to anyone to use this software for any +purpose, including commercial applications, and to alter it and +redistribute it freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must +not claim that you wrote the original software. If you use this +software in a product, an acknowledgment in the product documentation +would be appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and +must not be misrepresented as being the original software. + +3. This notice may not be removed or altered from any source +distribution. +*/ +#include "tinyxml2.h" + +#include <cstdio> +#include <cstdlib> +#include <new> +#include <cstddef> + +#include <fcntl.h> +using namespace tinyxml2; + +static const char LINE_FEED = (char)0x0a; // all line endings are normalized to LF +static const char LF = LINE_FEED; +static const char CARRIAGE_RETURN = (char)0x0d; // CR gets filtered out +static const char CR = CARRIAGE_RETURN; +static const char SINGLE_QUOTE = '\''; +static const char DOUBLE_QUOTE = '\"'; + +// Bunch of unicode info at: +// http://www.unicode.org/faq/utf_bom.html +// ef bb bf (Microsoft "lead bytes") - designates UTF-8 + +static const unsigned char TIXML_UTF_LEAD_0 = 0xefU; +static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU; +static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU; + + +#define DELETE_NODE( node ) { \ + if ( node ) { \ + MemPool* pool = node->memPool; \ + node->~XMLNode(); \ + pool->Free( node ); \ + } \ +} +#define DELETE_ATTRIBUTE( attrib ) { \ + if ( attrib ) { \ + MemPool* pool = attrib->memPool; \ + attrib->~XMLAttribute(); \ + pool->Free( attrib ); \ + } \ +} + +struct Entity { + const char* pattern; + int length; + char value; +}; + +static const int NUM_ENTITIES = 5; +static const Entity entities[NUM_ENTITIES] = +{ + { "quot", 4, DOUBLE_QUOTE }, + { "amp", 3, '&' }, + { "apos", 4, SINGLE_QUOTE }, + { "lt", 2, '<' }, + { "gt", 2, '>' } +}; + + +StrPair::~StrPair() +{ + Reset(); +} + + +void StrPair::Reset() +{ + if ( flags & NEEDS_DELETE ) { + delete [] start; + } + flags = 0; + start = 0; + end = 0; +} + + +void StrPair::SetStr( const char* str, int flags ) +{ + Reset(); + size_t len = strlen( str ); + start = new char[ len+1 ]; + memcpy( start, str, len+1 ); + end = start + len; + this->flags = flags | NEEDS_DELETE; +} + + +char* StrPair::ParseText( char* p, const char* endTag, int strFlags ) +{ + TIXMLASSERT( endTag && *endTag ); + + char* start = p; // fixme: hides a member + char endChar = *endTag; + size_t length = strlen( endTag ); + + // Inner loop of text parsing. + while ( *p ) { + if ( *p == endChar && strncmp( p, endTag, length ) == 0 ) { + Set( start, p, strFlags ); + return p + length; + } + ++p; + } + return 0; +} + + +char* StrPair::ParseName( char* p ) +{ + char* start = p; + + if ( !start || !(*start) ) { + return 0; + } + + if ( !XMLUtil::IsAlpha( *p ) ) { + return 0; + } + + while( *p && ( + XMLUtil::IsAlphaNum( (unsigned char) *p ) + || *p == '_' + || *p == '-' + || *p == '.' + || *p == ':' )) + { + ++p; + } + + if ( p > start ) { + Set( start, p, 0 ); + return p; + } + return 0; +} + + + +const char* StrPair::GetStr() +{ + if ( flags & NEEDS_FLUSH ) { + *end = 0; + flags ^= NEEDS_FLUSH; + + if ( flags ) { + char* p = start; // the read pointer + char* q = start; // the write pointer + + while( p < end ) { + if ( (flags & NEEDS_NEWLINE_NORMALIZATION) && *p == CR ) { + // CR-LF pair becomes LF + // CR alone becomes LF + // LF-CR becomes LF + if ( *(p+1) == LF ) { + p += 2; + } + else { + ++p; + } + *q++ = LF; + } + else if ( (flags & NEEDS_NEWLINE_NORMALIZATION) && *p == LF ) { + if ( *(p+1) == CR ) { + p += 2; + } + else { + ++p; + } + *q++ = LF; + } + else if ( (flags & NEEDS_ENTITY_PROCESSING) && *p == '&' ) { + // Entities handled by tinyXML2: + // - special entities in the entity table [in/out] + // - numeric character reference [in] + // 中 or 中 + + if ( *(p+1) == '#' ) { + char buf[10] = { 0 }; + int len; + p = const_cast<char*>( XMLUtil::GetCharacterRef( p, buf, &len ) ); + for( int i=0; i<len; ++i ) { + *q++ = buf[i]; + } + TIXMLASSERT( q <= p ); + } + else { + int i=0; + for(; i<NUM_ENTITIES; ++i ) { + if ( strncmp( p+1, entities[i].pattern, entities[i].length ) == 0 + && *(p+entities[i].length+1) == ';' ) + { + // Found an entity convert; + *q = entities[i].value; + ++q; + p += entities[i].length + 2; + break; + } + } + if ( i == NUM_ENTITIES ) { + // fixme: treat as error? + ++p; + ++q; + } + } + } + else { + *q = *p; + ++p; + ++q; + } + } + *q = 0; + } + flags = (flags & NEEDS_DELETE); + } + return start; +} + + + + +// --------- XMLUtil ----------- // + +const char* XMLUtil::ReadBOM( const char* p, bool* bom ) +{ + *bom = false; + const unsigned char* pu = reinterpret_cast<const unsigned char*>(p); + // Check for BOM: + if ( *(pu+0) == TIXML_UTF_LEAD_0 + && *(pu+1) == TIXML_UTF_LEAD_1 + && *(pu+2) == TIXML_UTF_LEAD_2 ) + { + *bom = true; + p += 3; + } + return p; +} + + +void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length ) +{ + int i; + const unsigned long BYTE_MASK = 0xBF; + const unsigned long BYTE_MARK = 0x80; + const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + + if (input < 0x80) + *length = 1; + else if ( input < 0x800 ) + *length = 2; + else if ( input < 0x10000 ) + *length = 3; + else if ( input < 0x200000 ) + *length = 4; + else + { *length = 0; return; } // This code won't covert this correctly anyway. + + output += *length; + + for (i = *length; i > 0; --i) { + if (i == 1) { + --output; + *output = (char)(input | FIRST_BYTE_MARK[*length]); + } else { + --output; + *output = (char)((input | BYTE_MARK) & BYTE_MASK); + input >>= 6; + } + } +} + + +const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length ) +{ + // Presume an entity, and pull it out. + *length = 0; + + if ( *(p+1) == '#' && *(p+2) ) + { + unsigned long ucs = 0; + ptrdiff_t delta = 0; + unsigned mult = 1; + + if ( *(p+2) == 'x' ) + { + // Hexadecimal. + if ( !*(p+3) ) return 0; + + const char* q = p+3; + q = strchr( q, ';' ); + + if ( !q || !*q ) return 0; + + delta = q-p; + --q; + + while ( *q != 'x' ) + { + if ( *q >= '0' && *q <= '9' ) + ucs += mult * (*q - '0'); + else if ( *q >= 'a' && *q <= 'f' ) + ucs += mult * (*q - 'a' + 10); + else if ( *q >= 'A' && *q <= 'F' ) + ucs += mult * (*q - 'A' + 10 ); + else + return 0; + mult *= 16; + --q; + } + } + else + { + // Decimal. + if ( !*(p+2) ) return 0; + + const char* q = p+2; + q = strchr( q, ';' ); + + if ( !q || !*q ) return 0; + + delta = q-p; + --q; + + while ( *q != '#' ) + { + if ( *q >= '0' && *q <= '9' ) + ucs += mult * (*q - '0'); + else + return 0; + mult *= 10; + --q; + } + } + // convert the UCS to UTF-8 + ConvertUTF32ToUTF8( ucs, value, length ); + return p + delta + 1; + } + return p+1; +} + + +void XMLUtil::ToStr( int v, char* buffer, int bufferSize ) +{ + TIXML_SNPRINTF( buffer, bufferSize, "%d", v ); +} + + +void XMLUtil::ToStr( unsigned v, char* buffer, int bufferSize ) +{ + TIXML_SNPRINTF( buffer, bufferSize, "%u", v ); +} + + +void XMLUtil::ToStr( bool v, char* buffer, int bufferSize ) +{ + TIXML_SNPRINTF( buffer, bufferSize, "%d", v ? 1 : 0 ); +} + + +void XMLUtil::ToStr( float v, char* buffer, int bufferSize ) +{ + TIXML_SNPRINTF( buffer, bufferSize, "%f", v ); +} + + +void XMLUtil::ToStr( double v, char* buffer, int bufferSize ) +{ + TIXML_SNPRINTF( buffer, bufferSize, "%f", v ); +} + + +bool XMLUtil::ToInt( const char* str, int* value ) +{ + if ( TIXML_SSCANF( str, "%d", value ) == 1 ) + return true; + return false; +} + +bool XMLUtil::ToUnsigned( const char* str, unsigned *value ) +{ + if ( TIXML_SSCANF( str, "%u", value ) == 1 ) + return true; + return false; +} + +bool XMLUtil::ToBool( const char* str, bool* value ) +{ + int ival = 0; + if ( ToInt( str, &ival )) { + *value = (ival==0) ? false : true; + return true; + } + if ( StringEqual( str, "true" ) ) { + *value = true; + return true; + } + else if ( StringEqual( str, "false" ) ) { + *value = false; + return true; + } + return false; +} + + +bool XMLUtil::ToFloat( const char* str, float* value ) +{ + if ( TIXML_SSCANF( str, "%f", value ) == 1 ) { + return true; + } + return false; +} + +bool XMLUtil::ToDouble( const char* str, double* value ) +{ + if ( TIXML_SSCANF( str, "%lf", value ) == 1 ) { + return true; + } + return false; +} + + +char* XMLDocument::Identify( char* p, XMLNode** node ) +{ + XMLNode* returnNode = 0; + char* start = p; + p = XMLUtil::SkipWhiteSpace( p ); + if( !p || !*p ) + { + return p; + } + + // What is this thing? + // - Elements start with a letter or underscore, but xml is reserved. + // - Comments: <!-- + // - Decleration: <? + // - Everthing else is unknown to tinyxml. + // + + static const char* xmlHeader = { "<?" }; + static const char* commentHeader = { "<!--" }; + static const char* dtdHeader = { "<!" }; + static const char* cdataHeader = { "<![CDATA[" }; + static const char* elementHeader = { "<" }; // and a header for everything else; check last. + + static const int xmlHeaderLen = 2; + static const int commentHeaderLen = 4; + static const int dtdHeaderLen = 2; + static const int cdataHeaderLen = 9; + static const int elementHeaderLen = 1; + +#if defined(_MSC_VER) +#pragma warning ( push ) +#pragma warning ( disable : 4127 ) +#endif + TIXMLASSERT( sizeof( XMLComment ) == sizeof( XMLUnknown ) ); // use same memory pool + TIXMLASSERT( sizeof( XMLComment ) == sizeof( XMLDeclaration ) ); // use same memory pool +#if defined(_MSC_VER) +#pragma warning (pop) +#endif + if ( XMLUtil::StringEqual( p, xmlHeader, xmlHeaderLen ) ) { + returnNode = new (commentPool.Alloc()) XMLDeclaration( this ); + returnNode->memPool = &commentPool; + p += xmlHeaderLen; + } + else if ( XMLUtil::StringEqual( p, commentHeader, commentHeaderLen ) ) { + returnNode = new (commentPool.Alloc()) XMLComment( this ); + returnNode->memPool = &commentPool; + p += commentHeaderLen; + } + else if ( XMLUtil::StringEqual( p, cdataHeader, cdataHeaderLen ) ) { + XMLText* text = new (textPool.Alloc()) XMLText( this ); + returnNode = text; + returnNode->memPool = &textPool; + p += cdataHeaderLen; + text->SetCData( true ); + } + else if ( XMLUtil::StringEqual( p, dtdHeader, dtdHeaderLen ) ) { + returnNode = new (commentPool.Alloc()) XMLUnknown( this ); + returnNode->memPool = &commentPool; + p += dtdHeaderLen; + } + else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) { + returnNode = new (elementPool.Alloc()) XMLElement( this ); + returnNode->memPool = &elementPool; + p += elementHeaderLen; + } + else { + returnNode = new (textPool.Alloc()) XMLText( this ); + returnNode->memPool = &textPool; + p = start; // Back it up, all the text counts. + } + + *node = returnNode; + return p; +} + + +bool XMLDocument::Accept( XMLVisitor* visitor ) const +{ + if ( visitor->VisitEnter( *this ) ) + { + for ( const XMLNode* node=FirstChild(); node; node=node->NextSibling() ) + { + if ( !node->Accept( visitor ) ) + break; + } + } + return visitor->VisitExit( *this ); +} + + +// --------- XMLNode ----------- // + +XMLNode::XMLNode( XMLDocument* doc ) : + document( doc ), + parent( 0 ), + firstChild( 0 ), lastChild( 0 ), + prev( 0 ), next( 0 ) +{ +} + + +XMLNode::~XMLNode() +{ + DeleteChildren(); + if ( parent ) { + parent->Unlink( this ); + } +} + + +void XMLNode::SetValue( const char* str, bool staticMem ) +{ + if ( staticMem ) + value.SetInternedStr( str ); + else + value.SetStr( str ); +} + + +void XMLNode::DeleteChildren() +{ + while( firstChild ) { + XMLNode* node = firstChild; + Unlink( node ); + + DELETE_NODE( node ); + } + firstChild = lastChild = 0; +} + + +void XMLNode::Unlink( XMLNode* child ) +{ + TIXMLASSERT( child->parent == this ); + if ( child == firstChild ) + firstChild = firstChild->next; + if ( child == lastChild ) + lastChild = lastChild->prev; + + if ( child->prev ) { + child->prev->next = child->next; + } + if ( child->next ) { + child->next->prev = child->prev; + } + child->parent = 0; +} + + +void XMLNode::DeleteChild( XMLNode* node ) +{ + TIXMLASSERT( node->parent == this ); + DELETE_NODE( node ); +} + + +XMLNode* XMLNode::InsertEndChild( XMLNode* addThis ) +{ + if ( lastChild ) { + TIXMLASSERT( firstChild ); + TIXMLASSERT( lastChild->next == 0 ); + lastChild->next = addThis; + addThis->prev = lastChild; + lastChild = addThis; + + addThis->next = 0; + } + else { + TIXMLASSERT( firstChild == 0 ); + firstChild = lastChild = addThis; + + addThis->prev = 0; + addThis->next = 0; + } + addThis->parent = this; + return addThis; +} + + +XMLNode* XMLNode::InsertFirstChild( XMLNode* addThis ) +{ + if ( firstChild ) { + TIXMLASSERT( lastChild ); + TIXMLASSERT( firstChild->prev == 0 ); + + firstChild->prev = addThis; + addThis->next = firstChild; + firstChild = addThis; + + addThis->prev = 0; + } + else { + TIXMLASSERT( lastChild == 0 ); + firstChild = lastChild = addThis; + + addThis->prev = 0; + addThis->next = 0; + } + addThis->parent = this; + return addThis; +} + + +XMLNode* XMLNode::InsertAfterChild( XMLNode* afterThis, XMLNode* addThis ) +{ + TIXMLASSERT( afterThis->parent == this ); + if ( afterThis->parent != this ) + return 0; + + if ( afterThis->next == 0 ) { + // The last node or the only node. + return InsertEndChild( addThis ); + } + addThis->prev = afterThis; + addThis->next = afterThis->next; + afterThis->next->prev = addThis; + afterThis->next = addThis; + addThis->parent = this; + return addThis; +} + + + + +const XMLElement* XMLNode::FirstChildElement( const char* value ) const +{ + for( XMLNode* node=firstChild; node; node=node->next ) { + XMLElement* element = node->ToElement(); + if ( element ) { + if ( !value || XMLUtil::StringEqual( element->Name(), value ) ) { + return element; + } + } + } + return 0; +} + + +const XMLElement* XMLNode::LastChildElement( const char* value ) const +{ + for( XMLNode* node=lastChild; node; node=node->prev ) { + XMLElement* element = node->ToElement(); + if ( element ) { + if ( !value || XMLUtil::StringEqual( element->Name(), value ) ) { + return element; + } + } + } + return 0; +} + + +const XMLElement* XMLNode::NextSiblingElement( const char* value ) const +{ + for( XMLNode* element=this->next; element; element = element->next ) { + if ( element->ToElement() + && (!value || XMLUtil::StringEqual( value, element->Value() ))) + { + return element->ToElement(); + } + } + return 0; +} + + +const XMLElement* XMLNode::PreviousSiblingElement( const char* value ) const +{ + for( XMLNode* element=this->prev; element; element = element->prev ) { + if ( element->ToElement() + && (!value || XMLUtil::StringEqual( value, element->Value() ))) + { + return element->ToElement(); + } + } + return 0; +} + + +char* XMLNode::ParseDeep( char* p, StrPair* parentEnd ) +{ + // This is a recursive method, but thinking about it "at the current level" + // it is a pretty simple flat list: + // <foo/> + // <!-- comment --> + // + // With a special case: + // <foo> + // </foo> + // <!-- comment --> + // + // Where the closing element (/foo) *must* be the next thing after the opening + // element, and the names must match. BUT the tricky bit is that the closing + // element will be read by the child. + // + // 'endTag' is the end tag for this node, it is returned by a call to a child. + // 'parentEnd' is the end tag for the parent, which is filled in and returned. + + while( p && *p ) { + XMLNode* node = 0; + + p = document->Identify( p, &node ); + if ( p == 0 || node == 0 ) { + break; + } + + StrPair endTag; + p = node->ParseDeep( p, &endTag ); + if ( !p ) { + DELETE_NODE( node ); + node = 0; + if ( !document->Error() ) { + document->SetError( XML_ERROR_PARSING, 0, 0 ); + } + break; + } + + // We read the end tag. Return it to the parent. + if ( node->ToElement() && node->ToElement()->ClosingType() == XMLElement::CLOSING ) { + if ( parentEnd ) { + *parentEnd = static_cast<XMLElement*>(node)->value; + } + DELETE_NODE( node ); + return p; + } + + // Handle an end tag returned to this level. + // And handle a bunch of annoying errors. + XMLElement* ele = node->ToElement(); + if ( ele ) { + if ( endTag.Empty() && ele->ClosingType() == XMLElement::OPEN ) { + document->SetError( XML_ERROR_MISMATCHED_ELEMENT, node->Value(), 0 ); + p = 0; + } + else if ( !endTag.Empty() && ele->ClosingType() != XMLElement::OPEN ) { + document->SetError( XML_ERROR_MISMATCHED_ELEMENT, node->Value(), 0 ); + p = 0; + } + else if ( !endTag.Empty() ) { + if ( !XMLUtil::StringEqual( endTag.GetStr(), node->Value() )) { + document->SetError( XML_ERROR_MISMATCHED_ELEMENT, node->Value(), 0 ); + p = 0; + } + } + } + if ( p == 0 ) { + DELETE_NODE( node ); + node = 0; + } + if ( node ) { + this->InsertEndChild( node ); + } + } + return 0; +} + +// --------- XMLText ---------- // +char* XMLText::ParseDeep( char* p, StrPair* ) +{ + const char* start = p; + if ( this->CData() ) { + p = value.ParseText( p, "]]>", StrPair::NEEDS_NEWLINE_NORMALIZATION ); + if ( !p ) { + document->SetError( XML_ERROR_PARSING_CDATA, start, 0 ); + } + return p; + } + else { + p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES ); + if ( !p ) { + document->SetError( XML_ERROR_PARSING_TEXT, start, 0 ); + } + if ( p && *p ) { + return p-1; + } + } + return 0; +} + + +XMLNode* XMLText::ShallowClone( XMLDocument* doc ) const +{ + if ( !doc ) { + doc = document; + } + XMLText* text = doc->NewText( Value() ); // fixme: this will always allocate memory. Intern? + text->SetCData( this->CData() ); + return text; +} + + +bool XMLText::ShallowEqual( const XMLNode* compare ) const +{ + return ( compare->ToText() && XMLUtil::StringEqual( compare->ToText()->Value(), Value() )); +} + + +bool XMLText::Accept( XMLVisitor* visitor ) const +{ + return visitor->Visit( *this ); +} + + +// --------- XMLComment ---------- // + +XMLComment::XMLComment( XMLDocument* doc ) : XMLNode( doc ) +{ +} + + +XMLComment::~XMLComment() +{ + //printf( "~XMLComment\n" ); +} + + +char* XMLComment::ParseDeep( char* p, StrPair* ) +{ + // Comment parses as text. + const char* start = p; + p = value.ParseText( p, "-->", StrPair::COMMENT ); + if ( p == 0 ) { + document->SetError( XML_ERROR_PARSING_COMMENT, start, 0 ); + } + return p; +} + + +XMLNode* XMLComment::ShallowClone( XMLDocument* doc ) const +{ + if ( !doc ) { + doc = document; + } + XMLComment* comment = doc->NewComment( Value() ); // fixme: this will always allocate memory. Intern? + return comment; +} + + +bool XMLComment::ShallowEqual( const XMLNode* compare ) const +{ + return ( compare->ToComment() && XMLUtil::StringEqual( compare->ToComment()->Value(), Value() )); +} + + +bool XMLComment::Accept( XMLVisitor* visitor ) const +{ + return visitor->Visit( *this ); +} + + +// --------- XMLDeclaration ---------- // + +XMLDeclaration::XMLDeclaration( XMLDocument* doc ) : XMLNode( doc ) +{ +} + + +XMLDeclaration::~XMLDeclaration() +{ + //printf( "~XMLDeclaration\n" ); +} + + +char* XMLDeclaration::ParseDeep( char* p, StrPair* ) +{ + // Declaration parses as text. + const char* start = p; + p = value.ParseText( p, "?>", StrPair::NEEDS_NEWLINE_NORMALIZATION ); + if ( p == 0 ) { + document->SetError( XML_ERROR_PARSING_DECLARATION, start, 0 ); + } + return p; +} + + +XMLNode* XMLDeclaration::ShallowClone( XMLDocument* doc ) const +{ + if ( !doc ) { + doc = document; + } + XMLDeclaration* dec = doc->NewDeclaration( Value() ); // fixme: this will always allocate memory. Intern? + return dec; +} + + +bool XMLDeclaration::ShallowEqual( const XMLNode* compare ) const +{ + return ( compare->ToDeclaration() && XMLUtil::StringEqual( compare->ToDeclaration()->Value(), Value() )); +} + + + +bool XMLDeclaration::Accept( XMLVisitor* visitor ) const +{ + return visitor->Visit( *this ); +} + +// --------- XMLUnknown ---------- // + +XMLUnknown::XMLUnknown( XMLDocument* doc ) : XMLNode( doc ) +{ +} + + +XMLUnknown::~XMLUnknown() +{ +} + + +char* XMLUnknown::ParseDeep( char* p, StrPair* ) +{ + // Unknown parses as text. + const char* start = p; + + p = value.ParseText( p, ">", StrPair::NEEDS_NEWLINE_NORMALIZATION ); + if ( !p ) { + document->SetError( XML_ERROR_PARSING_UNKNOWN, start, 0 ); + } + return p; +} + + +XMLNode* XMLUnknown::ShallowClone( XMLDocument* doc ) const +{ + if ( !doc ) { + doc = document; + } + XMLUnknown* text = doc->NewUnknown( Value() ); // fixme: this will always allocate memory. Intern? + return text; +} + + +bool XMLUnknown::ShallowEqual( const XMLNode* compare ) const +{ + return ( compare->ToUnknown() && XMLUtil::StringEqual( compare->ToUnknown()->Value(), Value() )); +} + + +bool XMLUnknown::Accept( XMLVisitor* visitor ) const +{ + return visitor->Visit( *this ); +} + +// --------- XMLAttribute ---------- // +char* XMLAttribute::ParseDeep( char* p, bool processEntities ) +{ + // Parse using the name rules: bug fix, was using ParseText before + p = name.ParseName( p ); + if ( !p || !*p ) return 0; + + // Skip white space before = + p = XMLUtil::SkipWhiteSpace( p ); + if ( !p || *p != '=' ) return 0; + + ++p; // move up to opening quote + p = XMLUtil::SkipWhiteSpace( p ); + if ( *p != '\"' && *p != '\'' ) return 0; + + char endTag[2] = { *p, 0 }; + ++p; // move past opening quote + + p = value.ParseText( p, endTag, processEntities ? StrPair::ATTRIBUTE_VALUE : StrPair::ATTRIBUTE_VALUE_LEAVE_ENTITIES ); + return p; +} + + +void XMLAttribute::SetName( const char* n ) +{ + name.SetStr( n ); +} + + +int XMLAttribute::QueryIntValue( int* value ) const +{ + if ( XMLUtil::ToInt( Value(), value )) + return XML_NO_ERROR; + return XML_WRONG_ATTRIBUTE_TYPE; +} + + +int XMLAttribute::QueryUnsignedValue( unsigned int* value ) const +{ + if ( XMLUtil::ToUnsigned( Value(), value )) + return XML_NO_ERROR; + return XML_WRONG_ATTRIBUTE_TYPE; +} + + +int XMLAttribute::QueryBoolValue( bool* value ) const +{ + if ( XMLUtil::ToBool( Value(), value )) { + return XML_NO_ERROR; + } + return XML_WRONG_ATTRIBUTE_TYPE; +} + + +int XMLAttribute::QueryFloatValue( float* value ) const +{ + if ( XMLUtil::ToFloat( Value(), value )) + return XML_NO_ERROR; + return XML_WRONG_ATTRIBUTE_TYPE; +} + + +int XMLAttribute::QueryDoubleValue( double* value ) const +{ + if ( XMLUtil::ToDouble( Value(), value )) + return XML_NO_ERROR; + return XML_WRONG_ATTRIBUTE_TYPE; +} + + +void XMLAttribute::SetAttribute( const char* v ) +{ + value.SetStr( v ); +} + + +void XMLAttribute::SetAttribute( int v ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( v, buf, BUF_SIZE ); + value.SetStr( buf ); +} + + +void XMLAttribute::SetAttribute( unsigned v ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( v, buf, BUF_SIZE ); + value.SetStr( buf ); +} + + +void XMLAttribute::SetAttribute( bool v ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( v, buf, BUF_SIZE ); + value.SetStr( buf ); +} + +void XMLAttribute::SetAttribute( double v ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( v, buf, BUF_SIZE ); + value.SetStr( buf ); +} + +void XMLAttribute::SetAttribute( float v ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( v, buf, BUF_SIZE ); + value.SetStr( buf ); +} + + +// --------- XMLElement ---------- // +XMLElement::XMLElement( XMLDocument* doc ) : XMLNode( doc ), + closingType( 0 ), + rootAttribute( 0 ) +{ +} + + +XMLElement::~XMLElement() +{ + while( rootAttribute ) { + XMLAttribute* next = rootAttribute->next; + DELETE_ATTRIBUTE( rootAttribute ); + rootAttribute = next; + } +} + + +XMLAttribute* XMLElement::FindAttribute( const char* name ) +{ + XMLAttribute* a = 0; + for( a=rootAttribute; a; a = a->next ) { + if ( XMLUtil::StringEqual( a->Name(), name ) ) + return a; + } + return 0; +} + + +const XMLAttribute* XMLElement::FindAttribute( const char* name ) const +{ + XMLAttribute* a = 0; + for( a=rootAttribute; a; a = a->next ) { + if ( XMLUtil::StringEqual( a->Name(), name ) ) + return a; + } + return 0; +} + + +const char* XMLElement::Attribute( const char* name, const char* value ) const +{ + const XMLAttribute* a = FindAttribute( name ); + if ( !a ) + return 0; + if ( !value || XMLUtil::StringEqual( a->Value(), value )) + return a->Value(); + return 0; +} + + +const char* XMLElement::GetText() const +{ + if ( FirstChild() && FirstChild()->ToText() ) { + return FirstChild()->ToText()->Value(); + } + return 0; +} + + +int XMLElement::QueryIntText( int* _value ) const +{ + if ( FirstChild() && FirstChild()->ToText() ) { + const char* t = FirstChild()->ToText()->Value(); + if ( XMLUtil::ToInt( t, _value ) ) { + return XML_SUCCESS; + } + return XML_CAN_NOT_CONVERT_TEXT; + } + return XML_NO_TEXT_NODE; +} + + +int XMLElement::QueryUnsignedText( unsigned* _value ) const +{ + if ( FirstChild() && FirstChild()->ToText() ) { + const char* t = FirstChild()->ToText()->Value(); + if ( XMLUtil::ToUnsigned( t, _value ) ) { + return XML_SUCCESS; + } + return XML_CAN_NOT_CONVERT_TEXT; + } + return XML_NO_TEXT_NODE; +} + + +int XMLElement::QueryBoolText( bool* _value ) const +{ + if ( FirstChild() && FirstChild()->ToText() ) { + const char* t = FirstChild()->ToText()->Value(); + if ( XMLUtil::ToBool( t, _value ) ) { + return XML_SUCCESS; + } + return XML_CAN_NOT_CONVERT_TEXT; + } + return XML_NO_TEXT_NODE; +} + + +int XMLElement::QueryDoubleText( double* _value ) const +{ + if ( FirstChild() && FirstChild()->ToText() ) { + const char* t = FirstChild()->ToText()->Value(); + if ( XMLUtil::ToDouble( t, _value ) ) { + return XML_SUCCESS; + } + return XML_CAN_NOT_CONVERT_TEXT; + } + return XML_NO_TEXT_NODE; +} + + +int XMLElement::QueryFloatText( float* _value ) const +{ + if ( FirstChild() && FirstChild()->ToText() ) { + const char* t = FirstChild()->ToText()->Value(); + if ( XMLUtil::ToFloat( t, _value ) ) { + return XML_SUCCESS; + } + return XML_CAN_NOT_CONVERT_TEXT; + } + return XML_NO_TEXT_NODE; +} + + + +XMLAttribute* XMLElement::FindOrCreateAttribute( const char* name ) +{ + XMLAttribute* last = 0; + XMLAttribute* attrib = 0; + for( attrib = rootAttribute; + attrib; + last = attrib, attrib = attrib->next ) + { + if ( XMLUtil::StringEqual( attrib->Name(), name ) ) { + break; + } + } + if ( !attrib ) { + attrib = new (document->attributePool.Alloc() ) XMLAttribute(); + attrib->memPool = &document->attributePool; + if ( last ) { + last->next = attrib; + } + else { + rootAttribute = attrib; + } + attrib->SetName( name ); + } + return attrib; +} + + +void XMLElement::DeleteAttribute( const char* name ) +{ + XMLAttribute* prev = 0; + for( XMLAttribute* a=rootAttribute; a; a=a->next ) { + if ( XMLUtil::StringEqual( name, a->Name() ) ) { + if ( prev ) { + prev->next = a->next; + } + else { + rootAttribute = a->next; + } + DELETE_ATTRIBUTE( a ); + break; + } + prev = a; + } +} + + +char* XMLElement::ParseAttributes( char* p ) +{ + const char* start = p; + XMLAttribute* prevAttribute = 0; + + // Read the attributes. + while( p ) { + p = XMLUtil::SkipWhiteSpace( p ); + if ( !p || !(*p) ) { + document->SetError( XML_ERROR_PARSING_ELEMENT, start, Name() ); + return 0; + } + + // attribute. + if ( XMLUtil::IsAlpha( *p ) ) { + XMLAttribute* attrib = new (document->attributePool.Alloc() ) XMLAttribute(); + attrib->memPool = &document->attributePool; + + p = attrib->ParseDeep( p, document->ProcessEntities() ); + if ( !p || Attribute( attrib->Name() ) ) { + DELETE_ATTRIBUTE( attrib ); + document->SetError( XML_ERROR_PARSING_ATTRIBUTE, start, p ); + return 0; + } + // There is a minor bug here: if the attribute in the source xml + // document is duplicated, it will not be detected and the + // attribute will be doubly added. However, tracking the 'prevAttribute' + // avoids re-scanning the attribute list. Preferring performance for + // now, may reconsider in the future. + if ( prevAttribute ) { + prevAttribute->next = attrib; + } + else { + rootAttribute = attrib; + } + prevAttribute = attrib; + } + // end of the tag + else if ( *p == '/' && *(p+1) == '>' ) { + closingType = CLOSED; + return p+2; // done; sealed element. + } + // end of the tag + else if ( *p == '>' ) { + ++p; + break; + } + else { + document->SetError( XML_ERROR_PARSING_ELEMENT, start, p ); + return 0; + } + } + return p; +} + + +// +// <ele></ele> +// <ele>foo<b>bar</b></ele> +// +char* XMLElement::ParseDeep( char* p, StrPair* strPair ) +{ + // Read the element name. + p = XMLUtil::SkipWhiteSpace( p ); + if ( !p ) return 0; + + // The closing element is the </element> form. It is + // parsed just like a regular element then deleted from + // the DOM. + if ( *p == '/' ) { + closingType = CLOSING; + ++p; + } + + p = value.ParseName( p ); + if ( value.Empty() ) return 0; + + p = ParseAttributes( p ); + if ( !p || !*p || closingType ) + return p; + + p = XMLNode::ParseDeep( p, strPair ); + return p; +} + + + +XMLNode* XMLElement::ShallowClone( XMLDocument* doc ) const +{ + if ( !doc ) { + doc = document; + } + XMLElement* element = doc->NewElement( Value() ); // fixme: this will always allocate memory. Intern? + for( const XMLAttribute* a=FirstAttribute(); a; a=a->Next() ) { + element->SetAttribute( a->Name(), a->Value() ); // fixme: this will always allocate memory. Intern? + } + return element; +} + + +bool XMLElement::ShallowEqual( const XMLNode* compare ) const +{ + const XMLElement* other = compare->ToElement(); + if ( other && XMLUtil::StringEqual( other->Value(), Value() )) { + + const XMLAttribute* a=FirstAttribute(); + const XMLAttribute* b=other->FirstAttribute(); + + while ( a && b ) { + if ( !XMLUtil::StringEqual( a->Value(), b->Value() ) ) { + return false; + } + a = a->Next(); + b = b->Next(); + } + if ( a || b ) { + // different count + return false; + } + return true; + } + return false; +} + + +bool XMLElement::Accept( XMLVisitor* visitor ) const +{ + if ( visitor->VisitEnter( *this, rootAttribute ) ) + { + for ( const XMLNode* node=FirstChild(); node; node=node->NextSibling() ) + { + if ( !node->Accept( visitor ) ) + break; + } + } + return visitor->VisitExit( *this ); +} + + +// --------- XMLDocument ----------- // +XMLDocument::XMLDocument( bool _processEntities ) : + XMLNode( 0 ), + writeBOM( false ), + processEntities( _processEntities ), + errorID( 0 ), + errorStr1( 0 ), + errorStr2( 0 ), + charBuffer( 0 ) +{ + document = this; // avoid warning about 'this' in initializer list +} + + +XMLDocument::~XMLDocument() +{ + DeleteChildren(); + delete [] charBuffer; + +#if 0 + textPool.Trace( "text" ); + elementPool.Trace( "element" ); + commentPool.Trace( "comment" ); + attributePool.Trace( "attribute" ); +#endif + + TIXMLASSERT( textPool.CurrentAllocs() == 0 ); + TIXMLASSERT( elementPool.CurrentAllocs() == 0 ); + TIXMLASSERT( commentPool.CurrentAllocs() == 0 ); + TIXMLASSERT( attributePool.CurrentAllocs() == 0 ); +} + + +void XMLDocument::InitDocument() +{ + errorID = XML_NO_ERROR; + errorStr1 = 0; + errorStr2 = 0; + + delete [] charBuffer; + charBuffer = 0; + +} + + +XMLElement* XMLDocument::NewElement( const char* name ) +{ + XMLElement* ele = new (elementPool.Alloc()) XMLElement( this ); + ele->memPool = &elementPool; + ele->SetName( name ); + return ele; +} + + +XMLComment* XMLDocument::NewComment( const char* str ) +{ + XMLComment* comment = new (commentPool.Alloc()) XMLComment( this ); + comment->memPool = &commentPool; + comment->SetValue( str ); + return comment; +} + + +XMLText* XMLDocument::NewText( const char* str ) +{ + XMLText* text = new (textPool.Alloc()) XMLText( this ); + text->memPool = &textPool; + text->SetValue( str ); + return text; +} + + +XMLDeclaration* XMLDocument::NewDeclaration( const char* str ) +{ + XMLDeclaration* dec = new (commentPool.Alloc()) XMLDeclaration( this ); + dec->memPool = &commentPool; + dec->SetValue( str ? str : "xml version=\"1.0\" encoding=\"UTF-8\"" ); + return dec; +} + + +XMLUnknown* XMLDocument::NewUnknown( const char* str ) +{ + XMLUnknown* unk = new (commentPool.Alloc()) XMLUnknown( this ); + unk->memPool = &commentPool; + unk->SetValue( str ); + return unk; +} + + +int XMLDocument::LoadFile( const char* filename ) +{ + DeleteChildren(); + InitDocument(); + +#if defined(_MSC_VER) +#pragma warning ( push ) +#pragma warning ( disable : 4996 ) // Fail to see a compelling reason why this should be deprecated. +#endif + FILE* fp = fopen( filename, "rb" ); +#if defined(_MSC_VER) +#pragma warning ( pop ) +#endif + if ( !fp ) { + SetError( XML_ERROR_FILE_NOT_FOUND, filename, 0 ); + return errorID; + } + LoadFile( fp ); + fclose( fp ); + return errorID; +} + + +int XMLDocument::LoadFile( FILE* fp ) +{ + DeleteChildren(); + InitDocument(); + + fseek( fp, 0, SEEK_END ); + unsigned size = ftell( fp ); + fseek( fp, 0, SEEK_SET ); + + if ( size == 0 ) { + return errorID; + } + + charBuffer = new char[size+1]; + size_t read = fread( charBuffer, 1, size, fp ); + if ( read != size ) { + SetError( XML_ERROR_FILE_READ_ERROR, 0, 0 ); + return errorID; + } + + charBuffer[size] = 0; + + const char* p = charBuffer; + p = XMLUtil::SkipWhiteSpace( p ); + p = XMLUtil::ReadBOM( p, &writeBOM ); + if ( !p || !*p ) { + SetError( XML_ERROR_EMPTY_DOCUMENT, 0, 0 ); + return errorID; + } + + ParseDeep( charBuffer + (p-charBuffer), 0 ); + return errorID; +} + + +int XMLDocument::SaveFile( const char* filename ) +{ +#if defined(_MSC_VER) +#pragma warning ( push ) +#pragma warning ( disable : 4996 ) // Fail to see a compelling reason why this should be deprecated. +#endif + int fd = open(filename, O_RDWR|O_CREAT, 0644); + FILE* fp = fdopen(fd, "w"); + //FILE* fp = fopen( filename, "w" ); +#if defined(_MSC_VER) +#pragma warning ( pop ) +#endif + if ( !fp ) { + SetError( XML_ERROR_FILE_COULD_NOT_BE_OPENED, filename, 0 ); + return errorID; + } + SaveFile(fp); + fclose( fp ); + return errorID; +} + + +int XMLDocument::SaveFile( FILE* fp ) +{ + XMLPrinter stream( fp ); + Print( &stream ); + return errorID; +} + + +int XMLDocument::Parse( const char* p ) +{ + DeleteChildren(); + InitDocument(); + + if ( !p || !*p ) { + SetError( XML_ERROR_EMPTY_DOCUMENT, 0, 0 ); + return errorID; + } + p = XMLUtil::SkipWhiteSpace( p ); + p = XMLUtil::ReadBOM( p, &writeBOM ); + if ( !p || !*p ) { + SetError( XML_ERROR_EMPTY_DOCUMENT, 0, 0 ); + return errorID; + } + + size_t len = strlen( p ); + charBuffer = new char[ len+1 ]; + memcpy( charBuffer, p, len+1 ); + + + ParseDeep( charBuffer, 0 ); + return errorID; +} + + +void XMLDocument::Print( XMLPrinter* streamer ) +{ + XMLPrinter stdStreamer( stdout ); + if ( !streamer ) + streamer = &stdStreamer; + Accept( streamer ); +} + + +void XMLDocument::SetError( int error, const char* str1, const char* str2 ) +{ + errorID = error; + errorStr1 = str1; + errorStr2 = str2; +} + + +void XMLDocument::PrintError() const +{ + if ( errorID ) { + static const int LEN = 20; + char buf1[LEN] = { 0 }; + char buf2[LEN] = { 0 }; + + if ( errorStr1 ) { + TIXML_SNPRINTF( buf1, LEN, "%s", errorStr1 ); + } + if ( errorStr2 ) { + TIXML_SNPRINTF( buf2, LEN, "%s", errorStr2 ); + } + + printf( "XMLDocument error id=%d str1=%s str2=%s\n", + errorID, buf1, buf2 ); + } +} + + +XMLPrinter::XMLPrinter( FILE* file, bool compact ) : + elementJustOpened( false ), + firstElement( true ), + fp( file ), + depth( 0 ), + textDepth( -1 ), + processEntities( true ), + compactMode( compact ) +{ + for( int i=0; i<ENTITY_RANGE; ++i ) { + entityFlag[i] = false; + restrictedEntityFlag[i] = false; + } + for( int i=0; i<NUM_ENTITIES; ++i ) { + TIXMLASSERT( entities[i].value < ENTITY_RANGE ); + if ( entities[i].value < ENTITY_RANGE ) { + entityFlag[ (int)entities[i].value ] = true; + } + } + restrictedEntityFlag[(int)'&'] = true; + restrictedEntityFlag[(int)'<'] = true; + restrictedEntityFlag[(int)'>'] = true; // not required, but consistency is nice + buffer.Push( 0 ); +} + + +void XMLPrinter::Print( const char* format, ... ) +{ + va_list va; + va_start( va, format ); + + if ( fp ) { + vfprintf( fp, format, va ); + } + else { + // This seems brutally complex. Haven't figured out a better + // way on windows. + #ifdef _MSC_VER + int len = -1; + int expand = 1000; + while ( len < 0 ) { + len = vsnprintf_s( accumulator.Mem(), accumulator.Capacity(), _TRUNCATE, format, va ); + if ( len < 0 ) { + expand *= 3/2; + accumulator.PushArr( expand ); + } + } + char* p = buffer.PushArr( len ) - 1; + memcpy( p, accumulator.Mem(), len+1 ); + #else + int len = vsnprintf( 0, 0, format, va ); + // Close out and re-start the va-args + va_end( va ); + va_start( va, format ); + char* p = buffer.PushArr( len ) - 1; + vsnprintf( p, len+1, format, va ); + #endif + } + va_end( va ); +} + + +void XMLPrinter::PrintSpace( int depth ) +{ + for( int i=0; i<depth; ++i ) { + Print( " " ); + } +} + + +void XMLPrinter::PrintString( const char* p, bool restricted ) +{ + // Look for runs of bytes between entities to print. + const char* q = p; + const bool* flag = restricted ? restrictedEntityFlag : entityFlag; + + if ( processEntities ) { + while ( *q ) { + // Remember, char is sometimes signed. (How many times has that bitten me?) + if ( *q > 0 && *q < ENTITY_RANGE ) { + // Check for entities. If one is found, flush + // the stream up until the entity, write the + // entity, and keep looking. + if ( flag[(unsigned)(*q)] ) { + while ( p < q ) { + Print( "%c", *p ); + ++p; + } + for( int i=0; i<NUM_ENTITIES; ++i ) { + if ( entities[i].value == *q ) { + Print( "&%s;", entities[i].pattern ); + break; + } + } + ++p; + } + } + ++q; + } + } + // Flush the remaining string. This will be the entire + // string if an entity wasn't found. + if ( !processEntities || (q-p > 0) ) { + Print( "%s", p ); + } +} + + +void XMLPrinter::PushHeader( bool writeBOM, bool writeDec ) +{ + static const unsigned char bom[] = { TIXML_UTF_LEAD_0, TIXML_UTF_LEAD_1, TIXML_UTF_LEAD_2, 0 }; + if ( writeBOM ) { + Print( "%s", bom ); + } + if ( writeDec ) { + PushDeclaration( "xml version=\"1.0\"" ); + } +} + + +void XMLPrinter::OpenElement( const char* name ) +{ + if ( elementJustOpened ) { + SealElement(); + } + stack.Push( name ); + + if ( textDepth < 0 && !firstElement && !compactMode ) { + Print( "\n" ); + PrintSpace( depth ); + } + + Print( "<%s", name ); + elementJustOpened = true; + firstElement = false; + ++depth; +} + + +void XMLPrinter::PushAttribute( const char* name, const char* value ) +{ + TIXMLASSERT( elementJustOpened ); + Print( " %s=\"", name ); + PrintString( value, false ); + Print( "\"" ); +} + + +void XMLPrinter::PushAttribute( const char* name, int v ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( v, buf, BUF_SIZE ); + PushAttribute( name, buf ); +} + + +void XMLPrinter::PushAttribute( const char* name, unsigned v ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( v, buf, BUF_SIZE ); + PushAttribute( name, buf ); +} + + +void XMLPrinter::PushAttribute( const char* name, bool v ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( v, buf, BUF_SIZE ); + PushAttribute( name, buf ); +} + + +void XMLPrinter::PushAttribute( const char* name, double v ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( v, buf, BUF_SIZE ); + PushAttribute( name, buf ); +} + + +void XMLPrinter::CloseElement() +{ + --depth; + const char* name = stack.Pop(); + + if ( elementJustOpened ) { + Print( "/>" ); + } + else { + if ( textDepth < 0 && !compactMode) { + Print( "\n" ); + PrintSpace( depth ); + } + Print( "</%s>", name ); + } + + if ( textDepth == depth ) + textDepth = -1; + if ( depth == 0 && !compactMode) + Print( "\n" ); + elementJustOpened = false; +} + + +void XMLPrinter::SealElement() +{ + elementJustOpened = false; + Print( ">" ); +} + + +void XMLPrinter::PushText( const char* text, bool cdata ) +{ + textDepth = depth-1; + + if ( elementJustOpened ) { + SealElement(); + } + if ( cdata ) { + Print( "<![CDATA[" ); + Print( "%s", text ); + Print( "]]>" ); + } + else { + PrintString( text, true ); + } +} + +void XMLPrinter::PushText( int value ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( value, buf, BUF_SIZE ); + PushText( buf, false ); +} + + +void XMLPrinter::PushText( unsigned value ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( value, buf, BUF_SIZE ); + PushText( buf, false ); +} + + +void XMLPrinter::PushText( bool value ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( value, buf, BUF_SIZE ); + PushText( buf, false ); +} + + +void XMLPrinter::PushText( float value ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( value, buf, BUF_SIZE ); + PushText( buf, false ); +} + + +void XMLPrinter::PushText( double value ) +{ + char buf[BUF_SIZE]; + XMLUtil::ToStr( value, buf, BUF_SIZE ); + PushText( buf, false ); +} + + +void XMLPrinter::PushComment( const char* comment ) +{ + if ( elementJustOpened ) { + SealElement(); + } + if ( textDepth < 0 && !firstElement && !compactMode) { + Print( "\n" ); + PrintSpace( depth ); + } + firstElement = false; + Print( "<!--%s-->", comment ); +} + + +void XMLPrinter::PushDeclaration( const char* value ) +{ + if ( elementJustOpened ) { + SealElement(); + } + if ( textDepth < 0 && !firstElement && !compactMode) { + Print( "\n" ); + PrintSpace( depth ); + } + firstElement = false; + Print( "<?%s?>", value ); +} + + +void XMLPrinter::PushUnknown( const char* value ) +{ + if ( elementJustOpened ) { + SealElement(); + } + if ( textDepth < 0 && !firstElement && !compactMode) { + Print( "\n" ); + PrintSpace( depth ); + } + firstElement = false; + Print( "<!%s>", value ); +} + + +bool XMLPrinter::VisitEnter( const XMLDocument& doc ) +{ + processEntities = doc.ProcessEntities(); + if ( doc.HasBOM() ) { + PushHeader( true, false ); + } + return true; +} + + +bool XMLPrinter::VisitEnter( const XMLElement& element, const XMLAttribute* attribute ) +{ + OpenElement( element.Name() ); + while ( attribute ) { + PushAttribute( attribute->Name(), attribute->Value() ); + attribute = attribute->Next(); + } + return true; +} + + +bool XMLPrinter::VisitExit( const XMLElement& ) +{ + CloseElement(); + return true; +} + + +bool XMLPrinter::Visit( const XMLText& text ) +{ + PushText( text.Value(), text.CData() ); + return true; +} + + +bool XMLPrinter::Visit( const XMLComment& comment ) +{ + PushComment( comment.Value() ); + return true; +} + +bool XMLPrinter::Visit( const XMLDeclaration& declaration ) +{ + PushDeclaration( declaration.Value() ); + return true; +} + + +bool XMLPrinter::Visit( const XMLUnknown& unknown ) +{ + PushUnknown( unknown.Value() ); + return true; +} diff --git a/llvm/xml/tinyxml2.h b/llvm/xml/tinyxml2.h new file mode 100644 index 0000000..80e076d --- /dev/null +++ b/llvm/xml/tinyxml2.h @@ -0,0 +1,1480 @@ +/* +Original code by Lee Thomason (www.grinninglizard.com) + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any +damages arising from the use of this software. + +Permission is granted to anyone to use this software for any +purpose, including commercial applications, and to alter it and +redistribute it freely, subject to the following restrictions: + +1. The origin of this software must not be misrepresented; you must +not claim that you wrote the original software. If you use this +software in a product, an acknowledgment in the product documentation +would be appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and +must not be misrepresented as being the original software. + +3. This notice may not be removed or altered from any source +distribution. +*/ + +#ifndef TINYXML2_INCLUDED +#define TINYXML2_INCLUDED + +#include <cctype> +#include <climits> +#include <cstdio> +#include <cstring> +//#include <cstdarg> +#include <stdarg.h> +/* + TODO: intern strings instead of allocation. +*/ +/* + gcc: g++ -Wall tinyxml2.cpp xmltest.cpp -o gccxmltest.exe +*/ + +#if defined( _DEBUG ) || defined( DEBUG ) || defined (__DEBUG__) + #ifndef DEBUG + #define DEBUG + #endif +#endif + + +#if defined(DEBUG) + #if defined(_MSC_VER) + #define TIXMLASSERT( x ) if ( !(x)) { __debugbreak(); } //if ( !(x)) WinDebugBreak() + #elif defined (ANDROID_NDK) + #include <android/log.h> + #define TIXMLASSERT( x ) if ( !(x)) { __android_log_assert( "assert", "grinliz", "ASSERT in '%s' at %d.", __FILE__, __LINE__ ); } + #else + #include <assert.h> + #define TIXMLASSERT assert + #endif +#else + #define TIXMLASSERT( x ) {} +#endif + + +#if defined(_MSC_VER) && (_MSC_VER >= 1400 ) + // Microsoft visual studio, version 2005 and higher. + /*int _snprintf_s( + char *buffer, + size_t sizeOfBuffer, + size_t count, + const char *format [, + argument] ... + );*/ + inline int TIXML_SNPRINTF( char* buffer, size_t size, const char* format, ... ) { + va_list va; + va_start( va, format ); + int result = vsnprintf_s( buffer, size, _TRUNCATE, format, va ); + va_end( va ); + return result; + } + #define TIXML_SSCANF sscanf_s +#else + // GCC version 3 and higher + //#warning( "Using sn* functions." ) + #define TIXML_SNPRINTF snprintf + #define TIXML_SSCANF sscanf +#endif + +static const int TIXML2_MAJOR_VERSION = 1; +static const int TIXML2_MINOR_VERSION = 0; +static const int TIXML2_PATCH_VERSION = 6; + +namespace tinyxml2 +{ +class XMLDocument; +class XMLElement; +class XMLAttribute; +class XMLComment; +class XMLNode; +class XMLText; +class XMLDeclaration; +class XMLUnknown; + +class XMLPrinter; + +/* + A class that wraps strings. Normally stores the start and end + pointers into the XML file itself, and will apply normalization + and entity translation if actually read. Can also store (and memory + manage) a traditional char[] +*/ +class StrPair +{ +public: + enum { + NEEDS_ENTITY_PROCESSING = 0x01, + NEEDS_NEWLINE_NORMALIZATION = 0x02, + + TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION, + TEXT_ELEMENT_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION, + ATTRIBUTE_NAME = 0, + ATTRIBUTE_VALUE = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION, + ATTRIBUTE_VALUE_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION, + COMMENT = NEEDS_NEWLINE_NORMALIZATION + }; + + StrPair() : flags( 0 ), start( 0 ), end( 0 ) {} + ~StrPair(); + + void Set( char* _start, char* _end, int _flags ) { + Reset(); + this->start = _start; this->end = _end; this->flags = _flags | NEEDS_FLUSH; + } + const char* GetStr(); + bool Empty() const { return start == end; } + + void SetInternedStr( const char* str ) { Reset(); this->start = const_cast<char*>(str); } + void SetStr( const char* str, int flags=0 ); + + char* ParseText( char* in, const char* endTag, int strFlags ); + char* ParseName( char* in ); + + +private: + void Reset(); + + enum { + NEEDS_FLUSH = 0x100, + NEEDS_DELETE = 0x200 + }; + + // After parsing, if *end != 0, it can be set to zero. + int flags; + char* start; + char* end; +}; + + +/* + A dynamic array of Plain Old Data. Doesn't support constructors, etc. + Has a small initial memory pool, so that low or no usage will not + cause a call to new/delete +*/ +template <class T, int INIT> +class DynArray +{ +public: + DynArray< T, INIT >() + { + mem = pool; + allocated = INIT; + size = 0; + } + ~DynArray() + { + if ( mem != pool ) { + delete [] mem; + } + } + void Push( T t ) + { + EnsureCapacity( size+1 ); + mem[size++] = t; + } + + T* PushArr( int count ) + { + EnsureCapacity( size+count ); + T* ret = &mem[size]; + size += count; + return ret; + } + T Pop() { + return mem[--size]; + } + void PopArr( int count ) + { + TIXMLASSERT( size >= count ); + size -= count; + } + + bool Empty() const { return size == 0; } + T& operator[](int i) { TIXMLASSERT( i>= 0 && i < size ); return mem[i]; } + const T& operator[](int i) const { TIXMLASSERT( i>= 0 && i < size ); return mem[i]; } + int Size() const { return size; } + int Capacity() const { return allocated; } + const T* Mem() const { return mem; } + T* Mem() { return mem; } + + +private: + void EnsureCapacity( int cap ) { + if ( cap > allocated ) { + int newAllocated = cap * 2; + T* newMem = new T[newAllocated]; + memcpy( newMem, mem, sizeof(T)*size ); // warning: not using constructors, only works for PODs + if ( mem != pool ) delete [] mem; + mem = newMem; + allocated = newAllocated; + } + } + + T* mem; + T pool[INIT]; + int allocated; // objects allocated + int size; // number objects in use +}; + + +/* + Parent virtual class of a pool for fast allocation + and deallocation of objects. +*/ +class MemPool +{ +public: + MemPool() {} + virtual ~MemPool() {} + + virtual int ItemSize() const = 0; + virtual void* Alloc() = 0; + virtual void Free( void* ) = 0; +}; + + +/* + Template child class to create pools of the correct type. +*/ +template< int SIZE > +class MemPoolT : public MemPool +{ +public: + MemPoolT() : root(0), currentAllocs(0), nAllocs(0), maxAllocs(0) {} + ~MemPoolT() { + // Delete the blocks. + for( int i=0; i<blockPtrs.Size(); ++i ) { + delete blockPtrs[i]; + } + } + + virtual int ItemSize() const { return SIZE; } + int CurrentAllocs() const { return currentAllocs; } + + virtual void* Alloc() { + if ( !root ) { + // Need a new block. + Block* block = new Block(); + blockPtrs.Push( block ); + + for( int i=0; i<COUNT-1; ++i ) { + block->chunk[i].next = &block->chunk[i+1]; + } + block->chunk[COUNT-1].next = 0; + root = block->chunk; + } + void* result = root; + root = root->next; + + ++currentAllocs; + if ( currentAllocs > maxAllocs ) maxAllocs = currentAllocs; + nAllocs++; + return result; + } + virtual void Free( void* mem ) { + if ( !mem ) return; + --currentAllocs; + Chunk* chunk = (Chunk*)mem; + memset( chunk, 0xfe, sizeof(Chunk) ); + chunk->next = root; + root = chunk; + } + void Trace( const char* name ) { + printf( "Mempool %s watermark=%d [%dk] current=%d size=%d nAlloc=%d blocks=%d\n", + name, maxAllocs, maxAllocs*SIZE/1024, currentAllocs, SIZE, nAllocs, blockPtrs.Size() ); + } + +private: + enum { COUNT = 1024/SIZE }; + union Chunk { + Chunk* next; + char mem[SIZE]; + }; + struct Block { + Chunk chunk[COUNT]; + }; + DynArray< Block*, 10 > blockPtrs; + Chunk* root; + + int currentAllocs; + int nAllocs; + int maxAllocs; +}; + + + +/** + Implements the interface to the "Visitor pattern" (see the Accept() method.) + If you call the Accept() method, it requires being passed a XMLVisitor + class to handle callbacks. For nodes that contain other nodes (Document, Element) + you will get called with a VisitEnter/VisitExit pair. Nodes that are always leafs + are simply called with Visit(). + + If you return 'true' from a Visit method, recursive parsing will continue. If you return + false, <b>no children of this node or its sibilings</b> will be visited. + + All flavors of Visit methods have a default implementation that returns 'true' (continue + visiting). You need to only override methods that are interesting to you. + + Generally Accept() is called on the TiXmlDocument, although all nodes support visiting. + + You should never change the document from a callback. + + @sa XMLNode::Accept() +*/ +class XMLVisitor +{ +public: + virtual ~XMLVisitor() {} + + /// Visit a document. + virtual bool VisitEnter( const XMLDocument& /*doc*/ ) { return true; } + /// Visit a document. + virtual bool VisitExit( const XMLDocument& /*doc*/ ) { return true; } + + /// Visit an element. + virtual bool VisitEnter( const XMLElement& /*element*/, const XMLAttribute* /*firstAttribute*/ ) { return true; } + /// Visit an element. + virtual bool VisitExit( const XMLElement& /*element*/ ) { return true; } + + /// Visit a declaration. + virtual bool Visit( const XMLDeclaration& /*declaration*/ ) { return true; } + /// Visit a text node. + virtual bool Visit( const XMLText& /*text*/ ) { return true; } + /// Visit a comment node. + virtual bool Visit( const XMLComment& /*comment*/ ) { return true; } + /// Visit an unknown node. + virtual bool Visit( const XMLUnknown& /*unknown*/ ) { return true; } +}; + + +/* + Utility functionality. +*/ +class XMLUtil +{ +public: + // Anything in the high order range of UTF-8 is assumed to not be whitespace. This isn't + // correct, but simple, and usually works. + static const char* SkipWhiteSpace( const char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<const unsigned char*>(p) ) ) { ++p; } return p; } + static char* SkipWhiteSpace( char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<unsigned char*>(p) ) ) { ++p; } return p; } + + inline static bool StringEqual( const char* p, const char* q, int nChar=INT_MAX ) { + int n = 0; + if ( p == q ) { + return true; + } + while( *p && *q && *p == *q && n<nChar ) { + ++p; ++q; ++n; + } + if ( (n == nChar) || ( *p == 0 && *q == 0 ) ) { + return true; + } + return false; + } + inline static int IsUTF8Continuation( const char p ) { return p & 0x80; } + inline static int IsAlphaNum( unsigned char anyByte ) { return ( anyByte < 128 ) ? isalnum( anyByte ) : 1; } + inline static int IsAlpha( unsigned char anyByte ) { return ( anyByte < 128 ) ? isalpha( anyByte ) : 1; } + + static const char* ReadBOM( const char* p, bool* hasBOM ); + // p is the starting location, + // the UTF-8 value of the entity will be placed in value, and length filled in. + static const char* GetCharacterRef( const char* p, char* value, int* length ); + static void ConvertUTF32ToUTF8( unsigned long input, char* output, int* length ); + + // converts primitive types to strings + static void ToStr( int v, char* buffer, int bufferSize ); + static void ToStr( unsigned v, char* buffer, int bufferSize ); + static void ToStr( bool v, char* buffer, int bufferSize ); + static void ToStr( float v, char* buffer, int bufferSize ); + static void ToStr( double v, char* buffer, int bufferSize ); + + // converts strings to primitive types + static bool ToInt( const char* str, int* value ); + static bool ToUnsigned( const char* str, unsigned* value ); + static bool ToBool( const char* str, bool* value ); + static bool ToFloat( const char* str, float* value ); + static bool ToDouble( const char* str, double* value ); +}; + + +/** XMLNode is a base class for every object that is in the + XML Document Object Model (DOM), except XMLAttributes. + Nodes have siblings, a parent, and children which can + be navigated. A node is always in a XMLDocument. + The type of a XMLNode can be queried, and it can + be cast to its more defined type. + + A XMLDocument allocates memory for all its Nodes. + When the XMLDocument gets deleted, all its Nodes + will also be deleted. + + @verbatim + A Document can contain: Element (container or leaf) + Comment (leaf) + Unknown (leaf) + Declaration( leaf ) + + An Element can contain: Element (container or leaf) + Text (leaf) + Attributes (not on tree) + Comment (leaf) + Unknown (leaf) + + @endverbatim +*/ +class XMLNode +{ + friend class XMLDocument; + friend class XMLElement; +public: + + /// Get the XMLDocument that owns this XMLNode. + const XMLDocument* GetDocument() const { return document; } + /// Get the XMLDocument that owns this XMLNode. + XMLDocument* GetDocument() { return document; } + + virtual XMLElement* ToElement() { return 0; } ///< Safely cast to an Element, or null. + virtual XMLText* ToText() { return 0; } ///< Safely cast to Text, or null. + virtual XMLComment* ToComment() { return 0; } ///< Safely cast to a Comment, or null. + virtual XMLDocument* ToDocument() { return 0; } ///< Safely cast to a Document, or null. + virtual XMLDeclaration* ToDeclaration() { return 0; } ///< Safely cast to a Declaration, or null. + virtual XMLUnknown* ToUnknown() { return 0; } ///< Safely cast to an Unknown, or null. + + virtual const XMLElement* ToElement() const { return 0; } + virtual const XMLText* ToText() const { return 0; } + virtual const XMLComment* ToComment() const { return 0; } + virtual const XMLDocument* ToDocument() const { return 0; } + virtual const XMLDeclaration* ToDeclaration() const { return 0; } + virtual const XMLUnknown* ToUnknown() const { return 0; } + + /** The meaning of 'value' changes for the specific type. + @verbatim + Document: empty + Element: name of the element + Comment: the comment text + Unknown: the tag contents + Text: the text string + @endverbatim + */ + const char* Value() const { return value.GetStr(); } + /** Set the Value of an XML node. + @sa Value() + */ + void SetValue( const char* val, bool staticMem=false ); + + /// Get the parent of this node on the DOM. + const XMLNode* Parent() const { return parent; } + XMLNode* Parent() { return parent; } + + /// Returns true if this node has no children. + bool NoChildren() const { return !firstChild; } + + /// Get the first child node, or null if none exists. + const XMLNode* FirstChild() const { return firstChild; } + XMLNode* FirstChild() { return firstChild; } + /** Get the first child element, or optionally the first child + element with the specified name. + */ + const XMLElement* FirstChildElement( const char* value=0 ) const; + XMLElement* FirstChildElement( const char* _value=0 ) { return const_cast<XMLElement*>(const_cast<const XMLNode*>(this)->FirstChildElement( _value )); } + + /// Get the last child node, or null if none exists. + const XMLNode* LastChild() const { return lastChild; } + XMLNode* LastChild() { return const_cast<XMLNode*>(const_cast<const XMLNode*>(this)->LastChild() ); } + + /** Get the last child element or optionally the last child + element with the specified name. + */ + const XMLElement* LastChildElement( const char* value=0 ) const; + XMLElement* LastChildElement( const char* _value=0 ) { return const_cast<XMLElement*>(const_cast<const XMLNode*>(this)->LastChildElement(_value) ); } + + /// Get the previous (left) sibling node of this node. + const XMLNode* PreviousSibling() const { return prev; } + XMLNode* PreviousSibling() { return prev; } + + /// Get the previous (left) sibling element of this node, with an opitionally supplied name. + const XMLElement* PreviousSiblingElement( const char* value=0 ) const ; + XMLElement* PreviousSiblingElement( const char* _value=0 ) { return const_cast<XMLElement*>(const_cast<const XMLNode*>(this)->PreviousSiblingElement( _value ) ); } + + /// Get the next (right) sibling node of this node. + const XMLNode* NextSibling() const { return next; } + XMLNode* NextSibling() { return next; } + + /// Get the next (right) sibling element of this node, with an opitionally supplied name. + const XMLElement* NextSiblingElement( const char* value=0 ) const; + XMLElement* NextSiblingElement( const char* _value=0 ) { return const_cast<XMLElement*>(const_cast<const XMLNode*>(this)->NextSiblingElement( _value ) ); } + + /** + Add a child node as the last (right) child. + */ + XMLNode* InsertEndChild( XMLNode* addThis ); + + XMLNode* LinkEndChild( XMLNode* addThis ) { return InsertEndChild( addThis ); } + /** + Add a child node as the first (left) child. + */ + XMLNode* InsertFirstChild( XMLNode* addThis ); + /** + Add a node after the specified child node. + */ + XMLNode* InsertAfterChild( XMLNode* afterThis, XMLNode* addThis ); + + /** + Delete all the children of this node. + */ + void DeleteChildren(); + + /** + Delete a child of this node. + */ + void DeleteChild( XMLNode* node ); + + /** + Make a copy of this node, but not its children. + You may pass in a Document pointer that will be + the owner of the new Node. If the 'document' is + null, then the node returned will be allocated + from the current Document. (this->GetDocument()) + + Note: if called on a XMLDocument, this will return null. + */ + virtual XMLNode* ShallowClone( XMLDocument* document ) const = 0; + + /** + Test if 2 nodes are the same, but don't test children. + The 2 nodes do not need to be in the same Document. + + Note: if called on a XMLDocument, this will return false. + */ + virtual bool ShallowEqual( const XMLNode* compare ) const = 0; + + /** Accept a hierarchical visit of the nodes in the TinyXML DOM. Every node in the + XML tree will be conditionally visited and the host will be called back + via the TiXmlVisitor interface. + + This is essentially a SAX interface for TinyXML. (Note however it doesn't re-parse + the XML for the callbacks, so the performance of TinyXML is unchanged by using this + interface versus any other.) + + The interface has been based on ideas from: + + - http://www.saxproject.org/ + - http://c2.com/cgi/wiki?HierarchicalVisitorPattern + + Which are both good references for "visiting". + + An example of using Accept(): + @verbatim + TiXmlPrinter printer; + tinyxmlDoc.Accept( &printer ); + const char* xmlcstr = printer.CStr(); + @endverbatim + */ + virtual bool Accept( XMLVisitor* visitor ) const = 0; + + // internal + virtual char* ParseDeep( char*, StrPair* ); + +protected: + XMLNode( XMLDocument* ); + virtual ~XMLNode(); + XMLNode( const XMLNode& ); // not supported + XMLNode& operator=( const XMLNode& ); // not supported + + XMLDocument* document; + XMLNode* parent; + mutable StrPair value; + + XMLNode* firstChild; + XMLNode* lastChild; + + XMLNode* prev; + XMLNode* next; + +private: + MemPool* memPool; + void Unlink( XMLNode* child ); +}; + + +/** XML text. + + Note that a text node can have child element nodes, for example: + @verbatim + <root>This is <b>bold</b></root> + @endverbatim + + A text node can have 2 ways to output the next. "normal" output + and CDATA. It will default to the mode it was parsed from the XML file and + you generally want to leave it alone, but you can change the output mode with + SetCDATA() and query it with CDATA(). +*/ +class XMLText : public XMLNode +{ + friend class XMLBase; + friend class XMLDocument; +public: + virtual bool Accept( XMLVisitor* visitor ) const; + + virtual XMLText* ToText() { return this; } + virtual const XMLText* ToText() const { return this; } + + /// Declare whether this should be CDATA or standard text. + void SetCData( bool _isCData ) { this->isCData = _isCData; } + /// Returns true if this is a CDATA text element. + bool CData() const { return isCData; } + + char* ParseDeep( char*, StrPair* endTag ); + virtual XMLNode* ShallowClone( XMLDocument* document ) const; + virtual bool ShallowEqual( const XMLNode* compare ) const; + + +protected: + XMLText( XMLDocument* doc ) : XMLNode( doc ), isCData( false ) {} + virtual ~XMLText() {} + XMLText( const XMLText& ); // not supported + XMLText& operator=( const XMLText& ); // not supported + +private: + bool isCData; +}; + + +/** An XML Comment. */ +class XMLComment : public XMLNode +{ + friend class XMLDocument; +public: + virtual XMLComment* ToComment() { return this; } + virtual const XMLComment* ToComment() const { return this; } + + virtual bool Accept( XMLVisitor* visitor ) const; + + char* ParseDeep( char*, StrPair* endTag ); + virtual XMLNode* ShallowClone( XMLDocument* document ) const; + virtual bool ShallowEqual( const XMLNode* compare ) const; + +protected: + XMLComment( XMLDocument* doc ); + virtual ~XMLComment(); + XMLComment( const XMLComment& ); // not supported + XMLComment& operator=( const XMLComment& ); // not supported + +private: +}; + + +/** In correct XML the declaration is the first entry in the file. + @verbatim + <?xml version="1.0" standalone="yes"?> + @endverbatim + + TinyXML2 will happily read or write files without a declaration, + however. + + The text of the declaration isn't interpreted. It is parsed + and written as a string. +*/ +class XMLDeclaration : public XMLNode +{ + friend class XMLDocument; +public: + virtual XMLDeclaration* ToDeclaration() { return this; } + virtual const XMLDeclaration* ToDeclaration() const { return this; } + + virtual bool Accept( XMLVisitor* visitor ) const; + + char* ParseDeep( char*, StrPair* endTag ); + virtual XMLNode* ShallowClone( XMLDocument* document ) const; + virtual bool ShallowEqual( const XMLNode* compare ) const; + +protected: + XMLDeclaration( XMLDocument* doc ); + virtual ~XMLDeclaration(); + XMLDeclaration( const XMLDeclaration& ); // not supported + XMLDeclaration& operator=( const XMLDeclaration& ); // not supported +}; + + +/** Any tag that tinyXml doesn't recognize is saved as an + unknown. It is a tag of text, but should not be modified. + It will be written back to the XML, unchanged, when the file + is saved. + + DTD tags get thrown into TiXmlUnknowns. +*/ +class XMLUnknown : public XMLNode +{ + friend class XMLDocument; +public: + virtual XMLUnknown* ToUnknown() { return this; } + virtual const XMLUnknown* ToUnknown() const { return this; } + + virtual bool Accept( XMLVisitor* visitor ) const; + + char* ParseDeep( char*, StrPair* endTag ); + virtual XMLNode* ShallowClone( XMLDocument* document ) const; + virtual bool ShallowEqual( const XMLNode* compare ) const; + +protected: + XMLUnknown( XMLDocument* doc ); + virtual ~XMLUnknown(); + XMLUnknown( const XMLUnknown& ); // not supported + XMLUnknown& operator=( const XMLUnknown& ); // not supported +}; + + +enum { + XML_NO_ERROR = 0, + XML_SUCCESS = 0, + + XML_NO_ATTRIBUTE, + XML_WRONG_ATTRIBUTE_TYPE, + + XML_ERROR_FILE_NOT_FOUND, + XML_ERROR_FILE_COULD_NOT_BE_OPENED, + XML_ERROR_FILE_READ_ERROR, + XML_ERROR_ELEMENT_MISMATCH, + XML_ERROR_PARSING_ELEMENT, + XML_ERROR_PARSING_ATTRIBUTE, + XML_ERROR_IDENTIFYING_TAG, + XML_ERROR_PARSING_TEXT, + XML_ERROR_PARSING_CDATA, + XML_ERROR_PARSING_COMMENT, + XML_ERROR_PARSING_DECLARATION, + XML_ERROR_PARSING_UNKNOWN, + XML_ERROR_EMPTY_DOCUMENT, + XML_ERROR_MISMATCHED_ELEMENT, + XML_ERROR_PARSING, + + XML_CAN_NOT_CONVERT_TEXT, + XML_NO_TEXT_NODE +}; + + +/** An attribute is a name-value pair. Elements have an arbitrary + number of attributes, each with a unique name. + + @note The attributes are not XMLNodes. You may only query the + Next() attribute in a list. +*/ +class XMLAttribute +{ + friend class XMLElement; +public: + const char* Name() const { return name.GetStr(); } ///< The name of the attribute. + const char* Value() const { return value.GetStr(); } ///< The value of the attribute. + const XMLAttribute* Next() const { return next; } ///< The next attribute in the list. + + /** IntAttribute interprets the attribute as an integer, and returns the value. + If the value isn't an integer, 0 will be returned. There is no error checking; + use QueryIntAttribute() if you need error checking. + */ + int IntValue() const { int i=0; QueryIntValue( &i ); return i; } + /// Query as an unsigned integer. See IntAttribute() + unsigned UnsignedValue() const { unsigned i=0; QueryUnsignedValue( &i ); return i; } + /// Query as a boolean. See IntAttribute() + bool BoolValue() const { bool b=false; QueryBoolValue( &b ); return b; } + /// Query as a double. See IntAttribute() + double DoubleValue() const { double d=0; QueryDoubleValue( &d ); return d; } + /// Query as a float. See IntAttribute() + float FloatValue() const { float f=0; QueryFloatValue( &f ); return f; } + + /** QueryIntAttribute interprets the attribute as an integer, and returns the value + in the provided paremeter. The function will return XML_NO_ERROR on success, + and XML_WRONG_ATTRIBUTE_TYPE if the conversion is not successful. + */ + int QueryIntValue( int* value ) const; + /// See QueryIntAttribute + int QueryUnsignedValue( unsigned int* value ) const; + /// See QueryIntAttribute + int QueryBoolValue( bool* value ) const; + /// See QueryIntAttribute + int QueryDoubleValue( double* value ) const; + /// See QueryIntAttribute + int QueryFloatValue( float* value ) const; + + /// Set the attribute to a string value. + void SetAttribute( const char* value ); + /// Set the attribute to value. + void SetAttribute( int value ); + /// Set the attribute to value. + void SetAttribute( unsigned value ); + /// Set the attribute to value. + void SetAttribute( bool value ); + /// Set the attribute to value. + void SetAttribute( double value ); + /// Set the attribute to value. + void SetAttribute( float value ); + +private: + enum { BUF_SIZE = 200 }; + + XMLAttribute() : next( 0 ) {} + virtual ~XMLAttribute() {} + XMLAttribute( const XMLAttribute& ); // not supported + void operator=( const XMLAttribute& ); // not supported + void SetName( const char* name ); + + char* ParseDeep( char* p, bool processEntities ); + + mutable StrPair name; + mutable StrPair value; + XMLAttribute* next; + MemPool* memPool; +}; + + +/** The element is a container class. It has a value, the element name, + and can contain other elements, text, comments, and unknowns. + Elements also contain an arbitrary number of attributes. +*/ +class XMLElement : public XMLNode +{ + friend class XMLBase; + friend class XMLDocument; +public: + /// Get the name of an element (which is the Value() of the node.) + const char* Name() const { return Value(); } + /// Set the name of the element. + void SetName( const char* str, bool staticMem=false ) { SetValue( str, staticMem ); } + + virtual XMLElement* ToElement() { return this; } + virtual const XMLElement* ToElement() const { return this; } + virtual bool Accept( XMLVisitor* visitor ) const; + + /** Given an attribute name, Attribute() returns the value + for the attribute of that name, or null if none + exists. For example: + + @verbatim + const char* value = ele->Attribute( "foo" ); + @endverbatim + + The 'value' parameter is normally null. However, if specified, + the attribute will only be returned if the 'name' and 'value' + match. This allow you to write code: + + @verbatim + if ( ele->Attribute( "foo", "bar" ) ) callFooIsBar(); + @endverbatim + + rather than: + @verbatim + if ( ele->Attribute( "foo" ) ) { + if ( strcmp( ele->Attribute( "foo" ), "bar" ) == 0 ) callFooIsBar(); + } + @endverbatim + */ + const char* Attribute( const char* name, const char* value=0 ) const; + + /** Given an attribute name, IntAttribute() returns the value + of the attribute interpreted as an integer. 0 will be + returned if there is an error. For a method with error + checking, see QueryIntAttribute() + */ + int IntAttribute( const char* name ) const { int i=0; QueryIntAttribute( name, &i ); return i; } + /// See IntAttribute() + unsigned UnsignedAttribute( const char* name ) const{ unsigned i=0; QueryUnsignedAttribute( name, &i ); return i; } + /// See IntAttribute() + bool BoolAttribute( const char* name ) const { bool b=false; QueryBoolAttribute( name, &b ); return b; } + /// See IntAttribute() + double DoubleAttribute( const char* name ) const { double d=0; QueryDoubleAttribute( name, &d ); return d; } + /// See IntAttribute() + float FloatAttribute( const char* name ) const { float f=0; QueryFloatAttribute( name, &f ); return f; } + + /** Given an attribute name, QueryIntAttribute() returns + XML_NO_ERROR, XML_WRONG_ATTRIBUTE_TYPE if the conversion + can't be performed, or XML_NO_ATTRIBUTE if the attribute + doesn't exist. If successful, the result of the conversion + will be written to 'value'. If not successful, nothing will + be written to 'value'. This allows you to provide default + value: + + @verbatim + int value = 10; + QueryIntAttribute( "foo", &value ); // if "foo" isn't found, value will still be 10 + @endverbatim + */ + int QueryIntAttribute( const char* name, int* _value ) const { const XMLAttribute* a = FindAttribute( name ); if ( !a ) return XML_NO_ATTRIBUTE; return a->QueryIntValue( _value ); } + /// See QueryIntAttribute() + int QueryUnsignedAttribute( const char* name, unsigned int* _value ) const { const XMLAttribute* a = FindAttribute( name ); if ( !a ) return XML_NO_ATTRIBUTE; return a->QueryUnsignedValue( _value ); } + /// See QueryIntAttribute() + int QueryBoolAttribute( const char* name, bool* _value ) const { const XMLAttribute* a = FindAttribute( name ); if ( !a ) return XML_NO_ATTRIBUTE; return a->QueryBoolValue( _value ); } + /// See QueryIntAttribute() + int QueryDoubleAttribute( const char* name, double* _value ) const { const XMLAttribute* a = FindAttribute( name ); if ( !a ) return XML_NO_ATTRIBUTE; return a->QueryDoubleValue( _value ); } + /// See QueryIntAttribute() + int QueryFloatAttribute( const char* name, float* _value ) const { const XMLAttribute* a = FindAttribute( name ); if ( !a ) return XML_NO_ATTRIBUTE; return a->QueryFloatValue( _value ); } + + /// Sets the named attribute to value. + void SetAttribute( const char* name, const char* _value ) { XMLAttribute* a = FindOrCreateAttribute( name ); a->SetAttribute( _value ); } + /// Sets the named attribute to value. + void SetAttribute( const char* name, int _value ) { XMLAttribute* a = FindOrCreateAttribute( name ); a->SetAttribute( _value ); } + /// Sets the named attribute to value. + void SetAttribute( const char* name, unsigned _value ) { XMLAttribute* a = FindOrCreateAttribute( name ); a->SetAttribute( _value ); } + /// Sets the named attribute to value. + void SetAttribute( const char* name, bool _value ) { XMLAttribute* a = FindOrCreateAttribute( name ); a->SetAttribute( _value ); } + /// Sets the named attribute to value. + void SetAttribute( const char* name, double _value ) { XMLAttribute* a = FindOrCreateAttribute( name ); a->SetAttribute( _value ); } + + /** + Delete an attribute. + */ + void DeleteAttribute( const char* name ); + + /// Return the first attribute in the list. + const XMLAttribute* FirstAttribute() const { return rootAttribute; } + /// Query a specific attribute in the list. + const XMLAttribute* FindAttribute( const char* name ) const; + + /** Convenience function for easy access to the text inside an element. Although easy + and concise, GetText() is limited compared to getting the TiXmlText child + and accessing it directly. + + If the first child of 'this' is a TiXmlText, the GetText() + returns the character string of the Text node, else null is returned. + + This is a convenient method for getting the text of simple contained text: + @verbatim + <foo>This is text</foo> + const char* str = fooElement->GetText(); + @endverbatim + + 'str' will be a pointer to "This is text". + + Note that this function can be misleading. If the element foo was created from + this XML: + @verbatim + <foo><b>This is text</b></foo> + @endverbatim + + then the value of str would be null. The first child node isn't a text node, it is + another element. From this XML: + @verbatim + <foo>This is <b>text</b></foo> + @endverbatim + GetText() will return "This is ". + */ + const char* GetText() const; + + /** + Convenience method to query the value of a child text node. This is probably best + shown by example. Given you have a document is this form: + @verbatim + <point> + <x>1</x> + <y>1.4</y> + </point> + @endverbatim + + The QueryIntText() and similar functions provide a safe and easier way to get to the + "value" of x and y. + + @verbatim + int x = 0; + float y = 0; // types of x and y are contrived for example + const XMLElement* xElement = pointElement->FirstChildElement( "x" ); + const XMLElement* yElement = pointElement->FirstChildElement( "y" ); + xElement->QueryIntText( &x ); + yElement->QueryFloatText( &y ); + @endverbatim + + @returns XML_SUCCESS (0) on success, XML_CAN_NOT_CONVERT_TEXT if the text cannot be converted + to the requested type, and XML_NO_TEXT_NODE if there is no child text to query. + + */ + int QueryIntText( int* _value ) const; + /// See QueryIntText() + int QueryUnsignedText( unsigned* _value ) const; + /// See QueryIntText() + int QueryBoolText( bool* _value ) const; + /// See QueryIntText() + int QueryDoubleText( double* _value ) const; + /// See QueryIntText() + int QueryFloatText( float* _value ) const; + + // internal: + enum { + OPEN, // <foo> + CLOSED, // <foo/> + CLOSING // </foo> + }; + int ClosingType() const { return closingType; } + char* ParseDeep( char* p, StrPair* endTag ); + virtual XMLNode* ShallowClone( XMLDocument* document ) const; + virtual bool ShallowEqual( const XMLNode* compare ) const; + +private: + XMLElement( XMLDocument* doc ); + virtual ~XMLElement(); + XMLElement( const XMLElement& ); // not supported + void operator=( const XMLElement& ); // not supported + + XMLAttribute* FindAttribute( const char* name ); + XMLAttribute* FindOrCreateAttribute( const char* name ); + //void LinkAttribute( XMLAttribute* attrib ); + char* ParseAttributes( char* p ); + + int closingType; + // The attribute list is ordered; there is no 'lastAttribute' + // because the list needs to be scanned for dupes before adding + // a new attribute. + XMLAttribute* rootAttribute; +}; + + +/** A Document binds together all the functionality. + It can be saved, loaded, and printed to the screen. + All Nodes are connected and allocated to a Document. + If the Document is deleted, all its Nodes are also deleted. +*/ +class XMLDocument : public XMLNode +{ + friend class XMLElement; +public: + /// constructor + XMLDocument( bool processEntities = true ); + ~XMLDocument(); + + virtual XMLDocument* ToDocument() { return this; } + virtual const XMLDocument* ToDocument() const { return this; } + + /** + Parse an XML file from a character string. + Returns XML_NO_ERROR (0) on success, or + an errorID. + */ + int Parse( const char* xml ); + + /** + Load an XML file from disk. + Returns XML_NO_ERROR (0) on success, or + an errorID. + */ + int LoadFile( const char* filename ); + + /** + Load an XML file from disk. You are responsible + for providing and closing the FILE*. + + Returns XML_NO_ERROR (0) on success, or + an errorID. + */ + int LoadFile( FILE* ); + + /** + Save the XML file to disk. + Returns XML_NO_ERROR (0) on success, or + an errorID. + */ + int SaveFile( const char* filename ); + + /** + Save the XML file to disk. You are responsible + for providing and closing the FILE*. + + Returns XML_NO_ERROR (0) on success, or + an errorID. + */ + int SaveFile( FILE* ); + + bool ProcessEntities() const { return processEntities; } + + /** + Returns true if this document has a leading Byte Order Mark of UTF8. + */ + bool HasBOM() const { return writeBOM; } + /** Sets whether to write the BOM when writing the file. + */ + void SetBOM( bool useBOM ) { writeBOM = useBOM; } + + /** Return the root element of DOM. Equivalent to FirstChildElement(). + To get the first node, use FirstChild(). + */ + XMLElement* RootElement() { return FirstChildElement(); } + const XMLElement* RootElement() const { return FirstChildElement(); } + + /** Print the Document. If the Printer is not provided, it will + print to stdout. If you provide Printer, this can print to a file: + @verbatim + XMLPrinter printer( fp ); + doc.Print( &printer ); + @endverbatim + + Or you can use a printer to print to memory: + @verbatim + XMLPrinter printer; + doc->Print( &printer ); + // printer.CStr() has a const char* to the XML + @endverbatim + */ + void Print( XMLPrinter* streamer=0 ); + virtual bool Accept( XMLVisitor* visitor ) const; + + /** + Create a new Element associated with + this Document. The memory for the Element + is managed by the Document. + */ + XMLElement* NewElement( const char* name ); + /** + Create a new Comment associated with + this Document. The memory for the Comment + is managed by the Document. + */ + XMLComment* NewComment( const char* comment ); + /** + Create a new Text associated with + this Document. The memory for the Text + is managed by the Document. + */ + XMLText* NewText( const char* text ); + /** + Create a new Declaration associated with + this Document. The memory for the object + is managed by the Document. + + If the 'text' param is null, the standard + declaration is used.: + @verbatim + <?xml version="1.0" encoding="UTF-8"?> + @endverbatim + */ + XMLDeclaration* NewDeclaration( const char* text=0 ); + /** + Create a new Unknown associated with + this Document. The memory for the object + is managed by the Document. + */ + XMLUnknown* NewUnknown( const char* text ); + + /** + Delete a node associated with this document. + It will be unlinked from the DOM. + */ + void DeleteNode( XMLNode* node ) { node->parent->DeleteChild( node ); } + + void SetError( int error, const char* str1, const char* str2 ); + + /// Return true if there was an error parsing the document. + bool Error() const { return errorID != XML_NO_ERROR; } + /// Return the errorID. + int ErrorID() const { return errorID; } + /// Return a possibly helpful diagnostic location or string. + const char* GetErrorStr1() const { return errorStr1; } + /// Return a possibly helpful secondary diagnostic location or string. + const char* GetErrorStr2() const { return errorStr2; } + /// If there is an error, print it to stdout. + void PrintError() const; + + // internal + char* Identify( char* p, XMLNode** node ); + + virtual XMLNode* ShallowClone( XMLDocument* /*document*/ ) const { return 0; } + virtual bool ShallowEqual( const XMLNode* /*compare*/ ) const { return false; } + +private: + XMLDocument( const XMLDocument& ); // not supported + void operator=( const XMLDocument& ); // not supported + void InitDocument(); + + bool writeBOM; + bool processEntities; + int errorID; + const char* errorStr1; + const char* errorStr2; + char* charBuffer; + + MemPoolT< sizeof(XMLElement) > elementPool; + MemPoolT< sizeof(XMLAttribute) > attributePool; + MemPoolT< sizeof(XMLText) > textPool; + MemPoolT< sizeof(XMLComment) > commentPool; +}; + + +/** + A XMLHandle is a class that wraps a node pointer with null checks; this is + an incredibly useful thing. Note that XMLHandle is not part of the TinyXML + DOM structure. It is a separate utility class. + + Take an example: + @verbatim + <Document> + <Element attributeA = "valueA"> + <Child attributeB = "value1" /> + <Child attributeB = "value2" /> + </Element> + </Document> + @endverbatim + + Assuming you want the value of "attributeB" in the 2nd "Child" element, it's very + easy to write a *lot* of code that looks like: + + @verbatim + XMLElement* root = document.FirstChildElement( "Document" ); + if ( root ) + { + XMLElement* element = root->FirstChildElement( "Element" ); + if ( element ) + { + XMLElement* child = element->FirstChildElement( "Child" ); + if ( child ) + { + XMLElement* child2 = child->NextSiblingElement( "Child" ); + if ( child2 ) + { + // Finally do something useful. + @endverbatim + + And that doesn't even cover "else" cases. XMLHandle addresses the verbosity + of such code. A XMLHandle checks for null pointers so it is perfectly safe + and correct to use: + + @verbatim + XMLHandle docHandle( &document ); + XMLElement* child2 = docHandle.FirstChild( "Document" ).FirstChild( "Element" ).FirstChild().NextSibling().ToElement(); + if ( child2 ) + { + // do something useful + @endverbatim + + Which is MUCH more concise and useful. + + It is also safe to copy handles - internally they are nothing more than node pointers. + @verbatim + XMLHandle handleCopy = handle; + @endverbatim + + See also XMLConstHandle, which is the same as XMLHandle, but operates on const objects. +*/ +class XMLHandle +{ +public: + /// Create a handle from any node (at any depth of the tree.) This can be a null pointer. + XMLHandle( XMLNode* _node ) { node = _node; } + /// Create a handle from a node. + XMLHandle( XMLNode& _node ) { node = &_node; } + /// Copy constructor + XMLHandle( const XMLHandle& ref ) { node = ref.node; } + /// Assignment + XMLHandle& operator=( const XMLHandle& ref ) { node = ref.node; return *this; } + + /// Get the first child of this handle. + XMLHandle FirstChild() { return XMLHandle( node ? node->FirstChild() : 0 ); } + /// Get the first child element of this handle. + XMLHandle FirstChildElement( const char* value=0 ) { return XMLHandle( node ? node->FirstChildElement( value ) : 0 ); } + /// Get the last child of this handle. + XMLHandle LastChild() { return XMLHandle( node ? node->LastChild() : 0 ); } + /// Get the last child element of this handle. + XMLHandle LastChildElement( const char* _value=0 ) { return XMLHandle( node ? node->LastChildElement( _value ) : 0 ); } + /// Get the previous sibling of this handle. + XMLHandle PreviousSibling() { return XMLHandle( node ? node->PreviousSibling() : 0 ); } + /// Get the previous sibling element of this handle. + XMLHandle PreviousSiblingElement( const char* _value=0 ) { return XMLHandle( node ? node->PreviousSiblingElement( _value ) : 0 ); } + /// Get the next sibling of this handle. + XMLHandle NextSibling() { return XMLHandle( node ? node->NextSibling() : 0 ); } + /// Get the next sibling element of this handle. + XMLHandle NextSiblingElement( const char* _value=0 ) { return XMLHandle( node ? node->NextSiblingElement( _value ) : 0 ); } + + /// Safe cast to XMLNode. This can return null. + XMLNode* ToNode() { return node; } + /// Safe cast to XMLElement. This can return null. + XMLElement* ToElement() { return ( ( node && node->ToElement() ) ? node->ToElement() : 0 ); } + /// Safe cast to XMLText. This can return null. + XMLText* ToText() { return ( ( node && node->ToText() ) ? node->ToText() : 0 ); } + /// Safe cast to XMLUnknown. This can return null. + XMLUnknown* ToUnknown() { return ( ( node && node->ToUnknown() ) ? node->ToUnknown() : 0 ); } + /// Safe cast to XMLDeclaration. This can return null. + XMLDeclaration* ToDeclaration() { return ( ( node && node->ToDeclaration() ) ? node->ToDeclaration() : 0 ); } + +private: + XMLNode* node; +}; + + +/** + A variant of the XMLHandle class for working with const XMLNodes and Documents. It is the + same in all regards, except for the 'const' qualifiers. See XMLHandle for API. +*/ +class XMLConstHandle +{ +public: + XMLConstHandle( const XMLNode* _node ) { node = _node; } + XMLConstHandle( const XMLNode& _node ) { node = &_node; } + XMLConstHandle( const XMLConstHandle& ref ) { node = ref.node; } + + XMLConstHandle& operator=( const XMLConstHandle& ref ) { node = ref.node; return *this; } + + const XMLConstHandle FirstChild() const { return XMLConstHandle( node ? node->FirstChild() : 0 ); } + const XMLConstHandle FirstChildElement( const char* value=0 ) const { return XMLConstHandle( node ? node->FirstChildElement( value ) : 0 ); } + const XMLConstHandle LastChild() const { return XMLConstHandle( node ? node->LastChild() : 0 ); } + const XMLConstHandle LastChildElement( const char* _value=0 ) const { return XMLConstHandle( node ? node->LastChildElement( _value ) : 0 ); } + const XMLConstHandle PreviousSibling() const { return XMLConstHandle( node ? node->PreviousSibling() : 0 ); } + const XMLConstHandle PreviousSiblingElement( const char* _value=0 ) const { return XMLConstHandle( node ? node->PreviousSiblingElement( _value ) : 0 ); } + const XMLConstHandle NextSibling() const { return XMLConstHandle( node ? node->NextSibling() : 0 ); } + const XMLConstHandle NextSiblingElement( const char* _value=0 ) const { return XMLConstHandle( node ? node->NextSiblingElement( _value ) : 0 ); } + + + const XMLNode* ToNode() const { return node; } + const XMLElement* ToElement() const { return ( ( node && node->ToElement() ) ? node->ToElement() : 0 ); } + const XMLText* ToText() const { return ( ( node && node->ToText() ) ? node->ToText() : 0 ); } + const XMLUnknown* ToUnknown() const { return ( ( node && node->ToUnknown() ) ? node->ToUnknown() : 0 ); } + const XMLDeclaration* ToDeclaration() const { return ( ( node && node->ToDeclaration() ) ? node->ToDeclaration() : 0 ); } + +private: + const XMLNode* node; +}; + + +/** + Printing functionality. The XMLPrinter gives you more + options than the XMLDocument::Print() method. + + It can: + -# Print to memory. + -# Print to a file you provide. + -# Print XML without a XMLDocument. + + Print to Memory + + @verbatim + XMLPrinter printer; + doc->Print( &printer ); + SomeFunction( printer.CStr() ); + @endverbatim + + Print to a File + + You provide the file pointer. + @verbatim + XMLPrinter printer( fp ); + doc.Print( &printer ); + @endverbatim + + Print without a XMLDocument + + When loading, an XML parser is very useful. However, sometimes + when saving, it just gets in the way. The code is often set up + for streaming, and constructing the DOM is just overhead. + + The Printer supports the streaming case. The following code + prints out a trivially simple XML file without ever creating + an XML document. + + @verbatim + XMLPrinter printer( fp ); + printer.OpenElement( "foo" ); + printer.PushAttribute( "foo", "bar" ); + printer.CloseElement(); + @endverbatim +*/ +class XMLPrinter : public XMLVisitor +{ +public: + /** Construct the printer. If the FILE* is specified, + this will print to the FILE. Else it will print + to memory, and the result is available in CStr(). + If 'compact' is set to true, then output is created + with only required whitespace and newlines. + */ + XMLPrinter( FILE* file=0, bool compact = false ); + ~XMLPrinter() {} + + /** If streaming, write the BOM and declaration. */ + void PushHeader( bool writeBOM, bool writeDeclaration ); + /** If streaming, start writing an element. + The element must be closed with CloseElement() + */ + void OpenElement( const char* name ); + /// If streaming, add an attribute to an open element. + void PushAttribute( const char* name, const char* value ); + void PushAttribute( const char* name, int value ); + void PushAttribute( const char* name, unsigned value ); + void PushAttribute( const char* name, bool value ); + void PushAttribute( const char* name, double value ); + /// If streaming, close the Element. + void CloseElement(); + + /// Add a text node. + void PushText( const char* text, bool cdata=false ); + /// Add a text node from an integer. + void PushText( int value ); + /// Add a text node from an unsigned. + void PushText( unsigned value ); + /// Add a text node from a bool. + void PushText( bool value ); + /// Add a text node from a float. + void PushText( float value ); + /// Add a text node from a double. + void PushText( double value ); + + /// Add a comment + void PushComment( const char* comment ); + + void PushDeclaration( const char* value ); + void PushUnknown( const char* value ); + + virtual bool VisitEnter( const XMLDocument& /*doc*/ ); + virtual bool VisitExit( const XMLDocument& /*doc*/ ) { return true; } + + virtual bool VisitEnter( const XMLElement& element, const XMLAttribute* attribute ); + virtual bool VisitExit( const XMLElement& element ); + + virtual bool Visit( const XMLText& text ); + virtual bool Visit( const XMLComment& comment ); + virtual bool Visit( const XMLDeclaration& declaration ); + virtual bool Visit( const XMLUnknown& unknown ); + + /** + If in print to memory mode, return a pointer to + the XML file in memory. + */ + const char* CStr() const { return buffer.Mem(); } + /** + If in print to memory mode, return the size + of the XML file in memory. (Note the size returned + includes the terminating null.) + */ + int CStrSize() const { return buffer.Size(); } + +private: + void SealElement(); + void PrintSpace( int depth ); + void PrintString( const char*, bool restrictedEntitySet ); // prints out, after detecting entities. + void Print( const char* format, ... ); + + bool elementJustOpened; + bool firstElement; + FILE* fp; + int depth; + int textDepth; + bool processEntities; + bool compactMode; + + enum { + ENTITY_RANGE = 64, + BUF_SIZE = 200 + }; + bool entityFlag[ENTITY_RANGE]; + bool restrictedEntityFlag[ENTITY_RANGE]; + + DynArray< const char*, 10 > stack; + DynArray< char, 20 > buffer; +#ifdef _MSC_VER + DynArray< char, 20 > accumulator; +#endif +}; + + +} // tinyxml2 + + +#endif // TINYXML2_INCLUDED diff --git a/patch/llvm/llvm-3.5.patch b/patch/llvm/llvm-3.5.patch new file mode 100644 index 0000000..8ade786 --- /dev/null +++ b/patch/llvm/llvm-3.5.patch @@ -0,0 +1,864 @@ +diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h +index 6918280..8883165 100644 +--- a/include/llvm/MC/MCInst.h ++++ b/include/llvm/MC/MCInst.h +@@ -20,6 +20,7 @@ + #include "llvm/ADT/StringRef.h" + #include "llvm/Support/DataTypes.h" + #include "llvm/Support/SMLoc.h" ++#include "llvm/IR/DebugLoc.h" + + namespace llvm { + class raw_ostream; +@@ -151,6 +152,7 @@ class MCInst { + unsigned Opcode; + SMLoc Loc; + SmallVector<MCOperand, 8> Operands; ++ DebugLoc DbgLoc; + public: + MCInst() : Opcode(0) {} + +@@ -160,6 +162,9 @@ public: + void setLoc(SMLoc loc) { Loc = loc; } + SMLoc getLoc() const { return Loc; } + ++ void setDebugLoc(DebugLoc &Loc) { DbgLoc = Loc; } ++ DebugLoc getDebugLoc() const { return DbgLoc; } ++ + const MCOperand &getOperand(unsigned i) const { return Operands[i]; } + MCOperand &getOperand(unsigned i) { return Operands[i]; } + unsigned getNumOperands() const { return Operands.size(); } +diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h +index 5dda8bd..0bbd7fb 100644 +--- a/include/llvm/Target/TargetRegisterInfo.h ++++ b/include/llvm/Target/TargetRegisterInfo.h +@@ -238,6 +238,8 @@ protected: + virtual ~TargetRegisterInfo(); + public: + ++ std::vector<unsigned> HQEMUReservedRegs; ++ + // Register numbers can represent physical registers, virtual registers, and + // sometimes stack slots. The unsigned values are divided into these ranges: + // +@@ -452,6 +454,10 @@ public: + /// used by register scavenger to determine what registers are free. + virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0; + ++ /// Get/Set extra reserved register(s) by HQEMU. ++ virtual void getHQEMUReservedRegs(BitVector &Reserved) const { } ++ virtual void setHQEMUReservedRegs(std::string RegName) { } ++ + /// getMatchingSuperReg - Return a super-register of the specified register + /// Reg so its sub-register of index SubIdx is Reg. + unsigned getMatchingSuperReg(unsigned Reg, unsigned SubIdx, +diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp +index 2ba1f86..f727dd6 100644 +--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp ++++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp +@@ -365,7 +365,10 @@ namespace { + + } + ~JITEmitter() { ++#if 0 ++ // HQEMU has the ownership of the memory manager. Do not delete it. + delete MemMgr; ++#endif + } + + JITResolver &getJITResolver() { return Resolver; } +diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h +index 100e9a2..fc9fcfc 100644 +--- a/lib/ExecutionEngine/MCJIT/MCJIT.h ++++ b/lib/ExecutionEngine/MCJIT/MCJIT.h +@@ -77,7 +77,11 @@ public: + + private: + MCJIT *ParentEngine; ++#if 0 + std::unique_ptr<RTDyldMemoryManager> ClientMM; ++#endif ++ // HQEMU has the ownership of the memory manager. Do not delete it. ++ RTDyldMemoryManager *ClientMM; + }; + + // About Module states: added->loaded->finalized. +diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp +index 32b5f4a..bb873a9 100644 +--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp ++++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp +@@ -149,9 +149,39 @@ getReservedRegs(const MachineFunction &MF) const { + for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI) + if (Reserved.test(*SI)) Reserved.set(*I); + ++ getHQEMUReservedRegs(Reserved); + return Reserved; + } + ++void ARMBaseRegisterInfo::getHQEMUReservedRegs(BitVector &Reserved) const { ++ for (unsigned i = 0, e = HQEMUReservedRegs.size(); i != e; ++i) ++ Reserved.set(HQEMUReservedRegs[i]); ++} ++ ++void ARMBaseRegisterInfo::setHQEMUReservedRegs(std::string RegName) { ++#define RESERVE(x) \ ++ do { \ ++ HQEMUReservedRegs.push_back(ARM::R ## x); \ ++ return; \ ++ } while(0) ++ ++ if (RegName == "r0") RESERVE(0); ++ if (RegName == "r1") RESERVE(1); ++ if (RegName == "r2") RESERVE(2); ++ if (RegName == "r3") RESERVE(3); ++ if (RegName == "r4") RESERVE(4); ++ if (RegName == "r5") RESERVE(5); ++ if (RegName == "r6") RESERVE(6); ++ if (RegName == "r7") RESERVE(7); ++ if (RegName == "r8") RESERVE(8); ++ if (RegName == "r9") RESERVE(9); ++ if (RegName == "r10") RESERVE(10); ++ if (RegName == "r11") RESERVE(11); ++ if (RegName == "r12") RESERVE(12); ++ ++#undef RESERVE ++} ++ + const TargetRegisterClass* + ARMBaseRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) + const { +diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h +index 833d3f2..fdcc6be 100644 +--- a/lib/Target/ARM/ARMBaseRegisterInfo.h ++++ b/lib/Target/ARM/ARMBaseRegisterInfo.h +@@ -117,6 +117,9 @@ public: + + BitVector getReservedRegs(const MachineFunction &MF) const override; + ++ void getHQEMUReservedRegs(BitVector &Reserved) const; ++ void setHQEMUReservedRegs(std::string RegName); ++ + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override; +diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +index 075db11..8b469c5 100644 +--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp ++++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +@@ -164,6 +164,9 @@ public: + const MCInst &MI, const MCInstrDesc &Desc, + const MCSubtargetInfo &STI, + raw_ostream &OS) const; ++ ++ bool EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups) const; + }; + + } // end anonymous namespace +@@ -1151,6 +1154,50 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, + } + } + ++bool X86MCCodeEmitter:: ++EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups) const { ++ /* NOTE: the following flags must be synchronized with those in file ++ * llvm-opc.h of the HQEMU source tree. */ ++ enum { ++ PATCH_HQEMU = 0x4182U, ++ PATCH_DUMMY, ++ PATCH_EXIT_TB, ++ PATCH_DIRECT_JUMP, ++ PATCH_TRACE_BLOCK_CHAINING, ++ PATCH_QMMU ++ }; ++ ++ unsigned Opcode = MI.getOpcode(); ++ switch (Opcode) { ++ case X86::TRAP: ++ case X86::RETQ: ++ break; ++ default: return false; ++ } ++ ++ unsigned CurByte = 0; ++ DebugLoc Loc = MI.getDebugLoc(); ++ if (Loc.isUnknown()) ++ return false; ++ ++ unsigned PatchType = Loc.getLine(); ++ if (PatchType < PATCH_HQEMU || PatchType > PATCH_QMMU) ++ return false; ++ ++ if (Opcode == X86::TRAP) { ++ for (unsigned i = 0; i != 8; ++i) ++ EmitByte(0x90, CurByte, OS); ++ return true; ++ } ++ if (Opcode == X86::RETQ) { ++ for (unsigned i = 0; i != 5; ++i) ++ EmitByte(0x90, CurByte, OS); ++ return true; ++ } ++ return false; ++} ++ + void X86MCCodeEmitter:: + EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, +@@ -1159,6 +1206,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS, + const MCInstrDesc &Desc = MCII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + ++ if (EmitHQEMUInstruction(MI, OS, Fixups)) ++ return; ++ + // Pseudo instructions don't get encoded. + if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + return; +diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp +index a3ae7ee..1555712 100644 +--- a/lib/Target/X86/X86CodeEmitter.cpp ++++ b/lib/Target/X86/X86CodeEmitter.cpp +@@ -105,6 +105,8 @@ namespace { + void emitMemModRMByte(const MachineInstr &MI, + unsigned Op, unsigned RegOpcodeField, + intptr_t PCAdj = 0); ++ void emitQMMU(MachineInstr &MI, const MCInstrDesc *Desc); ++ bool emitHQEMUInstruction(MachineInstr &MI, const MCInstrDesc *Desc); + + unsigned getX86RegNum(unsigned RegNo) const { + const TargetRegisterInfo *TRI = TM.getRegisterInfo(); +@@ -113,6 +115,13 @@ namespace { + + unsigned char getVEXRegisterEncoding(const MachineInstr &MI, + unsigned OpNum) const; ++ unsigned char getWriteMaskRegisterEncoding(const MachineInstr &MI, ++ unsigned OpNum) const { ++ assert(X86::K0 != MI.getOperand(OpNum).getReg() && ++ "Invalid mask register as write-mask!"); ++ unsigned MaskRegNum = getX86RegNum(MI.getOperand(OpNum).getReg()); ++ return MaskRegNum; ++ } + }; + + template<class CodeEmitter> +@@ -748,9 +757,11 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + const MCInstrDesc *Desc) const { + unsigned char Encoding = (TSFlags & X86II::EncodingMask) >> + X86II::EncodingShift; ++ bool HasEVEX_K = ((TSFlags >> X86II::VEXShift) & X86II::EVEX_K); + bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V; + bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3; + bool HasMemOp4 = (TSFlags >> X86II::VEXShift) & X86II::MemOp4; ++ bool HasEVEX_RC = (TSFlags >> X86II::VEXShift) & X86II::EVEX_RC; + + // VEX_R: opcode externsion equivalent to REX.R in + // 1's complement (inverted) form +@@ -759,6 +770,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + // 0: Same as REX_R=1 (64 bit mode only) + // + unsigned char VEX_R = 0x1; ++ unsigned char EVEX_R2 = 0x1; + + // VEX_X: equivalent to REX.X, only used when a + // register is used for index in SIB Byte. +@@ -793,6 +805,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + // VEX_4V (VEX vvvv field): a register specifier + // (in 1's complement form) or 1111 if unused. + unsigned char VEX_4V = 0xf; ++ unsigned char EVEX_V2 = 0x1; + + // VEX_L (Vector Length): + // +@@ -800,6 +813,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + // 1: 256-bit vector + // + unsigned char VEX_L = 0; ++ unsigned char EVEX_L2 = 0; + + // VEX_PP: opcode extension providing equivalent + // functionality of a SIMD prefix +@@ -811,11 +825,36 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + // + unsigned char VEX_PP = 0; + ++ // EVEX_U ++ unsigned char EVEX_U = 1; // Always '1' so far ++ ++ // EVEX_z ++ unsigned char EVEX_z = 0; ++ ++ // EVEX_b ++ unsigned char EVEX_b = 0; ++ ++ // EVEX_rc ++ unsigned char EVEX_rc = 0; ++ ++ // EVEX_aaa ++ unsigned char EVEX_aaa = 0; ++ ++ bool EncodeRC = false; ++ + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_W) + VEX_W = 1; + + if ((TSFlags >> X86II::VEXShift) & X86II::VEX_L) + VEX_L = 1; ++ if (((TSFlags >> X86II::VEXShift) & X86II::EVEX_L2)) ++ EVEX_L2 = 1; ++ ++ if (HasEVEX_K && ((TSFlags >> X86II::VEXShift) & X86II::EVEX_Z)) ++ EVEX_z = 1; ++ ++ if (((TSFlags >> X86II::VEXShift) & X86II::EVEX_B)) ++ EVEX_b = 1; + + switch (TSFlags & X86II::OpPrefixMask) { + default: break; // VEX_PP already correct +@@ -836,15 +875,7 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + + // Classify VEX_B, VEX_4V, VEX_R, VEX_X + unsigned NumOps = Desc->getNumOperands(); +- unsigned CurOp = 0; +- if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) == 0) +- ++CurOp; +- else if (NumOps > 3 && Desc->getOperandConstraint(2, MCOI::TIED_TO) == 0) { +- assert(Desc->getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1); +- // Special case for GATHER with 2 TIED_TO operands +- // Skip the first 2 operands: dst, mask_wb +- CurOp += 2; +- } ++ unsigned CurOp = X86II::getOperandBias(*Desc); + + switch (TSFlags & X86II::FormMask) { + default: llvm_unreachable("Unexpected form in emitVEXOpcodePrefix!"); +@@ -860,14 +891,28 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + VEX_B = 0x0; + if (X86II::isX86_64ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) + VEX_X = 0x0; ++ if (X86II::is32ExtendedReg(MI.getOperand(X86::AddrIndexReg).getReg())) ++ EVEX_V2 = 0x0; + + CurOp = X86::AddrNumOperands; +- if (HasVEX_4V) +- VEX_4V = getVEXRegisterEncoding(MI, CurOp++); ++ ++ if (HasEVEX_K) ++ EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); ++ ++ if (HasVEX_4V) { ++ VEX_4V = getVEXRegisterEncoding(MI, CurOp); ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ EVEX_V2 = 0x0; ++ CurOp++; ++ } + + const MachineOperand &MO = MI.getOperand(CurOp); +- if (MO.isReg() && X86II::isX86_64ExtendedReg(MO.getReg())) +- VEX_R = 0x0; ++ if (MO.isReg()) { ++ if (X86II::isX86_64ExtendedReg(MO.getReg())) ++ VEX_R = 0x0; ++ if (X86II::is32ExtendedReg(MO.getReg())) ++ EVEX_R2 = 0x0; ++ } + break; + } + case X86II::MRMSrcMem: +@@ -882,10 +927,17 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + // dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M), + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ EVEX_R2 = 0x0; + CurOp++; + ++ if (HasEVEX_K) ++ EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); ++ + if (HasVEX_4V) { + VEX_4V = getVEXRegisterEncoding(MI, CurOp); ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ EVEX_V2 = 0x0; + CurOp++; + } + +@@ -896,6 +948,10 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) + VEX_X = 0x0; + ++ if (X86II::is32ExtendedReg( ++ MI.getOperand(MemOperand+X86::AddrIndexReg).getReg())) ++ EVEX_V2 = 0x0; ++ + if (HasVEX_4VOp3) + VEX_4V = getVEXRegisterEncoding(MI, CurOp+X86::AddrNumOperands); + break; +@@ -906,8 +962,15 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + // MRM[0-9]m instructions forms: + // MemAddr + // src1(VEX_4V), MemAddr +- if (HasVEX_4V) +- VEX_4V = getVEXRegisterEncoding(MI, CurOp++); ++ if (HasVEX_4V) { ++ VEX_4V = getVEXRegisterEncoding(MI, CurOp); ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ EVEX_V2 = 0x0; ++ CurOp++; ++ } ++ ++ if (HasEVEX_K) ++ EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg( + MI.getOperand(MemOperand+X86::AddrBaseReg).getReg())) +@@ -925,19 +988,38 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + // + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ EVEX_R2 = 0x0; + CurOp++; + +- if (HasVEX_4V) +- VEX_4V = getVEXRegisterEncoding(MI, CurOp++); ++ if (HasEVEX_K) ++ EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); ++ ++ if (HasVEX_4V) { ++ VEX_4V = getVEXRegisterEncoding(MI, CurOp); ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ EVEX_V2 = 0x0; ++ CurOp++; ++ } + + if (HasMemOp4) // Skip second register source (encoded in I8IMM) + CurOp++; + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ VEX_X = 0x0; + CurOp++; + if (HasVEX_4VOp3) + VEX_4V = getVEXRegisterEncoding(MI, CurOp); ++ if (EVEX_b) { ++ if (HasEVEX_RC) { ++ unsigned RcOperand = NumOps-1; ++ assert(RcOperand >= CurOp); ++ EVEX_rc = MI.getOperand(RcOperand).getImm() & 0x3; ++ } ++ EncodeRC = true; ++ } + break; + case X86II::MRMDestReg: + // MRMDestReg instructions forms: +@@ -946,13 +1028,26 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + // dst(ModR/M), src1(VEX_4V), src2(ModR/M) + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ VEX_X = 0x0; + CurOp++; + +- if (HasVEX_4V) +- VEX_4V = getVEXRegisterEncoding(MI, CurOp++); ++ if (HasEVEX_K) ++ EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); ++ ++ if (HasVEX_4V) { ++ VEX_4V = getVEXRegisterEncoding(MI, CurOp); ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ EVEX_V2 = 0x0; ++ CurOp++; ++ } + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_R = 0x0; ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ EVEX_R2 = 0x0; ++ if (EVEX_b) ++ EncodeRC = true; + break; + case X86II::MRM0r: case X86II::MRM1r: + case X86II::MRM2r: case X86II::MRM3r: +@@ -960,45 +1055,190 @@ void Emitter<CodeEmitter>::emitVEXOpcodePrefix(uint64_t TSFlags, + case X86II::MRM6r: case X86II::MRM7r: + // MRM0r-MRM7r instructions forms: + // dst(VEX_4V), src(ModR/M), imm8 +- VEX_4V = getVEXRegisterEncoding(MI, CurOp); +- CurOp++; ++ if (HasVEX_4V) { ++ VEX_4V = getVEXRegisterEncoding(MI, CurOp); ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ EVEX_V2 = 0x0; ++ CurOp++; ++ } ++ if (HasEVEX_K) ++ EVEX_aaa = getWriteMaskRegisterEncoding(MI, CurOp++); + + if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg())) + VEX_B = 0x0; ++ if (X86II::is32ExtendedReg(MI.getOperand(CurOp).getReg())) ++ VEX_X = 0x0; + break; + } + +- // Emit segment override opcode prefix as needed. +- emitSegmentOverridePrefix(TSFlags, MemOperand, MI); ++ if (Encoding == X86II::VEX || Encoding == X86II::XOP) { ++ // Emit segment override opcode prefix as needed. ++ emitSegmentOverridePrefix(TSFlags, MemOperand, MI); ++ ++ // VEX opcode prefix can have 2 or 3 bytes ++ // ++ // 3 bytes: ++ // +-----+ +--------------+ +-------------------+ ++ // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | ++ // +-----+ +--------------+ +-------------------+ ++ // 2 bytes: ++ // +-----+ +-------------------+ ++ // | C5h | | R | vvvv | L | pp | ++ // +-----+ +-------------------+ ++ // ++ // XOP uses a similar prefix: ++ // +-----+ +--------------+ +-------------------+ ++ // | 8Fh | | RXB | m-mmmm | | W | vvvv | L | pp | ++ // +-----+ +--------------+ +-------------------+ ++ unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); ++ ++ // Can this use the 2 byte VEX prefix? ++ if (Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { ++ MCE.emitByte(0xC5); ++ MCE.emitByte(LastByte | (VEX_R << 7)); ++ return; ++ } ++ ++ // 3 byte VEX prefix ++ MCE.emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4); ++ MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M); ++ MCE.emitByte(LastByte | (VEX_W << 7)); ++ } else { ++ assert(Encoding == X86II::EVEX && "unknown encoding!"); ++ // EVEX opcode prefix can have 4 bytes ++ // ++ // +-----+ +--------------+ +-------------------+ +------------------------+ ++ // | 62h | | RXBR' | 00mm | | W | vvvv | U | pp | | z | L'L | b | v' | aaa | ++ // +-----+ +--------------+ +-------------------+ +------------------------+ ++ assert((VEX_5M & 0x3) == VEX_5M ++ && "More than 2 significant bits in VEX.m-mmmm fields for EVEX!"); ++ ++ VEX_5M &= 0x3; ++ ++ MCE.emitByte(0x62); ++ MCE.emitByte((VEX_R << 7) | ++ (VEX_X << 6) | ++ (VEX_B << 5) | ++ (EVEX_R2 << 4) | ++ VEX_5M); ++ MCE.emitByte((VEX_W << 7) | ++ (VEX_4V << 3) | ++ (EVEX_U << 2) | ++ VEX_PP); ++ if (EncodeRC) ++ MCE.emitByte((EVEX_z << 7) | ++ (EVEX_rc << 5) | ++ (EVEX_b << 4) | ++ (EVEX_V2 << 3) | ++ EVEX_aaa); ++ else ++ MCE.emitByte((EVEX_z << 7) | ++ (EVEX_L2 << 6) | ++ (VEX_L << 5) | ++ (EVEX_b << 4) | ++ (EVEX_V2 << 3) | ++ EVEX_aaa); ++ } ++} + +- // VEX opcode prefix can have 2 or 3 bytes +- // +- // 3 bytes: +- // +-----+ +--------------+ +-------------------+ +- // | C4h | | RXB | m-mmmm | | W | vvvv | L | pp | +- // +-----+ +--------------+ +-------------------+ +- // 2 bytes: +- // +-----+ +-------------------+ +- // | C5h | | R | vvvv | L | pp | +- // +-----+ +-------------------+ +- // +- // XOP uses a similar prefix: +- // +-----+ +--------------+ +-------------------+ +- // | 8Fh | | RXB | m-mmmm | | W | vvvv | L | pp | +- // +-----+ +--------------+ +-------------------+ +- unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3); +- +- // Can this use the 2 byte VEX prefix? +- if (Encoding == X86II::VEX && VEX_B && VEX_X && !VEX_W && (VEX_5M == 1)) { +- MCE.emitByte(0xC5); +- MCE.emitByte(LastByte | (VEX_R << 7)); +- return; ++template<class CodeEmitter> ++void Emitter<CodeEmitter>::emitQMMU(MachineInstr &MI, ++ const MCInstrDesc *Desc) { ++ // QMMU stub is as follows: ++ // jmp QMMUExit ++ // nop ++ // jmp QMMUMiss ++ MachineBasicBlock *MBB = MI.getParent(); ++ if (MBB->succ_size() != 2) ++ llvm_unreachable("Unhandled QMMU stub!"); ++ ++ MachineBasicBlock* QMMUExit = *MBB->succ_begin(); ++ MachineBasicBlock* QMMUMiss = *(++MBB->succ_begin()); ++ MachineInstr *MRI = &*QMMUMiss->rbegin(); ++ if (MRI->getDesc().getOpcode() != X86::TRAP) { ++ MachineBasicBlock *tmp = QMMUExit; ++ QMMUExit = QMMUMiss; ++ QMMUMiss = tmp; + } + +- // 3 byte VEX prefix +- MCE.emitByte(Encoding == X86II::XOP ? 0x8F : 0xC4); +- MCE.emitByte(VEX_R << 7 | VEX_X << 6 | VEX_B << 5 | VEX_5M); +- MCE.emitByte(LastByte | (VEX_W << 7)); ++ MRI = &*QMMUMiss->rbegin(); ++ if (MRI->getDesc().getOpcode() != X86::TRAP) ++ llvm_unreachable("Unknown QMMU CFG!"); ++ ++ MCE.emitByte(0xE9); ++ emitPCRelativeBlockAddress(QMMUExit); ++ MCE.emitByte(0x90); ++ if (QMMUMiss != ++MachineFunction::iterator(MBB)) { ++ MCE.emitByte(0xE9); ++ emitPCRelativeBlockAddress(QMMUMiss); ++ } ++} ++ ++template<class CodeEmitter> ++bool Emitter<CodeEmitter>::emitHQEMUInstruction(MachineInstr &MI, ++ const MCInstrDesc *Desc) ++{ ++ /* NOTE: the following flags must be synchronized with those in file ++ * llvm-opc.h of the HQEMU source tree. */ ++ enum { ++ PATCH_HQEMU = 0x4182U, ++ PATCH_DUMMY, ++ PATCH_EXIT_TB, ++ PATCH_DIRECT_JUMP, ++ PATCH_TRACE_BLOCK_CHAINING, ++ PATCH_QMMU ++ }; ++ ++ unsigned Opcode = Desc->Opcode; ++ ++ switch (Opcode) { ++ case X86::TRAP: ++ case X86::RETQ: ++ case X86::JMP32r: ++ case X86::JMP64r: ++ break; ++ default: return false; ++ } ++ ++ LLVMContext &Ctx = MI.getParent()->getParent()->getFunction()->getContext(); ++ MDNode *M = MI.getDebugLoc().getScope(Ctx); ++ if (!M || !isa<ConstantInt>(M->getOperand(1))) ++ return false; ++ ++ uint64_t flag = cast<ConstantInt>(M->getOperand(1))->getZExtValue(); ++ if (flag < PATCH_HQEMU || flag > PATCH_QMMU) ++ return false; ++ ++ if (Opcode == X86::TRAP) { ++ if (flag == PATCH_QMMU) ++ return true; ++ ++ unsigned NumNOP = 3 - MCE.getCurrentPCValue() % 4; ++ for (unsigned i = 0; i != NumNOP; ++i) ++ MCE.emitByte(0x90); ++ ++ uintptr_t *ChainPoint = (uintptr_t *)cast<ConstantInt>(M->getOperand(2))->getZExtValue(); ++ *ChainPoint = (uintptr_t) MCE.getCurrentPCValue(); ++ MCE.emitByte(0xE9); ++ emitConstant(0, 4); ++ return true; ++ } ++ ++ if (Opcode == X86::RETQ) { ++ uintptr_t ExitAddr = (uintptr_t)cast<ConstantInt>(M->getOperand(2))->getZExtValue(); ++ uintptr_t Disp = ExitAddr - ((uintptr_t) MCE.getCurrentPCValue() + 5); ++ MCE.emitByte(0xE9); ++ emitConstant(Disp, 4); ++ return true; ++ } ++ ++ if (Opcode == X86::JMP32r || Opcode == X86::JMP64r) { ++ if (flag == PATCH_QMMU) { ++ emitQMMU(MI, Desc); ++ return true; ++ } ++ } ++ return false; + } + + template<class CodeEmitter> +@@ -1032,6 +1272,11 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, + + unsigned Opcode = Desc->Opcode; + ++ if (emitHQEMUInstruction(MI, Desc)) { ++ MCE.processDebugLoc(MI.getDebugLoc(), false); ++ return; ++ } ++ + // If this is a two-address instruction, skip one of the register operands. + unsigned NumOps = Desc->getNumOperands(); + unsigned CurOp = 0; +diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp +index 2bd70a9..7e83c66 100644 +--- a/lib/Target/X86/X86MCInstLower.cpp ++++ b/lib/Target/X86/X86MCInstLower.cpp +@@ -345,6 +345,10 @@ static unsigned getRetOpcode(const X86Subtarget &Subtarget) + void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + ++ DebugLoc Loc = MI->getDebugLoc(); ++ if (!Loc.isUnknown()) ++ OutMI.setDebugLoc(Loc); ++ + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + +diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp +index e8a7e84..a0b425e 100644 +--- a/lib/Target/X86/X86RegisterInfo.cpp ++++ b/lib/Target/X86/X86RegisterInfo.cpp +@@ -395,9 +395,65 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + } + } + ++ getHQEMUReservedRegs(Reserved); + return Reserved; + } + ++void X86RegisterInfo::getHQEMUReservedRegs(BitVector &Reserved) const { ++ for (unsigned i = 0, e = HQEMUReservedRegs.size(); i != e; ++i) ++ Reserved.set(HQEMUReservedRegs[i]); ++} ++ ++void X86RegisterInfo::setHQEMUReservedRegs(std::string RegName) { ++#define RESERVE1(x) \ ++ do { \ ++ HQEMUReservedRegs.push_back(X86::x ## L); \ ++ HQEMUReservedRegs.push_back(X86::x ## H); \ ++ HQEMUReservedRegs.push_back(X86::x ## X);\ ++ HQEMUReservedRegs.push_back(X86::E ## x ## X);\ ++ HQEMUReservedRegs.push_back(X86::R ## x ## X);\ ++ return; \ ++ } while(0) ++ ++#define RESERVE2(x) \ ++ do { \ ++ HQEMUReservedRegs.push_back(X86::R ## x); \ ++ HQEMUReservedRegs.push_back(X86::R ## x ## B);\ ++ HQEMUReservedRegs.push_back(X86::R ## x ## D);\ ++ HQEMUReservedRegs.push_back(X86::R ## x ## W);\ ++ return; \ ++ } while(0) ++ ++ if (RegName == "ebp") { ++ // 32-bit registers ++ HQEMUReservedRegs.push_back(X86::EBP); ++ // 16-bit registers ++ HQEMUReservedRegs.push_back(X86::BP); ++#if defined(__x86_64__) ++ // X86-64 only ++ HQEMUReservedRegs.push_back(X86::BPL); ++#endif ++ return; ++ } ++#if defined(__x86_64__) ++ if (RegName == "rax") RESERVE1(A); ++ if (RegName == "rbx") RESERVE1(B); ++ if (RegName == "rcx") RESERVE1(C); ++ if (RegName == "rdx") RESERVE1(D); ++ if (RegName == "r8") RESERVE2(8); ++ if (RegName == "r9") RESERVE2(9); ++ if (RegName == "r10") RESERVE2(10); ++ if (RegName == "r11") RESERVE2(11); ++ if (RegName == "r12") RESERVE2(12); ++ if (RegName == "r13") RESERVE2(13); ++ if (RegName == "r14") RESERVE2(14); ++ if (RegName == "r15") RESERVE2(15); ++#endif ++ ++#undef RESERVE1 ++#undef RESERVE2 ++} ++ + //===----------------------------------------------------------------------===// + // Stack Frame Processing methods + //===----------------------------------------------------------------------===// +diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h +index 74efd1f..d709505 100644 +--- a/lib/Target/X86/X86RegisterInfo.h ++++ b/lib/Target/X86/X86RegisterInfo.h +@@ -107,6 +107,9 @@ public: + /// register scavenger to determine what registers are free. + BitVector getReservedRegs(const MachineFunction &MF) const override; + ++ void getHQEMUReservedRegs(BitVector &Reserved) const override; ++ void setHQEMUReservedRegs(std::string RegName) override; ++ + bool hasBasePointer(const MachineFunction &MF) const; + + bool canRealignStack(const MachineFunction &MF) const; +diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp +index a5e443f..cd4f57a 100644 +--- a/lib/Transforms/Utils/Local.cpp ++++ b/lib/Transforms/Utils/Local.cpp +@@ -1188,12 +1188,15 @@ static bool markAliveBlocks(BasicBlock *BB, + // If we found a call to a no-return function, insert an unreachable + // instruction after it. Make sure there isn't *already* one there + // though. ++#if 0 ++ // HQEMU: do not delete instructions after llvm.trap. + ++BBI; + if (!isa<UnreachableInst>(BBI)) { + // Don't insert a call to llvm.trap right before the unreachable. + changeToUnreachable(BBI, false); + Changed = true; + } ++#endif + break; + } + } +diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp +index 1c62559..8375529 100644 +--- a/lib/Transforms/Utils/SimplifyCFG.cpp ++++ b/lib/Transforms/Utils/SimplifyCFG.cpp +@@ -1028,6 +1028,11 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) { + + bool Changed = false; + do { ++ // HQEMU: skip hoisting instructions from llvm.trap to the terminator ++ // instruction. ++ if (isa<IntrinsicInst>(I1) || I1->hasMetadata()) ++ return Changed; ++ + // If we are hoisting the terminator instruction, don't move one (making a + // broken BB), instead clone it, and remove BI. + if (isa<TerminatorInst>(I1)) +@@ -3968,6 +3973,10 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { + BasicBlock *BB = IBI->getParent(); + bool Changed = false; + ++ // HQEMU: LLVM tries to remove the indirectbr with no successors. ++ // Disable it because we use indirectbr to implement IBTC. ++ return false; ++ + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { diff --git a/patch/llvm/llvm-3.8.patch b/patch/llvm/llvm-3.8.patch new file mode 100644 index 0000000..a2f8968 --- /dev/null +++ b/patch/llvm/llvm-3.8.patch @@ -0,0 +1,247 @@ +diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h +index a730260..5102344 100644 +--- a/include/llvm/ExecutionEngine/ExecutionEngine.h ++++ b/include/llvm/ExecutionEngine/ExecutionEngine.h +@@ -550,6 +550,7 @@ public: + /// is called and is successful, the created engine takes ownership of the + /// memory manager. This option defaults to NULL. + EngineBuilder &setMCJITMemoryManager(std::unique_ptr<RTDyldMemoryManager> mcjmm); ++ EngineBuilder &setMCJITMemoryManager(std::shared_ptr<RTDyldMemoryManager> mcjmm); + + EngineBuilder& + setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM); +diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h +index 4688b5f..e3124bf 100644 +--- a/include/llvm/MC/MCInst.h ++++ b/include/llvm/MC/MCInst.h +@@ -27,6 +27,7 @@ class MCAsmInfo; + class MCInstPrinter; + class MCExpr; + class MCInst; ++class DebugLoc; + + /// \brief Instances of this class represent operands of the MCInst class. + /// This is a simple discriminated union. +@@ -151,9 +152,10 @@ class MCInst { + unsigned Opcode; + SMLoc Loc; + SmallVector<MCOperand, 8> Operands; ++ const DebugLoc *DbgLoc; + + public: +- MCInst() : Opcode(0) {} ++ MCInst() : Opcode(0), DbgLoc(nullptr) {} + + void setOpcode(unsigned Op) { Opcode = Op; } + unsigned getOpcode() const { return Opcode; } +@@ -161,6 +163,9 @@ public: + void setLoc(SMLoc loc) { Loc = loc; } + SMLoc getLoc() const { return Loc; } + ++ void setDebugLoc(const DebugLoc *Loc) { DbgLoc = Loc; } ++ const DebugLoc *getDebugLoc() const { return DbgLoc; } ++ + const MCOperand &getOperand(unsigned i) const { return Operands[i]; } + MCOperand &getOperand(unsigned i) { return Operands[i]; } + unsigned getNumOperands() const { return Operands.size(); } +diff --git a/include/llvm/MC/MCInstrInfo.h b/include/llvm/MC/MCInstrInfo.h +index 70c8658..69a6427 100644 +--- a/include/llvm/MC/MCInstrInfo.h ++++ b/include/llvm/MC/MCInstrInfo.h +@@ -26,6 +26,7 @@ class MCInstrInfo { + const unsigned *InstrNameIndices; // Array for name indices in InstrNameData + const char *InstrNameData; // Instruction name string pool + unsigned NumOpcodes; // Number of entries in the desc array ++ unsigned long HQEMUExitAddr; + + public: + /// \brief Initialize MCInstrInfo, called by TableGen auto-generated routines. +@@ -52,6 +53,9 @@ public: + assert(Opcode < NumOpcodes && "Invalid opcode!"); + return &InstrNameData[InstrNameIndices[Opcode]]; + } ++ ++ void setHQEMUExitAddr(unsigned long Addr) { HQEMUExitAddr = Addr; } ++ unsigned long getHQEMUExitAddr() const { return HQEMUExitAddr; } + }; + + } // End llvm namespace +diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp +index 41c8da4..ffca9ea 100644 +--- a/lib/ExecutionEngine/ExecutionEngine.cpp ++++ b/lib/ExecutionEngine/ExecutionEngine.cpp +@@ -497,6 +497,13 @@ EngineBuilder &EngineBuilder::setMCJITMemoryManager( + return *this; + } + ++EngineBuilder &EngineBuilder::setMCJITMemoryManager( ++ std::shared_ptr<RTDyldMemoryManager> mcjmm) { ++ MemMgr = mcjmm; ++ Resolver = mcjmm; ++ return *this; ++} ++ + EngineBuilder& + EngineBuilder::setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM) { + MemMgr = std::shared_ptr<MCJITMemoryManager>(std::move(MM)); +diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +index dfab6ec..8a9752f 100644 +--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp ++++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +@@ -23,6 +23,7 @@ + #include "llvm/MC/MCSubtargetInfo.h" + #include "llvm/MC/MCSymbol.h" + #include "llvm/Support/raw_ostream.h" ++#include "llvm/IR/DebugLoc.h" + + using namespace llvm; + +@@ -164,6 +165,9 @@ public: + const MCInst &MI, const MCInstrDesc &Desc, + const MCSubtargetInfo &STI, + raw_ostream &OS) const; ++ ++ bool EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups) const; + }; + + } // end anonymous namespace +@@ -1158,6 +1162,52 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, + } + } + ++bool X86MCCodeEmitter:: ++EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups) const { ++ /* NOTE: the following flags must be synchronized with those in file ++ * llvm-opc.h of the HQEMU source tree. */ ++ enum { ++ PATCH_HQEMU = 0x4182U, ++ PATCH_DUMMY, ++ PATCH_EXIT_TB, ++ PATCH_DIRECT_JUMP, ++ PATCH_TRACE_BLOCK_CHAINING, ++ PATCH_QMMU ++ }; ++ ++ unsigned Opcode = MI.getOpcode(); ++ switch (Opcode) { ++ case X86::TRAP: ++ case X86::RETQ: ++ break; ++ default: return false; ++ } ++ ++ unsigned CurByte = 0; ++ const DebugLoc *Loc = MI.getDebugLoc(); ++ if (!Loc) ++ return false; ++ ++ unsigned PatchType = Loc->getLine(); ++ if (PatchType < PATCH_HQEMU || PatchType > PATCH_QMMU) ++ return false; ++ ++ if (Opcode == X86::TRAP) { ++ for (unsigned i = 0; i != 8; ++i) ++ EmitByte(0x90, CurByte, OS); ++ return true; ++ } ++ if (Opcode == X86::RETQ) { ++ uintptr_t ExitAddr = MCII.getHQEMUExitAddr(); ++ EmitByte(0xE9, CurByte, OS); ++ EmitImmediate(MCOperand::createImm(ExitAddr), MI.getLoc(), 4, FK_PCRel_4, ++ CurByte, OS, Fixups); ++ return true; ++ } ++ return false; ++} ++ + void X86MCCodeEmitter:: + encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, +@@ -1166,6 +1216,9 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, + const MCInstrDesc &Desc = MCII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + ++ if (EmitHQEMUInstruction(MI, OS, Fixups)) ++ return; ++ + // Pseudo instructions don't get encoded. + if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + return; +diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp +index e1ca558..c3acaec 100644 +--- a/lib/Target/X86/X86MCInstLower.cpp ++++ b/lib/Target/X86/X86MCInstLower.cpp +@@ -437,6 +437,9 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, + void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + ++ if (MI->getDebugLoc()) ++ OutMI.setDebugLoc(&MI->getDebugLoc()); ++ + for (const MachineOperand &MO : MI->operands()) + if (auto MaybeMCOp = LowerMachineOperand(MI, MO)) + OutMI.addOperand(MaybeMCOp.getValue()); +diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp +index 274b566..dbb4fec 100644 +--- a/lib/Target/X86/X86RegisterInfo.cpp ++++ b/lib/Target/X86/X86RegisterInfo.cpp +@@ -473,6 +473,19 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + } + } + ++ // Reserve registers for HQEMU. ++ if (MF.getFunction()->hasFnAttribute("hqemu")) { ++ if (!Is64Bit) { ++ Reserved.set(X86::EBP); ++ Reserved.set(X86::BP); ++ Reserved.set(X86::BPL); ++ } else { ++ Reserved.set(X86::R14); ++ Reserved.set(X86::R14B); ++ Reserved.set(X86::R14D); ++ Reserved.set(X86::R14W); ++ } ++ } + return Reserved; + } + +diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp +index abc9b65..39241c2 100644 +--- a/lib/Transforms/Utils/Local.cpp ++++ b/lib/Transforms/Utils/Local.cpp +@@ -1302,7 +1302,8 @@ static bool markAliveBlocks(Function &F, + } + + if (CallInst *CI = dyn_cast<CallInst>(BBI)) { +- if (CI->doesNotReturn()) { ++ // HQEMU: do not delete instructions after llvm.trap. ++ if (!F.hasFnAttribute("hqemu") && CI->doesNotReturn()) { + // If we found a call to a no-return function, insert an unreachable + // instruction after it. Make sure there isn't *already* one there + // though. +diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp +index e484b69..6ac6033 100644 +--- a/lib/Transforms/Utils/SimplifyCFG.cpp ++++ b/lib/Transforms/Utils/SimplifyCFG.cpp +@@ -1120,6 +1120,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, + + bool Changed = false; + do { ++ if (BIParent->getParent()->hasFnAttribute("hqemu")) ++ if (isa<IntrinsicInst>(I1) || I1->hasMetadata()) ++ return Changed; + // If we are hoisting the terminator instruction, don't move one (making a + // broken BB), instead clone it, and remove BI. + if (isa<TerminatorInst>(I1)) +@@ -4898,6 +4901,9 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { + BasicBlock *BB = IBI->getParent(); + bool Changed = false; + ++ if (BB->getParent()->hasFnAttribute("hqemu")) ++ return false; ++ + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { diff --git a/patch/llvm/llvm-3.9.patch b/patch/llvm/llvm-3.9.patch new file mode 100644 index 0000000..38fa566 --- /dev/null +++ b/patch/llvm/llvm-3.9.patch @@ -0,0 +1,404 @@ +diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h +index ab13028..810f403 100644 +--- a/include/llvm/ExecutionEngine/ExecutionEngine.h ++++ b/include/llvm/ExecutionEngine/ExecutionEngine.h +@@ -550,6 +550,7 @@ public: + /// is called and is successful, the created engine takes ownership of the + /// memory manager. This option defaults to NULL. + EngineBuilder &setMCJITMemoryManager(std::unique_ptr<RTDyldMemoryManager> mcjmm); ++ EngineBuilder &setMCJITMemoryManager(std::shared_ptr<RTDyldMemoryManager> mcjmm); + + EngineBuilder& + setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM); +diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h +index 4688b5f..e3124bf 100644 +--- a/include/llvm/MC/MCInst.h ++++ b/include/llvm/MC/MCInst.h +@@ -27,6 +27,7 @@ class MCAsmInfo; + class MCInstPrinter; + class MCExpr; + class MCInst; ++class DebugLoc; + + /// \brief Instances of this class represent operands of the MCInst class. + /// This is a simple discriminated union. +@@ -151,9 +152,10 @@ class MCInst { + unsigned Opcode; + SMLoc Loc; + SmallVector<MCOperand, 8> Operands; ++ const DebugLoc *DbgLoc; + + public: +- MCInst() : Opcode(0) {} ++ MCInst() : Opcode(0), DbgLoc(nullptr) {} + + void setOpcode(unsigned Op) { Opcode = Op; } + unsigned getOpcode() const { return Opcode; } +@@ -161,6 +163,9 @@ public: + void setLoc(SMLoc loc) { Loc = loc; } + SMLoc getLoc() const { return Loc; } + ++ void setDebugLoc(const DebugLoc *Loc) { DbgLoc = Loc; } ++ const DebugLoc *getDebugLoc() const { return DbgLoc; } ++ + const MCOperand &getOperand(unsigned i) const { return Operands[i]; } + MCOperand &getOperand(unsigned i) { return Operands[i]; } + unsigned getNumOperands() const { return Operands.size(); } +diff --git a/include/llvm/MC/MCInstrInfo.h b/include/llvm/MC/MCInstrInfo.h +index 70c8658..69a6427 100644 +--- a/include/llvm/MC/MCInstrInfo.h ++++ b/include/llvm/MC/MCInstrInfo.h +@@ -26,6 +26,7 @@ class MCInstrInfo { + const unsigned *InstrNameIndices; // Array for name indices in InstrNameData + const char *InstrNameData; // Instruction name string pool + unsigned NumOpcodes; // Number of entries in the desc array ++ unsigned long HQEMUExitAddr; + + public: + /// \brief Initialize MCInstrInfo, called by TableGen auto-generated routines. +@@ -52,6 +53,9 @@ public: + assert(Opcode < NumOpcodes && "Invalid opcode!"); + return &InstrNameData[InstrNameIndices[Opcode]]; + } ++ ++ void setHQEMUExitAddr(unsigned long Addr) { HQEMUExitAddr = Addr; } ++ unsigned long getHQEMUExitAddr() const { return HQEMUExitAddr; } + }; + + } // End llvm namespace +diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp +index a8e68bf..a4f1d99 100644 +--- a/lib/ExecutionEngine/ExecutionEngine.cpp ++++ b/lib/ExecutionEngine/ExecutionEngine.cpp +@@ -492,6 +492,13 @@ EngineBuilder &EngineBuilder::setMCJITMemoryManager( + return *this; + } + ++EngineBuilder &EngineBuilder::setMCJITMemoryManager( ++ std::shared_ptr<RTDyldMemoryManager> mcjmm) { ++ MemMgr = mcjmm; ++ Resolver = mcjmm; ++ return *this; ++} ++ + EngineBuilder& + EngineBuilder::setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM) { + MemMgr = std::shared_ptr<MCJITMemoryManager>(std::move(MM)); +diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp +index 2b4cdf1..0e09232 100644 +--- a/lib/Target/AArch64/AArch64MCInstLower.cpp ++++ b/lib/Target/AArch64/AArch64MCInstLower.cpp +@@ -207,6 +207,9 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, + void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + ++ if (MI->getDebugLoc()) ++ OutMI.setDebugLoc(&MI->getDebugLoc()); ++ + for (const MachineOperand &MO : MI->operands()) { + MCOperand MCOp; + if (lowerOperand(MO, MCOp)) +diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp +index af867da..1755863 100644 +--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp ++++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp +@@ -138,6 +138,14 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + Reserved.set(AArch64::W19); + } + ++ // Reserve registers for HQEMU. ++ if (MF.getFunction()->hasFnAttribute("hqemu")) { ++ Reserved.set(AArch64::X19); ++ Reserved.set(AArch64::W19); ++ Reserved.set(AArch64::X28); ++ Reserved.set(AArch64::W28); ++ } ++ + return Reserved; + } + +diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +index 7b9ff8f..7d724cb 100644 +--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp ++++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +@@ -24,6 +24,7 @@ + #include "llvm/MC/MCSubtargetInfo.h" + #include "llvm/Support/EndianStream.h" + #include "llvm/Support/raw_ostream.h" ++#include "llvm/IR/DebugLoc.h" + using namespace llvm; + + #define DEBUG_TYPE "mccodeemitter" +@@ -35,11 +36,13 @@ namespace { + + class AArch64MCCodeEmitter : public MCCodeEmitter { + MCContext &Ctx; ++ const MCInstrInfo &MCII; + + AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT + void operator=(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT + public: +- AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {} ++ AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) ++ : Ctx(ctx), MCII(mcii) {} + + ~AArch64MCCodeEmitter() override {} + +@@ -170,6 +173,10 @@ public: + + unsigned fixOneOperandFPComparison(const MCInst &MI, unsigned EncodedValue, + const MCSubtargetInfo &STI) const; ++ ++ bool EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups, ++ const MCSubtargetInfo &STI) const; + }; + + } // end anonymous namespace +@@ -536,9 +543,85 @@ unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, + return EncodedValue & ~(1u << 30); + } + ++bool AArch64MCCodeEmitter:: ++EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups, ++ const MCSubtargetInfo &STI) const { ++ /* NOTE: the following flags must be synchronized with those in file ++ * llvm-opc.h of the HQEMU source tree. */ ++ enum { ++ PATCH_HQEMU = 0x4182U, ++ PATCH_DUMMY, ++ PATCH_EXIT_TB, ++ PATCH_DIRECT_JUMP, ++ PATCH_TRACE_BLOCK_CHAINING, ++ PATCH_QMMU ++ }; ++ ++ unsigned Opcode = MI.getOpcode(); ++ switch (Opcode) { ++ case AArch64::BRK: ++ case AArch64::RET: ++ break; ++ default: return false; ++ } ++ ++ const DebugLoc *Loc = MI.getDebugLoc(); ++ if (!Loc) ++ return false; ++ ++ unsigned PatchType = Loc->getLine(); ++ if (PatchType < PATCH_HQEMU || PatchType > PATCH_QMMU) ++ return false; ++ ++ if (Opcode == AArch64::BRK) { ++ uint64_t Binary = 0; ++ MCOperand Operand = MCOperand::createImm(1); ++ MCInst Jump; ++ ++ Jump.setOpcode(AArch64::B); ++ Jump.addOperand(Operand); ++ Binary = getBinaryCodeForInstr(Jump, Fixups, STI); ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Binary); ++ ++MCNumEmitted; ++ return true; ++ } ++ if (Opcode == AArch64::RET) { ++ uint64_t ExitAddr = MCII.getHQEMUExitAddr(); ++ uint32_t Binary[4]; ++ MCOperand Reg = MCOperand::createReg(AArch64::X1); ++ MCInst Jump, Mov; ++ ++ // mov w0, ExitAddr[15:0] ++ Binary[0] = (0x2 << 29) | 0x1; ++ Binary[0] |= (0x25 << 23); ++ Binary[0] |= ((ExitAddr & 0xFFFF) << 5); ++ ++ // movk w0, ExitAddr[31:16] ++ Binary[1] = (0x3 << 29) | 0x1; ++ Binary[1] |= (0x25 << 23); ++ Binary[1] |= (0x1 << 21); ++ Binary[1] |= ((ExitAddr & 0xFFFF0000) >> 11); ++ ++ Jump.setOpcode(AArch64::BR); ++ Jump.addOperand(Reg); ++ Binary[2] = getBinaryCodeForInstr(Jump, Fixups, STI); ++ ++ for (int i = 0; i < 3; ++i) { ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Binary[i]); ++ ++MCNumEmitted; ++ } ++ return true; ++ } ++ return false; ++} ++ + void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { ++ if (EmitHQEMUInstruction(MI, OS, Fixups, STI)) ++ return; ++ + if (MI.getOpcode() == AArch64::TLSDESCCALL) { + // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the + // following (BLR) instruction. It doesn't emit any code itself so it +diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +index 96c2e81..504b3eb 100644 +--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp ++++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +@@ -23,6 +23,7 @@ + #include "llvm/MC/MCSubtargetInfo.h" + #include "llvm/MC/MCSymbol.h" + #include "llvm/Support/raw_ostream.h" ++#include "llvm/IR/DebugLoc.h" + + using namespace llvm; + +@@ -142,6 +143,9 @@ public: + const MCInst &MI, const MCInstrDesc &Desc, + const MCSubtargetInfo &STI, raw_ostream &OS) const; + ++ bool EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups) const; ++ + uint8_t DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, + int MemOperand, const MCInstrDesc &Desc) const; + }; +@@ -1110,6 +1114,52 @@ bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, + return Ret; + } + ++bool X86MCCodeEmitter:: ++EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups) const { ++ /* NOTE: the following flags must be synchronized with those in file ++ * llvm-opc.h of the HQEMU source tree. */ ++ enum { ++ PATCH_HQEMU = 0x4182U, ++ PATCH_DUMMY, ++ PATCH_EXIT_TB, ++ PATCH_DIRECT_JUMP, ++ PATCH_TRACE_BLOCK_CHAINING, ++ PATCH_QMMU ++ }; ++ ++ unsigned Opcode = MI.getOpcode(); ++ switch (Opcode) { ++ case X86::TRAP: ++ case X86::RETQ: ++ break; ++ default: return false; ++ } ++ ++ unsigned CurByte = 0; ++ const DebugLoc *Loc = MI.getDebugLoc(); ++ if (!Loc) ++ return false; ++ ++ unsigned PatchType = Loc->getLine(); ++ if (PatchType < PATCH_HQEMU || PatchType > PATCH_QMMU) ++ return false; ++ ++ if (Opcode == X86::TRAP) { ++ for (unsigned i = 0; i != 8; ++i) ++ EmitByte(0x90, CurByte, OS); ++ return true; ++ } ++ if (Opcode == X86::RETQ) { ++ uintptr_t ExitAddr = MCII.getHQEMUExitAddr(); ++ EmitByte(0xE9, CurByte, OS); ++ EmitImmediate(MCOperand::createImm(ExitAddr), MI.getLoc(), 4, FK_PCRel_4, ++ CurByte, OS, Fixups); ++ return true; ++ } ++ return false; ++} ++ + void X86MCCodeEmitter:: + encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, +@@ -1118,6 +1168,9 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, + const MCInstrDesc &Desc = MCII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + ++ if (EmitHQEMUInstruction(MI, OS, Fixups)) ++ return; ++ + // Pseudo instructions don't get encoded. + if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + return; +diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp +index 906e342..8f7db6b 100644 +--- a/lib/Target/X86/X86MCInstLower.cpp ++++ b/lib/Target/X86/X86MCInstLower.cpp +@@ -389,6 +389,9 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, + void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + ++ if (MI->getDebugLoc()) ++ OutMI.setDebugLoc(&MI->getDebugLoc()); ++ + for (const MachineOperand &MO : MI->operands()) + if (auto MaybeMCOp = LowerMachineOperand(MI, MO)) + OutMI.addOperand(MaybeMCOp.getValue()); +diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp +index 8675063..e1d0e19 100644 +--- a/lib/Target/X86/X86RegisterInfo.cpp ++++ b/lib/Target/X86/X86RegisterInfo.cpp +@@ -503,6 +503,19 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + } + } + ++ // Reserve registers for HQEMU. ++ if (MF.getFunction()->hasFnAttribute("hqemu")) { ++ if (!Is64Bit) { ++ Reserved.set(X86::EBP); ++ Reserved.set(X86::BP); ++ Reserved.set(X86::BPL); ++ } else { ++ Reserved.set(X86::R14); ++ Reserved.set(X86::R14B); ++ Reserved.set(X86::R14D); ++ Reserved.set(X86::R14W); ++ } ++ } + return Reserved; + } + +diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp +index f1838d8..3d4d3b9 100644 +--- a/lib/Transforms/Utils/Local.cpp ++++ b/lib/Transforms/Utils/Local.cpp +@@ -1413,7 +1413,8 @@ static bool markAliveBlocks(Function &F, + Changed = true; + break; + } +- if (CI->doesNotReturn()) { ++ // HQEMU: do not delete instructions after llvm.trap. ++ if (!F.hasFnAttribute("hqemu") && CI->doesNotReturn()) { + // If we found a call to a no-return function, insert an unreachable + // instruction after it. Make sure there isn't *already* one there + // though. +diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp +index 0504646..92291c3 100644 +--- a/lib/Transforms/Utils/SimplifyCFG.cpp ++++ b/lib/Transforms/Utils/SimplifyCFG.cpp +@@ -1201,6 +1201,9 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, + + bool Changed = false; + do { ++ if (BIParent->getParent()->hasFnAttribute("hqemu")) ++ if (isa<IntrinsicInst>(I1) || I1->hasMetadata()) ++ return Changed; + // If we are hoisting the terminator instruction, don't move one (making a + // broken BB), instead clone it, and remove BI. + if (isa<TerminatorInst>(I1)) +@@ -5088,6 +5091,9 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { + BasicBlock *BB = IBI->getParent(); + bool Changed = false; + ++ if (BB->getParent()->hasFnAttribute("hqemu")) ++ return false; ++ + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { diff --git a/patch/llvm/llvm-5.0.patch b/patch/llvm/llvm-5.0.patch new file mode 100644 index 0000000..bb89779 --- /dev/null +++ b/patch/llvm/llvm-5.0.patch @@ -0,0 +1,652 @@ +diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h +index 2830a26..8c9c09e 100644 +--- a/include/llvm/ExecutionEngine/ExecutionEngine.h ++++ b/include/llvm/ExecutionEngine/ExecutionEngine.h +@@ -566,6 +566,7 @@ public: + /// is called and is successful, the created engine takes ownership of the + /// memory manager. This option defaults to NULL. + EngineBuilder &setMCJITMemoryManager(std::unique_ptr<RTDyldMemoryManager> mcjmm); ++ EngineBuilder &setMCJITMemoryManager(std::shared_ptr<RTDyldMemoryManager> mcjmm); + + EngineBuilder& + setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM); +diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h +index 9bf440e..4f0250c 100644 +--- a/include/llvm/MC/MCInst.h ++++ b/include/llvm/MC/MCInst.h +@@ -29,6 +29,7 @@ class MCExpr; + class MCInst; + class MCInstPrinter; + class raw_ostream; ++class DebugLoc; + + /// \brief Instances of this class represent operands of the MCInst class. + /// This is a simple discriminated union. +@@ -160,6 +161,7 @@ class MCInst { + unsigned Opcode = 0; + SMLoc Loc; + SmallVector<MCOperand, 8> Operands; ++ const DebugLoc *DbgLoc = nullptr; + + public: + MCInst() = default; +@@ -170,6 +172,9 @@ public: + void setLoc(SMLoc loc) { Loc = loc; } + SMLoc getLoc() const { return Loc; } + ++ void setDebugLoc(const DebugLoc *Loc) { DbgLoc = Loc; } ++ const DebugLoc *getDebugLoc() const { return DbgLoc; } ++ + const MCOperand &getOperand(unsigned i) const { return Operands[i]; } + MCOperand &getOperand(unsigned i) { return Operands[i]; } + unsigned getNumOperands() const { return Operands.size(); } +diff --git a/include/llvm/MC/MCInstrInfo.h b/include/llvm/MC/MCInstrInfo.h +index 80f1f32..e5056cb 100644 +--- a/include/llvm/MC/MCInstrInfo.h ++++ b/include/llvm/MC/MCInstrInfo.h +@@ -26,6 +26,7 @@ class MCInstrInfo { + const unsigned *InstrNameIndices; // Array for name indices in InstrNameData + const char *InstrNameData; // Instruction name string pool + unsigned NumOpcodes; // Number of entries in the desc array ++ unsigned long HQEMUExitAddr; + + public: + /// \brief Initialize MCInstrInfo, called by TableGen auto-generated routines. +@@ -52,6 +53,9 @@ public: + assert(Opcode < NumOpcodes && "Invalid opcode!"); + return StringRef(&InstrNameData[InstrNameIndices[Opcode]]); + } ++ ++ void setHQEMUExitAddr(unsigned long Addr) { HQEMUExitAddr = Addr; } ++ unsigned long getHQEMUExitAddr() const { return HQEMUExitAddr; } + }; + + } // End llvm namespace +diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp +index 3c439e6..536e776 100644 +--- a/lib/CodeGen/BranchFolding.cpp ++++ b/lib/CodeGen/BranchFolding.cpp +@@ -169,6 +169,12 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, + MachineModuleInfo *mmi, + MachineLoopInfo *mli, bool AfterPlacement) { + if (!tii) return false; ++ if (MF.getFunction()->hasFnAttribute("hqemu")) { ++ switch (MF.getTarget().getTargetTriple().getArch()) { ++ case Triple::x86: case Triple::x86_64: break; ++ default: return false; ++ } ++ } + + TriedMerging.clear(); + +diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp +index 2ee72f9..701c21b 100644 +--- a/lib/ExecutionEngine/ExecutionEngine.cpp ++++ b/lib/ExecutionEngine/ExecutionEngine.cpp +@@ -496,6 +496,13 @@ EngineBuilder &EngineBuilder::setMCJITMemoryManager( + return *this; + } + ++EngineBuilder &EngineBuilder::setMCJITMemoryManager( ++ std::shared_ptr<RTDyldMemoryManager> mcjmm) { ++ MemMgr = mcjmm; ++ Resolver = mcjmm; ++ return *this; ++} ++ + EngineBuilder& + EngineBuilder::setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM) { + MemMgr = std::shared_ptr<MCJITMemoryManager>(std::move(MM)); +diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp +index f82b9db..c42ac7f 100644 +--- a/lib/Target/AArch64/AArch64MCInstLower.cpp ++++ b/lib/Target/AArch64/AArch64MCInstLower.cpp +@@ -219,6 +219,9 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, + void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + ++ if (MI->getDebugLoc()) ++ OutMI.setDebugLoc(&MI->getDebugLoc()); ++ + for (const MachineOperand &MO : MI->operands()) { + MCOperand MCOp; + if (lowerOperand(MO, MCOp)) +diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp +index 9f7dcb3..0e56bb6 100644 +--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp ++++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp +@@ -130,6 +130,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + if (hasBasePointer(MF)) + markSuperRegs(Reserved, AArch64::W19); + ++ // Reserve registers for HQEMU. ++ if (MF.getFunction()->hasFnAttribute("hqemu")) { ++ markSuperRegs(Reserved, AArch64::W19); ++ markSuperRegs(Reserved, AArch64::W28); ++ } ++ + assert(checkAllSuperRegsMarked(Reserved)); + return Reserved; + } +diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +index 33698d2..9735e88 100644 +--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp ++++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +@@ -29,6 +29,7 @@ + #include "llvm/Support/EndianStream.h" + #include "llvm/Support/ErrorHandling.h" + #include "llvm/Support/raw_ostream.h" ++#include "llvm/IR/DebugLoc.h" + #include <cassert> + #include <cstdint> + +@@ -180,6 +181,10 @@ public: + unsigned fixOneOperandFPComparison(const MCInst &MI, unsigned EncodedValue, + const MCSubtargetInfo &STI) const; + ++ bool EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups, ++ const MCSubtargetInfo &STI) const; ++ + private: + uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; + void verifyInstructionPredicates(const MCInst &MI, +@@ -552,9 +557,85 @@ unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, + return EncodedValue & ~(1u << 30); + } + ++bool AArch64MCCodeEmitter:: ++EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups, ++ const MCSubtargetInfo &STI) const { ++ /* NOTE: the following flags must be synchronized with those in file ++ * llvm-opc.h of the HQEMU source tree. */ ++ enum { ++ PATCH_HQEMU = 0x4182U, ++ PATCH_DUMMY, ++ PATCH_EXIT_TB, ++ PATCH_DIRECT_JUMP, ++ PATCH_TRACE_BLOCK_CHAINING, ++ PATCH_QMMU ++ }; ++ ++ unsigned Opcode = MI.getOpcode(); ++ switch (Opcode) { ++ case AArch64::BRK: ++ case AArch64::RET: ++ break; ++ default: return false; ++ } ++ ++ const DebugLoc *Loc = MI.getDebugLoc(); ++ if (!Loc) ++ return false; ++ ++ unsigned PatchType = Loc->getLine(); ++ if (PatchType < PATCH_HQEMU || PatchType > PATCH_QMMU) ++ return false; ++ ++ if (Opcode == AArch64::BRK) { ++ uint64_t Binary = 0; ++ MCOperand Operand = MCOperand::createImm(1); ++ MCInst Jump; ++ ++ Jump.setOpcode(AArch64::B); ++ Jump.addOperand(Operand); ++ Binary = getBinaryCodeForInstr(Jump, Fixups, STI); ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Binary); ++ ++MCNumEmitted; ++ return true; ++ } ++ if (Opcode == AArch64::RET) { ++ uint64_t ExitAddr = MCII.getHQEMUExitAddr(); ++ uint32_t Binary[4]; ++ MCOperand Reg = MCOperand::createReg(AArch64::X1); ++ MCInst Jump, Mov; ++ ++ // mov w0, ExitAddr[15:0] ++ Binary[0] = (0x2 << 29) | 0x1; ++ Binary[0] |= (0x25 << 23); ++ Binary[0] |= ((ExitAddr & 0xFFFF) << 5); ++ ++ // movk w0, ExitAddr[31:16] ++ Binary[1] = (0x3 << 29) | 0x1; ++ Binary[1] |= (0x25 << 23); ++ Binary[1] |= (0x1 << 21); ++ Binary[1] |= ((ExitAddr & 0xFFFF0000) >> 11); ++ ++ Jump.setOpcode(AArch64::BR); ++ Jump.addOperand(Reg); ++ Binary[2] = getBinaryCodeForInstr(Jump, Fixups, STI); ++ ++ for (int i = 0; i < 3; ++i) { ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Binary[i]); ++ ++MCNumEmitted; ++ } ++ return true; ++ } ++ return false; ++} ++ + void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { ++ if (EmitHQEMUInstruction(MI, OS, Fixups, STI)) ++ return; ++ + verifyInstructionPredicates(MI, + computeAvailableFeatures(STI.getFeatureBits())); + +diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +index 92c8c22..befec89 100644 +--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp ++++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +@@ -30,6 +30,7 @@ + #include "llvm/Support/ErrorHandling.h" + #include "llvm/Support/MathExtras.h" + #include "llvm/Support/raw_ostream.h" ++#include "llvm/IR/DebugLoc.h" + #include <cassert> + #include <cstdint> + +@@ -109,9 +110,16 @@ public: + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + ++ bool EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups, ++ const MCSubtargetInfo &STI) const; ++ + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override { ++ if (EmitHQEMUInstruction(MI, OS, Fixups, STI)) ++ return; ++ + verifyInstructionPredicates(MI, + computeAvailableFeatures(STI.getFeatureBits())); + +@@ -386,5 +394,75 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, + return MO.getImm(); + } + ++bool PPCMCCodeEmitter:: ++EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups, ++ const MCSubtargetInfo &STI) const { ++ /* NOTE: the following flags must be synchronized with those in file ++ * llvm-opc.h of the HQEMU source tree. */ ++ enum { ++ PATCH_HQEMU = 0x4182U, ++ PATCH_DUMMY, ++ PATCH_EXIT_TB, ++ PATCH_DIRECT_JUMP, ++ PATCH_TRACE_BLOCK_CHAINING, ++ PATCH_QMMU ++ }; ++ ++ unsigned Opcode = MI.getOpcode(); ++ switch (Opcode) { ++ case PPC::TRAP: ++ case PPC::BLR: ++ case PPC::BLR8: ++ break; ++ default: return false; ++ } ++ ++ const DebugLoc *Loc = MI.getDebugLoc(); ++ if (!Loc) ++ return false; ++ ++ unsigned PatchType = Loc->getLine(); ++ if (PatchType < PATCH_HQEMU || PatchType > PATCH_QMMU) ++ return false; ++ ++ if (Opcode == PPC::TRAP) { ++ uint64_t Bits = 0; ++ MCInst NopInst; ++ NopInst.setOpcode(PPC::NOP); ++ Bits = getBinaryCodeForInstr(NopInst, Fixups, STI); ++ for (unsigned i = 0; i != 5; ++i) { ++ if (IsLittleEndian) { ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Bits); ++ } else { ++ support::endian::Writer<support::big>(OS).write<uint32_t>(Bits); ++ } ++ } ++ MCNumEmitted += 5; ++ return true; ++ } ++ if (Opcode == PPC::BLR || Opcode == PPC::BLR8) { ++ uint64_t Bits[2]; ++ MCInst Inst[2]; ++ Inst[0].setOpcode(PPC::MTCTR); ++ Inst[0].addOperand(MCOperand::createReg(PPC::R31)); ++ Inst[1].setOpcode(PPC::BCTR); ++ Bits[0] = getBinaryCodeForInstr(Inst[0], Fixups, STI); ++ Bits[1] = getBinaryCodeForInstr(Inst[1], Fixups, STI); ++ ++ if (IsLittleEndian) { ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Bits[0]); ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Bits[1]); ++ } else { ++ support::endian::Writer<support::big>(OS).write<uint32_t>(Bits[0]); ++ support::endian::Writer<support::big>(OS).write<uint32_t>(Bits[1]); ++ } ++ ++ MCNumEmitted += 2; ++ return true; ++ } ++ return false; ++} ++ + #define ENABLE_INSTR_PREDICATE_VERIFIER + #include "PPCGenMCCodeEmitter.inc" +diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp +index b3a3c73..05c8cac 100644 +--- a/lib/Target/PowerPC/PPCISelLowering.cpp ++++ b/lib/Target/PowerPC/PPCISelLowering.cpp +@@ -2422,10 +2422,11 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, + EVT PtrVT = Op.getValueType(); + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); + const Constant *C = CP->getConstVal(); ++ bool isHQEMU = DAG.getMachineFunction().getFunction()->hasFnAttribute("hqemu"); + + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. +- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { ++ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64() && !isHQEMU) { + setUsesTOCBasePtr(DAG); + SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); + return getTOCEntry(DAG, SDLoc(CP), true, GA); +@@ -2435,7 +2436,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, + bool IsPIC = isPositionIndependent(); + getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); + +- if (IsPIC && Subtarget.isSVR4ABI()) { ++ if (IsPIC && Subtarget.isSVR4ABI() && !isHQEMU) { + SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), + PPCII::MO_PIC_FLAG); + return getTOCEntry(DAG, SDLoc(CP), false, GA); +@@ -2500,10 +2501,11 @@ PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, + SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + EVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); ++ bool isHQEMU = DAG.getMachineFunction().getFunction()->hasFnAttribute("hqemu"); + + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. +- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { ++ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64() && !isHQEMU) { + setUsesTOCBasePtr(DAG); + SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + return getTOCEntry(DAG, SDLoc(JT), true, GA); +@@ -2513,7 +2515,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + bool IsPIC = isPositionIndependent(); + getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); + +- if (IsPIC && Subtarget.isSVR4ABI()) { ++ if (IsPIC && Subtarget.isSVR4ABI() && !isHQEMU) { + SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + PPCII::MO_PIC_FLAG); + return getTOCEntry(DAG, SDLoc(GA), false, GA); +@@ -2529,10 +2531,11 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, + EVT PtrVT = Op.getValueType(); + BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); + const BlockAddress *BA = BASDN->getBlockAddress(); ++ bool isHQEMU = DAG.getMachineFunction().getFunction()->hasFnAttribute("hqemu"); + + // 64-bit SVR4 ABI code is always position-independent. + // The actual BlockAddress is stored in the TOC. +- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { ++ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64() && !isHQEMU) { + setUsesTOCBasePtr(DAG); + SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); + return getTOCEntry(DAG, SDLoc(BASDN), true, GA); +@@ -2642,10 +2645,11 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, + GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); + SDLoc DL(GSDN); + const GlobalValue *GV = GSDN->getGlobal(); ++ bool isHQEMU = DAG.getMachineFunction().getFunction()->hasFnAttribute("hqemu"); + + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. +- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { ++ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64() && !isHQEMU) { + setUsesTOCBasePtr(DAG); + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); + return getTOCEntry(DAG, DL, true, GA); +@@ -2655,7 +2659,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, + bool IsPIC = isPositionIndependent(); + getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); + +- if (IsPIC && Subtarget.isSVR4ABI()) { ++ if (IsPIC && Subtarget.isSVR4ABI() && !isHQEMU) { + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, + GSDN->getOffset(), + PPCII::MO_PIC_FLAG); +diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp +index b310493..afc6c81 100644 +--- a/lib/Target/PowerPC/PPCMCInstLower.cpp ++++ b/lib/Target/PowerPC/PPCMCInstLower.cpp +@@ -141,7 +141,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, + void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AsmPrinter &AP, bool isDarwin) { + OutMI.setOpcode(MI->getOpcode()); +- ++ ++ if (MI->getDebugLoc()) ++ OutMI.setDebugLoc(&MI->getDebugLoc()); ++ + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + +diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp +index 9207165..286c2cb 100644 +--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp ++++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp +@@ -269,6 +269,13 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + IE = PPC::VRRCRegClass.end(); I != IE; ++I) + markSuperRegs(Reserved, *I); + ++ // Reserve registers for HQEMU. ++ if (MF.getFunction()->hasFnAttribute("hqemu")) { ++ markSuperRegs(Reserved, PPC::R27); ++ if (TM.isPPC64()) ++ markSuperRegs(Reserved, PPC::R31); ++ } ++ + assert(checkAllSuperRegsMarked(Reserved)); + return Reserved; + } +@@ -882,6 +889,11 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + if (!MF.getFunction()->hasFnAttribute(Attribute::Naked)) { + if (!(hasBasePointer(MF) && FrameIndex < 0)) + Offset += MFI.getStackSize(); ++ } else { ++ if (MF.getFunction()->hasFnAttribute("hqemu") && FrameIndex >= 0) { ++ const PPCFrameLowering *TFI = getFrameLowering(MF); ++ Offset += TFI->determineFrameLayout(MF, false, false); ++ } + } + + // If we can, encode the offset directly into the instruction. If this is a +diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +index 10e2bbc..e6e6a66 100644 +--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp ++++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +@@ -27,6 +27,7 @@ + #include "llvm/MC/MCSymbol.h" + #include "llvm/Support/ErrorHandling.h" + #include "llvm/Support/raw_ostream.h" ++#include "llvm/IR/DebugLoc.h" + #include <cassert> + #include <cstdint> + #include <cstdlib> +@@ -150,6 +151,9 @@ public: + const MCInst &MI, const MCInstrDesc &Desc, + const MCSubtargetInfo &STI, raw_ostream &OS) const; + ++ bool EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups) const; ++ + uint8_t DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, + int MemOperand, const MCInstrDesc &Desc) const; + }; +@@ -1152,6 +1156,52 @@ bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, + return Ret; + } + ++bool X86MCCodeEmitter:: ++EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups) const { ++ /* NOTE: the following flags must be synchronized with those in file ++ * llvm-opc.h of the HQEMU source tree. */ ++ enum { ++ PATCH_HQEMU = 0x4182U, ++ PATCH_DUMMY, ++ PATCH_EXIT_TB, ++ PATCH_DIRECT_JUMP, ++ PATCH_TRACE_BLOCK_CHAINING, ++ PATCH_QMMU ++ }; ++ ++ unsigned Opcode = MI.getOpcode(); ++ switch (Opcode) { ++ case X86::TRAP: ++ case X86::RETQ: ++ break; ++ default: return false; ++ } ++ ++ unsigned CurByte = 0; ++ const DebugLoc *Loc = MI.getDebugLoc(); ++ if (!Loc) ++ return false; ++ ++ unsigned PatchType = Loc->getLine(); ++ if (PatchType < PATCH_HQEMU || PatchType > PATCH_QMMU) ++ return false; ++ ++ if (Opcode == X86::TRAP) { ++ for (unsigned i = 0; i != 8; ++i) ++ EmitByte(0x90, CurByte, OS); ++ return true; ++ } ++ if (Opcode == X86::RETQ) { ++ uintptr_t ExitAddr = MCII.getHQEMUExitAddr(); ++ EmitByte(0xE9, CurByte, OS); ++ EmitImmediate(MCOperand::createImm(ExitAddr), MI.getLoc(), 4, FK_PCRel_4, ++ CurByte, OS, Fixups); ++ return true; ++ } ++ return false; ++} ++ + void X86MCCodeEmitter:: + encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, +@@ -1160,6 +1210,9 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, + const MCInstrDesc &Desc = MCII.get(Opcode); + uint64_t TSFlags = Desc.TSFlags; + ++ if (EmitHQEMUInstruction(MI, OS, Fixups)) ++ return; ++ + // Pseudo instructions don't get encoded. + if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + return; +diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp +index f294e81..10a22ae 100644 +--- a/lib/Target/X86/X86FrameLowering.cpp ++++ b/lib/Target/X86/X86FrameLowering.cpp +@@ -83,6 +83,10 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { + /// or if frame pointer elimination is disabled. + bool X86FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ // HQEMU does not use FramePtr for stack accesses, so return false when ++ // running in HQEMU mode. ++ if (MF.getFunction()->hasFnAttribute("hqemu")) ++ return false; + return (MF.getTarget().Options.DisableFramePointerElim(MF) || + TRI->needsStackRealignment(MF) || + MFI.hasVarSizedObjects() || +diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp +index fd2837b..51d6e5b 100644 +--- a/lib/Target/X86/X86MCInstLower.cpp ++++ b/lib/Target/X86/X86MCInstLower.cpp +@@ -389,6 +389,9 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, + void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + ++ if (MI->getDebugLoc()) ++ OutMI.setDebugLoc(&MI->getDebugLoc()); ++ + for (const MachineOperand &MO : MI->operands()) + if (auto MaybeMCOp = LowerMachineOperand(MI, MO)) + OutMI.addOperand(MaybeMCOp.getValue()); +diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp +index 343da25..72550a0 100644 +--- a/lib/Target/X86/X86RegisterInfo.cpp ++++ b/lib/Target/X86/X86RegisterInfo.cpp +@@ -573,6 +573,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + } + } + ++ // Reserve registers for HQEMU. ++ if (MF.getFunction()->hasFnAttribute("hqemu")) { ++ if (!Is64Bit) { ++ Reserved.set(X86::EBP); ++ Reserved.set(X86::BP); ++ Reserved.set(X86::BPL); ++ } else { ++ Reserved.set(X86::R14); ++ Reserved.set(X86::R14B); ++ Reserved.set(X86::R14D); ++ Reserved.set(X86::R14W); ++ } ++ } ++ + assert(checkAllSuperRegsMarked(Reserved, + {X86::SIL, X86::DIL, X86::BPL, X86::SPL})); + return Reserved; +diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp +index 7461061..cb2e665 100644 +--- a/lib/Transforms/Utils/Local.cpp ++++ b/lib/Transforms/Utils/Local.cpp +@@ -1531,7 +1531,8 @@ static bool markAliveBlocks(Function &F, + Changed = true; + break; + } +- if (CI->doesNotReturn()) { ++ // Do not delete instructions after llvm.trap in HQEMU mode. ++ if (!F.hasFnAttribute("hqemu") && CI->doesNotReturn()) { + // If we found a call to a no-return function, insert an unreachable + // instruction after it. Make sure there isn't *already* one there + // though. +diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp +index 8784b97..c4cf1cc 100644 +--- a/lib/Transforms/Utils/SimplifyCFG.cpp ++++ b/lib/Transforms/Utils/SimplifyCFG.cpp +@@ -1250,6 +1250,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, + + bool Changed = false; + do { ++ // Do not hoist llvm::trap and debug instructions in HQEMU mode. ++ if (BIParent->getParent()->hasFnAttribute("hqemu")) ++ if (isa<IntrinsicInst>(I1) || I1->hasMetadata()) ++ return Changed; + // If we are hoisting the terminator instruction, don't move one (making a + // broken BB), instead clone it, and remove BI. + if (isa<TerminatorInst>(I1)) +@@ -5542,6 +5546,10 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { + BasicBlock *BB = IBI->getParent(); + bool Changed = false; + ++ // Do not delete indirectbrs of no successors in HQEMU mode. ++ if (BB->getParent()->hasFnAttribute("hqemu")) ++ return false; ++ + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { diff --git a/patch/llvm/llvm-6.0.patch b/patch/llvm/llvm-6.0.patch new file mode 100644 index 0000000..12fde6d --- /dev/null +++ b/patch/llvm/llvm-6.0.patch @@ -0,0 +1,652 @@ +diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h +index 77c23b4..85fa6d4 100644 +--- a/include/llvm/ExecutionEngine/ExecutionEngine.h ++++ b/include/llvm/ExecutionEngine/ExecutionEngine.h +@@ -567,6 +567,7 @@ public: + /// is called and is successful, the created engine takes ownership of the + /// memory manager. This option defaults to NULL. + EngineBuilder &setMCJITMemoryManager(std::unique_ptr<RTDyldMemoryManager> mcjmm); ++ EngineBuilder &setMCJITMemoryManager(std::shared_ptr<RTDyldMemoryManager> mcjmm); + + EngineBuilder& + setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM); +diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h +index db28fd0..574b66e 100644 +--- a/include/llvm/MC/MCInst.h ++++ b/include/llvm/MC/MCInst.h +@@ -29,6 +29,7 @@ class MCExpr; + class MCInst; + class MCInstPrinter; + class raw_ostream; ++class DebugLoc; + + /// \brief Instances of this class represent operands of the MCInst class. + /// This is a simple discriminated union. +@@ -160,6 +161,7 @@ class MCInst { + unsigned Opcode = 0; + SMLoc Loc; + SmallVector<MCOperand, 8> Operands; ++ const DebugLoc *DbgLoc = nullptr; + // These flags could be used to pass some info from one target subcomponent + // to another, for example, from disassembler to asm printer. The values of + // the flags have any sense on target level only (e.g. prefixes on x86). +@@ -177,6 +179,9 @@ public: + void setLoc(SMLoc loc) { Loc = loc; } + SMLoc getLoc() const { return Loc; } + ++ void setDebugLoc(const DebugLoc *Loc) { DbgLoc = Loc; } ++ const DebugLoc *getDebugLoc() const { return DbgLoc; } ++ + const MCOperand &getOperand(unsigned i) const { return Operands[i]; } + MCOperand &getOperand(unsigned i) { return Operands[i]; } + unsigned getNumOperands() const { return Operands.size(); } +diff --git a/include/llvm/MC/MCInstrInfo.h b/include/llvm/MC/MCInstrInfo.h +index 80f1f32..e5056cb 100644 +--- a/include/llvm/MC/MCInstrInfo.h ++++ b/include/llvm/MC/MCInstrInfo.h +@@ -26,6 +26,7 @@ class MCInstrInfo { + const unsigned *InstrNameIndices; // Array for name indices in InstrNameData + const char *InstrNameData; // Instruction name string pool + unsigned NumOpcodes; // Number of entries in the desc array ++ unsigned long HQEMUExitAddr; + + public: + /// \brief Initialize MCInstrInfo, called by TableGen auto-generated routines. +@@ -52,6 +53,9 @@ public: + assert(Opcode < NumOpcodes && "Invalid opcode!"); + return StringRef(&InstrNameData[InstrNameIndices[Opcode]]); + } ++ ++ void setHQEMUExitAddr(unsigned long Addr) { HQEMUExitAddr = Addr; } ++ unsigned long getHQEMUExitAddr() const { return HQEMUExitAddr; } + }; + + } // End llvm namespace +diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp +index 7f358a6..5ef804f 100644 +--- a/lib/CodeGen/BranchFolding.cpp ++++ b/lib/CodeGen/BranchFolding.cpp +@@ -175,6 +175,12 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF, + MachineModuleInfo *mmi, + MachineLoopInfo *mli, bool AfterPlacement) { + if (!tii) return false; ++ if (MF.getFunction().hasFnAttribute("hqemu")) { ++ switch (MF.getTarget().getTargetTriple().getArch()) { ++ case Triple::x86: case Triple::x86_64: break; ++ default: return false; ++ } ++ } + + TriedMerging.clear(); + +diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp +index c598857..c7ecd12 100644 +--- a/lib/ExecutionEngine/ExecutionEngine.cpp ++++ b/lib/ExecutionEngine/ExecutionEngine.cpp +@@ -496,6 +496,13 @@ EngineBuilder &EngineBuilder::setMCJITMemoryManager( + return *this; + } + ++EngineBuilder &EngineBuilder::setMCJITMemoryManager( ++ std::shared_ptr<RTDyldMemoryManager> mcjmm) { ++ MemMgr = mcjmm; ++ Resolver = mcjmm; ++ return *this; ++} ++ + EngineBuilder& + EngineBuilder::setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM) { + MemMgr = std::shared_ptr<MCJITMemoryManager>(std::move(MM)); +diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp +index 65dae03..09e5858 100644 +--- a/lib/Target/AArch64/AArch64MCInstLower.cpp ++++ b/lib/Target/AArch64/AArch64MCInstLower.cpp +@@ -239,6 +239,9 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, + void AArch64MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + ++ if (MI->getDebugLoc()) ++ OutMI.setDebugLoc(&MI->getDebugLoc()); ++ + for (const MachineOperand &MO : MI->operands()) { + MCOperand MCOp; + if (lowerOperand(MO, MCOp)) +diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp +index 88dd297..4b2ccd8 100644 +--- a/lib/Target/AArch64/AArch64RegisterInfo.cpp ++++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp +@@ -132,6 +132,12 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + if (hasBasePointer(MF)) + markSuperRegs(Reserved, AArch64::W19); + ++ // Reserve registers for HQEMU. ++ if (MF.getFunction().hasFnAttribute("hqemu")) { ++ markSuperRegs(Reserved, AArch64::W19); ++ markSuperRegs(Reserved, AArch64::W28); ++ } ++ + assert(checkAllSuperRegsMarked(Reserved)); + return Reserved; + } +diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +index 33698d2..9735e88 100644 +--- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp ++++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +@@ -29,6 +29,7 @@ + #include "llvm/Support/EndianStream.h" + #include "llvm/Support/ErrorHandling.h" + #include "llvm/Support/raw_ostream.h" ++#include "llvm/IR/DebugLoc.h" + #include <cassert> + #include <cstdint> + +@@ -180,6 +181,10 @@ public: + unsigned fixOneOperandFPComparison(const MCInst &MI, unsigned EncodedValue, + const MCSubtargetInfo &STI) const; + ++ bool EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups, ++ const MCSubtargetInfo &STI) const; ++ + private: + uint64_t computeAvailableFeatures(const FeatureBitset &FB) const; + void verifyInstructionPredicates(const MCInst &MI, +@@ -552,9 +557,85 @@ unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue, + return EncodedValue & ~(1u << 30); + } + ++bool AArch64MCCodeEmitter:: ++EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups, ++ const MCSubtargetInfo &STI) const { ++ /* NOTE: the following flags must be synchronized with those in file ++ * llvm-opc.h of the HQEMU source tree. */ ++ enum { ++ PATCH_HQEMU = 0x4182U, ++ PATCH_DUMMY, ++ PATCH_EXIT_TB, ++ PATCH_DIRECT_JUMP, ++ PATCH_TRACE_BLOCK_CHAINING, ++ PATCH_QMMU ++ }; ++ ++ unsigned Opcode = MI.getOpcode(); ++ switch (Opcode) { ++ case AArch64::BRK: ++ case AArch64::RET: ++ break; ++ default: return false; ++ } ++ ++ const DebugLoc *Loc = MI.getDebugLoc(); ++ if (!Loc) ++ return false; ++ ++ unsigned PatchType = Loc->getLine(); ++ if (PatchType < PATCH_HQEMU || PatchType > PATCH_QMMU) ++ return false; ++ ++ if (Opcode == AArch64::BRK) { ++ uint64_t Binary = 0; ++ MCOperand Operand = MCOperand::createImm(1); ++ MCInst Jump; ++ ++ Jump.setOpcode(AArch64::B); ++ Jump.addOperand(Operand); ++ Binary = getBinaryCodeForInstr(Jump, Fixups, STI); ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Binary); ++ ++MCNumEmitted; ++ return true; ++ } ++ if (Opcode == AArch64::RET) { ++ uint64_t ExitAddr = MCII.getHQEMUExitAddr(); ++ uint32_t Binary[4]; ++ MCOperand Reg = MCOperand::createReg(AArch64::X1); ++ MCInst Jump, Mov; ++ ++ // mov w0, ExitAddr[15:0] ++ Binary[0] = (0x2 << 29) | 0x1; ++ Binary[0] |= (0x25 << 23); ++ Binary[0] |= ((ExitAddr & 0xFFFF) << 5); ++ ++ // movk w0, ExitAddr[31:16] ++ Binary[1] = (0x3 << 29) | 0x1; ++ Binary[1] |= (0x25 << 23); ++ Binary[1] |= (0x1 << 21); ++ Binary[1] |= ((ExitAddr & 0xFFFF0000) >> 11); ++ ++ Jump.setOpcode(AArch64::BR); ++ Jump.addOperand(Reg); ++ Binary[2] = getBinaryCodeForInstr(Jump, Fixups, STI); ++ ++ for (int i = 0; i < 3; ++i) { ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Binary[i]); ++ ++MCNumEmitted; ++ } ++ return true; ++ } ++ return false; ++} ++ + void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { ++ if (EmitHQEMUInstruction(MI, OS, Fixups, STI)) ++ return; ++ + verifyInstructionPredicates(MI, + computeAvailableFeatures(STI.getFeatureBits())); + +diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +index 92c8c22..befec89 100644 +--- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp ++++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +@@ -30,6 +30,7 @@ + #include "llvm/Support/ErrorHandling.h" + #include "llvm/Support/MathExtras.h" + #include "llvm/Support/raw_ostream.h" ++#include "llvm/IR/DebugLoc.h" + #include <cassert> + #include <cstdint> + +@@ -109,9 +110,16 @@ public: + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + ++ bool EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups, ++ const MCSubtargetInfo &STI) const; ++ + void encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const override { ++ if (EmitHQEMUInstruction(MI, OS, Fixups, STI)) ++ return; ++ + verifyInstructionPredicates(MI, + computeAvailableFeatures(STI.getFeatureBits())); + +@@ -386,5 +394,75 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, + return MO.getImm(); + } + ++bool PPCMCCodeEmitter:: ++EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups, ++ const MCSubtargetInfo &STI) const { ++ /* NOTE: the following flags must be synchronized with those in file ++ * llvm-opc.h of the HQEMU source tree. */ ++ enum { ++ PATCH_HQEMU = 0x4182U, ++ PATCH_DUMMY, ++ PATCH_EXIT_TB, ++ PATCH_DIRECT_JUMP, ++ PATCH_TRACE_BLOCK_CHAINING, ++ PATCH_QMMU ++ }; ++ ++ unsigned Opcode = MI.getOpcode(); ++ switch (Opcode) { ++ case PPC::TRAP: ++ case PPC::BLR: ++ case PPC::BLR8: ++ break; ++ default: return false; ++ } ++ ++ const DebugLoc *Loc = MI.getDebugLoc(); ++ if (!Loc) ++ return false; ++ ++ unsigned PatchType = Loc->getLine(); ++ if (PatchType < PATCH_HQEMU || PatchType > PATCH_QMMU) ++ return false; ++ ++ if (Opcode == PPC::TRAP) { ++ uint64_t Bits = 0; ++ MCInst NopInst; ++ NopInst.setOpcode(PPC::NOP); ++ Bits = getBinaryCodeForInstr(NopInst, Fixups, STI); ++ for (unsigned i = 0; i != 5; ++i) { ++ if (IsLittleEndian) { ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Bits); ++ } else { ++ support::endian::Writer<support::big>(OS).write<uint32_t>(Bits); ++ } ++ } ++ MCNumEmitted += 5; ++ return true; ++ } ++ if (Opcode == PPC::BLR || Opcode == PPC::BLR8) { ++ uint64_t Bits[2]; ++ MCInst Inst[2]; ++ Inst[0].setOpcode(PPC::MTCTR); ++ Inst[0].addOperand(MCOperand::createReg(PPC::R31)); ++ Inst[1].setOpcode(PPC::BCTR); ++ Bits[0] = getBinaryCodeForInstr(Inst[0], Fixups, STI); ++ Bits[1] = getBinaryCodeForInstr(Inst[1], Fixups, STI); ++ ++ if (IsLittleEndian) { ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Bits[0]); ++ support::endian::Writer<support::little>(OS).write<uint32_t>(Bits[1]); ++ } else { ++ support::endian::Writer<support::big>(OS).write<uint32_t>(Bits[0]); ++ support::endian::Writer<support::big>(OS).write<uint32_t>(Bits[1]); ++ } ++ ++ MCNumEmitted += 2; ++ return true; ++ } ++ return false; ++} ++ + #define ENABLE_INSTR_PREDICATE_VERIFIER + #include "PPCGenMCCodeEmitter.inc" +diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp +index f0e8b11..a96a36d 100644 +--- a/lib/Target/PowerPC/PPCISelLowering.cpp ++++ b/lib/Target/PowerPC/PPCISelLowering.cpp +@@ -2442,10 +2442,11 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, + EVT PtrVT = Op.getValueType(); + ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); + const Constant *C = CP->getConstVal(); ++ bool isHQEMU = DAG.getMachineFunction().getFunction().hasFnAttribute("hqemu"); + + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. +- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { ++ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64() && !isHQEMU) { + setUsesTOCBasePtr(DAG); + SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); + return getTOCEntry(DAG, SDLoc(CP), true, GA); +@@ -2455,7 +2456,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, + bool IsPIC = isPositionIndependent(); + getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); + +- if (IsPIC && Subtarget.isSVR4ABI()) { ++ if (IsPIC && Subtarget.isSVR4ABI() && !isHQEMU) { + SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), + PPCII::MO_PIC_FLAG); + return getTOCEntry(DAG, SDLoc(CP), false, GA); +@@ -2518,10 +2519,11 @@ PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF, + SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + EVT PtrVT = Op.getValueType(); + JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); ++ bool isHQEMU = DAG.getMachineFunction().getFunction().hasFnAttribute("hqemu"); + + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. +- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { ++ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64() && !isHQEMU) { + setUsesTOCBasePtr(DAG); + SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); + return getTOCEntry(DAG, SDLoc(JT), true, GA); +@@ -2531,7 +2533,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { + bool IsPIC = isPositionIndependent(); + getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag); + +- if (IsPIC && Subtarget.isSVR4ABI()) { ++ if (IsPIC && Subtarget.isSVR4ABI() && !isHQEMU) { + SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, + PPCII::MO_PIC_FLAG); + return getTOCEntry(DAG, SDLoc(GA), false, GA); +@@ -2547,10 +2549,11 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, + EVT PtrVT = Op.getValueType(); + BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op); + const BlockAddress *BA = BASDN->getBlockAddress(); ++ bool isHQEMU = DAG.getMachineFunction().getFunction().hasFnAttribute("hqemu"); + + // 64-bit SVR4 ABI code is always position-independent. + // The actual BlockAddress is stored in the TOC. +- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { ++ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64() && !isHQEMU) { + setUsesTOCBasePtr(DAG); + SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()); + return getTOCEntry(DAG, SDLoc(BASDN), true, GA); +@@ -2660,10 +2663,11 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, + GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); + SDLoc DL(GSDN); + const GlobalValue *GV = GSDN->getGlobal(); ++ bool isHQEMU = DAG.getMachineFunction().getFunction().hasFnAttribute("hqemu"); + + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. +- if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { ++ if (Subtarget.isSVR4ABI() && Subtarget.isPPC64() && !isHQEMU) { + setUsesTOCBasePtr(DAG); + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); + return getTOCEntry(DAG, DL, true, GA); +@@ -2673,7 +2677,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, + bool IsPIC = isPositionIndependent(); + getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV); + +- if (IsPIC && Subtarget.isSVR4ABI()) { ++ if (IsPIC && Subtarget.isSVR4ABI() && !isHQEMU) { + SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, + GSDN->getOffset(), + PPCII::MO_PIC_FLAG); +diff --git a/lib/Target/PowerPC/PPCMCInstLower.cpp b/lib/Target/PowerPC/PPCMCInstLower.cpp +index 1e40711..496238a 100644 +--- a/lib/Target/PowerPC/PPCMCInstLower.cpp ++++ b/lib/Target/PowerPC/PPCMCInstLower.cpp +@@ -141,7 +141,10 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol, + void llvm::LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI, + AsmPrinter &AP, bool isDarwin) { + OutMI.setOpcode(MI->getOpcode()); +- ++ ++ if (MI->getDebugLoc()) ++ OutMI.setDebugLoc(&MI->getDebugLoc()); ++ + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MCOperand MCOp; + if (LowerPPCMachineOperandToMCOperand(MI->getOperand(i), MCOp, AP, +diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp +index 6b62a82..cc5a73b 100644 +--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp ++++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp +@@ -279,6 +279,13 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { + IE = PPC::VRRCRegClass.end(); I != IE; ++I) + markSuperRegs(Reserved, *I); + ++ // Reserve registers for HQEMU. ++ if (MF.getFunction().hasFnAttribute("hqemu")) { ++ markSuperRegs(Reserved, PPC::R27); ++ if (TM.isPPC64()) ++ markSuperRegs(Reserved, PPC::R31); ++ } ++ + assert(checkAllSuperRegsMarked(Reserved)); + return Reserved; + } +@@ -904,6 +911,11 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, + if (!MF.getFunction().hasFnAttribute(Attribute::Naked)) { + if (!(hasBasePointer(MF) && FrameIndex < 0)) + Offset += MFI.getStackSize(); ++ } else { ++ if (MF.getFunction().hasFnAttribute("hqemu") && FrameIndex >= 0) { ++ const PPCFrameLowering *TFI = getFrameLowering(MF); ++ Offset += TFI->determineFrameLayout(MF, false, false); ++ } + } + + // If we can, encode the offset directly into the instruction. If this is a +diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +index 4ddc1f0..c564e71 100644 +--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp ++++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +@@ -27,6 +27,7 @@ + #include "llvm/MC/MCSymbol.h" + #include "llvm/Support/ErrorHandling.h" + #include "llvm/Support/raw_ostream.h" ++#include "llvm/IR/DebugLoc.h" + #include <cassert> + #include <cstdint> + #include <cstdlib> +@@ -150,6 +151,9 @@ public: + const MCInst &MI, const MCInstrDesc &Desc, + const MCSubtargetInfo &STI, raw_ostream &OS) const; + ++ bool EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups) const; ++ + uint8_t DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags, + int MemOperand, const MCInstrDesc &Desc) const; + }; +@@ -1158,6 +1162,52 @@ bool X86MCCodeEmitter::emitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, + return Ret; + } + ++bool X86MCCodeEmitter:: ++EmitHQEMUInstruction(const MCInst &MI, raw_ostream &OS, ++ SmallVectorImpl<MCFixup> &Fixups) const { ++ /* NOTE: the following flags must be synchronized with those in file ++ * llvm-opc.h of the HQEMU source tree. */ ++ enum { ++ PATCH_HQEMU = 0x4182U, ++ PATCH_DUMMY, ++ PATCH_EXIT_TB, ++ PATCH_DIRECT_JUMP, ++ PATCH_TRACE_BLOCK_CHAINING, ++ PATCH_QMMU ++ }; ++ ++ unsigned Opcode = MI.getOpcode(); ++ switch (Opcode) { ++ case X86::TRAP: ++ case X86::RETQ: ++ break; ++ default: return false; ++ } ++ ++ unsigned CurByte = 0; ++ const DebugLoc *Loc = MI.getDebugLoc(); ++ if (!Loc) ++ return false; ++ ++ unsigned PatchType = Loc->getLine(); ++ if (PatchType < PATCH_HQEMU || PatchType > PATCH_QMMU) ++ return false; ++ ++ if (Opcode == X86::TRAP) { ++ for (unsigned i = 0; i != 8; ++i) ++ EmitByte(0x90, CurByte, OS); ++ return true; ++ } ++ if (Opcode == X86::RETQ) { ++ uintptr_t ExitAddr = MCII.getHQEMUExitAddr(); ++ EmitByte(0xE9, CurByte, OS); ++ EmitImmediate(MCOperand::createImm(ExitAddr), MI.getLoc(), 4, FK_PCRel_4, ++ CurByte, OS, Fixups); ++ return true; ++ } ++ return false; ++} ++ + void X86MCCodeEmitter:: + encodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl<MCFixup> &Fixups, +@@ -1167,6 +1217,9 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS, + uint64_t TSFlags = Desc.TSFlags; + unsigned Flags = MI.getFlags(); + ++ if (EmitHQEMUInstruction(MI, OS, Fixups)) ++ return; ++ + // Pseudo instructions don't get encoded. + if ((TSFlags & X86II::FormMask) == X86II::Pseudo) + return; +diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp +index 11808f8..bd10b86 100644 +--- a/lib/Target/X86/X86FrameLowering.cpp ++++ b/lib/Target/X86/X86FrameLowering.cpp +@@ -83,6 +83,10 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { + /// or if frame pointer elimination is disabled. + bool X86FrameLowering::hasFP(const MachineFunction &MF) const { + const MachineFrameInfo &MFI = MF.getFrameInfo(); ++ // HQEMU does not use FramePtr for stack accesses, so return false when ++ // running in HQEMU mode. ++ if (MF.getFunction().hasFnAttribute("hqemu")) ++ return false; + return (MF.getTarget().Options.DisableFramePointerElim(MF) || + TRI->needsStackRealignment(MF) || + MFI.hasVarSizedObjects() || +diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp +index 730ba74..c1b3ef6 100644 +--- a/lib/Target/X86/X86MCInstLower.cpp ++++ b/lib/Target/X86/X86MCInstLower.cpp +@@ -389,6 +389,9 @@ X86MCInstLower::LowerMachineOperand(const MachineInstr *MI, + void X86MCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const { + OutMI.setOpcode(MI->getOpcode()); + ++ if (MI->getDebugLoc()) ++ OutMI.setDebugLoc(&MI->getDebugLoc()); ++ + for (const MachineOperand &MO : MI->operands()) + if (auto MaybeMCOp = LowerMachineOperand(MI, MO)) + OutMI.addOperand(MaybeMCOp.getValue()); +diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp +index bc31e95..893ff41 100644 +--- a/lib/Target/X86/X86RegisterInfo.cpp ++++ b/lib/Target/X86/X86RegisterInfo.cpp +@@ -570,6 +570,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { + } + } + ++ // Reserve registers for HQEMU. ++ if (MF.getFunction().hasFnAttribute("hqemu")) { ++ if (!Is64Bit) { ++ Reserved.set(X86::EBP); ++ Reserved.set(X86::BP); ++ Reserved.set(X86::BPL); ++ } else { ++ Reserved.set(X86::R14); ++ Reserved.set(X86::R14B); ++ Reserved.set(X86::R14D); ++ Reserved.set(X86::R14W); ++ } ++ } ++ + assert(checkAllSuperRegsMarked(Reserved, + {X86::SIL, X86::DIL, X86::BPL, X86::SPL})); + return Reserved; +diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp +index acccf7a..f2ab12d 100644 +--- a/lib/Transforms/Utils/Local.cpp ++++ b/lib/Transforms/Utils/Local.cpp +@@ -1587,7 +1587,8 @@ static bool markAliveBlocks(Function &F, + Changed = true; + break; + } +- if (CI->doesNotReturn()) { ++ // Do not delete instructions after llvm.trap in HQEMU mode. ++ if (!F.hasFnAttribute("hqemu") && CI->doesNotReturn()) { + // If we found a call to a no-return function, insert an unreachable + // instruction after it. Make sure there isn't *already* one there + // though. +diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp +index 7c19578..30f3481 100644 +--- a/lib/Transforms/Utils/SimplifyCFG.cpp ++++ b/lib/Transforms/Utils/SimplifyCFG.cpp +@@ -1271,6 +1271,10 @@ static bool HoistThenElseCodeToIf(BranchInst *BI, + + bool Changed = false; + do { ++ // Do not hoist llvm::trap and debug instructions in HQEMU mode. ++ if (BI->getParent()->getParent()->hasFnAttribute("hqemu")) ++ if (isa<IntrinsicInst>(I1) || I1->hasMetadata()) ++ return Changed; + // If we are hoisting the terminator instruction, don't move one (making a + // broken BB), instead clone it, and remove BI. + if (isa<TerminatorInst>(I1)) +@@ -5600,6 +5604,10 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) { + BasicBlock *BB = IBI->getParent(); + bool Changed = false; + ++ // Do not delete indirectbrs of no successors in HQEMU mode. ++ if (BB->getParent()->hasFnAttribute("hqemu")) ++ return false; ++ + // Eliminate redundant destinations. + SmallPtrSet<Value *, 8> Succs; + for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { diff --git a/qga/commands-posix.c b/qga/commands-posix.c index c2ff970..e6c9f51 100644 --- a/qga/commands-posix.c +++ b/qga/commands-posix.c @@ -15,6 +15,7 @@ #include <sys/types.h> #include <sys/ioctl.h> #include <sys/wait.h> +#include <sys/sysmacros.h> #include <unistd.h> #include <errno.h> #include <fcntl.h> diff --git a/qom/object.c b/qom/object.c index d751569..deb182f 100644 --- a/qom/object.c +++ b/qom/object.c @@ -28,6 +28,10 @@ #include "qapi/qmp/qint.h" #include "qapi/qmp/qstring.h" +#define Type QEMUType +#define class QEMUclass +#define typename QEMUtypename + #define MAX_INTERFACES 32 typedef struct InterfaceImpl InterfaceImpl; @@ -2126,3 +2130,7 @@ static void register_types(void) } type_init(register_types) + +#undef Type +#undef class +#undef typename diff --git a/softmmu_template.h b/softmmu_template.h index 6803890..4574545 100644 --- a/softmmu_template.h +++ b/softmmu_template.h @@ -24,6 +24,7 @@ #include "qemu/timer.h" #include "exec/address-spaces.h" #include "exec/memory.h" +#include "hqemu-config.h" #define DATA_SIZE (1 << SHIFT) @@ -116,6 +117,16 @@ # define helper_te_st_name helper_le_st_name #endif +#if defined(ENABLE_TLBVERSION) +#define TLB_IO_MASK (TLB_NOTDIRTY | TLB_MMIO) +#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK | TLB_VERSION_MASK) +#define page_val(addr, env) (((tlbaddr_t)addr & TARGET_PAGE_MASK) | tlb_version(env)) +#else +#define TLB_IO_MASK (~TARGET_PAGE_MASK) +#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK) +#define page_val(addr, env) ((addr & TARGET_PAGE_MASK)) +#endif + /* macro to check the victim tlb */ #define VICTIM_TLB_HIT(ty) \ ({ \ @@ -126,7 +137,7 @@ CPUIOTLBEntry tmpiotlb; \ CPUTLBEntry tmptlb; \ for (vidx = CPU_VTLB_SIZE-1; vidx >= 0; --vidx) { \ - if (env->tlb_v_table[mmu_idx][vidx].ty == (addr & TARGET_PAGE_MASK)) {\ + if (env->tlb_v_table[mmu_idx][vidx].ty == page_val(addr, env)) { \ /* found entry in victim tlb, swap tlb and iotlb */ \ tmptlb = env->tlb_table[mmu_idx][index]; \ env->tlb_table[mmu_idx][index] = env->tlb_v_table[mmu_idx][vidx]; \ @@ -170,7 +181,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, { unsigned mmu_idx = get_mmuidx(oi); int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; uintptr_t haddr; DATA_TYPE res; @@ -178,8 +189,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, retaddr -= GETPC_ADJ; /* If the TLB entry is for a different page, reload and try again. */ - if ((addr & TARGET_PAGE_MASK) - != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { if ((addr & (DATA_SIZE - 1)) != 0 && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, @@ -193,7 +203,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr, } /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if (unlikely(tlb_addr & TLB_IO_MASK)) { CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; @@ -254,7 +264,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, { unsigned mmu_idx = get_mmuidx(oi); int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; uintptr_t haddr; DATA_TYPE res; @@ -262,8 +272,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, retaddr -= GETPC_ADJ; /* If the TLB entry is for a different page, reload and try again. */ - if ((addr & TARGET_PAGE_MASK) - != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { if ((addr & (DATA_SIZE - 1)) != 0 && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, @@ -277,7 +286,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr, } /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if (unlikely(tlb_addr & TLB_IO_MASK)) { CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; @@ -375,15 +384,14 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, { unsigned mmu_idx = get_mmuidx(oi); int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].addr_write; uintptr_t haddr; /* Adjust the given return address. */ retaddr -= GETPC_ADJ; /* If the TLB entry is for a different page, reload and try again. */ - if ((addr & TARGET_PAGE_MASK) - != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { if ((addr & (DATA_SIZE - 1)) != 0 && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, @@ -396,7 +404,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, } /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if (unlikely(tlb_addr & TLB_IO_MASK)) { CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; @@ -455,15 +463,14 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, { unsigned mmu_idx = get_mmuidx(oi); int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].addr_write; uintptr_t haddr; /* Adjust the given return address. */ retaddr -= GETPC_ADJ; /* If the TLB entry is for a different page, reload and try again. */ - if ((addr & TARGET_PAGE_MASK) - != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { if ((addr & (DATA_SIZE - 1)) != 0 && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, @@ -476,7 +483,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, } /* Handle an IO access. */ - if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) { + if (unlikely(tlb_addr & TLB_IO_MASK)) { CPUIOTLBEntry *iotlbentry; if ((addr & (DATA_SIZE - 1)) != 0) { goto do_unaligned_access; @@ -537,10 +544,9 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, uintptr_t retaddr) { int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); - target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].addr_write; - if ((addr & TARGET_PAGE_MASK) - != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) { + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { /* TLB entry is for a different page */ if (!VICTIM_TLB_HIT(addr_write)) { tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr); @@ -550,6 +556,11 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx, #endif #endif /* !defined(SOFTMMU_CODE_ACCESS) */ +#include "softmmu_template_llvm.h" + +#undef TLB_IO_MASK +#undef TLB_NONIO_MASK +#undef page_val #undef READ_ACCESS_TYPE #undef SHIFT #undef DATA_TYPE diff --git a/softmmu_template_llvm.h b/softmmu_template_llvm.h new file mode 100644 index 0000000..0a5f4bf --- /dev/null +++ b/softmmu_template_llvm.h @@ -0,0 +1,384 @@ +/* + * Software MMU support for LLVM + */ + +#if DATA_SIZE == 1 +# define llvm_le_ld_name glue(glue(llvm_ret_ld, USUFFIX), MMUSUFFIX) +# define llvm_be_ld_name llvm_le_ld_name +# define llvm_le_lds_name glue(glue(llvm_ret_ld, SSUFFIX), MMUSUFFIX) +# define llvm_be_lds_name llvm_le_lds_name +# define llvm_le_st_name glue(glue(llvm_ret_st, SUFFIX), MMUSUFFIX) +# define llvm_be_st_name llvm_le_st_name +#else +# define llvm_le_ld_name glue(glue(llvm_le_ld, USUFFIX), MMUSUFFIX) +# define llvm_be_ld_name glue(glue(llvm_be_ld, USUFFIX), MMUSUFFIX) +# define llvm_le_lds_name glue(glue(llvm_le_ld, SSUFFIX), MMUSUFFIX) +# define llvm_be_lds_name glue(glue(llvm_be_ld, SSUFFIX), MMUSUFFIX) +# define llvm_le_st_name glue(glue(llvm_le_st, SUFFIX), MMUSUFFIX) +# define llvm_be_st_name glue(glue(llvm_be_st, SUFFIX), MMUSUFFIX) +#endif + +#ifdef TARGET_WORDS_BIGENDIAN +# define llvm_te_ld_name llvm_be_ld_name +# define llvm_te_st_name llvm_be_st_name +#else +# define llvm_te_ld_name llvm_le_ld_name +# define llvm_te_st_name llvm_le_st_name +#endif + + +#ifndef SOFTMMU_CODE_ACCESS +WORD_TYPE llvm_le_ld_name(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi) +{ + unsigned mmu_idx = get_mmuidx((uint16_t)oi); + int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + uintptr_t haddr; + DATA_TYPE res; + uintptr_t retaddr; + + env->restore_val = oi >> 16; + + /* Adjust the given return address. */ + retaddr = GETPC(); + + /* If the TLB entry is for a different page, reload and try again. */ + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { + if ((addr & (DATA_SIZE - 1)) != 0 + && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, + mmu_idx, retaddr); + } + if (!VICTIM_TLB_HIT(ADDR_READ)) { + tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, + mmu_idx, retaddr); + } + tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & TLB_IO_MASK)) { + CPUIOTLBEntry *iotlbentry; + if ((addr & (DATA_SIZE - 1)) != 0) { + goto do_unaligned_access; + } + iotlbentry = &env->iotlb[mmu_idx][index]; + + /* ??? Note that the io helpers always read data in the target + byte ordering. We should push the LE/BE request down into io. */ + res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr); + res = TGT_LE(res); + return res; + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (DATA_SIZE > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 + >= TARGET_PAGE_SIZE)) { + target_ulong addr1, addr2; + DATA_TYPE res1, res2; + unsigned shift; + do_unaligned_access: + if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, + mmu_idx, retaddr); + } + addr1 = addr & ~(DATA_SIZE - 1); + addr2 = addr1 + DATA_SIZE; + /* Note the adjustment at the beginning of the function. + Undo that for the recursion. */ + res1 = helper_le_ld_name(env, addr1, oi, retaddr + GETPC_ADJ); + res2 = helper_le_ld_name(env, addr2, oi, retaddr + GETPC_ADJ); + shift = (addr & (DATA_SIZE - 1)) * 8; + + /* Little-endian combine. */ + res = (res1 >> shift) | (res2 << ((DATA_SIZE * 8) - shift)); + return res; + } + + /* Handle aligned access or unaligned access in the same page. */ + if ((addr & (DATA_SIZE - 1)) != 0 + && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, + mmu_idx, retaddr); + } + + haddr = addr + env->tlb_table[mmu_idx][index].addend; +#if DATA_SIZE == 1 + res = glue(glue(ld, LSUFFIX), _p)((uint8_t *)haddr); +#else + res = glue(glue(ld, LSUFFIX), _le_p)((uint8_t *)haddr); +#endif + return res; +} + +#if DATA_SIZE > 1 +WORD_TYPE llvm_be_ld_name(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi) +{ + unsigned mmu_idx = get_mmuidx((uint16_t)oi); + int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + uintptr_t haddr; + DATA_TYPE res; + uintptr_t retaddr; + + env->restore_val = oi >> 16; + + /* Adjust the given return address. */ + retaddr = GETPC(); + + /* If the TLB entry is for a different page, reload and try again. */ + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { + if ((addr & (DATA_SIZE - 1)) != 0 + && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, + mmu_idx, retaddr); + } + if (!VICTIM_TLB_HIT(ADDR_READ)) { + tlb_fill(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, + mmu_idx, retaddr); + } + tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & TLB_IO_MASK)) { + CPUIOTLBEntry *iotlbentry; + if ((addr & (DATA_SIZE - 1)) != 0) { + goto do_unaligned_access; + } + iotlbentry = &env->iotlb[mmu_idx][index]; + + /* ??? Note that the io helpers always read data in the target + byte ordering. We should push the LE/BE request down into io. */ + res = glue(io_read, SUFFIX)(env, iotlbentry, addr, retaddr); + res = TGT_BE(res); + return res; + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (DATA_SIZE > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 + >= TARGET_PAGE_SIZE)) { + target_ulong addr1, addr2; + DATA_TYPE res1, res2; + unsigned shift; + do_unaligned_access: + if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, + mmu_idx, retaddr); + } + addr1 = addr & ~(DATA_SIZE - 1); + addr2 = addr1 + DATA_SIZE; + /* Note the adjustment at the beginning of the function. + Undo that for the recursion. */ + res1 = helper_be_ld_name(env, addr1, oi, retaddr + GETPC_ADJ); + res2 = helper_be_ld_name(env, addr2, oi, retaddr + GETPC_ADJ); + shift = (addr & (DATA_SIZE - 1)) * 8; + + /* Big-endian combine. */ + res = (res1 << shift) | (res2 >> ((DATA_SIZE * 8) - shift)); + return res; + } + + /* Handle aligned access or unaligned access in the same page. */ + if ((addr & (DATA_SIZE - 1)) != 0 + && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE, + mmu_idx, retaddr); + } + + haddr = addr + env->tlb_table[mmu_idx][index].addend; + res = glue(glue(ld, LSUFFIX), _be_p)((uint8_t *)haddr); + return res; +} +#endif /* DATA_SIZE > 1 */ + +/* Provide signed versions of the load routines as well. We can of course + avoid this for 64-bit data, or for 32-bit data on 32-bit host. */ +#if DATA_SIZE * 8 < TCG_TARGET_REG_BITS +WORD_TYPE llvm_le_lds_name(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi) +{ + env->restore_val = oi >> 16; + return (SDATA_TYPE)helper_le_ld_name(env, addr, (uint16_t)oi, GETRA()); +} + +# if DATA_SIZE > 1 +WORD_TYPE llvm_be_lds_name(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi) +{ + env->restore_val = oi >> 16; + return (SDATA_TYPE)helper_be_ld_name(env, addr, (uint16_t)oi, GETRA()); +} +# endif +#endif + +void llvm_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, + TCGMemOpIdx oi) +{ + unsigned mmu_idx = get_mmuidx((uint16_t)oi); + int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t haddr; + uintptr_t retaddr; + + env->restore_val = oi >> 16; + + /* Adjust the given return address. */ + retaddr = GETPC(); + + /* If the TLB entry is for a different page, reload and try again. */ + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { + if ((addr & (DATA_SIZE - 1)) != 0 + && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, + mmu_idx, retaddr); + } + if (!VICTIM_TLB_HIT(addr_write)) { + tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr); + } + tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & TLB_IO_MASK)) { + CPUIOTLBEntry *iotlbentry; + if ((addr & (DATA_SIZE - 1)) != 0) { + goto do_unaligned_access; + } + iotlbentry = &env->iotlb[mmu_idx][index]; + + /* ??? Note that the io helpers always read data in the target + byte ordering. We should push the LE/BE request down into io. */ + val = TGT_LE(val); + glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr); + return; + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (DATA_SIZE > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 + >= TARGET_PAGE_SIZE)) { + int i; + do_unaligned_access: + if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, + mmu_idx, retaddr); + } + /* XXX: not efficient, but simple */ + /* Note: relies on the fact that tlb_fill() does not remove the + * previous page from the TLB cache. */ + for (i = DATA_SIZE - 1; i >= 0; i--) { + /* Little-endian extract. */ + uint8_t val8 = val >> (i * 8); + /* Note the adjustment at the beginning of the function. + Undo that for the recursion. */ + glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, + oi, retaddr + GETPC_ADJ); + } + return; + } + + /* Handle aligned access or unaligned access in the same page. */ + if ((addr & (DATA_SIZE - 1)) != 0 + && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, + mmu_idx, retaddr); + } + + haddr = addr + env->tlb_table[mmu_idx][index].addend; +#if DATA_SIZE == 1 + glue(glue(st, SUFFIX), _p)((uint8_t *)haddr, val); +#else + glue(glue(st, SUFFIX), _le_p)((uint8_t *)haddr, val); +#endif +} + +#if DATA_SIZE > 1 +void llvm_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val, + TCGMemOpIdx oi) +{ + unsigned mmu_idx = get_mmuidx((uint16_t)oi); + int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); + tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + uintptr_t haddr; + uintptr_t retaddr; + + env->restore_val = oi >> 16; + + /* Adjust the given return address. */ + retaddr = GETPC(); + + /* If the TLB entry is for a different page, reload and try again. */ + if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) { + if ((addr & (DATA_SIZE - 1)) != 0 + && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, + mmu_idx, retaddr); + } + if (!VICTIM_TLB_HIT(addr_write)) { + tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr); + } + tlb_addr = env->tlb_table[mmu_idx][index].addr_write; + } + + /* Handle an IO access. */ + if (unlikely(tlb_addr & TLB_IO_MASK)) { + CPUIOTLBEntry *iotlbentry; + if ((addr & (DATA_SIZE - 1)) != 0) { + goto do_unaligned_access; + } + iotlbentry = &env->iotlb[mmu_idx][index]; + + /* ??? Note that the io helpers always read data in the target + byte ordering. We should push the LE/BE request down into io. */ + val = TGT_BE(val); + glue(io_write, SUFFIX)(env, iotlbentry, val, addr, retaddr); + return; + } + + /* Handle slow unaligned access (it spans two pages or IO). */ + if (DATA_SIZE > 1 + && unlikely((addr & ~TARGET_PAGE_MASK) + DATA_SIZE - 1 + >= TARGET_PAGE_SIZE)) { + int i; + do_unaligned_access: + if ((get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, + mmu_idx, retaddr); + } + /* XXX: not efficient, but simple */ + /* Note: relies on the fact that tlb_fill() does not remove the + * previous page from the TLB cache. */ + for (i = DATA_SIZE - 1; i >= 0; i--) { + /* Big-endian extract. */ + uint8_t val8 = val >> (((DATA_SIZE - 1) * 8) - (i * 8)); + /* Note the adjustment at the beginning of the function. + Undo that for the recursion. */ + glue(helper_ret_stb, MMUSUFFIX)(env, addr + i, val8, + oi, retaddr + GETPC_ADJ); + } + return; + } + + /* Handle aligned access or unaligned access in the same page. */ + if ((addr & (DATA_SIZE - 1)) != 0 + && (get_memop(oi) & MO_AMASK) == MO_ALIGN) { + cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE, + mmu_idx, retaddr); + } + + haddr = addr + env->tlb_table[mmu_idx][index].addend; + glue(glue(st, SUFFIX), _be_p)((uint8_t *)haddr, val); +} +#endif /* DATA_SIZE > 1 */ + +#endif /* !defined(SOFTMMU_CODE_ACCESS) */ + +#undef llvm_le_ld_name +#undef llvm_be_ld_name +#undef llvm_le_lds_name +#undef llvm_be_lds_name +#undef llvm_le_st_name +#undef llvm_be_st_name +#undef llvm_te_ld_name +#undef llvm_te_st_name diff --git a/target-arm/cpu.h b/target-arm/cpu.h index 815fef8..1087075 100644 --- a/target-arm/cpu.h +++ b/target-arm/cpu.h @@ -437,7 +437,7 @@ typedef struct CPUARMState { * the two execution states, and means we do not need to explicitly * map these registers when changing states. */ - float64 regs[64]; + float64 regs[64] __attribute__((aligned(16))); uint32_t xregs[16]; /* We store these fpcsr fields separately for convenience. */ @@ -496,6 +496,8 @@ typedef struct CPUARMState { /* Internal CPU feature flags. */ uint64_t features; + CPU_OPTIMIZATION_COMMON + /* PMSAv7 MPU */ struct { uint32_t *drbar; @@ -1509,7 +1511,7 @@ bool write_cpustate_to_list(ARMCPU *cpu); /* The ARM MMU allows 1k pages. */ /* ??? Linux doesn't actually use these, and they're deprecated in recent architecture revisions. Maybe a configure option to disable them. */ -#define TARGET_PAGE_BITS 10 +#define TARGET_PAGE_BITS 12 #endif #if defined(TARGET_AARCH64) @@ -1523,7 +1525,7 @@ bool write_cpustate_to_list(ARMCPU *cpu); static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx, unsigned int target_el) { - CPUARMState *env = cs->env_ptr; + CPUARMState *env = (CPUARMState *)cs->env_ptr; unsigned int cur_el = arm_current_el(env); bool secure = arm_is_secure(env); bool pstate_unmasked; @@ -1983,6 +1985,62 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc, *cs_base = 0; } +static inline target_ulong cpu_get_pc(CPUARMState *env) +{ +#if defined(TARGET_AARCH64) + return env->pc; +#else + return env->regs[15]; +#endif +} + +static inline int cpu_check_state(CPUARMState *env, + target_ulong cs_base, int flags) +{ + int f; + if (is_a64(env)) { + f = ARM_TBFLAG_AARCH64_STATE_MASK; + } else { + f = (env->thumb << ARM_TBFLAG_THUMB_SHIFT) + | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT) + | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT) + | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT) + | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT); + if (!(access_secure_reg(env))) { + f |= ARM_TBFLAG_NS_MASK; + } + if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30) + || arm_el_is_aa64(env, 1)) { + f |= ARM_TBFLAG_VFPEN_MASK; + } + f |= (extract32(env->cp15.c15_cpar, 0, 2) + << ARM_TBFLAG_XSCALE_CPAR_SHIFT); + } + + f |= (cpu_mmu_index(env, false) << ARM_TBFLAG_MMUIDX_SHIFT); + /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine + * states defined in the ARM ARM for software singlestep: + * SS_ACTIVE PSTATE.SS State + * 0 x Inactive (the TB flag for SS is always 0) + * 1 0 Active-pending + * 1 1 Active-not-pending + */ + if (arm_singlestep_active(env)) { + f |= ARM_TBFLAG_SS_ACTIVE_MASK; + if (is_a64(env)) { + if (env->pstate & PSTATE_SS) { + f |= ARM_TBFLAG_PSTATE_SS_MASK; + } + } else { + if (env->uncached_cpsr & PSTATE_SS) { + f |= ARM_TBFLAG_PSTATE_SS_MASK; + } + } + } + f |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT; + return f == flags; +} + #include "exec/exec-all.h" enum { diff --git a/target-arm/helper.c b/target-arm/helper.c index 1743e37..8e862d9 100644 --- a/target-arm/helper.c +++ b/target-arm/helper.c @@ -11,6 +11,7 @@ #include "arm_ldst.h" #include <zlib.h> /* For crc32 */ #include "exec/semihost.h" +#include "hqemu.h" #define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */ @@ -2225,6 +2226,8 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, tlb_flush(CPU(cpu), 1); } raw_write(env, ri, value); + + pcid = (target_ulong)value >> 12; } static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri, @@ -8091,29 +8094,23 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env) /* XXX: check quiet/signaling case */ #define DO_VFP_cmp(p, type) \ -void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \ +uint32_t VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \ { \ - uint32_t flags; \ - switch(type ## _compare_quiet(a, b, &env->vfp.fp_status)) { \ - case 0: flags = 0x6; break; \ - case -1: flags = 0x8; break; \ - case 1: flags = 0x2; break; \ - default: case 2: flags = 0x3; break; \ - } \ - env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ - | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ + uint32_t flags = 0x3; \ + int ret = type ## _compare_quiet(a, b, &env->vfp.fp_status); \ + if (ret == 0) flags = 0x6; \ + else if (ret == -1) flags = 0x8; \ + else if (ret == 1) flags = 0x2; \ + return flags << 28; \ } \ -void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \ +uint32_t VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \ { \ - uint32_t flags; \ - switch(type ## _compare(a, b, &env->vfp.fp_status)) { \ - case 0: flags = 0x6; break; \ - case -1: flags = 0x8; break; \ - case 1: flags = 0x2; break; \ - default: case 2: flags = 0x3; break; \ - } \ - env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \ - | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \ + uint32_t flags = 0x3; \ + int ret = type ## _compare(a, b, &env->vfp.fp_status); \ + if (ret == 0) flags = 0x6; \ + else if (ret == -1) flags = 0x8; \ + else if (ret == 1) flags = 0x2; \ + return flags << 28; \ } DO_VFP_cmp(s, float32) DO_VFP_cmp(d, float64) @@ -8891,3 +8888,12 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes) /* Linux crc32c converts the output to one's complement. */ return crc32c(acc, buf, bytes) ^ 0xffffffff; } + +CPUState *cpu_create(void) +{ + ARMCPU *cpu = g_malloc0(sizeof(ARMCPU)); + CPUState *cs = CPU(cpu); + memcpy(cpu, ARM_CPU(first_cpu), sizeof(ARMCPU)); + cs->env_ptr = &cpu->env; + return cs; +} diff --git a/target-arm/helper.h b/target-arm/helper.h index c2a85c7..41c2c6d 100644 --- a/target-arm/helper.h +++ b/target-arm/helper.h @@ -56,6 +56,7 @@ DEF_HELPER_2(pre_smc, void, env, i32) DEF_HELPER_1(check_breakpoints, void, env) +DEF_HELPER_3(cpsr_write_nzcv, void, env, i32, i32) DEF_HELPER_3(cpsr_write, void, env, i32, i32) DEF_HELPER_1(cpsr_read, i32, env) @@ -103,10 +104,10 @@ DEF_HELPER_1(vfp_abss, f32, f32) DEF_HELPER_1(vfp_absd, f64, f64) DEF_HELPER_2(vfp_sqrts, f32, f32, env) DEF_HELPER_2(vfp_sqrtd, f64, f64, env) -DEF_HELPER_3(vfp_cmps, void, f32, f32, env) -DEF_HELPER_3(vfp_cmpd, void, f64, f64, env) -DEF_HELPER_3(vfp_cmpes, void, f32, f32, env) -DEF_HELPER_3(vfp_cmped, void, f64, f64, env) +DEF_HELPER_3(vfp_cmps, i32, f32, f32, env) +DEF_HELPER_3(vfp_cmpd, i32, f64, f64, env) +DEF_HELPER_3(vfp_cmpes, i32, f32, f32, env) +DEF_HELPER_3(vfp_cmped, i32, f64, f64, env) DEF_HELPER_2(vfp_fcvtds, f64, f32, env) DEF_HELPER_2(vfp_fcvtsd, f32, f64, env) @@ -535,3 +536,5 @@ DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) #ifdef TARGET_AARCH64 #include "helper-a64.h" #endif + +#include "hqemu-helper.h" diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c index 6cd54c8..fdea907 100644 --- a/target-arm/op_helper.c +++ b/target-arm/op_helper.c @@ -386,6 +386,16 @@ void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask) cpsr_write(env, val, mask); } +void HELPER(cpsr_write_nzcv)(CPUARMState *env, uint32_t val, uint32_t mask) +{ + if (mask & CPSR_NZCV) { + env->ZF = (~val) & CPSR_Z; + env->NF = val; + env->CF = (val >> 29) & 1; + env->VF = (val << 3) & 0x80000000; + } +} + /* Access to user mode registers from privileged modes. */ uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno) { diff --git a/target-arm/simd_helper.h b/target-arm/simd_helper.h new file mode 100644 index 0000000..186a7bd --- /dev/null +++ b/target-arm/simd_helper.h @@ -0,0 +1,91 @@ + +static inline void gen_vector_op3(TCGOpcode opc, TCGArg arg1, TCGArg arg2, + TCGArg arg3) +{ + int pi = tcg_ctx.gen_next_parm_idx; + tcg_emit_op(&tcg_ctx, opc, pi); + *tcg_ctx.vec_opparam_ptr++ = arg1; + *tcg_ctx.vec_opparam_ptr++ = arg2; + *tcg_ctx.vec_opparam_ptr++ = arg3; +} + +#define gen_vector_arith(op,etype,size) \ +do { \ + TCGOpcode _opc = 0; \ + TCGArg _rd = offsetof(CPUARMState, vfp.regs[rd]); \ + TCGArg _rn = offsetof(CPUARMState, vfp.regs[rn]); \ + TCGArg _rm = offsetof(CPUARMState, vfp.regs[rm]); \ + if (q == 1) { \ + switch(size) { \ + case 0: _opc = INDEX_op_##op##_##etype##8_128; break; \ + case 1: _opc = INDEX_op_##op##_##etype##16_128; break; \ + case 2: _opc = INDEX_op_##op##_##etype##32_128; break; \ + case 3: _opc = INDEX_op_##op##_##etype##64_128; break; \ + default: \ + fprintf(stderr, "%s:%d: tcg fatal error: size=%d q=%d\n", \ + __FILE__, __LINE__, size, q); \ + exit(0); \ + break; \ + } \ + } else { \ + switch(size) { \ + case 0: _opc = INDEX_op_##op##_##etype##8_64; break; \ + case 1: _opc = INDEX_op_##op##_##etype##16_64; break; \ + case 2: _opc = INDEX_op_##op##_##etype##32_64; break; \ + default: \ + fprintf(stderr, "%s:%d: tcg fatal error: size=%d q=%d\n", \ + __FILE__, __LINE__, size, q); \ + exit(0); \ + break; \ + } \ + } \ + gen_vector_op3(_opc, _rd, _rn, _rm); \ +} while (0) + +#define gen_vector_fop(op) \ +do { \ + TCGOpcode _opc = 0; \ + TCGArg _rd = offsetof(CPUARMState, vfp.regs[rd]); \ + TCGArg _rn = offsetof(CPUARMState, vfp.regs[rn]); \ + TCGArg _rm = offsetof(CPUARMState, vfp.regs[rm]); \ + if(q == 1) \ + _opc = INDEX_op_##op##_f32_128;\ + else \ + _opc = INDEX_op_##op##_f32_64; \ + gen_vector_op3(_opc, _rd, _rn, _rm); \ +} while (0) + +#define gen_vector_fop2(op) \ +do { \ + TCGOpcode _opc = 0; \ + TCGArg _rd = offsetof(CPUARMState, vfp.regs[rd]); \ + TCGArg _rn = offsetof(CPUARMState, vfp.regs[rn]); \ + TCGArg _rm = offsetof(CPUARMState, vfp.regs[rm]); \ + if(q == 1) \ + _opc = (size) ? INDEX_op_##op##_f64_128 : INDEX_op_##op##_f32_128;\ + else \ + _opc = INDEX_op_##op##_f32_64; \ + gen_vector_op3(_opc, _rd, _rn, _rm); \ +} while (0) + +#define gen_vector_logical(op) \ +do { \ + TCGOpcode _opc = 0; \ + TCGArg _rd = offsetof(CPUARMState, vfp.regs[rd]); \ + TCGArg _rn = offsetof(CPUARMState, vfp.regs[rn]); \ + TCGArg _rm = offsetof(CPUARMState, vfp.regs[rm]); \ + if(q == 1) \ + _opc = INDEX_op_##op##_128; \ + else \ + _opc = INDEX_op_##op##_64; \ + gen_vector_op3(_opc, _rd, _rn, _rm); \ +} while (0) + +#define gen_vector_cvt(op,size) \ +do { \ + TCGOpcode _opc = INDEX_op_##op##_128; \ + TCGArg _rd = offsetof(CPUARMState, vfp.regs[rd]); \ + TCGArg _rm = offsetof(CPUARMState, vfp.regs[rm]); \ + gen_vector_op3(_opc, _rd, _rm, size); \ +} while (0) + diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c index 14e8131..21cf214 100644 --- a/target-arm/translate-a64.c +++ b/target-arm/translate-a64.c @@ -37,10 +37,17 @@ #include "exec/helper-gen.h" #include "trace-tcg.h" +#include "hqemu.h" static TCGv_i64 cpu_X[32]; static TCGv_i64 cpu_pc; +#if defined(CONFIG_USER_ONLY) +#define IS_USER(s) 1 +#else +#define IS_USER(s) (s->user) +#endif + /* Load/store exclusive handling */ static TCGv_i64 cpu_exclusive_high; @@ -119,6 +126,31 @@ static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s) } } +static inline void gen_ibtc_stub(DisasContext *s) +{ +#ifdef ENABLE_IBTC + if (!build_llvm(s->env)) { + TCGv_ptr ibtc_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_ibtc(ibtc_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(ibtc_host_pc)); + tcg_temp_free_ptr(ibtc_host_pc); + s->gen_ibtc = 0; + } +#endif +} + +static inline void gen_cpbl_stub(DisasContext *s) +{ +#ifdef ENABLE_CPBL + if (!build_llvm(s->env)) { + TCGv_ptr cpbl_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_cpbl(cpbl_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(cpbl_host_pc)); + tcg_temp_free_ptr(cpbl_host_pc); + } +#endif +} + void aarch64_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, int flags) { @@ -285,12 +317,38 @@ static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest) return true; } +#if defined(CONFIG_USER_ONLY) +static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) +{ + TranslationBlock *tb; + + tb = s->tb; + tcg_gen_goto_tb(n); + gen_a64_set_pc_im(dest); + tcg_gen_exit_tb((intptr_t)tb + n); + s->is_jmp = DISAS_TB_JUMP; + tb->jmp_pc[n] = dest; +} +#else +static int try_link_pages(DisasContext *s, TranslationBlock *tb, target_ulong dest) +{ +#ifdef ENABLE_LPAGE + if (!build_llvm(s->env)) { + target_ulong addr, size; + int ret = lpt_search_page(s->env, dest, &addr, &size); + if (ret == 1 && (tb->pc & ~(size - 1)) == addr) + return 1; + } +#endif + return 0; +} + static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) { TranslationBlock *tb; tb = s->tb; - if (use_goto_tb(s, n, dest)) { + if (use_goto_tb(s, n, dest) || try_link_pages(s, tb, dest) == 1) { tcg_gen_goto_tb(n); gen_a64_set_pc_im(dest); tcg_gen_exit_tb((intptr_t)tb + n); @@ -302,11 +360,14 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest) } else if (s->singlestep_enabled) { gen_exception_internal(EXCP_DEBUG); } else { + gen_cpbl_stub(s); tcg_gen_exit_tb(0); s->is_jmp = DISAS_TB_JUMP; } } + tb->jmp_pc[n] = dest; } +#endif static void unallocated_encoding(DisasContext *s) { @@ -568,6 +629,7 @@ static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) tcg_gen_movi_i64(tmp, 0); tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp); + tcg_gen_annotate(A_SetCC); tcg_gen_extrl_i64_i32(cpu_CF, flag); @@ -614,6 +676,7 @@ static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1) result = tcg_temp_new_i64(); flag = tcg_temp_new_i64(); tcg_gen_sub_i64(result, t0, t1); + tcg_gen_annotate(A_SetCC); gen_set_NZ64(result); @@ -764,11 +827,51 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr, get_mem_index(s)); } +#ifdef ENABLE_TCG_VECTOR +#include "simd_helper.h" + +#define VFP_DREG(reg) \ +do { \ + reg = reg * 2; \ +} while (0) +#define tcg_vector_abort() \ +do {\ + fprintf(stderr, "%s:%d: tcg fatal error - unhandled vector op.\n", __FILE__, __LINE__);\ + exit(0);\ +} while (0) + +/* + * disas_neon_ls_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_ls_vector(DisasContext *s, int reg, int is_load, + TCGv_i64 tcg_addr) +{ + TCGArg vop, alignment = 32; + + if (!build_llvm(s->env)) + return 0; + + VFP_DREG(reg); + vop = (is_load) ? INDEX_op_vload_128 : INDEX_op_vstore_128; + gen_vector_op3(vop, + offsetof(CPUARMState, vfp.regs[reg]), + GET_TCGV_I64(tcg_addr), + alignment); + return 1; +} +#endif + /* * Store from FP register to memory */ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) { +#ifdef ENABLE_TCG_VECTOR + if (size >= 4 && disas_neon_ls_vector(s, srcidx, 0, tcg_addr) == 1) + return; +#endif + /* This writes the bottom N bits of a 128 bit wide vector to memory */ TCGv_i64 tmp = tcg_temp_new_i64(); tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64)); @@ -791,6 +894,11 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size) */ static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size) { +#ifdef ENABLE_TCG_VECTOR + if (size >= 4 && disas_neon_ls_vector(s, destidx, 1, tcg_addr) == 1) + return; +#endif + /* This always zero-extends and writes to a full 128 bit wide vector */ TCGv_i64 tmplo = tcg_temp_new_i64(); TCGv_i64 tmphi; @@ -1653,6 +1761,7 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn) } s->is_jmp = DISAS_JUMP; + s->gen_ibtc = 1; } /* C3.2 Branches, exception generating and system instructions */ @@ -3624,6 +3733,8 @@ static void disas_cc(DisasContext *s, uint32_t insn) TCGv_i64 tcg_tmp, tcg_y, tcg_rn; DisasCompare c; + tcg_gen_annotate(A_NoSIMDization); + if (!extract32(insn, 29, 1)) { unallocated_encoding(s); return; @@ -8854,6 +8965,153 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn) } } +#ifdef ENABLE_TCG_VECTOR +static int disas_neon_misc(DisasContext *s, uint32_t insn) +{ + if (!build_llvm(s->env)) + return 0; + + int size = extract32(insn, 22, 2); + int opcode = extract32(insn, 12, 5); + bool u = extract32(insn, 29, 1); + bool is_q = extract32(insn, 30, 1); + int rm = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + VFP_DREG(rm); + VFP_DREG(rd); + + switch (opcode) { + case 0xc ... 0xf: + case 0x16 ... 0x1d: + case 0x1f: + { + /* Floating point: U, size[1] and opcode indicate operation; + * size[0] indicates single or double precision. + */ + int is_double = extract32(size, 0, 1); + opcode |= (extract32(size, 1, 1) << 5) | (u << 6); + size = is_double ? 64 : 32; + + switch (opcode) { + case 0x1d: /* SCVTF */ + case 0x5d: /* UCVTF */ + { + if (is_double && !is_q) { + unallocated_encoding(s); + return 0; + } + if (!fp_access_check(s)) { + return 0; + } + if (opcode == 0x1d) + gen_vector_cvt(vsitofp, size); + else + gen_vector_cvt(vuitofp, size); + break; + } + case 0x1a: /* FCVTNS */ + case 0x1b: /* FCVTMS */ + case 0x1c: /* FCVTAS */ + case 0x3a: /* FCVTPS */ + case 0x3b: /* FCVTZS */ + if (is_double && !is_q) { + unallocated_encoding(s); + return 0; + } + gen_vector_cvt(vfptosi, size); + break; + case 0x5a: /* FCVTNU */ + case 0x5b: /* FCVTMU */ + case 0x5c: /* FCVTAU */ + case 0x7a: /* FCVTPU */ + case 0x7b: /* FCVTZU */ + if (is_double && !is_q) { + unallocated_encoding(s); + return 0; + } + gen_vector_cvt(vfptoui, size); + break; + default: + return 0; + } + break; + } + default: + return 0; + } + + return 1; +} + +/* + * disas_neon_data_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_data_vector(DisasContext *s, uint32_t insn) +{ + if (!build_llvm(s->env)) + return 0; + + int q = extract32(insn, 30, 1); + int u = extract32(insn, 29, 1); + int size = extract32(insn, 22, 2); + int op = extract32(insn, 11, 5); + int rm = extract32(insn, 16, 5); + int rn = extract32(insn, 5, 5); + int rd = extract32(insn, 0, 5); + + VFP_DREG(rm); + VFP_DREG(rn); + VFP_DREG(rd); + + switch(op) { + case 0x10: /* ADD, SUB */ + if(!u) /* ADD */ + gen_vector_arith(vadd, i, size); + else /* SUB */ + gen_vector_arith(vsub, i, size); + break; + case 0x3: /* logic ops */ + switch ((u << 2) | size) { + case 0: gen_vector_logical(vand); break; /* AND */ + case 1: gen_vector_logical(vbic); break; /* BIC rd = rn&(~rm)*/ + case 2: gen_vector_logical(vorr); break; /* ORR */ + case 3: gen_vector_logical(vorn); break; /* ORN */ + case 4: gen_vector_logical(veor); break; /* EOR */ + case 5: gen_vector_logical(vbsl); break; /* BSL */ + case 6: gen_vector_logical(vbit); break; /* BIT */ + case 7: gen_vector_logical(vbif); break; /* BIF */ + default: + return 0; + } + break; + case 0x18 ... 0x31: + { + int fpopcode = extract32(insn, 11, 5) + | (extract32(insn, 23, 1) << 5) + | (extract32(insn, 29, 1) << 6); + int size = extract32(insn, 22, 1); + switch (fpopcode) { + case 0x1a: gen_vector_fop2(vadd); break; /* FADD */ + case 0x3a: gen_vector_fop2(vsub); break; /* FSUB */ + case 0x5b: gen_vector_fop2(vmul); break; /* FMUL */ + case 0x5f: gen_vector_fop2(vdiv); break; /* FDIV */ + case 0x19: gen_vector_fop2(vmla); break; /* FMLA */ + case 0x39: gen_vector_fop2(vmls); break; /* FMLS */ + default: + return 0; + } + break; + } + default: + return 0; + } + + return 1; +} +#endif + /* Logic op (opcode == 3) subgroup of C3.6.16. */ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) { @@ -8870,6 +9128,11 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn) return; } +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_data_vector(s, insn) == 1) + return; +#endif + tcg_op1 = tcg_temp_new_i64(); tcg_op2 = tcg_temp_new_i64(); tcg_res[0] = tcg_temp_new_i64(); @@ -9138,6 +9401,11 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn) return; } +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_data_vector(s, insn) == 1) + return; +#endif + switch (fpopcode) { case 0x58: /* FMAXNMP */ case 0x5a: /* FADDP */ @@ -9232,6 +9500,11 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn) return; } +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_data_vector(s, insn) == 1) + return; +#endif + if (size == 3) { assert(is_q); for (pass = 0; pass < 2; pass++) { @@ -9778,6 +10051,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn) TCGv_i32 tcg_rmode; TCGv_ptr tcg_fpstatus; +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_misc(s, insn) == 1) + return; +#endif + switch (opcode) { case 0x0: /* REV64, REV32 */ case 0x1: /* REV16 */ @@ -11018,6 +11296,8 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) pc_start = tb->pc; + dc->gen_ibtc = 0; + dc->env = env; dc->tb = tb; dc->is_jmp = DISAS_NEXT; @@ -11078,7 +11358,12 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) max_insns = TCG_MAX_INSNS; } - gen_tb_start(tb); + if (!build_llvm(env)) { + gen_tb_start(tb); + if (tracer_mode != TRANS_MODE_NONE) + tcg_gen_hotpatch(IS_USER(dc), tracer_mode == TRANS_MODE_HYBRIDS || + tracer_mode == TRANS_MODE_HYBRIDM); + } tcg_clear_temp_count(); @@ -11144,6 +11429,9 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) * Also stop translation when a page boundary is reached. This * ensures prefetch aborts occur at the right place. */ + + if (build_llvm(env) && num_insns == tb->icount) + break; } while (!dc->is_jmp && !tcg_op_buf_full() && !cs->singlestep_enabled && !singlestep && @@ -11155,6 +11443,15 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) gen_io_end(); } + if (build_llvm(env) && tb->size != dc->pc - pc_start) { + /* consistency check with tb info. we must make sure + * guest basic blocks are the same. skip this trace if inconsistent */ + fprintf(stderr, "inconsistent block with pc 0x"TARGET_FMT_lx" size=%d" + " icount=%d (error size="TARGET_FMT_ld")\n", + tb->pc, tb->size, tb->icount, dc->pc - pc_start); + exit(0); + } + if (unlikely(cs->singlestep_enabled || dc->ss_active) && dc->is_jmp != DISAS_EXC) { /* Note that this means single stepping WFI doesn't halt the CPU. @@ -11182,6 +11479,8 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) /* fall through */ case DISAS_JUMP: /* indicate that the hash table must be used to find the next TB */ + if (dc->gen_ibtc == 1) + gen_ibtc_stub(dc); tcg_gen_exit_tb(0); break; case DISAS_TB_JUMP: @@ -11211,10 +11510,15 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb) } done_generating: - gen_tb_end(tb, num_insns); + if (build_llvm(env)) { + /* Terminate the linked list. */ + tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1; + } else { + gen_tb_end(tb, num_insns); + } #ifdef DEBUG_DISAS - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) { qemu_log("----------------\n"); qemu_log("IN: %s\n", lookup_symbol(pc_start)); log_target_disas(cs, pc_start, dc->pc - pc_start, @@ -11222,6 +11526,8 @@ done_generating: qemu_log("\n"); } #endif - tb->size = dc->pc - pc_start; - tb->icount = num_insns; + if (!build_llvm(env)) { + tb->size = dc->pc - pc_start; + tb->icount = num_insns; + } } diff --git a/target-arm/translate.c b/target-arm/translate.c index 5d22879..256227b 100644 --- a/target-arm/translate.c +++ b/target-arm/translate.c @@ -36,6 +36,7 @@ #include "exec/helper-gen.h" #include "trace-tcg.h" +#include "hqemu.h" #define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T) @@ -110,6 +111,33 @@ void arm_translate_init(void) #endif a64_translate_init(); + + copy_tcg_context_global(); +} + +static inline void gen_ibtc_stub(DisasContext *s) +{ +#ifdef ENABLE_IBTC + if (!build_llvm(s->env)) { + TCGv_ptr ibtc_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_ibtc(ibtc_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(ibtc_host_pc)); + tcg_temp_free_ptr(ibtc_host_pc); + s->gen_ibtc = 0; + } +#endif +} + +static inline void gen_cpbl_stub(DisasContext *s) +{ +#ifdef ENABLE_CPBL + if (!build_llvm(s->env)) { + TCGv_ptr cpbl_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_cpbl(cpbl_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(cpbl_host_pc)); + tcg_temp_free_ptr(cpbl_host_pc); + } +#endif } static inline ARMMMUIdx get_a32_user_mem_index(DisasContext *s) @@ -201,7 +229,10 @@ static void store_reg(DisasContext *s, int reg, TCGv_i32 var) static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask) { TCGv_i32 tmp_mask = tcg_const_i32(mask); - gen_helper_cpsr_write(cpu_env, var, tmp_mask); + if (mask & ~CPSR_NZCV) + gen_helper_cpsr_write(cpu_env, var, tmp_mask); + else + gen_helper_cpsr_write_nzcv(cpu_env, var, tmp_mask); tcg_temp_free_i32(tmp_mask); } /* Set NZCV flags from the high 4 bits of var. */ @@ -493,6 +524,7 @@ static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) { TCGv_i32 tmp; tcg_gen_sub_i32(cpu_NF, t0, t1); + tcg_gen_annotate(A_SetCC); tcg_gen_mov_i32(cpu_ZF, cpu_NF); tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1); tcg_gen_xor_i32(cpu_VF, cpu_NF, t0); @@ -878,6 +910,7 @@ static inline void gen_bx_im(DisasContext *s, uint32_t addr) tcg_temp_free_i32(tmp); } tcg_gen_movi_i32(cpu_R[15], addr & ~1); + s->gen_ibtc = 1; } /* Set PC and Thumb state from var. var is marked as dead. */ @@ -887,6 +920,7 @@ static inline void gen_bx(DisasContext *s, TCGv_i32 var) tcg_gen_andi_i32(cpu_R[15], var, ~1); tcg_gen_andi_i32(var, var, 1); store_cpu_field(var, thumb); + s->gen_ibtc = 1; } /* Variant of store_reg which uses branch&exchange logic when storing @@ -1199,20 +1233,38 @@ static inline void gen_vfp_sqrt(int dp) gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env); } +static inline void gen_update_fpscr(TCGv_i32 flags) +{ + TCGv_i32 tmp; + tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]); + tcg_gen_andi_i32(tmp, tmp, 0x0fffffff); + tcg_gen_or_i32(tmp, tmp, flags); + store_cpu_field(tmp, vfp.xregs[ARM_VFP_FPSCR]); + tcg_temp_free_i32(tmp); +} + static inline void gen_vfp_cmp(int dp) { + TCGv_i32 flags = tcg_temp_new_i32(); if (dp) - gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env); + gen_helper_vfp_cmpd(flags, cpu_F0d, cpu_F1d, cpu_env); else - gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env); + gen_helper_vfp_cmps(flags, cpu_F0s, cpu_F1s, cpu_env); + + gen_update_fpscr(flags); + tcg_temp_free_i32(flags); } static inline void gen_vfp_cmpe(int dp) { + TCGv_i32 flags = tcg_temp_new_i32(); if (dp) - gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env); + gen_helper_vfp_cmped(flags, cpu_F0d, cpu_F1d, cpu_env); else - gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env); + gen_helper_vfp_cmpes(flags, cpu_F0s, cpu_F1s, cpu_env); + + gen_update_fpscr(flags); + tcg_temp_free_i32(flags); } static inline void gen_vfp_F1_ld0(int dp) @@ -3977,20 +4029,49 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn) return 0; } +#if defined(CONFIG_USER_ONLY) +static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) +{ + TranslationBlock *tb; + + tb = s->tb; + tcg_gen_goto_tb(n); + gen_set_pc_im(s, dest); + tcg_gen_exit_tb((uintptr_t)tb + n); + tb->jmp_pc[n] = dest; +} +#else +static int try_link_pages(DisasContext *s, TranslationBlock *tb, target_ulong dest) +{ +#ifdef ENABLE_LPAGE + if (!build_llvm(s->env)) { + target_ulong addr, size; + int ret = lpt_search_page(s->env, dest, &addr, &size); + if (ret == 1 && (tb->pc & ~(size - 1)) == addr) + return 1; + } +#endif + return 0; +} + static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest) { TranslationBlock *tb; tb = s->tb; - if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) { + if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) || + try_link_pages(s, tb, dest) == 1) { tcg_gen_goto_tb(n); gen_set_pc_im(s, dest); tcg_gen_exit_tb((uintptr_t)tb + n); } else { gen_set_pc_im(s, dest); + gen_cpbl_stub(s); tcg_gen_exit_tb(0); } + tb->jmp_pc[n] = dest; } +#endif static inline void gen_jmp (DisasContext *s, uint32_t dest) { @@ -4372,6 +4453,54 @@ static struct { {2, 1, 1} }; +#ifdef ENABLE_TCG_VECTOR +#include "simd_helper.h" + +#define tcg_vector_abort() \ +do {\ + fprintf(stderr, "%s:%d: tcg fatal error - unhandled vector op.\n", __FILE__, __LINE__);\ + exit(0);\ +} while (0) + +/* + * disas_neon_ls_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_ls_vector(DisasContext *s, uint32_t insn, TCGv_i32 addr) +{ + int rd, op, load; + int nregs, reg; + int interleave, spacing; + TCGArg vop, alignment = 32; + + if (!build_llvm(s->env)) + return 0; + + /* Load store all elements. */ + op = (insn >> 8) & 0xf; + nregs = neon_ls_element_type[op].nregs; + interleave = neon_ls_element_type[op].interleave; + spacing = neon_ls_element_type[op].spacing; + + if (interleave != 1 || nregs % 2 != 0) + return 0; + + VFP_DREG_D(rd, insn); + load = (insn & (1 << 21)) != 0; + vop = (load) ? INDEX_op_vload_128 : INDEX_op_vstore_128; + + for (reg = 0; reg < nregs; reg += 2) { + gen_vector_op3(vop, + offsetof(CPUARMState, vfp.regs[rd]), + GET_TCGV_I32(addr), + alignment); + rd += spacing * 2; + tcg_gen_addi_i32(addr, addr, 16); + } + return 1; +} +#endif + /* Translate a NEON load/store element instruction. Return nonzero if the instruction is invalid. */ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) @@ -4438,6 +4567,11 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) addr = tcg_temp_new_i32(); load_reg_var(s, addr, rn); stride = (1 << size) * interleave; + +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_ls_vector(s, insn, addr) == 1) + goto vector_done; +#endif for (reg = 0; reg < nregs; reg++) { if (interleave > 2 || (interleave == 2 && nregs == 2)) { load_reg_var(s, addr, rn); @@ -4529,6 +4663,9 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn) } rd += spacing; } +#ifdef ENABLE_TCG_VECTOR +vector_done: +#endif tcg_temp_free_i32(addr); stride = nregs * 8; } else { @@ -5111,6 +5248,131 @@ static const uint8_t neon_2rm_sizes[] = { [NEON_2RM_VCVT_UF] = 0x4, }; +#ifdef ENABLE_TCG_VECTOR +static int disas_neon_misc(DisasContext *s, uint32_t insn) +{ + int op, rd, rm; + + if (!build_llvm(s->env)) + return 0; + + op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf); + VFP_DREG_D(rd, insn); + VFP_DREG_M(rm, insn); + + switch (op) { + case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */ + gen_vector_cvt(vsitofp, 32); + break; + case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */ + gen_vector_cvt(vuitofp, 32); + break; + case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */ + gen_vector_cvt(vfptosi, 32); + break; + case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */ + gen_vector_cvt(vfptoui, 32); + break; + default: + return 0; + } + + return 1; +} + +/* + * disas_neon_data_vector() + * return true if the neon instruction is successfully translated to tcg vector opc. + */ +static int disas_neon_data_vector(DisasContext *s, uint32_t insn) +{ + int op, q, u, size; + int rd, rn, rm; + + if (!build_llvm(s->env)) + return 0; + + /* Three register same length. */ + q = (insn & (1 << 6)) != 0; + u = (insn >> 24) & 1; + VFP_DREG_D(rd, insn); + VFP_DREG_N(rn, insn); + VFP_DREG_M(rm, insn); + size = (insn >> 20) & 3; + op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1); + + switch (op) { + case NEON_3R_VSHL: + case NEON_3R_VQSHL: + case NEON_3R_VRSHL: + case NEON_3R_VQRSHL: + { + int rtmp; + /* Shift instruction operands are reversed. */ + rtmp = rn; + rn = rm; + rm = rtmp; + } + break; + default: + break; + } + + switch(op) { + case NEON_3R_VADD_VSUB: + if(!u) /* VADD */ + gen_vector_arith(vadd, i, size); + else /* VSUB */ + gen_vector_arith(vsub, i, size); + break; + case NEON_3R_LOGIC: + switch ((u << 2) | size) { + case 0: gen_vector_logical(vand); break; /* VAND */ + case 1: gen_vector_logical(vbic); break; /* BIC rd = rn&(~rm)*/ + case 2: gen_vector_logical(vorr); break; /* VORR */ + case 3: gen_vector_logical(vorn); break; /* VORN OR NOT */ + case 4: gen_vector_logical(veor); break; /* VEOR Vector Bitwise Exclusive OR*/ + case 5: gen_vector_logical(vbsl); break; /* VBSL */ + case 6: gen_vector_logical(vbit); break; /* VBIT */ + case 7: gen_vector_logical(vbif); break; /* VBIF */ + } + break; + case NEON_3R_VFM: + if (size) /* VFMS */ + gen_vector_fop(vfms); + else /* VFMA */ + gen_vector_fop(vfma); + break; + case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */ + switch ((u << 2) | size) { + case 0: gen_vector_fop(vadd); break; /* VADD */ + case 4: gen_vector_fop(vpadd); break; /* VPADD */ + case 2: gen_vector_fop(vsub); break; /* VSUB */ + case 6: gen_vector_fop(vabd); break; /* VABD */ + default: + tcg_vector_abort(); + break; + } + break; + case NEON_3R_FLOAT_MULTIPLY: /* float VMLA, VMLS, VMUL */ + if(u) + gen_vector_fop(vmul); + else if (!u) { + if (size == 0) + gen_vector_fop(vmla); + else + gen_vector_fop(vmls); + } else + tcg_vector_abort(); + break; + default: + return 0; + } + + return 1; +} +#endif + /* Translate a NEON data processing instruction. Return nonzero if the instruction is invalid. We process data in a mixture of 32-bit and 64-bit chunks. @@ -5341,6 +5603,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) return 1; } +#ifdef ENABLE_TCG_VECTOR + if (!pairwise && disas_neon_data_vector(s, insn) == 1) + return 0; +#endif + for (pass = 0; pass < (q ? 4 : 2); pass++) { if (pairwise) { @@ -6741,6 +7008,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn) break; default: elementwise: +#ifdef ENABLE_TCG_VECTOR + if (disas_neon_misc(s, insn) == 1) + return 0; +#endif for (pass = 0; pass < (q ? 4 : 2); pass++) { if (neon_2rm_is_float_op(op)) { tcg_gen_ld_f32(cpu_F0s, cpu_env, @@ -11234,6 +11505,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) pc_start = tb->pc; + dc->gen_ibtc = 0; + dc->env = env; dc->tb = tb; dc->is_jmp = DISAS_NEXT; @@ -11303,7 +11576,12 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) max_insns = TCG_MAX_INSNS; } - gen_tb_start(tb); + if (!build_llvm(env)) { + gen_tb_start(tb); + if (tracer_mode != TRANS_MODE_NONE) + tcg_gen_hotpatch(IS_USER(dc), tracer_mode == TRANS_MODE_HYBRIDS || + tracer_mode == TRANS_MODE_HYBRIDM); + } tcg_clear_temp_count(); @@ -11460,6 +11738,12 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) end_of_page = (dc->pc >= next_page_start) || ((dc->pc >= next_page_start - 3) && insn_crosses_page(env, dc)); +#if defined(CONFIG_LLVM) && defined(CONFIG_USER_ONLY) + if (llvm_has_annotation(dc->pc, ANNOTATION_LOOP)) + break; +#endif + if (build_llvm(env) && num_insns == tb->icount) + break; } while (!dc->is_jmp && !tcg_op_buf_full() && !cs->singlestep_enabled && !singlestep && @@ -11476,6 +11760,15 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) gen_io_end(); } + if (build_llvm(env) && tb->size != dc->pc - pc_start) { + /* consistency check with tb info. we must make sure + * guest basic blocks are the same. skip this trace if inconsistent */ + fprintf(stderr, "inconsistent block with pc 0x"TARGET_FMT_lx" size=%d" + " icount=%d (error size="TARGET_FMT_ld")\n", + tb->pc, tb->size, tb->icount, dc->pc - pc_start); + exit(0); + } + /* At this stage dc->condjmp will only be set when the skipped instruction was a conditional branch or trap, and the PC has already been written. */ @@ -11543,6 +11836,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) case DISAS_JUMP: default: /* indicate that the hash table must be used to find the next TB */ + if (dc->gen_ibtc == 1) + gen_ibtc_stub(dc); tcg_gen_exit_tb(0); break; case DISAS_TB_JUMP: @@ -11581,10 +11876,15 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb) } done_generating: - gen_tb_end(tb, num_insns); + if (build_llvm(env)) { + /* Terminate the linked list. */ + tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1; + } else { + gen_tb_end(tb, num_insns); + } #ifdef DEBUG_DISAS - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) { qemu_log("----------------\n"); qemu_log("IN: %s\n", lookup_symbol(pc_start)); log_target_disas(cs, pc_start, dc->pc - pc_start, @@ -11592,8 +11892,10 @@ done_generating: qemu_log("\n"); } #endif - tb->size = dc->pc - pc_start; - tb->icount = num_insns; + if (!build_llvm(env)) { + tb->size = dc->pc - pc_start; + tb->icount = num_insns; + } } static const char *cpu_mode_names[16] = { diff --git a/target-arm/translate.h b/target-arm/translate.h index 53ef971..10f6a05 100644 --- a/target-arm/translate.h +++ b/target-arm/translate.h @@ -61,6 +61,8 @@ typedef struct DisasContext { #define TMP_A64_MAX 16 int tmp_a64_count; TCGv_i64 tmp_a64[TMP_A64_MAX]; + int gen_ibtc; + CPUArchState *env; } DisasContext; typedef struct DisasCompare { diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 84edfd0..cbd8b2a 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -845,7 +845,7 @@ typedef struct CPUX86State { uint64_t efer; /* Beginning of state preserved by INIT (dummy marker). */ - struct {} start_init_save; + struct { int dummy; } start_init_save; /* FPU state */ unsigned int fpstt; /* top of stack index */ @@ -865,8 +865,8 @@ typedef struct CPUX86State { float_status mmx_status; /* for 3DNow! float ops */ float_status sse_status; uint32_t mxcsr; - XMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32]; - XMMReg xmm_t0; + XMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32] __attribute__((aligned(64))); + XMMReg xmm_t0 __attribute__((aligned(64))); MMXReg mmx_t0; uint64_t opmask_regs[NB_OPMASK_REGS]; @@ -906,7 +906,7 @@ typedef struct CPUX86State { uint32_t smbase; /* End of state preserved by INIT (dummy marker). */ - struct {} end_init_save; + struct { int dummy; } end_init_save; uint64_t system_time_msr; uint64_t wall_clock_msr; @@ -966,6 +966,8 @@ typedef struct CPUX86State { uint64_t mtrr_deftype; MTRRVar mtrr_var[MSR_MTRRcap_VCNT]; + CPU_OPTIMIZATION_COMMON + /* For KVM */ uint32_t mp_state; int32_t exception_injected; @@ -1237,6 +1239,19 @@ static inline void cpu_get_tb_cpu_state(CPUX86State *env, target_ulong *pc, (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK)); } +static inline target_ulong cpu_get_pc(CPUX86State *env) +{ + return env->eip + env->segs[R_CS].base; +} + +static inline int cpu_check_state(CPUX86State *env, + target_ulong cs_base, int flags) +{ + int mask = IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK; + return (cs_base == env->segs[R_CS].base) && + ((uint32_t)flags == (env->hflags | (env->eflags & mask))); +} + void do_cpu_init(X86CPU *cpu); void do_cpu_sipi(X86CPU *cpu); @@ -1297,7 +1312,9 @@ static inline void cpu_load_efer(CPUX86State *env, uint64_t val) static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State *env) { - return ((MemTxAttrs) { .secure = (env->hflags & HF_SMM_MASK) != 0 }); + MemTxAttrs attrs = { 0 }; + attrs.secure = (env->hflags & HF_SMM_MASK) != 0; + return attrs; } /* fpu_helper.c */ diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c index d421a47..4f50cd9 100644 --- a/target-i386/fpu_helper.c +++ b/target-i386/fpu_helper.c @@ -385,7 +385,7 @@ void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) /* FPU operations */ -static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; +const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; void helper_fcom_ST0_FT0(CPUX86State *env) { diff --git a/target-i386/helper.c b/target-i386/helper.c index d18be95..4bc1e13 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -25,6 +25,7 @@ #include "monitor/monitor.h" #include "hw/i386/apic_internal.h" #endif +#include "hqemu.h" static void cpu_x86_version(CPUX86State *env, int *family, int *model) { @@ -641,6 +642,8 @@ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3) "CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3); tlb_flush(CPU(cpu), 0); } + + pcid = new_cr3 >> 12; } void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) @@ -1432,3 +1435,12 @@ void x86_stq_phys(CPUState *cs, hwaddr addr, uint64_t val) NULL); } #endif + +CPUState *cpu_create(void) +{ + X86CPU *cpu = g_malloc0(sizeof(X86CPU)); + CPUState *cs = CPU(cpu); + memcpy(cpu, X86_CPU(first_cpu), sizeof(X86CPU)); + cs->env_ptr = &cpu->env; + return cs; +} diff --git a/target-i386/helper.h b/target-i386/helper.h index ecfcfd1..8fbdde6 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -219,3 +219,6 @@ DEF_HELPER_3(rcrl, tl, env, tl, tl) DEF_HELPER_3(rclq, tl, env, tl, tl) DEF_HELPER_3(rcrq, tl, env, tl, tl) #endif + +#include "hqemu-helper.h" +#include "atomic-helper.h" diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 36fa3f0..2639ba5 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -2786,13 +2786,13 @@ static int kvm_handle_debug(X86CPU *cpu, case 0x1: ret = EXCP_DEBUG; cs->watchpoint_hit = &hw_watchpoint; - hw_watchpoint.vaddr = hw_breakpoint[n].addr; + hw_watchpoint.addr = hw_breakpoint[n].addr; hw_watchpoint.flags = BP_MEM_WRITE; break; case 0x3: ret = EXCP_DEBUG; cs->watchpoint_hit = &hw_watchpoint; - hw_watchpoint.vaddr = hw_breakpoint[n].addr; + hw_watchpoint.addr = hw_breakpoint[n].addr; hw_watchpoint.flags = BP_MEM_ACCESS; break; } diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c index 13bd4f5..b446daa 100644 --- a/target-i386/misc_helper.c +++ b/target-i386/misc_helper.c @@ -599,3 +599,7 @@ void helper_debug(CPUX86State *env) cs->exception_index = EXCP_DEBUG; cpu_loop_exit(cs); } + +#ifdef CONFIG_COREMU +#include "atomic-x86.c" +#endif diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h index 1780d1d..4a96ed7 100644 --- a/target-i386/ops_sse.h +++ b/target-i386/ops_sse.h @@ -995,7 +995,7 @@ SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) SSE_HELPER_CMP(cmpord, FPU_CMPORD) -static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; +const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s) { diff --git a/target-i386/simd_helper.h b/target-i386/simd_helper.h new file mode 100644 index 0000000..dce0d59 --- /dev/null +++ b/target-i386/simd_helper.h @@ -0,0 +1,65 @@ + +static inline void gen_vector_op3(TCGOpcode opc, TCGArg arg1, TCGArg arg2, + TCGArg arg3) +{ + int pi = tcg_ctx.gen_next_parm_idx; + tcg_emit_op(&tcg_ctx, opc, pi); + *tcg_ctx.vec_opparam_ptr++ = arg1; + *tcg_ctx.vec_opparam_ptr++ = arg2; + *tcg_ctx.vec_opparam_ptr++ = arg3; +} + +#define gen_vector_arith(op,etype,size) \ +do { \ + TCGOpcode _opc = 0; \ + TCGArg _rd = offsetof(CPUX86State, xmm_regs[rd]); \ + TCGArg _rn = offsetof(CPUX86State, xmm_regs[rn]); \ + TCGArg _rm = (rm == -1) ? offsetof(CPUX86State, xmm_t0) : \ + offsetof(CPUX86State, xmm_regs[rm]); \ + switch(size) { \ + case 0: _opc = INDEX_op_##op##_##etype##8_128; break; \ + case 1: _opc = INDEX_op_##op##_##etype##16_128; break; \ + case 2: _opc = INDEX_op_##op##_##etype##32_128; break; \ + case 3: _opc = INDEX_op_##op##_##etype##64_128; break; \ + default: \ + fprintf(stderr, "%s:%d: tcg fatal error: size=%d\n", \ + __FILE__, __LINE__, size); \ + exit(0); \ + break; \ + } \ + gen_vector_op3(_opc, _rd, _rn, _rm); \ +} while (0) + +#define gen_vector_fop(op,size) \ +do { \ + TCGOpcode _opc = 0; \ + TCGArg _rd = offsetof(CPUX86State, xmm_regs[rd]); \ + TCGArg _rn = offsetof(CPUX86State, xmm_regs[rn]); \ + TCGArg _rm = (rm == -1) ? offsetof(CPUX86State, xmm_t0) : \ + offsetof(CPUX86State, xmm_regs[rm]); \ + if(size == 0) \ + _opc = INDEX_op_##op##_f32_128;\ + else \ + _opc = INDEX_op_##op##_f64_128;\ + gen_vector_op3(_opc, _rd, _rn, _rm); \ +} while (0) + +#define gen_vector_logical(op) \ +do { \ + TCGOpcode _opc = INDEX_op_##op##_128; \ + TCGArg _rd = offsetof(CPUX86State, xmm_regs[rd]); \ + TCGArg _rn = (rn == -1) ? offsetof(CPUX86State, xmm_t0) : \ + offsetof(CPUX86State, xmm_regs[rn]); \ + TCGArg _rm = (rm == -1) ? offsetof(CPUX86State, xmm_t0) : \ + offsetof(CPUX86State, xmm_regs[rm]); \ + gen_vector_op3(_opc, _rd, _rn, _rm); \ +} while (0) + +#define gen_vector_cvt(op,size) \ +do { \ + TCGOpcode _opc = INDEX_op_##op##_128; \ + TCGArg _rd = offsetof(CPUX86State, xmm_regs[rd]); \ + TCGArg _rm = (rm == -1) ? offsetof(CPUX86State, xmm_t0) : \ + offsetof(CPUX86State, xmm_regs[rm]); \ + gen_vector_op3(_opc, _rd, _rm, size); \ +} while (0) diff --git a/target-i386/translate.c b/target-i386/translate.c index a3dd167..7204635 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -32,7 +32,13 @@ #include "exec/helper-gen.h" #include "trace-tcg.h" +#include "hqemu.h" +#if defined(CONFIG_USER_ONLY) +#define IS_USER(s) 1 +#else +#define IS_USER(s) (s->cpl == 3) +#endif #define PREFIX_REPZ 0x01 #define PREFIX_REPNZ 0x02 @@ -59,26 +65,35 @@ # define clztl clz32 #endif +#ifdef CONFIG_COREMU +#ifdef TARGET_X86_64 +#define X86_64_HREGS x86_64_hregs +#else +#define X86_64_HREGS 0 +#endif +#endif + //#define MACRO_TEST 1 /* global register indexes */ static TCGv_ptr cpu_env; -static TCGv cpu_A0; -static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT; +static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2; static TCGv_i32 cpu_cc_op; static TCGv cpu_regs[CPU_NB_REGS]; /* local temps */ -static TCGv cpu_T[2]; +static __thread TCGv cpu_T[2]; /* local register indexes (only used inside old micro ops) */ -static TCGv cpu_tmp0, cpu_tmp4; -static TCGv_ptr cpu_ptr0, cpu_ptr1; -static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32; -static TCGv_i64 cpu_tmp1_i64; +static __thread TCGv cpu_A0; +static __thread TCGv cpu_tmp0, cpu_tmp4; +static __thread TCGv_ptr cpu_ptr0, cpu_ptr1; +static __thread TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32; +static __thread TCGv_i64 cpu_tmp1_i64; +static __thread TCGv cpu_cc_srcT; #include "exec/gen-icount.h" #ifdef TARGET_X86_64 -static int x86_64_hregs; +static __thread int x86_64_hregs; #endif typedef struct DisasContext { @@ -123,6 +138,10 @@ typedef struct DisasContext { int cpuid_ext2_features; int cpuid_ext3_features; int cpuid_7_0_ebx_features; + int fallthrough; + int gen_ibtc; + int gen_cpbl; + CPUX86State *env; } DisasContext; static void gen_eob(DisasContext *s); @@ -209,6 +228,36 @@ static const uint8_t cc_op_live[CC_OP_NB] = { [CC_OP_CLR] = 0, }; +static inline void gen_ibtc_stub(DisasContext *s) +{ +#ifdef ENABLE_IBTC + if (!build_llvm(s->env)) { + TCGv_ptr ibtc_host_pc = tcg_temp_new_ptr(); + if (s->fallthrough) { + tcg_gen_st_i32(tcg_const_i32(1), cpu_env, offsetof(CPUX86State, fallthrough)); + s->fallthrough = 0; + } + gen_helper_lookup_ibtc(ibtc_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(ibtc_host_pc)); + tcg_temp_free_ptr(ibtc_host_pc); + s->gen_ibtc = 0; + } +#endif +} + +static inline void gen_cpbl_stub(DisasContext *s) +{ +#ifdef ENABLE_CPBL + if (!build_llvm(s->env)) { + TCGv_ptr cpbl_host_pc = tcg_temp_new_ptr(); + gen_helper_lookup_cpbl(cpbl_host_pc, cpu_env); + tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(cpbl_host_pc)); + tcg_temp_free_ptr(cpbl_host_pc); + s->gen_cpbl = 0; + } +#endif +} + static void set_cc_op(DisasContext *s, CCOp op) { int dead; @@ -1312,6 +1361,30 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg) /* if d == OR_TMP0, it means memory operand (address in A0) */ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d) { +#ifdef CONFIG_COREMU + if (s1->prefix & PREFIX_LOCK) { + gen_update_cc_op(s1); + + switch (ot & 3) { + case 0: + gen_helper_atomic_opb(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op)); + break; + case 1: + gen_helper_atomic_opw(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op)); + break; + case 2: + gen_helper_atomic_opl(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op)); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_opq(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op)); +#endif + } + set_cc_op(s1, CC_OP_EFLAGS); + return; + } +#endif + if (d != OR_TMP0) { gen_op_mov_v_reg(ot, cpu_T[0], d); } else { @@ -1378,6 +1451,35 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d) /* if d == OR_TMP0, it means memory operand (address in A0) */ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c) { +#ifdef CONFIG_COREMU + /* with lock prefix */ + if (s1->prefix & PREFIX_LOCK) { + assert(d == OR_TMP0); + + /* The helper will use CAS1 as a unified way to + implement atomic inc (locked inc) */ + gen_update_cc_op(s1); + + switch(ot & 3) { + case 0: + gen_helper_atomic_incb(cpu_env, cpu_A0, tcg_const_i32(c)); + break; + case 1: + gen_helper_atomic_incw(cpu_env, cpu_A0, tcg_const_i32(c)); + break; + case 2: + gen_helper_atomic_incl(cpu_env, cpu_A0, tcg_const_i32(c)); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_incq(cpu_env, cpu_A0, tcg_const_i32(c)); +#endif + } + set_cc_op(s1, CC_OP_EFLAGS); + return; + } +#endif + if (d != OR_TMP0) { gen_op_mov_v_reg(ot, cpu_T[0], d); } else { @@ -2205,6 +2307,31 @@ static inline int insn_const_size(TCGMemOp ot) } } +#if defined(CONFIG_USER_ONLY) +static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) +{ + TranslationBlock *tb; + + tb = s->tb; + tcg_gen_goto_tb(tb_num); + gen_jmp_im(eip); + tcg_gen_exit_tb((uintptr_t)tb + tb_num); + tb->jmp_pc[tb_num] = tb->cs_base + eip; +} +#else +static int try_link_pages(DisasContext *s, TranslationBlock *tb, target_ulong dest) +{ +#ifdef ENABLE_LPAGE + if (!build_llvm(s->env)) { + target_ulong addr, size; + int ret = lpt_search_page(s->env, dest, &addr, &size); + if (ret == 1 && (tb->pc & ~(size - 1)) == addr) + return 1; + } +#endif + return 0; +} + static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) { TranslationBlock *tb; @@ -2214,7 +2341,8 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) tb = s->tb; /* NOTE: we handle the case where the TB spans two pages here */ if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) || - (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK)) { + (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK) || + try_link_pages(s, tb, pc) == 1) { /* jump to same page: we can use a direct jump */ tcg_gen_goto_tb(tb_num); gen_jmp_im(eip); @@ -2222,9 +2350,12 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) } else { /* jump to another page: currently not optimized */ gen_jmp_im(eip); + s->gen_cpbl = 1; gen_eob(s); } + tb->jmp_pc[tb_num] = pc; } +#endif static inline void gen_jcc(DisasContext *s, int b, target_ulong val, target_ulong next_eip) @@ -2561,6 +2692,10 @@ static void gen_eob(DisasContext *s) } else if (s->tf) { gen_helper_single_step(cpu_env); } else { + if (s->gen_ibtc == 1) + gen_ibtc_stub(s); + if (s->gen_cpbl == 1) + gen_cpbl_stub(s); tcg_gen_exit_tb(0); } s->is_jmp = DISAS_TB_JUMP; @@ -2974,6 +3109,192 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = { [0xdf] = AESNI_OP(aeskeygenassist), }; +#ifdef ENABLE_TCG_VECTOR +#include "simd_helper.h" + +#define tcg_vector_abort() \ +do {\ + fprintf(stderr, "%s:%d: tcg fatal error - unhandled vector op.\n", __FILE__, __LINE__);\ + exit(0);\ +} while (0) + +static int gen_vload(DisasContext *s, int op, int mod, int modrm, int reg) +{ + int rm; + TCGArg alignment = 128; + CPUX86State *env = s->env; + + if (!build_llvm(env)) + return 0; + + switch (op) { + case 0x010: /* movups */ + case 0x110: /* movupd */ + case 0x26f: /* movdqu xmm, ea */ + alignment = (TCGArg)-1; + break; + default: + break; + } + + if (mod != 3) { + gen_lea_modrm(env, s, modrm); + gen_vector_op3(INDEX_op_vload_128, + offsetof(CPUX86State, xmm_regs[reg]), + (TCGArg)cpu_A0, + alignment); + } else { + rm = (modrm & 7) | REX_B(s); + gen_vector_op3(INDEX_op_vmov_128, + offsetof(CPUX86State, xmm_regs[reg]), + offsetof(CPUX86State, xmm_regs[rm]), + alignment); + } + + return 1; +} + +static int gen_vstore(DisasContext *s, int op, int mod, int modrm, int reg) +{ + int rm; + TCGArg alignment = 128; + CPUX86State *env = s->env; + + if (!build_llvm(env)) + return 0; + + switch (op) { + case 0x011: /* movups */ + case 0x111: /* movupd */ + case 0x27f: /* movdqu ea, xmm */ + alignment = (TCGArg)-1; + break; + default: + break; + } + + if (mod != 3) { + gen_lea_modrm(env, s, modrm); + gen_vector_op3(INDEX_op_vstore_128, + offsetof(CPUX86State, xmm_regs[reg]), + (TCGArg)cpu_A0, + alignment); + } else { + rm = (modrm & 7) | REX_B(s); + gen_vector_op3(INDEX_op_vmov_128, + offsetof(CPUX86State, xmm_regs[rm]), + offsetof(CPUX86State, xmm_regs[reg]), + alignment); + } + + return 1; +} + +static int gen_tcg_vector(DisasContext *s, int op, int b1, int mod, int modrm, int reg) +{ + int rd, rm, rn; + TCGArg alignment = 128; + CPUX86State *env = s->env; + + if (!build_llvm(env)) + return 0; + + switch(op) { + case 0x54 ... 0x59: + case 0x5b: /* cvtdq2ps cvtps2dq cvttps2dq */ + case 0x5c: + case 0x5e: + case 0xd4: + case 0xdb: + case 0xdf: + case 0xeb: + case 0xef: + case 0xf8 ... 0xfe: + break; + default: /* unhandled op */ + return 0; + } + + switch (op) { + case 0x50 ... 0x5a: + case 0x5c ... 0x5f: + case 0xc2: + /* Most sse scalar operations. */ + if (b1 == 2 || b1 == 3) + return 0; + break; + } + + rd = rn = reg; + if (mod != 3) { + gen_lea_modrm(env, s, modrm); + gen_vector_op3(INDEX_op_vload_128, + offsetof(CPUX86State, xmm_t0), + (TCGArg)cpu_A0, + alignment); + rm = -1; + } else { + rm = (modrm & 7) | REX_B(s); + } + + switch(op) { + case 0x54: /* andps, andpd */ + case 0xdb: /* MMX_OP2(pand) */ + gen_vector_logical(vand); break; + case 0x55: /* andnps, andnpd */ + case 0xdf: /* MMX_OP2(pandn) */ + { + int rtmp = rn; + rn = rm; + rm = rtmp; + gen_vector_logical(vbic); break; + } + case 0x56: /* orps, orpd */ + case 0xeb: /* por */ + gen_vector_logical(vorr); break; + case 0x57: /* xorps, xorpd */ + case 0xef: /* pxor */ + gen_vector_logical(veor); break; + case 0x58: /* SSE_FOP(add) */ + gen_vector_fop(vadd, b1); break; + case 0x59: /* SSE_FOP(mul) */ + gen_vector_fop(vmul, b1); break; + case 0x5c: /* SSE_FOP(sub) */ + gen_vector_fop(vsub, b1); break; + case 0x5e: /* SSE_FOP(div) */ + gen_vector_fop(vdiv, b1); break; + case 0x5b: /* cvtdq2ps cvtps2dq cvttps2dq */ + if(b1 == 0) + gen_vector_cvt(vsitofp, 32); + else if(b1 == 1) + gen_vector_cvt(vfptosi, 32); + else if(b1 == 2) + gen_vector_cvt(vfptosi, 32); + else + tcg_vector_abort(); + break; + case 0xd4: /* MMX_OP2(paddq) */ + if (b1 != 1) + tcg_vector_abort(); + gen_vector_arith(vadd, i, 3); break; + case 0xf8 ... 0xfb: /* MMX_OP2(psubb ... psubq) */ + if (b1 != 1) + tcg_vector_abort(); + gen_vector_arith(vsub, i, (op-0xf8)); break; + case 0xfc ... 0xfe: /* MMX_OP2(paddb ... paddl) */ + if (b1 != 1) + tcg_vector_abort(); + gen_vector_arith(vadd, i, (op-0xfc)); break; + default: + tcg_vector_abort(); + break; + } + + return 1; +} + +#endif + static void gen_sse(CPUX86State *env, DisasContext *s, int b, target_ulong pc_start, int rex_r) { @@ -3131,6 +3452,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x128: /* movapd */ case 0x16f: /* movdqa xmm, ea */ case 0x26f: /* movdqu xmm, ea */ +#ifdef ENABLE_TCG_VECTOR + if (gen_vload(s, b, mod, modrm, reg) == 1) + break; +#endif if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); @@ -3317,6 +3642,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, case 0x129: /* movapd */ case 0x17f: /* movdqa ea, xmm */ case 0x27f: /* movdqu ea, xmm */ +#ifdef ENABLE_TCG_VECTOR + if (gen_vstore(s, b, mod, modrm, reg) == 1) + break; +#endif if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); @@ -4283,6 +4612,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b, default: break; } +#ifdef ENABLE_TCG_VECTOR + if (is_xmm && gen_tcg_vector(s, b, b1, mod, modrm, reg) == 1) + return; +#endif if (is_xmm) { op1_offset = offsetof(CPUX86State,xmm_regs[reg]); if (mod != 3) { @@ -4565,9 +4898,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, s->aflag = aflag; s->dflag = dflag; +#ifndef CONFIG_COREMU /* lock generation */ if (prefixes & PREFIX_LOCK) gen_helper_lock(); +#endif /* now check op code */ reswitch: @@ -4719,6 +5054,29 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, set_cc_op(s, CC_OP_LOGICB + ot); break; case 2: /* not */ +#ifdef CONFIG_COREMU + if (s->prefix & PREFIX_LOCK) { + if (mod == 3) + goto illegal_op; + + switch(ot & 3) { + case 0: + gen_helper_atomic_notb(cpu_env, cpu_A0); + break; + case 1: + gen_helper_atomic_notw(cpu_env, cpu_A0); + break; + case 2: + gen_helper_atomic_notl(cpu_env, cpu_A0); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_notq(cpu_env, cpu_A0); +#endif + } + break; + } +#endif tcg_gen_not_tl(cpu_T[0], cpu_T[0]); if (mod != 3) { gen_op_st_v(s, ot, cpu_T[0], cpu_A0); @@ -4727,6 +5085,32 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } break; case 3: /* neg */ +#ifdef CONFIG_COREMU + if (s->prefix & PREFIX_LOCK) { + if (mod == 3) + goto illegal_op; + + gen_update_cc_op(s); + + switch(ot & 3) { + case 0: + gen_helper_atomic_negb(cpu_env, cpu_A0); + break; + case 1: + gen_helper_atomic_negw(cpu_env, cpu_A0); + break; + case 2: + gen_helper_atomic_negl(cpu_env, cpu_A0); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_negq(cpu_env, cpu_A0); +#endif + } + set_cc_op(s, CC_OP_EFLAGS); + break; + } +#endif tcg_gen_neg_tl(cpu_T[0], cpu_T[0]); if (mod != 3) { gen_op_st_v(s, ot, cpu_T[0], cpu_A0); @@ -4936,6 +5320,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_movi_tl(cpu_T[1], next_eip); gen_push_v(s, cpu_T[1]); gen_op_jmp_v(cpu_T[0]); + s->gen_ibtc = 1; gen_eob(s); break; case 3: /* lcall Ev */ @@ -4954,6 +5339,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_const_i32(dflag - 1), tcg_const_i32(s->pc - s->cs_base)); } + s->gen_ibtc = 1; gen_eob(s); break; case 4: /* jmp Ev */ @@ -4961,6 +5347,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]); } gen_op_jmp_v(cpu_T[0]); + s->gen_ibtc = 1; gen_eob(s); break; case 5: /* ljmp Ev */ @@ -4976,6 +5363,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_movl_seg_T0_vm(R_CS); gen_op_jmp_v(cpu_T[1]); } + s->gen_ibtc = 1; gen_eob(s); break; case 6: /* push Ev */ @@ -5124,7 +5512,36 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]); gen_op_mov_reg_v(ot, reg, cpu_T[1]); gen_op_mov_reg_v(ot, rm, cpu_T[0]); - } else { + } else +#ifdef CONFIG_COREMU + if (s->prefix & PREFIX_LOCK) { + gen_lea_modrm(env, s, modrm); + gen_update_cc_op(s); + + switch (ot & 3) { + case 0: + gen_helper_atomic_xaddb(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 1: + gen_helper_atomic_xaddw(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 2: + gen_helper_atomic_xaddl(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_xaddq(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(x86_64_hregs)); +#endif + } + set_cc_op(s, CC_OP_EFLAGS); + break; + } else +#endif + { gen_lea_modrm(env, s, modrm); gen_op_mov_v_reg(ot, cpu_T[0], reg); gen_op_ld_v(s, ot, cpu_T[1], cpu_A0); @@ -5145,6 +5562,38 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, modrm = cpu_ldub_code(env, s->pc++); reg = ((modrm >> 3) & 7) | rex_r; mod = (modrm >> 6) & 3; + +#ifdef CONFIG_COREMU + if (s->prefix & PREFIX_LOCK) { + if (mod == 3) + goto illegal_op; + + gen_lea_modrm(env, s, modrm); + gen_update_cc_op(s); + + switch(ot & 3) { + case 0: + gen_helper_atomic_cmpxchgb(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 1: + gen_helper_atomic_cmpxchgw(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 2: + gen_helper_atomic_cmpxchgl(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_atomic_cmpxchgq(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(x86_64_hregs)); +#endif + } + set_cc_op(s, CC_OP_EFLAGS); + break; + } +#endif t0 = tcg_temp_local_new(); t1 = tcg_temp_local_new(); t2 = tcg_temp_local_new(); @@ -5201,6 +5650,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) goto illegal_op; gen_lea_modrm(env, s, modrm); +#ifdef CONFIG_COREMU + if (s->prefix | PREFIX_LOCK) { + gen_helper_atomic_cmpxchg16b(cpu_env, cpu_A0); + } else +#endif gen_helper_cmpxchg16b(cpu_env, cpu_A0); } else #endif @@ -5208,6 +5662,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, if (!(s->cpuid_features & CPUID_CX8)) goto illegal_op; gen_lea_modrm(env, s, modrm); +#ifdef CONFIG_COREMU + if (s->prefix | PREFIX_LOCK) { + gen_helper_atomic_cmpxchg8b(cpu_env, cpu_A0); + } else +#endif gen_helper_cmpxchg8b(cpu_env, cpu_A0); } set_cc_op(s, CC_OP_EFLAGS); @@ -5550,15 +6009,41 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_op_mov_reg_v(ot, reg, cpu_T[1]); } else { gen_lea_modrm(env, s, modrm); +#ifdef CONFIG_COREMU + /* for xchg, lock is implicit. + XXX: none flag is affected! */ + switch (ot & 3) { + case 0: + gen_helper_xchgb(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 1: + gen_helper_xchgw(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; + case 2: + gen_helper_xchgl(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(X86_64_HREGS)); + break; +#ifdef TARGET_X86_64 + case 3: + gen_helper_xchgq(cpu_env, cpu_A0, tcg_const_i32(reg), + tcg_const_i32(x86_64_hregs)); +#endif + } +#else gen_op_mov_v_reg(ot, cpu_T[0], reg); /* for xchg, lock is implicit */ if (!(prefixes & PREFIX_LOCK)) gen_helper_lock(); gen_op_ld_v(s, ot, cpu_T[1], cpu_A0); gen_op_st_v(s, ot, cpu_T[0], cpu_A0); +#ifndef CONFIG_COREMU if (!(prefixes & PREFIX_LOCK)) gen_helper_unlock(); +#endif gen_op_mov_reg_v(ot, reg, cpu_T[1]); +#endif } break; case 0xc4: /* les Gv */ @@ -6360,6 +6845,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_stack_update(s, val + (1 << ot)); /* Note that gen_pop_T0 uses a zero-extending load. */ gen_op_jmp_v(cpu_T[0]); + s->gen_ibtc = 1; gen_eob(s); break; case 0xc3: /* ret */ @@ -6367,6 +6853,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, gen_pop_update(s, ot); /* Note that gen_pop_T0 uses a zero-extending load. */ gen_op_jmp_v(cpu_T[0]); + s->gen_ibtc = 1; gen_eob(s); break; case 0xca: /* lret im */ @@ -6392,6 +6879,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, /* add stack offset */ gen_stack_update(s, val + (2 << dflag)); } + s->gen_ibtc = 1; gen_eob(s); break; case 0xcb: /* lret */ @@ -6415,6 +6903,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, tcg_const_i32(s->pc - s->cs_base)); set_cc_op(s, CC_OP_EFLAGS); } + s->gen_ibtc = 1; gen_eob(s); break; case 0xe8: /* call im */ @@ -6680,6 +7169,27 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, } bt_op: tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1); +#ifdef CONFIG_COREMU + if (s->prefix & PREFIX_LOCK) { + gen_update_cc_op(s); + + switch (op) { + case 0: + goto illegal_op; + break; + case 1: + gen_helper_atomic_bts(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(ot)); + break; + case 2: + gen_helper_atomic_btr(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(ot)); + break; + case 3: + gen_helper_atomic_btc(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(ot)); + } + set_cc_op(s, CC_OP_EFLAGS); + break; + } +#endif tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]); switch(op) { case 0: @@ -7818,12 +8328,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s, goto illegal_op; } /* lock generation */ +#ifndef CONFIG_COREMU if (s->prefix & PREFIX_LOCK) gen_helper_unlock(); +#endif return s->pc; illegal_op: +#ifndef CONFIG_COREMU if (s->prefix & PREFIX_LOCK) gen_helper_unlock(); +#endif /* XXX: ensure that no lock was generated */ gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base); return s->pc; @@ -7879,6 +8393,8 @@ void optimize_flags_init(void) } helper_lock_init(); + + copy_tcg_context_global(); } /* generate intermediate code in gen_opc_buf and gen_opparam_buf for @@ -7900,6 +8416,10 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb) cs_base = tb->cs_base; flags = tb->flags; + dc->fallthrough = 0; + dc->gen_ibtc = 0; + dc->gen_cpbl = 0; + dc->env = env; dc->pe = (flags >> HF_PE_SHIFT) & 1; dc->code32 = (flags >> HF_CS32_SHIFT) & 1; dc->ss32 = (flags >> HF_SS32_SHIFT) & 1; @@ -7977,7 +8497,12 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb) max_insns = TCG_MAX_INSNS; } - gen_tb_start(tb); + if (!build_llvm(env)) { + gen_tb_start(tb); + if (tracer_mode != TRANS_MODE_NONE) + tcg_gen_hotpatch(IS_USER(dc), tracer_mode == TRANS_MODE_HYBRIDS || + tracer_mode == TRANS_MODE_HYBRIDM); + } for(;;) { tcg_gen_insn_start(pc_ptr, dc->cc_op); num_insns++; @@ -8027,12 +8552,27 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb) gen_eob(dc); break; } + +#if defined(CONFIG_LLVM) && defined(CONFIG_USER_ONLY) + if (llvm_has_annotation(pc_ptr, ANNOTATION_LOOP)) + break; +#endif + if (build_llvm(env) && num_insns == tb->icount) { + gen_jmp_im(pc_ptr - dc->cs_base); + gen_eob(dc); + break; + } + /* if too long translation, stop generation too */ if (tcg_op_buf_full() || (pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) || num_insns >= max_insns) { gen_jmp_im(pc_ptr - dc->cs_base); + dc->fallthrough = 1; + dc->gen_ibtc = 1; gen_eob(dc); + + tb->jmp_pc[0] = pc_ptr; break; } if (singlestep) { @@ -8041,13 +8581,28 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb) break; } } + if (build_llvm(env) && tb->size != dc->pc - pc_start) { + /* consistency check with tb info. we must make sure + * guest basic blocks are the same. skip this trace if inconsistent */ + fprintf(stderr, "inconsistent block with pc 0x"TARGET_FMT_lx" size=%d" + " icount=%d (error size="TARGET_FMT_ld")\n", + tb->pc, tb->size, tb->icount, dc->pc - pc_start); + exit(0); + } + if (tb->cflags & CF_LAST_IO) gen_io_end(); done_generating: - gen_tb_end(tb, num_insns); + + if (build_llvm(env)) { + /* Terminate the linked list. */ + tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1; + } else { + gen_tb_end(tb, num_insns); + } #ifdef DEBUG_DISAS - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) { int disas_flags; qemu_log("----------------\n"); qemu_log("IN: %s\n", lookup_symbol(pc_start)); @@ -8062,8 +8617,10 @@ done_generating: } #endif - tb->size = pc_ptr - pc_start; - tb->icount = num_insns; + if (!build_llvm(env)) { + tb->size = pc_ptr - pc_start; + tb->icount = num_insns; + } } void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, diff --git a/target-ppc/Makefile.objs b/target-ppc/Makefile.objs index e667e69..363a701 100644 --- a/target-ppc/Makefile.objs +++ b/target-ppc/Makefile.objs @@ -1,5 +1,5 @@ obj-y += cpu-models.o -obj-y += translate.o +obj-y += translate.o helper.o ifeq ($(CONFIG_SOFTMMU),y) obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o obj-$(TARGET_PPC64) += mmu-hash64.o arch_dump.o diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index 9706000..bf1481a 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -88,7 +88,6 @@ /*****************************************************************************/ /* MMU model */ -typedef enum powerpc_mmu_t powerpc_mmu_t; enum powerpc_mmu_t { POWERPC_MMU_UNKNOWN = 0x00000000, /* Standard 32 bits PowerPC MMU */ @@ -133,10 +132,10 @@ enum powerpc_mmu_t { | 0x00000004, #endif /* defined(TARGET_PPC64) */ }; +typedef enum powerpc_mmu_t powerpc_mmu_t; /*****************************************************************************/ /* Exception model */ -typedef enum powerpc_excp_t powerpc_excp_t; enum powerpc_excp_t { POWERPC_EXCP_UNKNOWN = 0, /* Standard PowerPC exception model */ @@ -170,6 +169,7 @@ enum powerpc_excp_t { POWERPC_EXCP_POWER7, #endif /* defined(TARGET_PPC64) */ }; +typedef enum powerpc_excp_t powerpc_excp_t; /*****************************************************************************/ /* Exception vectors definitions */ @@ -298,7 +298,6 @@ enum { /*****************************************************************************/ /* Input pins model */ -typedef enum powerpc_input_t powerpc_input_t; enum powerpc_input_t { PPC_FLAGS_INPUT_UNKNOWN = 0, /* PowerPC 6xx bus */ @@ -316,6 +315,7 @@ enum powerpc_input_t { /* Freescale RCPU bus */ PPC_FLAGS_INPUT_RCPU, }; +typedef enum powerpc_input_t powerpc_input_t; #define PPC_INPUT(env) (env->bus_model) @@ -1168,6 +1168,8 @@ struct CPUPPCState { uint32_t tm_vscr; uint64_t tm_dscr; uint64_t tm_tar; + + CPU_OPTIMIZATION_COMMON }; #define SET_FIT_PERIOD(a_, b_, c_, d_) \ @@ -2226,6 +2228,17 @@ static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc, *flags = env->hflags; } +static inline target_ulong cpu_get_pc(CPUPPCState *env) +{ + return env->nip; +} + +static inline int cpu_check_state(CPUPPCState *env, + target_ulong cs_base, int flags) +{ + return cs_base == 0 && (uint32_t)flags == env->hflags; +} + #if !defined(CONFIG_USER_ONLY) static inline int booke206_tlbm_id(CPUPPCState *env, ppcmas_tlb_t *tlbm) { @@ -2311,7 +2324,7 @@ static inline uint32_t booke206_tlbnps(CPUPPCState *env, const int tlbn) uint32_t tlbncfg = env->spr[SPR_BOOKE_TLB0CFG + tlbn]; uint32_t min = (tlbncfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT; uint32_t max = (tlbncfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT; - int i; + unsigned i; for (i = min; i <= max; i++) { ret |= (1 << (i << 1)); } diff --git a/target-ppc/helper.c b/target-ppc/helper.c new file mode 100644 index 0000000..5ec684b --- /dev/null +++ b/target-ppc/helper.c @@ -0,0 +1,9 @@ +#include "cpu.h" +CPUState *cpu_create(void) +{ + PowerPCCPU *cpu = g_malloc0(sizeof(PowerPCCPU)); + CPUState *cs = CPU(cpu); + memcpy(cpu, POWERPC_CPU(first_cpu), sizeof(PowerPCCPU)); + cs->env_ptr = &cpu->env; + return cs; +} diff --git a/target-ppc/helper.h b/target-ppc/helper.h index 869be15..c96f51b 100644 --- a/target-ppc/helper.h +++ b/target-ppc/helper.h @@ -667,3 +667,5 @@ DEF_HELPER_4(dscli, void, env, fprp, fprp, i32) DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32) DEF_HELPER_1(tbegin, void, env) + +#include "hqemu-helper.h" diff --git a/target-ppc/translate.c b/target-ppc/translate.c index 41a7258..15cedc5 100644 --- a/target-ppc/translate.c +++ b/target-ppc/translate.c @@ -28,7 +28,13 @@ #include "exec/helper-gen.h" #include "trace-tcg.h" +#include "hqemu.h" +#if defined(CONFIG_USER_ONLY) +#define IS_USER(s) 1 +#else +#define IS_USER(s) (s->mem_idx == MMU_USER_IDX) +#endif #define CPU_SINGLE_STEP 0x1 #define CPU_BRANCH_STEP 0x2 @@ -180,6 +186,8 @@ void ppc_translate_init(void) offsetof(CPUPPCState, access_type), "access_type"); done_init = 1; + + copy_tcg_context_global(); } /* internal defines */ @@ -11479,7 +11487,12 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) max_insns = TCG_MAX_INSNS; } - gen_tb_start(tb); + if (!build_llvm(env)) { + gen_tb_start(tb); + if (tracer_mode != TRANS_MODE_NONE) + tcg_gen_hotpatch(IS_USER(ctxp), tracer_mode == TRANS_MODE_HYBRIDS || + tracer_mode == TRANS_MODE_HYBRIDM); + } tcg_clear_temp_count(); /* Set env in case of segfault during code fetch */ while (ctx.exception == POWERPC_EXCP_NONE && !tcg_op_buf_full()) { @@ -11553,6 +11566,9 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) #if defined(DO_PPC_STATISTICS) handler->count++; #endif + if (build_llvm(env) && num_insns == tb->icount) + break; + /* Check trace mode exceptions */ if (unlikely(ctx.singlestep_enabled & CPU_SINGLE_STEP && (ctx.nip <= 0x100 || ctx.nip > 0xF00) && @@ -11576,6 +11592,16 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) exit(1); } } + + if (build_llvm(env) && tb->size != ctx.nip - pc_start) { + /* consistency check with tb info. we must make sure + * guest basic blocks are the same */ + fprintf(stderr, "inconsistant block with pc 0x"TARGET_FMT_lx" size %d" + " icount=%d (error size="TARGET_FMT_ld")\n", + tb->pc, tb->size, tb->icount, ctx.nip - pc_start); + exit(0); + } + if (tb->cflags & CF_LAST_IO) gen_io_end(); if (ctx.exception == POWERPC_EXCP_NONE) { @@ -11587,13 +11613,18 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb) /* Generate the return instruction */ tcg_gen_exit_tb(0); } - gen_tb_end(tb, num_insns); - tb->size = ctx.nip - pc_start; - tb->icount = num_insns; + if (build_llvm(env)) { + /* Terminate the linked list. */ + tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1; + } else { + gen_tb_end(tb, num_insns); + tb->size = ctx.nip - pc_start; + tb->icount = num_insns; + } #if defined(DEBUG_DISAS) - if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) { + if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) { int flags; flags = env->bfd_mach; flags |= ctx.le_mode << 16; diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c index 0ed10a9..05e26af 100644 --- a/tcg/aarch64/tcg-target.c +++ b/tcg/aarch64/tcg-target.c @@ -1264,7 +1264,56 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg, #endif /* CONFIG_SOFTMMU */ } -static tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *ibtc_ret_addr; + +/* + * Emit trace profiling/prediction stubs. The code sequence is as following: + * S1: direct jump (the reloc part requires 4-byte alignment) + * S2: trace profiling stub + * S3: trace prediction stub + * S4: beginning of QEMU emulation code + * + * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2). + * Remember the offset of S3 (patch_next) which is used to turn the + * trace profiling off. Also remember the offset of S4 (patch_skip) + * so that the trace stubs can be skipped quickly while searching pc. + */ +static void tcg_out_hotpatch(TCGContext *s, int is_user, int emit_helper) +{ + tcg_insn_unit *label_ptr[2]; + TranslationBlock *tb = s->tb; + + tb->patch_jmp = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + /* S1: Direct Jump */ + if (is_user == 0 || emit_helper == 0) { + tcg_out_goto(s, s->code_ptr + 1); + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + return; + } + + label_ptr[0] = s->code_ptr; + tcg_out_goto_noaddr(s); + /* S2: Trace Profiling Stub */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile); + reloc_pc26(label_ptr[0], s->code_ptr); + + /* S3: Trace Prediction stub */ + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + tcg_out_ld(s, TCG_TYPE_I32, tcg_target_reg_alloc_order[0], + TCG_AREG0, offsetof(CPUArchState, start_trace_prediction)); + tcg_out_cmp(s, 0, tcg_target_reg_alloc_order[0], 0, 1); + label_ptr[1] = s->code_ptr; + tcg_out_goto_cond_noaddr(s, TCG_COND_EQ); + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict); + reloc_pc19(label_ptr[1], s->code_ptr); +} static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg args[TCG_MAX_OP_ARGS], @@ -1302,6 +1351,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, s->tb_next_offset[a0] = tcg_current_code_size(s); break; + case INDEX_op_hotpatch: + tcg_out_hotpatch(s, args[0], args[1]); + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_goto(s, (tcg_insn_unit *)args[0]); + } else { + tcg_out_insn(s, 3207, BR, args[0]); + } + break; case INDEX_op_br: tcg_out_goto_label(s, arg_label(a0)); break; @@ -1637,6 +1696,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, } static const TCGTargetOpDef aarch64_op_defs[] = { + { INDEX_op_hotpatch, { "i", "i" } }, + { INDEX_op_jmp, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, @@ -1748,6 +1809,10 @@ static const TCGTargetOpDef aarch64_op_defs[] = { { INDEX_op_muluh_i64, { "r", "r", "r" } }, { INDEX_op_mulsh_i64, { "r", "r", "r" } }, +#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} }, +#include "tcg-opc-vector.h" +#undef DEF + { -1 }, }; @@ -1777,12 +1842,24 @@ static void tcg_target_init(TCGContext *s) tcg_add_target_add_op_defs(aarch64_op_defs); } +static void tcg_out_epilogue(TCGContext *s) +{ + /* IBTC exit entry */ + ibtc_ret_addr = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X0, 0); +} + +#if defined(CONFIG_LLVM) +#define STACK_SIZE 0x800 +#else +#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE +#endif /* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */ #define PUSH_SIZE ((30 - 19 + 1) * 8) #define FRAME_SIZE \ ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ + + STACK_SIZE \ + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + TCG_TARGET_STACK_ALIGN - 1) \ & ~(TCG_TARGET_STACK_ALIGN - 1)) @@ -1828,6 +1905,7 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]); + tcg_out_epilogue(s); tb_ret_addr = s->code_ptr; /* Remove TCG locals stack space. */ diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 9187d34..b95f5fb 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -139,7 +139,8 @@ static bool have_bmi2; # define have_bmi2 0 #endif -static tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *ibtc_ret_addr; static void patch_reloc(tcg_insn_unit *code_ptr, int type, intptr_t value, intptr_t addend) @@ -323,6 +324,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type, #define OPC_MOVB_EvGv (0x88) /* stores, more or less */ #define OPC_MOVL_EvGv (0x89) /* stores, more or less */ #define OPC_MOVL_GvEv (0x8b) /* loads, more or less */ +#define OPC_NOP (0x90) #define OPC_MOVB_EvIz (0xc6) #define OPC_MOVL_EvIz (0xc7) #define OPC_MOVL_Iv (0xb8) @@ -1150,6 +1152,62 @@ static void * const qemu_st_helpers[16] = { [MO_BEQ] = helper_be_stq_mmu, }; +/* helpers for LLVM */ +void * const llvm_ld_helpers[16] = { + [MO_UB] = llvm_ret_ldub_mmu, + [MO_LEUW] = llvm_le_lduw_mmu, + [MO_LEUL] = llvm_le_ldul_mmu, + [MO_LEQ] = llvm_le_ldq_mmu, + [MO_BEUW] = llvm_be_lduw_mmu, + [MO_BEUL] = llvm_be_ldul_mmu, + [MO_BEQ] = llvm_be_ldq_mmu, +}; + +void * const llvm_st_helpers[16] = { + [MO_UB] = llvm_ret_stb_mmu, + [MO_LEUW] = llvm_le_stw_mmu, + [MO_LEUL] = llvm_le_stl_mmu, + [MO_LEQ] = llvm_le_stq_mmu, + [MO_BEUW] = llvm_be_stw_mmu, + [MO_BEUL] = llvm_be_stl_mmu, + [MO_BEQ] = llvm_be_stq_mmu, +}; + +static inline void tcg_out_compute_gva(TCGContext *s, TCGReg addrlo, + TCGMemOp opc, int trexw, int tv_hrexw) +{ + const TCGReg r1 = TCG_REG_L1; + int s_mask = (1 << (opc & MO_SIZE)) - 1; + +#if defined(ALIGNED_ONLY) + TCGType ttype = TCG_TYPE_I32; + bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; + if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) + ttype = TCG_TYPE_I64; + if (aligned) { + tcg_out_mov(s, ttype, r1, addrlo); + } else { + /* For unaligned access check that we don't cross pages using + the page address of the last byte. */ + tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); + } + tgen_arithi(s, ARITH_AND + trexw, r1, + TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); +#elif defined(ENABLE_TLBVERSION) + /* the following code is as equivalent to + * (((addr + (size - 1)) & TARGET_PAGE_MASK) | env->tlb_version) */ + tcg_out_modrm_sib_offset(s, OPC_LEA + trexw, r1, addrlo, -1, 0, s_mask); + tgen_arithi(s, ARITH_AND + trexw, r1, TARGET_PAGE_MASK, 0); + tcg_out_modrm_offset(s, (OPC_ARITH_GvEv | (ARITH_OR << 3)) + trexw + tv_hrexw, + r1, TCG_AREG0, offsetof(CPUArchState, tlb_version)); +#else + /* the following code is as equivalent to + * ((addr + (size - 1)) & TARGET_PAGE_MASK) */ + tcg_out_modrm_sib_offset(s, OPC_LEA + trexw, r1, addrlo, -1, 0, s_mask); + tgen_arithi(s, ARITH_AND + trexw, r1, TARGET_PAGE_MASK, 0); +#endif +} + /* Perform the TLB load and compare. Inputs: @@ -1179,9 +1237,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, const TCGReg r1 = TCG_REG_L1; TCGType ttype = TCG_TYPE_I32; TCGType tlbtype = TCG_TYPE_I32; - int trexw = 0, hrexw = 0, tlbrexw = 0; - int s_mask = (1 << (opc & MO_SIZE)) - 1; - bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0; + int trexw = 0, hrexw = 0, tlbrexw = 0, tv_hrexw = 0; if (TCG_TARGET_REG_BITS == 64) { if (TARGET_LONG_BITS == 64) { @@ -1197,20 +1253,18 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, } } +#if defined(ENABLE_TLBVERSION_EXT) + trexw = 0; + tv_hrexw = P_REXW; +#endif + tcg_out_mov(s, tlbtype, r0, addrlo); - if (aligned) { - tcg_out_mov(s, ttype, r1, addrlo); - } else { - /* For unaligned access check that we don't cross pages using - the page address of the last byte. */ - tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask); - } tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS); - tgen_arithi(s, ARITH_AND + trexw, r1, - TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0); + tcg_out_compute_gva(s, addrlo, opc, trexw, tv_hrexw); + tgen_arithi(s, ARITH_AND + tlbrexw, r0, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0); @@ -1219,7 +1273,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, + which); /* cmp 0(r0), r1 */ - tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0); + tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw + tv_hrexw, r1, r0, 0); /* Prepare for both the fast path add of the tlb addend, and the slow path function argument setup. There are two cases worth note: @@ -1754,6 +1808,73 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) #endif } +/* + * Emit trace profiling/prediction stubs. The code sequence is as following: + * S1: direct jump (the reloc part requires 4-byte alignment) + * S2: trace profiling stub + * S3: trace prediction stub + * S4: beginning of QEMU emulation code + * + * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2). + * Remember the offset of S3 (patch_next) which is used to turn the + * trace profiling off. Also remember the offset of S4 (patch_skip) + * so that the trace stubs can be skipped quickly while searching pc. + */ +static void tcg_out_hotpatch(TCGContext *s, uint32_t is_user, uint32_t emit_helper) +{ + uint8_t *label_ptr[2]; + TranslationBlock *tb = s->tb; + + /* S1: direct jump */ + while (((uintptr_t)s->code_ptr + 1) % 4) + tcg_out8(s, OPC_NOP); + + tb->patch_jmp = (uint16_t)(s->code_ptr - s->code_buf); + + tcg_out8(s, OPC_JMP_long); + label_ptr[0] = s->code_ptr; + s->code_ptr += 4; + + if (is_user == 0 || emit_helper == 0) { + *(uint32_t *)label_ptr[0] = s->code_ptr - label_ptr[0] - 4; + tb->patch_next = (uint16_t)(s->code_ptr - s->code_buf); + return; + } + + /* S2: trace profiling stub */ + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, 0); + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, 4, tb->id); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + } + tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile); + *(uint32_t *)label_ptr[0] = s->code_ptr - label_ptr[0] - 4; + + /* S3: trace prediction stub */ + tb->patch_next = (uint16_t)(s->code_ptr - s->code_buf); + + tcg_out_ld(s, TCG_TYPE_I32, tcg_target_reg_alloc_order[0], + TCG_AREG0, offsetof(CPUArchState, start_trace_prediction)); + tcg_out_cmp(s, tcg_target_reg_alloc_order[0], 0, 1, 0); + tcg_out_opc(s, OPC_JCC_long + JCC_JE, 0, 0, 0); + label_ptr[1] = s->code_ptr; + s->code_ptr += 4; + + if (TCG_TARGET_REG_BITS == 32) { + tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, 0); + tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, 4, tb->id); + } else { + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + } + tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict); + *(uint32_t *)label_ptr[1] = s->code_ptr - label_ptr[1] - 4; + + /* S4: QEMU emulation code */ +} + static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -1777,6 +1898,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_goto_tb: if (s->tb_jmp_offset) { /* direct jump method */ +#if defined(CONFIG_USER_ONLY) + while (((uintptr_t)s->code_ptr + 1) % 4) /* need 4-byte aligned */ + tcg_out8(s, OPC_NOP); +#endif tcg_out8(s, OPC_JMP_long); /* jmp im */ s->tb_jmp_offset[args[0]] = tcg_current_code_size(s); tcg_out32(s, 0); @@ -1787,6 +1912,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_hotpatch: + tcg_out_hotpatch(s, args[0], args[1]); + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_jmp(s, (tcg_insn_unit *)args[0]); + } else { + /* jmp *reg */ + tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]); + } + break; case INDEX_op_br: tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0); break; @@ -2110,6 +2246,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, } static const TCGTargetOpDef x86_op_defs[] = { + { INDEX_op_hotpatch, { "i", "i" } }, + { INDEX_op_jmp, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, @@ -2238,6 +2376,11 @@ static const TCGTargetOpDef x86_op_defs[] = { { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, #endif + +#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} }, +#include "tcg-opc-vector.h" +#undef DEF + { -1 }, }; @@ -2261,16 +2404,29 @@ static int tcg_target_callee_save_regs[] = { #endif }; +static void tcg_out_epilogue(TCGContext *s) +{ + /* IBTC exit entry */ + ibtc_ret_addr = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, 0); +} + /* Compute frame size via macros, to share between tcg_target_qemu_prologue and tcg_register_jit. */ +#if defined(CONFIG_LLVM) +#define STACK_SIZE 0x2000 +#else +#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE +#endif + #define PUSH_SIZE \ ((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \ * (TCG_TARGET_REG_BITS / 8)) #define FRAME_SIZE \ ((PUSH_SIZE \ - + TCG_STATIC_CALL_ARGS_SIZE \ + + STACK_SIZE \ + CPU_TEMP_BUF_NLONGS * sizeof(long) \ + TCG_TARGET_STACK_ALIGN - 1) \ & ~(TCG_TARGET_STACK_ALIGN - 1)) @@ -2279,10 +2435,12 @@ static int tcg_target_callee_save_regs[] = { static void tcg_target_qemu_prologue(TCGContext *s) { int i, stack_addend; + tcg_target_long stack_align_mask; /* TB prologue */ /* Reserve some stack space, also for TCG temps. */ + stack_align_mask = ~(TCG_TARGET_STACK_ALIGN - 1); stack_addend = FRAME_SIZE - PUSH_SIZE; tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE, CPU_TEMP_BUF_NLONGS * sizeof(long)); @@ -2296,6 +2454,9 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, (ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4); tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0, + offsetof(CPUArchState, sp)); + tgen_arithi(s, ARITH_AND, TCG_REG_ESP, stack_align_mask, 0); /* jmp *tb. */ tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP, (ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4 @@ -2303,13 +2464,19 @@ static void tcg_target_qemu_prologue(TCGContext *s) #else tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]); tcg_out_addi(s, TCG_REG_ESP, -stack_addend); + tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0, + offsetof(CPUArchState, sp)); + tgen_arithi(s, ARITH_AND + P_REXW, TCG_REG_ESP, stack_align_mask, 0); /* jmp *tb. */ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]); #endif /* TB epilogue */ + tcg_out_epilogue(s); tb_ret_addr = s->code_ptr; + tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0, + offsetof(CPUArchState, sp)); tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend); for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) { diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h index 92be341..c5715dc 100644 --- a/tcg/i386/tcg-target.h +++ b/tcg/i386/tcg-target.h @@ -67,7 +67,7 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_ESP -#define TCG_TARGET_STACK_ALIGN 16 +#define TCG_TARGET_STACK_ALIGN 32 #if defined(_WIN64) #define TCG_TARGET_CALL_STACK_OFFSET 32 #else diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c index 2c72565..ca5c7a4 100644 --- a/tcg/ppc/tcg-target.c +++ b/tcg/ppc/tcg-target.c @@ -78,7 +78,8 @@ #define TCG_CT_CONST_ZERO 0x1000 #define TCG_CT_CONST_MONE 0x2000 -static tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *tb_ret_addr; +tcg_insn_unit *ibtc_ret_addr; #include "elf.h" static bool have_isa_2_06; @@ -1785,8 +1786,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long)) #define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR) +#if defined(CONFIG_LLVM) +#define STACK_SIZE 0x800 +#else +#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE +#endif + #define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \ - + TCG_STATIC_CALL_ARGS_SIZE \ + + STACK_SIZE \ + CPU_TEMP_BUF_SIZE \ + REG_SAVE_SIZE \ + TCG_TARGET_STACK_ALIGN - 1) \ @@ -1794,6 +1801,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64) #define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE) +static unsigned num_epilogue_insns = 1; +static void tcg_out_epilogue(TCGContext *s) +{ + /* IBTC exit entry */ + ibtc_ret_addr = s->code_ptr; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0); +} + static void tcg_target_qemu_prologue(TCGContext *s) { int i; @@ -1832,27 +1847,29 @@ static void tcg_target_qemu_prologue(TCGContext *s) if (USE_REG_RA) { #ifdef _CALL_AIX /* Make the caller load the value as the TOC into R2. */ - tb_ret_addr = s->code_ptr + 2; + tb_ret_addr = s->code_ptr + 2 + num_epilogue_insns; desc[1] = tb_ret_addr; tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2); tcg_out32(s, BCCTR | BO_ALWAYS); #elif defined(_CALL_ELF) && _CALL_ELF == 2 /* Compute from the incoming R12 value. */ - tb_ret_addr = s->code_ptr + 2; + tb_ret_addr = s->code_ptr + 2 + num_epilogue_insns; tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12, tcg_ptr_byte_diff(tb_ret_addr, s->code_buf))); tcg_out32(s, BCCTR | BO_ALWAYS); #else /* Reserve max 5 insns for the constant load. */ - tb_ret_addr = s->code_ptr + 6; + tb_ret_addr = s->code_ptr + 6 + num_epilogue_insns; tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr); tcg_out32(s, BCCTR | BO_ALWAYS); while (s->code_ptr < tb_ret_addr) { tcg_out32(s, NOP); } #endif + tcg_out_epilogue(s); } else { tcg_out32(s, BCCTR | BO_ALWAYS); + tcg_out_epilogue(s); tb_ret_addr = s->code_ptr; } @@ -1869,6 +1886,85 @@ static void tcg_target_qemu_prologue(TCGContext *s) tcg_out32(s, BCLR | BO_ALWAYS); } +static void tcg_out_jmp_short(uintptr_t jmp_addr, uintptr_t addr) +{ + tcg_insn_unit i1, i2; + uint64_t pair; + intptr_t diff = addr - jmp_addr; + + if (!in_range_b(diff)) + tcg_abort(); + + i1 = B | (diff & 0x3fffffc); + i2 = NOP; +#ifdef HOST_WORDS_BIGENDIAN + pair = (uint64_t)i1 << 32 | i2; +#else + pair = (uint64_t)i2 << 32 | i1; +#endif + *(uint64_t *)jmp_addr = pair; +} + +/* + * Emit trace profiling/prediction stubs. The code sequence is as following: + * S1: direct jump (the reloc part requires 4-byte alignment) + * S2: trace profiling stub + * S3: trace prediction stub + * S4: beginning of QEMU emulation code + * + * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2). + * Remember the offset of S3 (patch_next) which is used to turn the + * trace profiling off. Also remember the offset of S4 (patch_skip) + * so that the trace stubs can be skipped quickly while searching pc. + */ +static void tcg_out_hotpatch(TCGContext *s, int is_user, int emit_helper) +{ + tcg_insn_unit *label_ptr[2]; + TranslationBlock *tb = s->tb; + + /* S1: direct jump. Ensure the next insns are 8-byte aligned. */ + if ((uintptr_t)s->code_ptr & 7) + tcg_out32(s, NOP); + + tb->patch_jmp = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + /* S1: Direct Jump */ + if (is_user == 0 || emit_helper == 0) { + tcg_out_jmp_short((uintptr_t)s->code_ptr, (uintptr_t)(s->code_ptr + 4)); + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + return; + } + + label_ptr[0] = s->code_ptr; + s->code_ptr += 2; + tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + + /* S2: Trace Profiling Stub */ + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile); + tcg_out_jmp_short((uintptr_t)label_ptr[0], (uintptr_t)s->code_ptr); + + /* S3: Trace Prediction stub */ + tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf); + + tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_AREG0, + offsetof(CPUArchState, start_trace_prediction)); + tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_TMP1, 0, 1, 7, TCG_TYPE_I32); + label_ptr[1] = s->code_ptr; + tcg_out_bc_noaddr(s, tcg_to_bc[TCG_COND_EQ]); + tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id); + tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict); + reloc_pc14(label_ptr[1], s->code_ptr); + + /* S4: QEMU emulation code */ +} + static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, const int *const_args) { @@ -1906,6 +2002,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, tcg_out32(s, BCCTR | BO_ALWAYS); s->tb_next_offset[args[0]] = tcg_current_code_size(s); break; + case INDEX_op_hotpatch: + tcg_out_hotpatch(s, args[0], args[1]); + break; + case INDEX_op_jmp: + if (const_args[0]) { + tcg_out_b(s, 0, (tcg_insn_unit *)args[0]); + } else { + tcg_out32(s, MTSPR | RS(args[0]) | CTR); + tcg_out32(s, BCCTR | BO_ALWAYS); + } + break; case INDEX_op_br: { TCGLabel *l = arg_label(args[0]); @@ -2436,6 +2543,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args, } static const TCGTargetOpDef ppc_op_defs[] = { + { INDEX_op_hotpatch, { "i", "i" } }, + { INDEX_op_jmp, { "ri" } }, { INDEX_op_exit_tb, { } }, { INDEX_op_goto_tb, { } }, { INDEX_op_br, { } }, @@ -2572,6 +2681,10 @@ static const TCGTargetOpDef ppc_op_defs[] = { { INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } }, #endif +#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} }, +#include "tcg-opc-vector.h" +#undef DEF + { -1 }, }; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 0b9dd8f..3773253 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -39,7 +39,7 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64); Up to and including filling in the forward link immediately. We'll do proper termination of the end of the list after we finish translation. */ -static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args) +void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args) { int oi = ctx->gen_next_op_idx; int ni = oi + 1; diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h index 4e20dc1..17d31df 100644 --- a/tcg/tcg-op.h +++ b/tcg/tcg-op.h @@ -28,6 +28,7 @@ /* Basic output routines. Not for general consumption. */ +void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args); void tcg_gen_op1(TCGContext *, TCGOpcode, TCGArg); void tcg_gen_op2(TCGContext *, TCGOpcode, TCGArg, TCGArg); void tcg_gen_op3(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg); @@ -311,6 +312,16 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg); void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg); +static inline void tcg_gen_hotpatch(uint32_t arg1, uint32_t arg2) +{ + tcg_gen_op2(&tcg_ctx, INDEX_op_hotpatch, arg1, arg2); +} + +static inline void tcg_gen_annotate(uint32_t arg) +{ + tcg_gen_op1(&tcg_ctx, INDEX_op_annotate, arg); +} + static inline void tcg_gen_discard_i32(TCGv_i32 arg) { tcg_gen_op1_i32(INDEX_op_discard, arg); diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h index 6d0410c..5ba1e05 100644 --- a/tcg/tcg-opc.h +++ b/tcg/tcg-opc.h @@ -26,12 +26,16 @@ * DEF(name, oargs, iargs, cargs, flags) */ +DEF(hotpatch, 0, 0, 2, 0) +DEF(annotate, 0, 0, 1, TCG_OPF_NOT_PRESENT) + /* predefined ops */ DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT) DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT) /* variable number of parameters */ DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT) +DEF(jmp, 0, 1, 0, TCG_OPF_BB_END) DEF(br, 0, 0, 1, TCG_OPF_BB_END) @@ -191,6 +195,8 @@ DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1, DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1, TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT) +#include "tcg-opc-vector.h" + #undef TLADDR_ARGS #undef DATA64_ARGS #undef IMPL @@ -304,19 +304,22 @@ void tcg_pool_reset(TCGContext *s) s->pool_current = NULL; } -typedef struct TCGHelperInfo { - void *func; - const char *name; - unsigned flags; - unsigned sizemask; -} TCGHelperInfo; - #include "exec/helper-proto.h" -static const TCGHelperInfo all_helpers[] = { +const TCGHelperInfo all_helpers[] = { #include "exec/helper-tcg.h" }; +int tcg_num_helpers(void) +{ + return ARRAY_SIZE(all_helpers); +} + +const TCGHelperInfo *get_tcg_helpers(void) +{ + return all_helpers; +} + void tcg_context_init(TCGContext *s) { int op, total_args, n, i; @@ -413,7 +416,7 @@ void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size) s->frame_reg = reg; } -void tcg_func_start(TCGContext *s) +void tcg_func_start(TCGContext *s, TranslationBlock *tb) { tcg_pool_reset(s); s->nb_temps = s->nb_globals; @@ -432,8 +435,10 @@ void tcg_func_start(TCGContext *s) s->gen_last_op_idx = -1; s->gen_next_op_idx = 0; s->gen_next_parm_idx = 0; + s->vec_opparam_ptr = s->vec_opparam_buf; s->be = tcg_malloc(sizeof(TCGBackendData)); + s->tb = tb; } static inline void tcg_temp_alloc(TCGContext *s, int n) @@ -1004,6 +1009,7 @@ void tcg_dump_ops(TCGContext *s) char buf[128]; TCGOp *op; int oi; + const TCGArg *vec_args = s->vec_opparam_buf; for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) { int i, k, nb_oargs, nb_iargs, nb_cargs; @@ -1051,8 +1057,29 @@ void tcg_dump_ops(TCGContext *s) qemu_log(",%s", t); } } else { + int is_vec = 0; qemu_log(" %s ", def->name); + /* print vector opc */ + switch (c) { + case INDEX_op_vector_start ... INDEX_op_vector_end: + is_vec = 1; + break; + default: + break; + } + if (is_vec) { + qemu_log("$0x%" TCG_PRIlx, vec_args[0]); + if (c == INDEX_op_vload_128 || c == INDEX_op_vstore_128) + qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), vec_args[1])); + else + qemu_log(",$0x%" TCG_PRIlx, vec_args[1]); + qemu_log(",$0x%" TCG_PRIlx, vec_args[2]); + qemu_log("\n"); + vec_args += 3; + continue; + } + nb_oargs = def->nb_oargs; nb_iargs = def->nb_iargs; nb_cargs = def->nb_cargs; @@ -1138,6 +1165,172 @@ void tcg_dump_ops(TCGContext *s) } } +void tcg_dump_ops_fn(TCGContext *s, void (*fn)(const char *)) +{ + char buf[128]; + char outbuf[128]; + TCGOp *op; + int oi; + const TCGArg *vec_args = s->vec_opparam_buf; + +#define printops(args...) \ + do { snprintf(outbuf, 128, ##args); (*fn)(outbuf); } while(0) + + for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) { + int i, k, nb_oargs, nb_iargs, nb_cargs; + const TCGOpDef *def; + const TCGArg *args; + TCGOpcode c; + + op = &s->gen_op_buf[oi]; + c = op->opc; + def = &tcg_op_defs[c]; + args = &s->gen_opparam_buf[op->args]; + + if (c == INDEX_op_insn_start) { + printops("%s ----", oi != s->gen_first_op_idx ? "\n" : ""); + + for (i = 0; i < TARGET_INSN_START_WORDS; ++i) { + target_ulong a; +#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS + a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2]; +#else + a = args[i]; +#endif + printops(" " TARGET_FMT_lx, a); + } + } else if (c == INDEX_op_call) { + /* variable number of arguments */ + nb_oargs = op->callo; + nb_iargs = op->calli; + nb_cargs = def->nb_cargs; + + /* function name, flags, out args */ + printops(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name, + tcg_find_helper(s, args[nb_oargs + nb_iargs]), + args[nb_oargs + nb_iargs + 1], nb_oargs); + for (i = 0; i < nb_oargs; i++) { + printops(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[i])); + } + for (i = 0; i < nb_iargs; i++) { + TCGArg arg = args[nb_oargs + i]; + const char *t = "<dummy>"; + if (arg != TCG_CALL_DUMMY_ARG) { + t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg); + } + printops(",%s", t); + } + } else { + int is_vec = 0; + printops(" %s ", def->name); + + /* print vector opc */ + switch (c) { + case INDEX_op_vector_start ... INDEX_op_vector_end: + is_vec = 1; + break; + default: + break; + } + if (is_vec) { + printops("$0x%" TCG_PRIlx, vec_args[0]); + if (c == INDEX_op_vload_128 || c == INDEX_op_vstore_128) + printops(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), vec_args[1])); + else + printops(",$0x%" TCG_PRIlx, vec_args[1]); + printops(",$0x%" TCG_PRIlx, vec_args[2]); + printops("\n"); + vec_args += 3; + continue; + } + + nb_oargs = def->nb_oargs; + nb_iargs = def->nb_iargs; + nb_cargs = def->nb_cargs; + + k = 0; + for (i = 0; i < nb_oargs; i++) { + if (k != 0) { + printops(","); + } + printops("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[k++])); + } + for (i = 0; i < nb_iargs; i++) { + if (k != 0) { + printops(","); + } + printops("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), + args[k++])); + } + switch (c) { + case INDEX_op_brcond_i32: + case INDEX_op_setcond_i32: + case INDEX_op_movcond_i32: + case INDEX_op_brcond2_i32: + case INDEX_op_setcond2_i32: + case INDEX_op_brcond_i64: + case INDEX_op_setcond_i64: + case INDEX_op_movcond_i64: + if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) { + printops(",%s", cond_name[args[k++]]); + } else { + printops(",$0x%" TCG_PRIlx, args[k++]); + } + i = 1; + break; + case INDEX_op_qemu_ld_i32: + case INDEX_op_qemu_st_i32: + case INDEX_op_qemu_ld_i64: + case INDEX_op_qemu_st_i64: + { + TCGMemOpIdx oi = args[k++]; + TCGMemOp op = get_memop(oi); + unsigned ix = get_mmuidx(oi); + + if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) { + printops(",$0x%x,%u", op, ix); + } else { + const char *s_al = "", *s_op; + if (op & MO_AMASK) { + if ((op & MO_AMASK) == MO_ALIGN) { + s_al = "al+"; + } else { + s_al = "un+"; + } + } + s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)]; + printops(",%s%s,%u", s_al, s_op, ix); + } + i = 1; + } + break; + default: + i = 0; + break; + } + switch (c) { + case INDEX_op_set_label: + case INDEX_op_br: + case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: + case INDEX_op_brcond2_i32: + printops("%s$L%d", k ? "," : "", arg_label(args[k])->id); + i++, k++; + break; + default: + break; + } + for (; i < nb_cargs; i++, k++) { + printops("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]); + } + } + printops("\n"); + } +#undef printops +} + /* we give more priority to constraints with less registers */ static int get_constraint_priority(const TCGOpDef *def, int k) { @@ -1334,10 +1527,11 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps, /* Liveness analysis : update the opc_dead_args array to tell if a given input arguments is dead. Instructions updating dead temporaries are removed. */ -static void tcg_liveness_analysis(TCGContext *s) +void tcg_liveness_analysis(TCGContext *s) { uint8_t *dead_temps, *mem_temps; int oi, oi_prev, nb_ops; + TCGArg *vec_args = s->vec_opparam_ptr; nb_ops = s->gen_next_op_idx; s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t)); @@ -1427,6 +1621,7 @@ static void tcg_liveness_analysis(TCGContext *s) } } break; + case INDEX_op_annotate: case INDEX_op_insn_start: break; case INDEX_op_discard: @@ -1434,7 +1629,11 @@ static void tcg_liveness_analysis(TCGContext *s) dead_temps[args[0]] = 1; mem_temps[args[0]] = 0; break; - + case INDEX_op_vector_start ... INDEX_op_vector_end: + vec_args -= 3; + if (opc == INDEX_op_vload_128 || opc == INDEX_op_vstore_128) + dead_temps[vec_args[1]] = 0; + break; case INDEX_op_add2_i32: opc_new = INDEX_op_add_i32; goto do_addsub2; @@ -1577,7 +1776,7 @@ static void tcg_liveness_analysis(TCGContext *s) } #else /* dummy liveness analysis */ -static void tcg_liveness_analysis(TCGContext *s) +void tcg_liveness_analysis(TCGContext *s) { int nb_ops; nb_ops = s->gen_opc_ptr - s->gen_opc_buf; @@ -2418,6 +2617,8 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf) s->gen_insn_data[num_insns][i] = a; } break; + case INDEX_op_annotate: + break; case INDEX_op_discard: temp_dead(s, args[0]); break; @@ -2554,15 +2755,15 @@ struct jit_descriptor { struct jit_code_entry *first_entry; }; -void __jit_debug_register_code(void) __attribute__((noinline)); -void __jit_debug_register_code(void) +void qemu_jit_debug_register_code(void) __attribute__((noinline)); +void qemu_jit_debug_register_code(void) { asm(""); } /* Must statically initialize the version, because GDB may check the version before we can set it. */ -struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; +struct jit_descriptor qemu_jit_debug_descriptor = { 1, 0, 0, 0 }; /* End GDB interface. */ @@ -2771,10 +2972,10 @@ static void tcg_register_jit_int(void *buf_ptr, size_t buf_size, one_entry.symfile_addr = img; one_entry.symfile_size = img_size; - __jit_debug_descriptor.action_flag = JIT_REGISTER_FN; - __jit_debug_descriptor.relevant_entry = &one_entry; - __jit_debug_descriptor.first_entry = &one_entry; - __jit_debug_register_code(); + qemu_jit_debug_descriptor.action_flag = JIT_REGISTER_FN; + qemu_jit_debug_descriptor.relevant_entry = &one_entry; + qemu_jit_debug_descriptor.first_entry = &one_entry; + qemu_jit_debug_register_code(); } #else /* No support for the feature. Provide the entry point expected by exec.c, @@ -2790,3 +2991,34 @@ void tcg_register_jit(void *buf, size_t buf_size) { } #endif /* ELF_HOST_MACHINE */ + + +/* + * copy_tcg_context_global() + * Copy thread's local TCG context to the global TCG context. + * + * We first initialize main thread's tcg_ctx and copy it to tcg_ctx_global + * at this point. The tcg_ctx_global is copied to each thread's local + * tcg_ctx later using copy_tcg_context(). + * + * Note: This copy must be done after tcg_ctx is completely initialized + * and should be setup by the main thread. + */ +void copy_tcg_context_global(void) +{ + static int init_once = 0; + if (init_once == 1) + return; + + memcpy(&tcg_ctx_global, &tcg_ctx, sizeof(TCGContext)); + init_once = 1; +} + +/* + * copy_tcg_context() + * Copy the global TCG context to the thread's local TCG context. + */ +void copy_tcg_context(void) +{ + memcpy(&tcg_ctx, &tcg_ctx_global, sizeof(TCGContext)); +} @@ -193,6 +193,7 @@ typedef struct TCGPool { #define TCG_POOL_CHUNK_SIZE 32768 +#define TCG_MAX_LABELS 512 #define TCG_MAX_TEMPS 512 #define TCG_MAX_INSNS 512 @@ -564,7 +565,7 @@ struct TCGContext { /* Threshold to flush the translated code buffer. */ void *code_gen_highwater; - TBContext tb_ctx; + TBContext *tb_ctx; /* The TCGBackendData structure is private to tcg-target.c. */ struct TCGBackendData *be; @@ -578,12 +579,33 @@ struct TCGContext { TCGOp gen_op_buf[OPC_BUF_SIZE]; TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE]; + TCGArg vec_opparam_buf[OPPARAM_BUF_SIZE]; + TCGArg *vec_opparam_ptr; uint16_t gen_insn_end_off[TCG_MAX_INSNS]; target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS]; + + TranslationBlock *tb; }; -extern TCGContext tcg_ctx; +extern TCGContext tcg_ctx_global; +extern __thread TCGContext tcg_ctx; + +typedef struct TCGHelperInfo { + void *func; + const char *name; + unsigned flags; + unsigned sizemask; +} TCGHelperInfo; + +void copy_tcg_context_global(void); +void copy_tcg_context(void); +int tcg_num_helpers(void); +const TCGHelperInfo *get_tcg_helpers(void); +void tcg_liveness_analysis(TCGContext *s); +void tcg_dump_ops_fn(TCGContext *s, void (*fn)(const char *)); +target_long decode_sleb128(uint8_t **pp); + /* The number of opcodes emitted so far. */ static inline int tcg_op_buf_count(void) @@ -624,7 +646,7 @@ static inline void *tcg_malloc(int size) void tcg_context_init(TCGContext *s); void tcg_prologue_init(TCGContext *s); -void tcg_func_start(TCGContext *s); +void tcg_func_start(TCGContext *s, TranslationBlock *tb); int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf); @@ -822,7 +844,7 @@ static inline TCGLabel *arg_label(TCGArg i) static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b) { - return a - b; + return (ptrdiff_t)a - (ptrdiff_t)b; } /** @@ -876,7 +898,7 @@ static inline TCGMemOpIdx make_memop_idx(TCGMemOp op, unsigned idx) */ static inline TCGMemOp get_memop(TCGMemOpIdx oi) { - return oi >> 4; + return (TCGMemOp)(oi >> 4); } /** @@ -939,6 +961,7 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi) #define TB_EXIT_IDX1 1 #define TB_EXIT_ICOUNT_EXPIRED 2 #define TB_EXIT_REQUESTED 3 +#define TB_EXIT_LLVM TB_EXIT_ICOUNT_EXPIRED #ifdef HAVE_TCG_QEMU_TB_EXEC uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr); @@ -1011,6 +1034,31 @@ uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr, uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi, uintptr_t retaddr); + +/* Value zero-extended to tcg register size. */ +tcg_target_ulong llvm_ret_ldub_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_lduw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_ldul_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +uint64_t llvm_le_ldq_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_lduw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_ldul_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +uint64_t llvm_be_ldq_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); + +/* Value sign-extended to tcg register size. */ +tcg_target_ulong llvm_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_ldsw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_le_ldsl_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_ldsw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); +tcg_target_ulong llvm_be_ldsl_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi); + +void llvm_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, TCGMemOpIdx oi); +void llvm_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, TCGMemOpIdx oi); +void llvm_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, TCGMemOpIdx oi); +void llvm_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, TCGMemOpIdx oi); +void llvm_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, TCGMemOpIdx oi); +void llvm_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, TCGMemOpIdx oi); +void llvm_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, TCGMemOpIdx oi); + /* Temporary aliases until backends are converted. */ #ifdef TARGET_WORDS_BIGENDIAN # define helper_ret_ldsw_mmu helper_be_ldsw_mmu diff --git a/translate-all.c b/translate-all.c index 042a857..bf05326 100644 --- a/translate-all.c +++ b/translate-all.c @@ -63,6 +63,10 @@ #include "qemu/bitmap.h" #include "qemu/timer.h" +#include "hqemu.h" + +size_t get_cpu_size(void) { return sizeof(CPUArchState); } + //#define DEBUG_TB_INVALIDATE //#define DEBUG_FLUSH /* make various TB consistency checks */ @@ -124,7 +128,8 @@ intptr_t qemu_host_page_mask; static void *l1_map[V_L1_SIZE]; /* code generation context */ -TCGContext tcg_ctx; +TCGContext tcg_ctx_global; +__thread TCGContext tcg_ctx; /* translation block context */ #ifdef CONFIG_USER_ONLY @@ -135,7 +140,7 @@ void tb_lock(void) { #ifdef CONFIG_USER_ONLY assert(!have_tb_lock); - qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_lock(&tcg_ctx.tb_ctx->tb_lock); have_tb_lock++; #endif } @@ -145,7 +150,7 @@ void tb_unlock(void) #ifdef CONFIG_USER_ONLY assert(have_tb_lock); have_tb_lock--; - qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_unlock(&tcg_ctx.tb_ctx->tb_lock); #endif } @@ -153,7 +158,7 @@ void tb_lock_reset(void) { #ifdef CONFIG_USER_ONLY if (have_tb_lock) { - qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_unlock(&tcg_ctx.tb_ctx->tb_lock); have_tb_lock = 0; } #endif @@ -161,11 +166,12 @@ void tb_lock_reset(void) static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2); -static TranslationBlock *tb_find_pc(uintptr_t tc_ptr); +static TranslationBlock *tb_find_pc(CPUState *cpu, uintptr_t tc_ptr); void cpu_gen_init(void) { tcg_context_init(&tcg_ctx); + tcg_ctx.tb_ctx = g_malloc0(sizeof(TBContext)); } /* Encode VAL as a signed leb128 sequence at P. @@ -190,7 +196,7 @@ static uint8_t *encode_sleb128(uint8_t *p, target_long val) /* Decode a signed leb128 sequence at *PP; increment *PP past the decoded value. Return the decoded value. */ -static target_long decode_sleb128(uint8_t **pp) +target_long decode_sleb128(uint8_t **pp) { uint8_t *p = *pp; target_long val = 0; @@ -268,6 +274,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, int64_t ti = profile_getclock(); #endif +#if defined(CONFIG_LLVM) + if (llvm_locate_trace(searched_pc)) + return llvm_restore_state(cpu, tb, searched_pc); +#endif + if (searched_pc < host_pc) { return -1; } @@ -297,8 +308,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, restore_state_to_opc(env, tb, data); #ifdef CONFIG_PROFILER - tcg_ctx.restore_time += profile_getclock() - ti; - tcg_ctx.restore_count++; + tcg_ctx_global.restore_time += profile_getclock() - ti; + tcg_ctx_global.restore_count++; #endif return 0; } @@ -307,7 +318,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr) { TranslationBlock *tb; - tb = tb_find_pc(retaddr); + tb = tb_find_pc(cpu, retaddr); if (tb) { cpu_restore_state_from_tb(cpu, tb, retaddr); if (tb->cflags & CF_NOCACHE) { @@ -485,7 +496,13 @@ static inline PageDesc *page_find(tb_page_addr_t index) # define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1) #endif +/* Note: The size of the code buffer is doubled. We steal half of the buffer + * acting as the trace code cache. */ +#if defined(CONFIG_LLVM) +#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024 * 2) +#else #define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024) +#endif #define DEFAULT_CODE_GEN_BUFFER_SIZE \ (DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \ @@ -503,6 +520,9 @@ static inline size_t size_code_gen_buffer(size_t tb_size) static buffer, we could size this on RESERVED_VA, on the text segment size of the executable, or continue to use the default. */ tb_size = (unsigned long)(ram_size / 4); +#if defined(CONFIG_LLVM) + tb_size = (unsigned long)(ram_size / 2); +#endif #endif } if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) { @@ -730,15 +750,18 @@ static inline void code_gen_alloc(size_t tb_size) fprintf(stderr, "Could not allocate dynamic translator buffer\n"); exit(1); } +#if defined(CONFIG_LLVM) + llvm_alloc_cache(); +#endif /* Estimate a good size for the number of TBs we can support. We still haven't deducted the prologue from the buffer size here, but that's minimal and won't affect the estimate much. */ tcg_ctx.code_gen_max_blocks = tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE; - tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks); + tcg_ctx.tb_ctx->tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks); - qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock); + qemu_mutex_init(&tcg_ctx.tb_ctx->tb_lock); } /* Must be called before using the QEMU cpus. 'tb_size' is the size @@ -765,26 +788,35 @@ bool tcg_enabled(void) too many translation blocks or too much generated code. */ static TranslationBlock *tb_alloc(target_ulong pc) { + TCGContext *s = &tcg_ctx_global; TranslationBlock *tb; - if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) { + if (s->tb_ctx->nb_tbs >= s->code_gen_max_blocks) { return NULL; } - tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++]; +#if defined(CONFIG_LLVM) + if (llvm_check_cache() == 1) + return NULL; +#endif + + tb = &s->tb_ctx->tbs[s->tb_ctx->nb_tbs++]; tb->pc = pc; tb->cflags = 0; + + optimization_init_tb(tb, s->tb_ctx->nb_tbs - 1); return tb; } void tb_free(TranslationBlock *tb) { + TCGContext *s = &tcg_ctx_global; /* In practice this is mostly used for single use temporary TB Ignore the hard cases and just back up if this TB happens to be the last one generated. */ - if (tcg_ctx.tb_ctx.nb_tbs > 0 && - tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) { - tcg_ctx.code_gen_ptr = tb->tc_ptr; - tcg_ctx.tb_ctx.nb_tbs--; + if (s->tb_ctx->nb_tbs > 0 && + tb == &s->tb_ctx->tbs[s->tb_ctx->nb_tbs - 1]) { + s->code_gen_ptr = tb->tc_ptr; + s->tb_ctx->nb_tbs--; } } @@ -832,42 +864,49 @@ static void page_flush_tb(void) /* XXX: tb_flush is currently not thread safe */ void tb_flush(CPUState *cpu) { + TCGContext *s = &tcg_ctx_global; #if defined(DEBUG_FLUSH) printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n", - (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer), - tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ? - ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) / - tcg_ctx.tb_ctx.nb_tbs : 0); + (unsigned long)(s->code_gen_ptr - s->code_gen_buffer), + s->tb_ctx->nb_tbs, s->tb_ctx->nb_tbs > 0 ? + ((unsigned long)(s->code_gen_ptr - s->code_gen_buffer)) / + s->tb_ctx->nb_tbs : 0); #endif - if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) - > tcg_ctx.code_gen_buffer_size) { + if ((unsigned long)(s->code_gen_ptr - s->code_gen_buffer) + > s->code_gen_buffer_size) { cpu_abort(cpu, "Internal error: code buffer overflow\n"); } - tcg_ctx.tb_ctx.nb_tbs = 0; +#if defined(CONFIG_LLVM) + llvm_tb_flush(); +#endif + + s->tb_ctx->nb_tbs = 0; CPU_FOREACH(cpu) { memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache)); + optimization_reset(cpu->env_ptr, 1); } - memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash)); + memset(s->tb_ctx->tb_phys_hash, 0, sizeof(s->tb_ctx->tb_phys_hash)); page_flush_tb(); - tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer; + s->code_gen_ptr = s->code_gen_buffer; /* XXX: flush processor icache at this point if cache flush is expensive */ - tcg_ctx.tb_ctx.tb_flush_count++; + s->tb_ctx->tb_flush_count++; } #ifdef DEBUG_TB_CHECK static void tb_invalidate_check(target_ulong address) { + TCGContext *s = &tcg_ctx_global; TranslationBlock *tb; int i; address &= TARGET_PAGE_MASK; for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { + for (tb = s->tb_ctx->tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { if (!(address + TARGET_PAGE_SIZE <= tb->pc || address >= tb->pc + tb->size)) { printf("ERROR invalidate: address=" TARGET_FMT_lx @@ -881,11 +920,12 @@ static void tb_invalidate_check(target_ulong address) /* verify that all the pages have correct rights for code */ static void tb_page_check(void) { + TCGContext *s = &tcg_ctx_global; TranslationBlock *tb; int i, flags1, flags2; for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) { - for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL; + for (tb = s->tb_ctx->tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) { flags1 = page_get_flags(tb->pc); flags2 = page_get_flags(tb->pc + tb->size - 1); @@ -911,6 +951,10 @@ static inline void tb_hash_remove(TranslationBlock **ptb, TranslationBlock *tb) } ptb = &tb1->phys_hash_next; } +#if defined(CONFIG_LLVM) + tb->mode = BLOCK_INVALID; + llvm_tb_remove(tb); +#endif } static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb) @@ -969,16 +1013,15 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n) /* invalidate one TB */ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) { + TCGContext *s = &tcg_ctx_global; CPUState *cpu; PageDesc *p; unsigned int h, n1; - tb_page_addr_t phys_pc; TranslationBlock *tb1, *tb2; /* remove the TB from the hash list */ - phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK); - h = tb_phys_hash_func(phys_pc); - tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb); + h = tb_phys_hash_func(tb->pc); + tb_hash_remove(&s->tb_ctx->tb_phys_hash[h], tb); /* remove the TB from the page list */ if (tb->page_addr[0] != page_addr) { @@ -992,7 +1035,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) invalidate_page_bitmap(p); } - tcg_ctx.tb_ctx.tb_invalidated_flag = 1; + s->tb_ctx->tb_invalidated_flag = 1; /* remove the TB from the hash list */ h = tb_jmp_cache_hash_func(tb->pc); @@ -1000,6 +1043,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) if (cpu->tb_jmp_cache[h] == tb) { cpu->tb_jmp_cache[h] = NULL; } + optimization_remove_entry(cpu->env_ptr, tb); } /* suppress this TB from the two jump lists */ @@ -1021,7 +1065,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr) } tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */ - tcg_ctx.tb_ctx.tb_phys_invalidate_count++; + s->tb_ctx->tb_phys_invalidate_count++; } static void build_page_bitmap(PageDesc *p) @@ -1058,6 +1102,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, target_ulong pc, target_ulong cs_base, int flags, int cflags) { + TCGContext *s = &tcg_ctx_global; CPUArchState *env = cpu->env_ptr; TranslationBlock *tb; tb_page_addr_t phys_pc, phys_page2; @@ -1082,22 +1127,22 @@ TranslationBlock *tb_gen_code(CPUState *cpu, tb = tb_alloc(pc); assert(tb != NULL); /* Don't forget to invalidate previous TB info. */ - tcg_ctx.tb_ctx.tb_invalidated_flag = 1; + s->tb_ctx->tb_invalidated_flag = 1; } - gen_code_buf = tcg_ctx.code_gen_ptr; - tb->tc_ptr = gen_code_buf; + gen_code_buf = s->code_gen_ptr; + tb->tc_ptr = tb->opt_ptr = gen_code_buf; tb->cs_base = cs_base; tb->flags = flags; tb->cflags = cflags; #ifdef CONFIG_PROFILER - tcg_ctx.tb_count1++; /* includes aborted translations because of + s->tb_count1++; /* includes aborted translations because of exceptions */ ti = profile_getclock(); #endif - tcg_func_start(&tcg_ctx); + tcg_func_start(&tcg_ctx, tb); gen_intermediate_code(env, tb); @@ -1116,9 +1161,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu, #endif #ifdef CONFIG_PROFILER - tcg_ctx.tb_count++; - tcg_ctx.interm_time += profile_getclock() - ti; - tcg_ctx.code_time -= profile_getclock(); + s->tb_count++; + s->interm_time += profile_getclock() - ti; + s->code_time -= profile_getclock(); #endif /* ??? Overflow could be handled better here. In particular, we @@ -1136,10 +1181,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } #ifdef CONFIG_PROFILER - tcg_ctx.code_time += profile_getclock(); - tcg_ctx.code_in_len += tb->size; - tcg_ctx.code_out_len += gen_code_size; - tcg_ctx.search_out_len += search_size; + s->code_time += profile_getclock(); + s->code_in_len += tb->size; + s->code_out_len += gen_code_size; + s->search_out_len += search_size; #endif #ifdef DEBUG_DISAS @@ -1151,7 +1196,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, } #endif - tcg_ctx.code_gen_ptr = (void *) + s->code_gen_ptr = (void *) ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size, CODE_GEN_ALIGN); @@ -1247,7 +1292,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, current_tb = NULL; if (cpu->mem_io_pc) { /* now we have a real cpu fault */ - current_tb = tb_find_pc(cpu->mem_io_pc); + current_tb = tb_find_pc(cpu, cpu->mem_io_pc); } } if (current_tb == tb && @@ -1365,7 +1410,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr, tb = p->first_tb; #ifdef TARGET_HAS_PRECISE_SMC if (tb && pc != 0) { - current_tb = tb_find_pc(pc); + current_tb = tb_find_pc(cpu, pc); } if (cpu != NULL) { env = cpu->env_ptr; @@ -1475,12 +1520,13 @@ static inline void tb_alloc_page(TranslationBlock *tb, static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2) { + TCGContext *s = &tcg_ctx_global; unsigned int h; TranslationBlock **ptb; /* add in the physical hash table */ - h = tb_phys_hash_func(phys_pc); - ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h]; + h = tb_phys_hash_func(tb->pc); + ptb = &s->tb_ctx->tb_phys_hash[h]; tb->phys_hash_next = *ptb; *ptb = tb; @@ -1511,25 +1557,31 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, /* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr < tb[1].tc_ptr. Return NULL if not found */ -static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) +static TranslationBlock *tb_find_pc(CPUState *cpu, uintptr_t tc_ptr) { + TCGContext *s = &tcg_ctx_global; int m_min, m_max, m; uintptr_t v; TranslationBlock *tb; - if (tcg_ctx.tb_ctx.nb_tbs <= 0) { + if (s->tb_ctx->nb_tbs <= 0) { return NULL; } - if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer || - tc_ptr >= (uintptr_t)tcg_ctx.code_gen_ptr) { +#if defined(CONFIG_LLVM) + tb = llvm_find_pc(cpu, tc_ptr); + if (tb) + return tb; +#endif + if (tc_ptr < (uintptr_t)s->code_gen_buffer || + tc_ptr >= (uintptr_t)s->code_gen_ptr) { return NULL; } /* binary search (cf Knuth) */ m_min = 0; - m_max = tcg_ctx.tb_ctx.nb_tbs - 1; + m_max = s->tb_ctx->nb_tbs - 1; while (m_min <= m_max) { m = (m_min + m_max) >> 1; - tb = &tcg_ctx.tb_ctx.tbs[m]; + tb = &s->tb_ctx->tbs[m]; v = (uintptr_t)tb->tc_ptr; if (v == tc_ptr) { return tb; @@ -1539,7 +1591,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr) m_min = m + 1; } } - return &tcg_ctx.tb_ctx.tbs[m_max]; + return &s->tb_ctx->tbs[m_max]; } #if !defined(CONFIG_USER_ONLY) @@ -1567,7 +1619,7 @@ void tb_check_watchpoint(CPUState *cpu) { TranslationBlock *tb; - tb = tb_find_pc(cpu->mem_io_pc); + tb = tb_find_pc(cpu, cpu->mem_io_pc); if (tb) { /* We can use retranslation to find the PC. */ cpu_restore_state_from_tb(cpu, tb, cpu->mem_io_pc); @@ -1599,7 +1651,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) target_ulong pc, cs_base; uint64_t flags; - tb = tb_find_pc(retaddr); + tb = tb_find_pc(cpu, retaddr); if (!tb) { cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", (void *)retaddr); @@ -1675,6 +1727,7 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr) void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) { + TCGContext *s = &tcg_ctx_global; int i, target_code_size, max_target_code_size; int direct_jmp_count, direct_jmp2_count, cross_page; TranslationBlock *tb; @@ -1684,8 +1737,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) cross_page = 0; direct_jmp_count = 0; direct_jmp2_count = 0; - for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) { - tb = &tcg_ctx.tb_ctx.tbs[i]; + for (i = 0; i < s->tb_ctx->nb_tbs; i++) { + tb = &s->tb_ctx->tbs[i]; target_code_size += tb->size; if (tb->size > max_target_code_size) { max_target_code_size = tb->size; @@ -1703,35 +1756,35 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf) /* XXX: avoid using doubles ? */ cpu_fprintf(f, "Translation buffer state:\n"); cpu_fprintf(f, "gen code size %td/%zd\n", - tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer, - tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer); + s->code_gen_ptr - s->code_gen_buffer, + s->code_gen_highwater - s->code_gen_buffer); cpu_fprintf(f, "TB count %d/%d\n", - tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks); + s->tb_ctx->nb_tbs, s->code_gen_max_blocks); cpu_fprintf(f, "TB avg target size %d max=%d bytes\n", - tcg_ctx.tb_ctx.nb_tbs ? target_code_size / - tcg_ctx.tb_ctx.nb_tbs : 0, + s->tb_ctx->nb_tbs ? target_code_size / + s->tb_ctx->nb_tbs : 0, max_target_code_size); cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n", - tcg_ctx.tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr - - tcg_ctx.code_gen_buffer) / - tcg_ctx.tb_ctx.nb_tbs : 0, - target_code_size ? (double) (tcg_ctx.code_gen_ptr - - tcg_ctx.code_gen_buffer) / + s->tb_ctx->nb_tbs ? (s->code_gen_ptr - + s->code_gen_buffer) / + s->tb_ctx->nb_tbs : 0, + target_code_size ? (double) (s->code_gen_ptr - + s->code_gen_buffer) / target_code_size : 0); cpu_fprintf(f, "cross page TB count %d (%d%%)\n", cross_page, - tcg_ctx.tb_ctx.nb_tbs ? (cross_page * 100) / - tcg_ctx.tb_ctx.nb_tbs : 0); + s->tb_ctx->nb_tbs ? (cross_page * 100) / + s->tb_ctx->nb_tbs : 0); cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n", direct_jmp_count, - tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp_count * 100) / - tcg_ctx.tb_ctx.nb_tbs : 0, + s->tb_ctx->nb_tbs ? (direct_jmp_count * 100) / + s->tb_ctx->nb_tbs : 0, direct_jmp2_count, - tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) / - tcg_ctx.tb_ctx.nb_tbs : 0); + s->tb_ctx->nb_tbs ? (direct_jmp2_count * 100) / + s->tb_ctx->nb_tbs : 0); cpu_fprintf(f, "\nStatistics:\n"); - cpu_fprintf(f, "TB flush count %d\n", tcg_ctx.tb_ctx.tb_flush_count); + cpu_fprintf(f, "TB flush count %d\n", s->tb_ctx->tb_flush_count); cpu_fprintf(f, "TB invalidate count %d\n", - tcg_ctx.tb_ctx.tb_phys_invalidate_count); + s->tb_ctx->tb_phys_invalidate_count); cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count); tcg_dump_info(f, cpu_fprintf); } diff --git a/user-exec.c b/user-exec.c index 8ad89a4..dbf04be 100644 --- a/user-exec.c +++ b/user-exec.c @@ -58,7 +58,7 @@ static void exception_action(CPUState *cpu) void cpu_resume_from_signal(CPUState *cpu, void *puc) { #ifdef __linux__ - struct ucontext *uc = puc; + ucontext_t *uc = puc; #elif defined(__OpenBSD__) struct sigcontext *uc = puc; #endif @@ -172,7 +172,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, #elif defined(__OpenBSD__) struct sigcontext *uc = puc; #else - struct ucontext *uc = puc; + ucontext_t *uc = puc; #endif unsigned long pc; int trapno; @@ -227,7 +227,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, #elif defined(__OpenBSD__) struct sigcontext *uc = puc; #else - struct ucontext *uc = puc; + ucontext_t *uc = puc; #endif pc = PC_sig(uc); @@ -332,7 +332,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) ucontext_t *uc = puc; #else - struct ucontext *uc = puc; + ucontext_t *uc = puc; #endif unsigned long pc; int is_write; @@ -359,7 +359,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; uint32_t *pc = uc->uc_mcontext.sc_pc; uint32_t insn = *pc; int is_write = 0; @@ -457,7 +457,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, #if defined(__NetBSD__) ucontext_t *uc = puc; #else - struct ucontext *uc = puc; + ucontext_t *uc = puc; #endif unsigned long pc; int is_write; @@ -484,7 +484,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; uintptr_t pc = uc->uc_mcontext.pc; uint32_t insn = *(uint32_t *)pc; bool is_write; @@ -513,7 +513,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; unsigned long pc; int is_write; @@ -535,7 +535,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; unsigned long ip; int is_write = 0; @@ -566,7 +566,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; unsigned long pc; uint16_t *pinsn; int is_write = 0; @@ -619,7 +619,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; greg_t pc = uc->uc_mcontext.pc; int is_write; @@ -635,7 +635,7 @@ int cpu_signal_handler(int host_signum, void *pinfo, void *puc) { siginfo_t *info = pinfo; - struct ucontext *uc = puc; + ucontext_t *uc = puc; unsigned long pc = uc->uc_mcontext.sc_iaoq[0]; uint32_t insn = *(uint32_t *)pc; int is_write = 0; diff --git a/util/memfd.c b/util/memfd.c index 7c40691..587ef5a 100644 --- a/util/memfd.c +++ b/util/memfd.c @@ -40,7 +40,7 @@ #include <sys/syscall.h> #include <asm/unistd.h> -static int memfd_create(const char *name, unsigned int flags) +static int qemu_memfd_create(const char *name, unsigned int flags) { #ifdef __NR_memfd_create return syscall(__NR_memfd_create, name, flags); @@ -74,12 +74,12 @@ void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, #ifdef CONFIG_LINUX if (seals) { - mfd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC); + mfd = qemu_memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC); } if (mfd == -1) { /* some systems have memfd without sealing */ - mfd = memfd_create(name, MFD_CLOEXEC); + mfd = qemu_memfd_create(name, MFD_CLOEXEC); seals = 0; } #endif @@ -4705,6 +4705,7 @@ int main(int argc, char **argv, char **envp) #ifdef CONFIG_TPM tpm_cleanup(); #endif + qemu_end_cpu_loop(); return 0; } |