summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--COPYRIGHT28
-rw-r--r--Makefile.target9
-rwxr-xr-xconfigure61
-rw-r--r--cpu-exec.c45
-rw-r--r--cpus.c25
-rw-r--r--cputlb.c135
-rw-r--r--exec.c10
-rw-r--r--gdbstub.c2
-rw-r--r--include/exec/cpu-all.h6
-rw-r--r--include/exec/cpu-common.h4
-rw-r--r--include/exec/cpu-defs.h11
-rw-r--r--include/exec/cpu_ldst.h11
-rw-r--r--include/exec/cpu_ldst_template.h31
-rw-r--r--include/exec/exec-all.h9
-rw-r--r--include/exec/memory.h4
-rw-r--r--include/hw/qdev-core.h4
-rw-r--r--include/qemu-common.h2
-rw-r--r--include/qemu/atomic.h10
-rw-r--r--include/qemu/bitmap.h10
-rw-r--r--include/qemu/compiler.h4
-rw-r--r--include/qemu/queue.h2
-rw-r--r--include/qemu/rcu.h6
-rw-r--r--include/qemu/timer.h4
-rw-r--r--include/qom/cpu.h7
-rw-r--r--include/qom/object.h7
-rw-r--r--include/sysemu/cpus.h1
-rw-r--r--linux-user/elfload.c4
-rw-r--r--linux-user/main.c75
-rw-r--r--linux-user/strace.c1
-rw-r--r--linux-user/syscall.c13
-rw-r--r--llvm/analysis/InnerLoopAnalysis.cpp631
-rw-r--r--llvm/atomic/atomic-arm.c158
-rw-r--r--llvm/atomic/atomic-helper.h74
-rw-r--r--llvm/atomic/atomic-x86.c504
-rw-r--r--llvm/atomic/coremu-atomic.h412
-rw-r--r--llvm/atomic/coremu-template.h101
-rw-r--r--llvm/fpu/softfloat-native-def.h127
-rw-r--r--llvm/fpu/softfloat-native.h248
-rw-r--r--llvm/hqemu-helper.c77
-rw-r--r--llvm/hqemu.mk191
-rw-r--r--llvm/include/InnerLoopAnalysis.h291
-rw-r--r--llvm/include/JIT.h228
-rw-r--r--llvm/include/JITMemoryManager.h318
-rw-r--r--llvm/include/MCJITMemoryManager.h213
-rw-r--r--llvm/include/hqemu-config.h142
-rw-r--r--llvm/include/hqemu-helper.h8
-rw-r--r--llvm/include/hqemu.h84
-rw-r--r--llvm/include/llvm-annotate.h51
-rw-r--r--llvm/include/llvm-debug.h247
-rw-r--r--llvm/include/llvm-hard-perfmon.h87
-rw-r--r--llvm/include/llvm-helper.h755
-rw-r--r--llvm/include/llvm-macro.h88
-rw-r--r--llvm/include/llvm-opc.h494
-rw-r--r--llvm/include/llvm-pass.h205
-rw-r--r--llvm/include/llvm-soft-perfmon.h74
-rw-r--r--llvm/include/llvm-state.h194
-rw-r--r--llvm/include/llvm-target.h116
-rw-r--r--llvm/include/llvm-translator.h270
-rw-r--r--llvm/include/llvm-types.h127
-rw-r--r--llvm/include/llvm.h278
-rw-r--r--llvm/include/optimization.h261
-rw-r--r--llvm/include/pmu/arm/arm-events.h35
-rw-r--r--llvm/include/pmu/perf_event.h992
-rw-r--r--llvm/include/pmu/pmu-events.h131
-rw-r--r--llvm/include/pmu/pmu-global.h52
-rw-r--r--llvm/include/pmu/pmu-utils.h106
-rw-r--r--llvm/include/pmu/pmu.h170
-rw-r--r--llvm/include/pmu/ppc/ppc-events.h30
-rw-r--r--llvm/include/pmu/x86/x86-events.h38
-rw-r--r--llvm/include/qemu-types.h33
-rw-r--r--llvm/include/tcg-opc-vector.h80
-rw-r--r--llvm/include/tracer.h109
-rw-r--r--llvm/include/utils.h260
-rw-r--r--llvm/llvm-annotate.cpp136
-rw-r--r--llvm/llvm-debug.cpp229
-rw-r--r--llvm/llvm-hard-perfmon.cpp289
-rw-r--r--llvm/llvm-opc-mmu.cpp344
-rw-r--r--llvm/llvm-opc-vector.cpp943
-rw-r--r--llvm/llvm-opc.cpp4431
-rw-r--r--llvm/llvm-soft-perfmon.cpp357
-rw-r--r--llvm/llvm-target.cpp812
-rw-r--r--llvm/llvm-translator.cpp924
-rw-r--r--llvm/llvm.cpp1251
-rw-r--r--llvm/optimization.cpp317
-rw-r--r--llvm/pass/CombineCasts.cpp321
-rw-r--r--llvm/pass/CombineGuestMemory.cpp389
-rw-r--r--llvm/pass/CombineZExtTrunc.cpp70
-rw-r--r--llvm/pass/FastMathPass.cpp87
-rw-r--r--llvm/pass/ProfileExec.cpp172
-rw-r--r--llvm/pass/RedundantStateElimination.cpp179
-rw-r--r--llvm/pass/ReplaceIntrinsic.cpp137
-rw-r--r--llvm/pass/SimplifyPointer.cpp334
-rw-r--r--llvm/pass/StateMappingPass.cpp885
-rw-r--r--llvm/pmu/arm/arm-events.cpp42
-rw-r--r--llvm/pmu/pmu-events.cpp414
-rw-r--r--llvm/pmu/pmu.cpp491
-rw-r--r--llvm/pmu/ppc/ppc-events.cpp37
-rw-r--r--llvm/pmu/x86/x86-events.cpp41
-rw-r--r--llvm/tracer.cpp365
-rw-r--r--llvm/utils.cpp223
-rw-r--r--llvm/xml/tinyxml2.cpp2013
-rw-r--r--llvm/xml/tinyxml2.h1480
-rw-r--r--patch/llvm/llvm-3.5.patch864
-rw-r--r--patch/llvm/llvm-3.8.patch247
-rw-r--r--patch/llvm/llvm-3.9.patch404
-rw-r--r--patch/llvm/llvm-5.0.patch652
-rw-r--r--patch/llvm/llvm-6.0.patch652
-rw-r--r--qga/commands-posix.c1
-rw-r--r--qom/object.c8
-rw-r--r--softmmu_template.h51
-rw-r--r--softmmu_template_llvm.h384
-rw-r--r--target-arm/cpu.h64
-rw-r--r--target-arm/helper.c46
-rw-r--r--target-arm/helper.h11
-rw-r--r--target-arm/op_helper.c10
-rw-r--r--target-arm/simd_helper.h91
-rw-r--r--target-arm/translate-a64.c318
-rw-r--r--target-arm/translate.c324
-rw-r--r--target-arm/translate.h2
-rw-r--r--target-i386/cpu.h27
-rw-r--r--target-i386/fpu_helper.c2
-rw-r--r--target-i386/helper.c12
-rw-r--r--target-i386/helper.h3
-rw-r--r--target-i386/kvm.c4
-rw-r--r--target-i386/misc_helper.c4
-rw-r--r--target-i386/ops_sse.h2
-rw-r--r--target-i386/simd_helper.h65
-rw-r--r--target-i386/translate.c587
-rw-r--r--target-ppc/Makefile.objs2
-rw-r--r--target-ppc/cpu.h21
-rw-r--r--target-ppc/helper.c9
-rw-r--r--target-ppc/helper.h2
-rw-r--r--target-ppc/translate.c41
-rw-r--r--tcg/aarch64/tcg-target.c82
-rw-r--r--tcg/i386/tcg-target.c197
-rw-r--r--tcg/i386/tcg-target.h2
-rw-r--r--tcg/ppc/tcg-target.c123
-rw-r--r--tcg/tcg-op.c2
-rw-r--r--tcg/tcg-op.h11
-rw-r--r--tcg/tcg-opc.h6
-rw-r--r--tcg/tcg.c270
-rw-r--r--tcg/tcg.h58
-rw-r--r--translate-all.c215
-rw-r--r--user-exec.c24
-rw-r--r--util/memfd.c6
-rw-r--r--vl.c1
146 files changed, 32918 insertions, 333 deletions
diff --git a/COPYRIGHT b/COPYRIGHT
new file mode 100644
index 0000000..75a9c94
--- /dev/null
+++ b/COPYRIGHT
@@ -0,0 +1,28 @@
+
+
+
+ COPYRIGHT
+
+The following is a notice of limited availability of the code, and disclaimer
+which must be included in the prologue of the code and in all source listings
+of the code.
+
+Copyright Notice
+ + 2010 Computer System Laboratory, Institute of Information Science,
+ Academia Sinica, Taiwan
+ + 2016 COVART Laboratory, Department of Computer Science and Information
+ Engineering, National Taiwan University, Taiwan.
+
+
+Permission is hereby granted to use, reproduce, prepare derivative works, and
+to redistribute to others. This software was authored by the following authors,
+sorted by surname:
+
+Name: Sheng-Yu Fu
+Email: d03922013@csie.ntu.edu.tw
+
+Name: Ding-Yong Hong
+Email: dyhong@iis.sinica.edu.tw
+
+Name: Yu-Ping Liu
+Email: r04922005@csie.ntu.edu.tw
diff --git a/Makefile.target b/Makefile.target
index 962d004..4e4b1fe 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -158,6 +158,8 @@ GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h qmp-commands-old.h
endif # CONFIG_SOFTMMU
+include $(SRC_PATH)/llvm/hqemu.mk
+
# Workaround for http://gcc.gnu.org/PR55489, see configure.
%/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS)
@@ -189,8 +191,8 @@ all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y)
$(QEMU_PROG_BUILD): config-devices.mak
# build either PROG or PROGW
-$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
- $(call LINK, $(filter-out %.mak, $^))
+$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a $(LLVM_BITCODE)
+ $(call LINK, $(filter-out %.mak %.bc, $^))
ifdef CONFIG_DARWIN
$(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@," REZ $(TARGET_DIR)$@")
$(call quiet-command,SetFile -a C $@," SETFILE $(TARGET_DIR)$@")
@@ -225,6 +227,9 @@ ifdef CONFIG_TRACE_SYSTEMTAP
$(INSTALL_DATA) $(QEMU_PROG).stp-installed "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG).stp"
$(INSTALL_DATA) $(QEMU_PROG)-simpletrace.stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG)-simpletrace.stp"
endif
+ifneq ($(LLVM_BITCODE),)
+ $(INSTALL) -m 644 $(LLVM_BITCODE) "$(DESTDIR)$(bindir)"
+endif
GENERATED_HEADERS += config-target.h
Makefile: $(GENERATED_HEADERS)
diff --git a/configure b/configure
index 6ca6c64..80c9396 100755
--- a/configure
+++ b/configure
@@ -345,6 +345,9 @@ vhdx=""
numa=""
tcmalloc="no"
jemalloc="no"
+llvm="no"
+bcflags=""
+libopencsd=""
# parse CC options first
for opt do
@@ -1169,6 +1172,12 @@ for opt do
;;
--enable-jemalloc) jemalloc="yes"
;;
+ --enable-llvm) llvm="yes"
+ ;;
+ --clang-flags=*) bcflags="$optarg"
+ ;;
+ --with-libopencsd=*) libopencsd="$optarg"
+ ;;
*)
echo "ERROR: unknown option $opt"
echo "Try '$0 --help' for more information"
@@ -1391,12 +1400,26 @@ disabled with --disable-FEATURE, default is enabled if available:
numa libnuma support
tcmalloc tcmalloc support
jemalloc jemalloc support
+ llvm enable LLVM optimization
+ --clang-flags flags for clang compiler
+ --with-libopencsd path to libopencsd library
NOTE: The object files are built at the place where configure is launched
EOF
exit 0
fi
+if test "$llvm" != "no" ; then
+ llvm-config --version > /dev/null 2>&1 || { echo >&2 "llvm-config is not in the PATH"; exit 1; }
+ llvm_major=`llvm-config --version | cut -d'.' -f1`
+ llvm_minor=`llvm-config --version | cut -d'.' -f2`
+ if test "$llvm_major" -lt "3" ; then
+ error_exit "LLVM version too old. Version 3.5 or later is required."
+ elif test "$llvm_major" -eq "3" && test "$llvm_minor" -lt "5" ; then
+ error_exit "LLVM version too old. Version 3.5 or later is required."
+ fi
+fi
+
# Now we have handled --enable-tcg-interpreter and know we're not just
# printing the help message, bail out if the host CPU isn't supported.
if test "$ARCH" = "unknown"; then
@@ -1469,6 +1492,7 @@ gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
gcc_flags="-Wendif-labels $gcc_flags"
gcc_flags="-Wno-initializer-overrides $gcc_flags"
gcc_flags="-Wno-string-plus-int $gcc_flags"
+gcc_flags="-Wno-format-truncation $gcc_flags"
# Note that we do not add -Werror to gcc_flags here, because that would
# enable it for all configure tests. If a configure test failed due
# to -Werror this would just silently disable some features,
@@ -4847,6 +4871,11 @@ echo "bzip2 support $bzip2"
echo "NUMA host support $numa"
echo "tcmalloc support $tcmalloc"
echo "jemalloc support $jemalloc"
+echo "LLVM enabled $llvm (version `llvm-config --version`)"
+
+if test "$libopencsd" != ""; then
+ echo "libopencsd $libopencsd"
+fi
if test "$sdl_too_old" = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5252,6 +5281,21 @@ if test "$seccomp" = "yes"; then
echo "CONFIG_SECCOMP=y" >> $config_host_mak
fi
+if test "$llvm" != "no" ; then
+ echo "CONFIG_LLVM=y" >> $config_host_mak
+ echo "BCFLAGS=$bcflags" >> $config_host_mak
+ echo "LLVM_VERSION=LLVM_V`llvm-config --version | sed -e "s/\.//g" | cut -c 1-2`" >> $config_host_mak
+ echo "LLVM_CFLAGS=`llvm-config --cflags`" >> $config_host_mak
+ echo "LLVM_CXXFLAGS=`llvm-config --cxxflags`" >> $config_host_mak
+ echo "LLVM_LDFLAGS=`llvm-config --ldflags`" >> $config_host_mak
+ echo "LLVM_LIBS=`llvm-config --libs`" >> $config_host_mak
+fi
+
+if test "$libopencsd" != "" ; then
+ echo "CONFIG_LIBOPENCSD=y" >> $config_host_mak
+ echo "LIBOPENCSD=$libopencsd" >> $config_host_mak
+fi
+
# XXX: suppress that
if [ "$bsd" = "yes" ] ; then
echo "CONFIG_BSD=y" >> $config_host_mak
@@ -5852,6 +5896,23 @@ fi
echo "LDFLAGS+=$ldflags" >> $config_target_mak
echo "QEMU_CFLAGS+=$cflags" >> $config_target_mak
+if test "$cpu" = "i386" -o "$cpu" = "x86_64" -o "$cpu" = "arm" ; then
+ case "$target_name" in
+ i386|x86_64)
+ echo "CONFIG_COREMU=y" >> $config_target_mak
+ ;;
+ esac
+fi
+
+if test "$llvm" != "no" ; then
+ bitcode="llvm_helper_$target_name"
+ if test "$target_softmmu" = "yes" ; then
+ bitcode=$bitcode"_softmmu"
+ fi
+ echo "LLVM_EXTRA_FLAGS+=-I. -I\$(SRC_PATH) $cflags $LLVM_EXTRA_FLAGS" >> $config_target_mak
+ echo "CONFIG_LLVM_BITCODE=\"$prefix/bin/$bitcode.bc\"" >> $config_target_mak
+fi
+
done # for target in $targets
if [ "$pixman" = "internal" ]; then
diff --git a/cpu-exec.c b/cpu-exec.c
index c88d0ff..5e1f380 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -31,6 +31,7 @@
#include "hw/i386/apic.h"
#endif
#include "sysemu/replay.h"
+#include "hqemu.h"
/* -icount align implementation. */
@@ -104,6 +105,7 @@ static void print_delay(const SyncClocks *sc)
static void init_delay_params(SyncClocks *sc,
const CPUState *cpu)
{
+ memset(sc, 0, sizeof(SyncClocks));
if (!icount_align_option) {
return;
}
@@ -159,6 +161,10 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK),
next_tb & TB_EXIT_MASK);
+#if defined(CONFIG_LLVM)
+ if ((next_tb & TB_EXIT_MASK) == TB_EXIT_LLVM)
+ return next_tb;
+#endif
if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) {
/* We didn't start executing this TB (eg because the instruction
* counter hit zero); we must restore the guest PC to the address
@@ -197,7 +203,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
max_cycles | CF_NOCACHE
| (ignore_icount ? CF_IGNORE_ICOUNT : 0));
- tb->orig_tb = tcg_ctx.tb_ctx.tb_invalidated_flag ? NULL : orig_tb;
+ tb->orig_tb = tcg_ctx.tb_ctx->tb_invalidated_flag ? NULL : orig_tb;
cpu->current_tb = tb;
/* execute the generated code */
trace_exec_tb_nocache(tb, tb->pc);
@@ -218,13 +224,13 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
tb_page_addr_t phys_pc, phys_page1;
target_ulong virt_page2;
- tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
+ tcg_ctx.tb_ctx->tb_invalidated_flag = 0;
/* find translated block using physical mappings */
phys_pc = get_page_addr_code(env, pc);
phys_page1 = phys_pc & TARGET_PAGE_MASK;
- h = tb_phys_hash_func(phys_pc);
- ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+ h = tb_phys_hash_func(pc);
+ ptb1 = &tcg_ctx.tb_ctx->tb_phys_hash[h];
for(;;) {
tb = *ptb1;
if (!tb) {
@@ -253,8 +259,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
/* Move the TB to the head of the list */
*ptb1 = tb->phys_hash_next;
- tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
- tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
+ tb->phys_hash_next = tcg_ctx.tb_ctx->tb_phys_hash[h];
+ tcg_ctx.tb_ctx->tb_phys_hash[h] = tb;
return tb;
}
@@ -315,6 +321,10 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu)
tb->flags != flags)) {
tb = tb_find_slow(cpu, pc, cs_base, flags);
}
+
+ itlb_update_entry(env, tb);
+ ibtc_update_entry(env, tb);
+
return tb;
}
@@ -492,29 +502,23 @@ int cpu_exec(CPUState *cpu)
tb = tb_find_fast(cpu);
/* Note: we do it here to avoid a gcc bug on Mac OS X when
doing it in tb_find_slow */
- if (tcg_ctx.tb_ctx.tb_invalidated_flag) {
+ if (tcg_ctx.tb_ctx->tb_invalidated_flag) {
/* as some TB could have been invalidated because
of memory exceptions while generating the code, we
must recompute the hash index here */
next_tb = 0;
- tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
+ tcg_ctx.tb_ctx->tb_invalidated_flag = 0;
}
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n",
tb->tc_ptr, tb->pc, lookup_symbol(tb->pc));
}
- /* see if we can patch the calling TB. When the TB
- spans two pages, we cannot safely do a direct
- jump. */
- if (next_tb != 0 && tb->page_addr[1] == -1
- && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
- tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
- next_tb & TB_EXIT_MASK, tb);
- }
+
+ tracer_exec_tb(cpu->env_ptr, next_tb, tb);
tb_unlock();
if (likely(!cpu->exit_request)) {
trace_exec_tb(tb, tb->pc);
- tc_ptr = tb->tc_ptr;
+ tc_ptr = tb->opt_ptr;
/* execute the generated code */
cpu->current_tb = tb;
next_tb = cpu_tb_exec(cpu, tc_ptr);
@@ -533,9 +537,14 @@ int cpu_exec(CPUState *cpu)
*/
smp_rmb();
next_tb = 0;
+
+ tracer_reset(cpu->env_ptr);
break;
case TB_EXIT_ICOUNT_EXPIRED:
{
+#if defined(CONFIG_LLVM)
+ break;
+#endif
/* Instruction counter expired. */
int insns_left = cpu->icount_decr.u32;
if (cpu->icount_extra && insns_left >= 0) {
@@ -590,6 +599,8 @@ int cpu_exec(CPUState *cpu)
#endif /* buggy compiler */
cpu->can_do_io = 1;
tb_lock_reset();
+
+ tracer_reset(cpu->env_ptr);
}
} /* for(;;) */
diff --git a/cpus.c b/cpus.c
index 747f14d..04da0ad 100644
--- a/cpus.c
+++ b/cpus.c
@@ -66,6 +66,9 @@
#endif /* CONFIG_LINUX */
+#include "tcg.h"
+#include "hqemu.h"
+
static CPUState *next_cpu;
int64_t max_delay;
int64_t max_advance;
@@ -892,6 +895,18 @@ void qemu_init_cpu_loop(void)
qemu_thread_get_self(&io_thread);
}
+void qemu_end_cpu_loop(void)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu)
+ optimization_finalize(cpu->env_ptr);
+
+#if defined(CONFIG_LLVM)
+ llvm_finalize();
+#endif
+}
+
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
{
struct qemu_work_item wi;
@@ -1134,6 +1149,16 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
/* process any pending work */
atomic_mb_set(&exit_request, 1);
+#if defined(CONFIG_LLVM)
+ llvm_init();
+#endif
+ /* we can safely initialize optimization resources after
+ * the setup of CPUArchState is completed. */
+ CPU_FOREACH(cpu) {
+ copy_tcg_context();
+ optimization_init(cpu->env_ptr);
+ }
+
while (1) {
tcg_exec_all();
diff --git a/cputlb.c b/cputlb.c
index bf1d50a..c81c3be 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -19,6 +19,7 @@
#include "config.h"
#include "cpu.h"
+#include "exec/tb-hash.h"
#include "exec/exec-all.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"
@@ -30,12 +31,38 @@
#include "exec/ram_addr.h"
#include "tcg/tcg.h"
+#include "hqemu.h"
+
+#if defined(ENABLE_TLBVERSION)
+#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK | TLB_VERSION_MASK)
+#define page_val(addr, env) (((tlbaddr_t)addr & TARGET_PAGE_MASK) | tlb_version(env))
+#else
+#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK)
+#define page_val(addr, env) (addr & TARGET_PAGE_MASK)
+#endif
+
//#define DEBUG_TLB
//#define DEBUG_TLB_CHECK
/* statistics */
int tlb_flush_count;
+static inline void tlb_reset(CPUArchState *env)
+{
+#if defined(ENABLE_TLBVERSION)
+ tlbaddr_t version = env->tlb_version >> TLB_VERSION_SHIFT;
+ if (++version == TLB_VERSION_SIZE) {
+ version = 0;
+ memset(env->tlb_table, -1, sizeof(env->tlb_table));
+ memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
+ }
+ env->tlb_version = version << TLB_VERSION_SHIFT;
+#else
+ memset(env->tlb_table, -1, sizeof(env->tlb_table));
+ memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
+#endif
+}
+
/* NOTE:
* If flush_global is true (the usual case), flush all tlb entries.
* If flush_global is false, flush (at least) all tlb entries not
@@ -59,10 +86,12 @@ void tlb_flush(CPUState *cpu, int flush_global)
links while we are modifying them */
cpu->current_tb = NULL;
- memset(env->tlb_table, -1, sizeof(env->tlb_table));
- memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
+ tlb_reset(env);
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+ optimization_reset(env, 0);
+ lpt_reset(env);
+
env->vtlb_index = 0;
env->tlb_flush_addr = -1;
env->tlb_flush_mask = 0;
@@ -110,18 +139,67 @@ void tlb_flush_by_mmuidx(CPUState *cpu, ...)
va_end(argp);
}
-static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
+static inline void tlb_flush_entry(CPUArchState *env, CPUTLBEntry *tlb_entry,
+ target_ulong addr)
{
- if (addr == (tlb_entry->addr_read &
- (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
- addr == (tlb_entry->addr_write &
- (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
- addr == (tlb_entry->addr_code &
- (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (page_val(addr, env) == (tlb_entry->addr_read & TLB_NONIO_MASK) ||
+ page_val(addr, env) == (tlb_entry->addr_write & TLB_NONIO_MASK) ||
+ page_val(addr, env) == (tlb_entry->addr_code & TLB_NONIO_MASK)) {
memset(tlb_entry, -1, sizeof(*tlb_entry));
}
}
+#ifdef ENABLE_LPAGE
+static int tlb_flush_large_page(CPUState *cpu, target_ulong addr)
+{
+ int i, j, k, ret, mmu_idx, num_base_pages, max_flush_pages;
+ target_ulong page_addr, page_size, flush_addr;
+ CPUArchState *env = cpu->env_ptr;
+
+#if defined(DEBUG_TLB)
+ printf("tlb_flush:\n");
+#endif
+ /* must reset current TB so that interrupts cannot modify the
+ links while we are modifying them */
+ cpu->current_tb = NULL;
+
+ ret = lpt_flush_page(env, addr, &page_addr, &page_size);
+ if (ret == 0)
+ return 0;
+
+ /* If the large page occupies a small set of the tlb, do a partial flush
+ * optimzation, otherwise, do a full flush. */
+ num_base_pages = page_size / TARGET_PAGE_SIZE;
+ max_flush_pages = (CPU_TLB_SIZE / 4 < 1024) ? CPU_TLB_SIZE / 4 : 1024;
+ if (num_base_pages > max_flush_pages) {
+ tlb_flush(cpu, 1);
+ return 1;
+ }
+
+ for (i = 0; i < num_base_pages; i++) {
+ flush_addr = addr + i * TARGET_PAGE_SIZE;
+ j = (flush_addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
+ tlb_flush_entry(env, &env->tlb_table[mmu_idx][j], flush_addr);
+
+ /* check whether there are entries that need to be flushed in the vtlb */
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+ for (k = 0; k < CPU_VTLB_SIZE; k++)
+ tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], flush_addr);
+ }
+ }
+
+ for (i = -1; i < num_base_pages; i++) {
+ j = tb_jmp_cache_hash_page(addr + i * TARGET_PAGE_SIZE);
+ memset(&cpu->tb_jmp_cache[j], 0,
+ TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+ }
+ optimization_reset(env, 0);
+
+ return 1;
+}
+#endif
+
void tlb_flush_page(CPUState *cpu, target_ulong addr)
{
CPUArchState *env = cpu->env_ptr;
@@ -138,8 +216,14 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
env->tlb_flush_addr, env->tlb_flush_mask);
#endif
+
+#ifdef ENABLE_LPAGE
+ if (tlb_flush_large_page(cpu, addr))
+ return;
+#else
tlb_flush(cpu, 1);
return;
+#endif
}
/* must reset current TB so that interrupts cannot modify the
links while we are modifying them */
@@ -148,18 +232,19 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
addr &= TARGET_PAGE_MASK;
i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
- tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
+ tlb_flush_entry(env, &env->tlb_table[mmu_idx][i], addr);
}
/* check whether there are entries that need to be flushed in the vtlb */
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
int k;
for (k = 0; k < CPU_VTLB_SIZE; k++) {
- tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
+ tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], addr);
}
}
tb_flush_jmp_cache(cpu, addr);
+ optimization_flush_page(env, addr);
}
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
@@ -202,11 +287,11 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
printf(" %d", mmu_idx);
#endif
- tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
+ tlb_flush_entry(env, &env->tlb_table[mmu_idx][i], addr);
/* check whether there are vltb entries that need to be flushed */
for (k = 0; k < CPU_VTLB_SIZE; k++) {
- tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
+ tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], addr);
}
}
va_end(argp);
@@ -284,10 +369,11 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
}
}
-static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
+static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr,
+ tlbaddr_t version)
{
- if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
- tlb_entry->addr_write = vaddr;
+ if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY | version)) {
+ tlb_entry->addr_write = vaddr | version;
}
}
@@ -302,13 +388,13 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
vaddr &= TARGET_PAGE_MASK;
i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
- tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
+ tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr, tlb_version(env));
}
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
int k;
for (k = 0; k < CPU_VTLB_SIZE; k++) {
- tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr);
+ tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr, tlb_version(env));
}
}
}
@@ -360,6 +446,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
assert(size >= TARGET_PAGE_SIZE);
if (size != TARGET_PAGE_SIZE) {
tlb_add_large_page(env, vaddr, size);
+ lpt_add_page(env, vaddr, size);
}
sz = size;
@@ -424,6 +511,13 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
} else {
te->addr_write = -1;
}
+
+#ifdef ENABLE_TLBVERSION
+ tlbaddr_t version = tlb_version(env);
+ te->addr_read |= version;
+ te->addr_write |= version;
+ te->addr_code |= version;
+#endif
}
/* Add a new TLB entry, but without specifying the memory
@@ -452,7 +546,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = cpu_mmu_index(env1, true);
if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
- (addr & TARGET_PAGE_MASK))) {
+ page_val(addr, env1))) {
cpu_ldub_code(env1, addr);
}
pd = env1->iotlb[mmu_idx][page_index].addr & ~TARGET_PAGE_MASK;
@@ -471,6 +565,9 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
return qemu_ram_addr_from_host_nofail(p);
}
+#undef TLB_NONIO_MASK
+#undef page_val
+
#define MMUSUFFIX _mmu
#define SHIFT 0
diff --git a/exec.c b/exec.c
index 0bf0a6e..30ab09f 100644
--- a/exec.c
+++ b/exec.c
@@ -706,7 +706,7 @@ int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
}
wp = g_malloc(sizeof(*wp));
- wp->vaddr = addr;
+ wp->addr = addr;
wp->len = len;
wp->flags = flags;
@@ -731,7 +731,7 @@ int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
CPUWatchpoint *wp;
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
- if (addr == wp->vaddr && len == wp->len
+ if (addr == wp->addr && len == wp->len
&& flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
cpu_watchpoint_remove_by_ref(cpu, wp);
return 0;
@@ -745,7 +745,7 @@ void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
{
QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
- tlb_flush_page(cpu, watchpoint->vaddr);
+ tlb_flush_page(cpu, watchpoint->addr);
g_free(watchpoint);
}
@@ -776,10 +776,10 @@ static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
* exactly at the top of the address space and so addr + len
* wraps round to zero.
*/
- vaddr wpend = wp->vaddr + wp->len - 1;
+ vaddr wpend = wp->addr + wp->len - 1;
vaddr addrend = addr + len - 1;
- return !(addr > wpend || wp->vaddr > addrend);
+ return !(addr > wpend || wp->addr > addrend);
}
#endif
diff --git a/gdbstub.c b/gdbstub.c
index 9c29aa0..a24d9ef 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1267,7 +1267,7 @@ static void gdb_vm_state_change(void *opaque, int running, RunState state)
snprintf(buf, sizeof(buf),
"T%02xthread:%02x;%swatch:" TARGET_FMT_lx ";",
GDB_SIGNAL_TRAP, cpu_index(cpu), type,
- (target_ulong)cpu->watchpoint_hit->vaddr);
+ (target_ulong)cpu->watchpoint_hit->addr);
cpu->watchpoint_hit = NULL;
goto send_packet;
}
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 83b1781..9471dc6 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -271,12 +271,12 @@ CPUArchState *cpu_copy(CPUArchState *env);
/* Flags stored in the low bits of the TLB virtual address. These are
defined so that fast path ram access is all zeros. */
/* Zero if TLB entry is valid. */
-#define TLB_INVALID_MASK (1 << 3)
+#define TLB_INVALID_MASK (1 << TLB_INVALID_SHIFT)
/* Set if TLB entry references a clean RAM page. The iotlb entry will
contain the page physical address. */
-#define TLB_NOTDIRTY (1 << 4)
+#define TLB_NOTDIRTY (1 << TLB_NOTDIRTY_SHIFT)
/* Set if TLB entry is an IO callback. */
-#define TLB_MMIO (1 << 5)
+#define TLB_MMIO (1 << TLB_MMIO_SHIFT)
void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf);
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 85aa403..ce7deb9 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -76,12 +76,12 @@ void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
static inline void cpu_physical_memory_read(hwaddr addr,
void *buf, int len)
{
- cpu_physical_memory_rw(addr, buf, len, 0);
+ cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 0);
}
static inline void cpu_physical_memory_write(hwaddr addr,
const void *buf, int len)
{
- cpu_physical_memory_rw(addr, (void *)buf, len, 1);
+ cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 1);
}
void *cpu_physical_memory_map(hwaddr addr,
hwaddr *plen,
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index 5093be2..b44e3f2 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -56,6 +56,8 @@ typedef uint64_t target_ulong;
#error TARGET_LONG_SIZE undefined
#endif
+#include "hqemu-config.h"
+
#if !defined(CONFIG_USER_ONLY)
/* use a fully associative victim tlb of 8 entries */
#define CPU_VTLB_SIZE 8
@@ -89,7 +91,7 @@ typedef uint64_t target_ulong;
* of tlb_table inside env (which is non-trivial but not huge).
*/
#define CPU_TLB_BITS \
- MIN(8, \
+ MIN(12, \
TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS - \
(NB_MMU_MODES <= 1 ? 0 : \
NB_MMU_MODES <= 2 ? 1 : \
@@ -107,9 +109,9 @@ typedef struct CPUTLBEntry {
*/
union {
struct {
- target_ulong addr_read;
- target_ulong addr_write;
- target_ulong addr_code;
+ tlbaddr_t addr_read;
+ tlbaddr_t addr_write;
+ tlbaddr_t addr_code;
/* Addend to virtual address to get host address. IO accesses
use the corresponding iotlb value. */
uintptr_t addend;
@@ -140,6 +142,7 @@ typedef struct CPUIOTLBEntry {
target_ulong tlb_flush_addr; \
target_ulong tlb_flush_mask; \
target_ulong vtlb_index; \
+ tlbaddr_t tlb_version; \
#else
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index b573df5..72acce7 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -405,7 +405,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
#else
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index];
- target_ulong tlb_addr;
+ tlbaddr_t tlb_addr;
uintptr_t haddr;
switch (access_type) {
@@ -422,13 +422,22 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
g_assert_not_reached();
}
+#if defined(ENABLE_TLBVERSION)
+ if (tlb_version(env) != (tlb_addr & TLB_VERSION_MASK))
+ return NULL;
+#endif
+
if ((addr & TARGET_PAGE_MASK)
!= (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
/* TLB entry is for a different page */
return NULL;
}
+#if defined(ENABLE_TLBVERSION)
+ if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO)) {
+#else
if (tlb_addr & ~TARGET_PAGE_MASK) {
+#endif
/* IO access */
return NULL;
}
diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h
index 3091c00..2a01c6f 100644
--- a/include/exec/cpu_ldst_template.h
+++ b/include/exec/cpu_ldst_template.h
@@ -67,6 +67,14 @@
#define SRETSUFFIX glue(s, SUFFIX)
#endif
+#include "hqemu.h"
+
+#if defined(ENABLE_TLBVERSION)
+#define page_val(addr, env) ((((tlbaddr_t)addr + DATA_SIZE - 1) & TARGET_PAGE_MASK) | tlb_version(env))
+#else
+#define page_val(addr, env) (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1)))
+#endif
+
/* generic load/store macros */
static inline RES_TYPE
@@ -80,12 +88,17 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
int mmu_idx;
TCGMemOpIdx oi;
+#ifdef SOFTMMU_CODE_ACCESS
+ if (build_llvm_only(env))
+ return glue(glue(ld, USUFFIX), _p)((uint8_t *)env->image_base + ptr);
+#endif
+
addr = ptr;
page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = CPU_MMU_INDEX;
if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
- (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
- oi = make_memop_idx(SHIFT, mmu_idx);
+ page_val(addr, env))) {
+ oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx);
res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr,
oi, retaddr);
} else {
@@ -112,12 +125,17 @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
int mmu_idx;
TCGMemOpIdx oi;
+#ifdef SOFTMMU_CODE_ACCESS
+ if (build_llvm_only(env))
+ return glue(glue(lds, SUFFIX), _p)((uint8_t *)env->image_base + ptr);
+#endif
+
addr = ptr;
page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = CPU_MMU_INDEX;
if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
- (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
- oi = make_memop_idx(SHIFT, mmu_idx);
+ page_val(addr, env))) {
+ oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx);
res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX),
MMUSUFFIX)(env, addr, oi, retaddr);
} else {
@@ -152,8 +170,8 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = CPU_MMU_INDEX;
if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
- (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
- oi = make_memop_idx(SHIFT, mmu_idx);
+ page_val(addr, env))) {
+ oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx);
glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi,
retaddr);
} else {
@@ -171,6 +189,7 @@ glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr,
#endif /* !SOFTMMU_CODE_ACCESS */
+#undef page_val
#undef RES_TYPE
#undef DATA_TYPE
#undef DATA_STYPE
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index d900b0d..a225bea 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -21,6 +21,7 @@
#define _EXEC_ALL_H_
#include "qemu-common.h"
+#include "hqemu-config.h"
/* allow to see translation results - the slowdown should be negligible, so we leave it */
#define DEBUG_DISAS
@@ -59,7 +60,7 @@ typedef struct TranslationBlock TranslationBlock;
* and up to 4 + N parameters on 64-bit archs
* (N = number of input arguments + output arguments). */
#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS))
-#define OPC_BUF_SIZE 640
+#define OPC_BUF_SIZE 2048
#define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR)
#define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM)
@@ -216,6 +217,8 @@ struct TranslationBlock {
jmp_first */
struct TranslationBlock *jmp_next[2];
struct TranslationBlock *jmp_first;
+
+ TB_OPTIMIZATION_COMMON
};
#include "qemu/thread.h"
@@ -305,7 +308,7 @@ static inline void tb_set_jmp_target(TranslationBlock *tb,
int n, uintptr_t addr)
{
uint16_t offset = tb->tb_jmp_offset[n];
- tb_set_jmp_target1((uintptr_t)(tb->tc_ptr + offset), addr);
+ tb_set_jmp_target1((uintptr_t)((uint8_t *)tb->tc_ptr + offset), addr);
}
#else
@@ -405,4 +408,6 @@ extern int singlestep;
extern CPUState *tcg_current_cpu;
extern bool exit_request;
+size_t get_cpu_size(void);
+
#endif
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 0f07159..c2a1cd3 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -208,9 +208,9 @@ struct MemoryListener {
void (*region_del)(MemoryListener *listener, MemoryRegionSection *section);
void (*region_nop)(MemoryListener *listener, MemoryRegionSection *section);
void (*log_start)(MemoryListener *listener, MemoryRegionSection *section,
- int old, int new);
+ int _old, int _new);
void (*log_stop)(MemoryListener *listener, MemoryRegionSection *section,
- int old, int new);
+ int _old, int _new);
void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section);
void (*log_global_start)(MemoryListener *listener);
void (*log_global_stop)(MemoryListener *listener);
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index c537969..4453e5b 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -10,6 +10,8 @@
#include "qapi/error.h"
#include "hw/hotplug.h"
+#define typename QEMUtypename
+
enum {
DEV_NVECTORS_UNSPECIFIED = -1,
};
@@ -401,4 +403,6 @@ static inline bool qbus_is_hotpluggable(BusState *bus)
void device_listener_register(DeviceListener *listener);
void device_listener_unregister(DeviceListener *listener);
+#undef typename
+
#endif
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 405364f..d0c2e20 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -454,7 +454,7 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end);
void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size);
/* vector definitions */
-#ifdef __ALTIVEC__
+#if defined(__ALTIVEC__) && !defined(__clang__)
#include <altivec.h>
/* The altivec.h header says we're allowed to undef these for
* C++ compatibility. Here we don't care about C++, but we
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index bd2c075..e2125bd 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -158,13 +158,13 @@
#ifndef atomic_rcu_read
#ifdef __ATOMIC_CONSUME
#define atomic_rcu_read(ptr) ({ \
- typeof(*ptr) _val; \
+ __typeof__(*ptr) _val; \
__atomic_load(ptr, &_val, __ATOMIC_CONSUME); \
_val; \
})
#else
#define atomic_rcu_read(ptr) ({ \
- typeof(*ptr) _val = atomic_read(ptr); \
+ __typeof__(*ptr) _val = atomic_read(ptr); \
smp_read_barrier_depends(); \
_val; \
})
@@ -185,7 +185,7 @@
#ifndef atomic_rcu_set
#ifdef __ATOMIC_RELEASE
#define atomic_rcu_set(ptr, i) do { \
- typeof(*ptr) _val = (i); \
+ __typeof__(*ptr) _val = (i); \
__atomic_store(ptr, &_val, __ATOMIC_RELEASE); \
} while(0)
#else
@@ -220,7 +220,7 @@
*/
#ifndef atomic_mb_read
#define atomic_mb_read(ptr) ({ \
- typeof(*ptr) _val = atomic_read(ptr); \
+ __typeof__(*ptr) _val = atomic_read(ptr); \
smp_rmb(); \
_val; \
})
@@ -239,7 +239,7 @@
#define atomic_xchg(ptr, i) __sync_swap(ptr, i)
#elif defined(__ATOMIC_SEQ_CST)
#define atomic_xchg(ptr, i) ({ \
- typeof(*ptr) _new = (i), _old; \
+ __typeof__(*ptr) _new = (i), _old; \
__atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \
_old; \
})
diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 86dd9cd..b53f462 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -71,7 +71,7 @@
unsigned long name[BITS_TO_LONGS(bits)]
#define small_nbits(nbits) \
- ((nbits) <= BITS_PER_LONG)
+ ((nbits) <= (long)BITS_PER_LONG)
int slow_bitmap_empty(const unsigned long *bitmap, long bits);
int slow_bitmap_full(const unsigned long *bitmap, long bits);
@@ -97,7 +97,7 @@ int slow_bitmap_intersects(const unsigned long *bitmap1,
static inline unsigned long *bitmap_try_new(long nbits)
{
long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
- return g_try_malloc0(len);
+ return (unsigned long *)g_try_malloc0(len);
}
static inline unsigned long *bitmap_new(long nbits)
@@ -241,9 +241,9 @@ static inline unsigned long *bitmap_zero_extend(unsigned long *old,
long old_nbits, long new_nbits)
{
long new_len = BITS_TO_LONGS(new_nbits) * sizeof(unsigned long);
- unsigned long *new = g_realloc(old, new_len);
- bitmap_clear(new, old_nbits, new_nbits - old_nbits);
- return new;
+ unsigned long *new_bitmap = (unsigned long *)g_realloc(old, new_len);
+ bitmap_clear(new_bitmap, old_nbits, new_nbits - old_nbits);
+ return new_bitmap;
}
#endif /* BITMAP_H */
diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
index d22eb01..0abf0f8 100644
--- a/include/qemu/compiler.h
+++ b/include/qemu/compiler.h
@@ -60,7 +60,7 @@
#ifndef container_of
#define container_of(ptr, type, member) ({ \
- const typeof(((type *) 0)->member) *__mptr = (ptr); \
+ const __typeof__(((type *) 0)->member) *__mptr = (ptr); \
(type *) ((char *) __mptr - offsetof(type, member));})
#endif
@@ -74,7 +74,7 @@
#define DO_UPCAST(type, field, dev) container_of(dev, type, field)
#endif
-#define typeof_field(type, field) typeof(((type *)0)->field)
+#define typeof_field(type, field) __typeof__(((type *)0)->field)
#define type_check(t1,t2) ((t1*)0 - (t2*)0)
#ifndef always_inline
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index f781aa2..b56bce5 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -198,7 +198,7 @@ struct { \
} while (/*CONSTCOND*/0)
#define QSLIST_INSERT_HEAD_ATOMIC(head, elm, field) do { \
- typeof(elm) save_sle_next; \
+ __typeof__(elm) save_sle_next; \
do { \
save_sle_next = (elm)->field.sle_next = (head)->slh_first; \
} while (atomic_cmpxchg(&(head)->slh_first, save_sle_next, (elm)) != \
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index f6d1d56..0d9f677 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -135,8 +135,8 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
#define call_rcu(head, func, field) \
call_rcu1(({ \
char __attribute__((unused)) \
- offset_must_be_zero[-offsetof(typeof(*(head)), field)], \
- func_type_invalid = (func) - (void (*)(typeof(head)))(func); \
+ offset_must_be_zero[-offsetof(__typeof__(*(head)), field)], \
+ func_type_invalid = (func) - (void (*)(__typeof__(head)))(func); \
&(head)->field; \
}), \
(RCUCBFunc *)(func))
@@ -144,7 +144,7 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
#define g_free_rcu(obj, field) \
call_rcu1(({ \
char __attribute__((unused)) \
- offset_must_be_zero[-offsetof(typeof(*(obj)), field)]; \
+ offset_must_be_zero[-offsetof(__typeof__(*(obj)), field)]; \
&(obj)->field; \
}), \
(RCUCBFunc *)g_free);
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index d0946cb..a16effa 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -523,7 +523,7 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list,
QEMUTimerCB *cb,
void *opaque)
{
- QEMUTimer *ts = g_malloc0(sizeof(QEMUTimer));
+ QEMUTimer *ts = (QEMUTimer *)g_malloc0(sizeof(QEMUTimer));
timer_init_tl(ts, timer_list, scale, cb, opaque);
return ts;
}
@@ -965,7 +965,7 @@ static inline int64_t cpu_get_host_ticks (void)
#define MIPS_RDHWR(rd, value) { \
__asm__ __volatile__ (".set push\n\t" \
".set mips32r2\n\t" \
- "rdhwr %0, "rd"\n\t" \
+ "rdhwr %0, " rd "\n\t" \
".set pop" \
: "=r" (value)); \
}
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 51a1323..4b005ff 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -30,6 +30,8 @@
#include "qemu/thread.h"
#include "qemu/typedefs.h"
+#define typename QEMUtypename
+
typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
void *opaque);
@@ -196,7 +198,7 @@ typedef struct CPUBreakpoint {
} CPUBreakpoint;
typedef struct CPUWatchpoint {
- vaddr vaddr;
+ vaddr addr;
vaddr len;
vaddr hitaddr;
MemTxAttrs hitattrs;
@@ -775,4 +777,7 @@ extern const struct VMStateDescription vmstate_cpu_common;
.offset = 0, \
}
+CPUState *cpu_create(void);
+#undef typename
+
#endif
diff --git a/include/qom/object.h b/include/qom/object.h
index 4509166..118c227 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -20,6 +20,10 @@
#include "qemu/queue.h"
#include "qapi/error.h"
+#define Type QEMUType
+#define class QEMUclass
+#define typename QEMUtypename
+
struct Visitor;
struct TypeImpl;
@@ -1570,5 +1574,8 @@ int object_child_foreach_recursive(Object *obj,
*/
Object *container_get(Object *root, const char *path);
+#undef Type
+#undef class
+#undef typename
#endif
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 3d1e5ba..d594ebf 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -4,6 +4,7 @@
/* cpus.c */
bool qemu_in_vcpu_thread(void);
void qemu_init_cpu_loop(void);
+void qemu_end_cpu_loop(void);
void resume_all_vcpus(void);
void pause_all_vcpus(void);
void cpu_stop_current(void);
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 8b17c0e..7be6e71 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2001,9 +2001,13 @@ static void load_elf_image(const char *image_name, int image_fd,
info->brk = info->end_code;
}
+#if defined(CONFIG_LLVM)
+ load_symbols(ehdr, image_fd, load_bias);
+#else
if (qemu_log_enabled()) {
load_symbols(ehdr, image_fd, load_bias);
}
+#endif
close(image_fd);
return;
diff --git a/linux-user/main.c b/linux-user/main.c
index 8acfe0f..0f67ad4 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -33,11 +33,12 @@
#include "qemu/timer.h"
#include "qemu/envlist.h"
#include "elf.h"
+#include "hqemu.h"
char *exec_path;
int singlestep;
-static const char *filename;
+const char *filename;
static const char *argv0;
static int gdbstub_port;
static envlist_t *envlist;
@@ -105,7 +106,10 @@ static int pending_cpus;
/* Make sure everything is in a consistent state for calling fork(). */
void fork_start(void)
{
- qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
+#if defined(CONFIG_LLVM)
+ llvm_fork_start();
+#endif
+ qemu_mutex_lock(&tcg_ctx.tb_ctx->tb_lock);
pthread_mutex_lock(&exclusive_lock);
mmap_fork_start();
}
@@ -127,12 +131,15 @@ void fork_end(int child)
pthread_mutex_init(&cpu_list_mutex, NULL);
pthread_cond_init(&exclusive_cond, NULL);
pthread_cond_init(&exclusive_resume, NULL);
- qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
+ qemu_mutex_init(&tcg_ctx.tb_ctx->tb_lock);
gdbserver_fork(thread_cpu);
} else {
pthread_mutex_unlock(&exclusive_lock);
- qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+ qemu_mutex_unlock(&tcg_ctx.tb_ctx->tb_lock);
}
+#if defined(CONFIG_LLVM)
+ llvm_fork_end(child);
+#endif
}
/* Wait for pending exclusive operations to complete. The exclusive lock
@@ -276,6 +283,9 @@ void cpu_loop(CPUX86State *env)
abi_ulong pc;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
for(;;) {
cpu_exec_start(cs);
trapnr = cpu_x86_exec(cs);
@@ -670,6 +680,9 @@ void cpu_loop(CPUARMState *env)
target_siginfo_t info;
uint32_t addr;
+ copy_tcg_context();
+ optimization_init(env);
+
for(;;) {
cpu_exec_start(cs);
trapnr = cpu_arm_exec(cs);
@@ -1001,6 +1014,9 @@ void cpu_loop(CPUARMState *env)
int trapnr, sig;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
for (;;) {
cpu_exec_start(cs);
trapnr = cpu_arm_exec(cs);
@@ -1083,6 +1099,9 @@ void cpu_loop(CPUUniCore32State *env)
unsigned int n, insn;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
for (;;) {
cpu_exec_start(cs);
trapnr = uc32_cpu_exec(cs);
@@ -1284,6 +1303,9 @@ void cpu_loop (CPUSPARCState *env)
abi_long ret;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_sparc_exec(cs);
@@ -1564,6 +1586,9 @@ void cpu_loop(CPUPPCState *env)
int trapnr;
target_ulong ret;
+ copy_tcg_context();
+ optimization_init(env);
+
for(;;) {
cpu_exec_start(cs);
trapnr = cpu_ppc_exec(cs);
@@ -2416,6 +2441,9 @@ void cpu_loop(CPUMIPSState *env)
unsigned int syscall_num;
# endif
+ copy_tcg_context();
+ optimization_init(env);
+
for(;;) {
cpu_exec_start(cs);
trapnr = cpu_mips_exec(cs);
@@ -2653,6 +2681,9 @@ void cpu_loop(CPUOpenRISCState *env)
CPUState *cs = CPU(openrisc_env_get_cpu(env));
int trapnr, gdbsig;
+ copy_tcg_context();
+ optimization_init(env);
+
for (;;) {
cpu_exec_start(cs);
trapnr = cpu_openrisc_exec(cs);
@@ -2743,6 +2774,9 @@ void cpu_loop(CPUSH4State *env)
int trapnr, ret;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_sh4_exec(cs);
@@ -2805,6 +2839,9 @@ void cpu_loop(CPUCRISState *env)
int trapnr, ret;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_cris_exec(cs);
@@ -2866,6 +2903,9 @@ void cpu_loop(CPUMBState *env)
int trapnr, ret;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_mb_exec(cs);
@@ -2971,6 +3011,9 @@ void cpu_loop(CPUM68KState *env)
target_siginfo_t info;
TaskState *ts = cs->opaque;
+ copy_tcg_context();
+ optimization_init(env);
+
for(;;) {
cpu_exec_start(cs);
trapnr = cpu_m68k_exec(cs);
@@ -3110,6 +3153,9 @@ void cpu_loop(CPUAlphaState *env)
target_siginfo_t info;
abi_long sysret;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_alpha_exec(cs);
@@ -3298,6 +3344,9 @@ void cpu_loop(CPUS390XState *env)
target_siginfo_t info;
target_ulong addr;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_s390x_exec(cs);
@@ -3602,6 +3651,9 @@ void cpu_loop(CPUTLGState *env)
CPUState *cs = CPU(tilegx_env_get_cpu(env));
int trapnr;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_tilegx_exec(cs);
@@ -3711,7 +3763,7 @@ CPUArchState *cpu_copy(CPUArchState *env)
cpu_breakpoint_insert(new_cpu, bp->pc, bp->flags, NULL);
}
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
- cpu_watchpoint_insert(new_cpu, wp->vaddr, wp->len, wp->flags, NULL);
+ cpu_watchpoint_insert(new_cpu, wp->addr, wp->len, wp->flags, NULL);
}
return new_env;
@@ -4009,6 +4061,12 @@ static void usage(int exitcode)
"Note that if you provide several changes to a single variable\n"
"the last change will stay in effect.\n");
+#if defined(CONFIG_LLVM)
+ printf("\n\nHQEMU ");
+ fflush(stdout);
+ hqemu_help();
+#endif
+
exit(exitcode);
}
@@ -4324,7 +4382,11 @@ int main(int argc, char **argv, char **envp)
/* Now that we've loaded the binary, GUEST_BASE is fixed. Delay
generating the prologue until now so that the prologue can take
the real value of GUEST_BASE into account. */
- tcg_prologue_init(&tcg_ctx);
+ tcg_prologue_init(&tcg_ctx_global);
+
+#if defined(CONFIG_LLVM)
+ llvm_init();
+#endif
#if defined(TARGET_I386)
env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
@@ -4663,6 +4725,7 @@ int main(int argc, char **argv, char **envp)
}
gdb_handlesig(cpu, 0);
}
+
cpu_loop(env);
/* never exits */
return 0;
diff --git a/linux-user/strace.c b/linux-user/strace.c
index ea6c1d2..69d5408 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -7,6 +7,7 @@
#include <sys/types.h>
#include <sys/mount.h>
#include <sys/mman.h>
+#include <sys/sysmacros.h>
#include <unistd.h>
#include <sched.h>
#include "qemu.h"
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 6c64ba6..030eb2a 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -114,6 +114,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
#include "uname.h"
#include "qemu.h"
+#include "hqemu.h"
#define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
@@ -4495,7 +4496,7 @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
#endif /* defined(TARGET_I386) */
-#define NEW_STACK_SIZE 0x40000
+#define NEW_STACK_SIZE 0x80000
static pthread_mutex_t clone_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -5710,6 +5711,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
rcu_unregister_thread();
pthread_exit(NULL);
}
+
+ optimization_finalize((CPUArchState *)cpu_env);
+#if defined(CONFIG_LLVM)
+ llvm_finalize();
+#endif
+
#ifdef TARGET_GPROF
_mcleanup();
#endif
@@ -7615,6 +7622,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
#ifdef __NR_exit_group
/* new thread calls */
case TARGET_NR_exit_group:
+ optimization_finalize((CPUArchState *)cpu_env);
+#if defined(CONFIG_LLVM)
+ llvm_finalize();
+#endif
#ifdef TARGET_GPROF
_mcleanup();
#endif
diff --git a/llvm/analysis/InnerLoopAnalysis.cpp b/llvm/analysis/InnerLoopAnalysis.cpp
new file mode 100644
index 0000000..f67d380
--- /dev/null
+++ b/llvm/analysis/InnerLoopAnalysis.cpp
@@ -0,0 +1,631 @@
+/*
+ * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm-debug.h"
+#include "llvm-opc.h"
+#include "llvm-pass.h"
+#include "InnerLoopAnalysis.h"
+
+
+/*
+ * The InnertLoop class represents a single innermost loop. The shape of
+ * InnerLoop is specific to the DBT decoded guest loop, and its loop definition
+ * is different to a nature loop, e.g., latch and exiting block.
+ * For example, the binary of a nature loop (a) will be translated to the loop
+ * CFG (b), which includes an additional block L(loopback) to check flag
+ * `tcg_exit_req' and exits the loop to block E if the flag is raised, otherwise,
+ * goes back to the loop header A.
+ *
+ * In loop (b), a latch is split into two blocks, B and L. The loop bottom test
+ * is in block B and the backward branch is included in block L (which also
+ * has an exit block E attached to it). We include block L in the loop body and
+ * have the following definitions: (1) block B and L are latch head and tail,
+ * respectively; (2) a latch tail is the source of a backedge; (3) block B is a
+ * loop exiting block, but block L is not, and block E is not included in the
+ * exit blocks.
+ *
+ * (a) A (b) A
+ * || |
+ * B B
+ * / / \
+ * C C L -> A
+ * \
+ * E
+ */
+InnerLoop::InnerLoop(Loop *loop)
+ : TheLoop(*loop), Blocks(TheLoop.getBlocks()), UnknownPhi(false)
+{
+ for (auto BB : Blocks)
+ DenseBlockSet.insert(BB);
+
+ /* Find all latches and split latches. */
+ SmallVector<BasicBlock *, 8> LoopLatches;
+ TheLoop.getLoopLatches(LoopLatches);
+ for (BasicBlock *BB : LoopLatches) {
+ Latches.push_back(BB);
+
+ if (MDFactory::isLoop(BB->getTerminator()) &&
+ BB->getSinglePredecessor()) {
+ /* Map latch tail to latch head. */
+ SplitLatches[BB] = BB->getSinglePredecessor();
+ }
+ }
+}
+
+
+/* True if terminator in the block can branch to another block that is
+ * outside of the current loop. */
+bool InnerLoop::isLoopExiting(BasicBlock *BB) const
+{
+ if (SplitLatches.find(BB) != SplitLatches.end())
+ return false;
+
+ typedef GraphTraits<const BasicBlock*> BlockTraits;
+ for (typename BlockTraits::ChildIteratorType SI =
+ BlockTraits::child_begin(BB),
+ SE = BlockTraits::child_end(BB); SI != SE; ++SI) {
+ if (!contains(*SI))
+ return true;
+ }
+ return false;
+}
+
+/* Calculate the number of back edges to the loop header. */
+unsigned InnerLoop::getNumBackEdges() const
+{
+ unsigned NumBackEdges = 0;
+ BasicBlock *H = getHeader();
+
+ typedef GraphTraits<Inverse<BasicBlock*> > InvBlockTraits;
+ for (typename InvBlockTraits::ChildIteratorType I =
+ InvBlockTraits::child_begin(H),
+ E = InvBlockTraits::child_end(H); I != E; ++I)
+ if (contains(*I))
+ ++NumBackEdges;
+
+ return NumBackEdges;
+}
+
+/* Return all blocks inside the loop that have successors outside of the loop. */
+void InnerLoop::getExitingBlocks(SmallVectorImpl<BasicBlock *> &ExitingBlocks) const
+{
+ typedef GraphTraits<BasicBlock *> BlockTraits;
+ for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
+ /* Skip the latch tail block. */
+ if (SplitLatches.find(*BI) != SplitLatches.end())
+ continue;
+
+ for (typename BlockTraits::ChildIteratorType I =
+ BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
+ I != E; ++I)
+ if (!contains(*I)) {
+ /* Not in current loop? It must be an exit block. */
+ ExitingBlocks.push_back(*BI);
+ break;
+ }
+ }
+}
+
+/* If getExitingBlocks would return exactly one block, return that block.
+ * Otherwise return null. */
+BasicBlock *InnerLoop::getExitingBlock() const
+{
+ SmallVector<BasicBlock *, 8> ExitingBlocks;
+ getExitingBlocks(ExitingBlocks);
+ if (ExitingBlocks.size() == 1)
+ return ExitingBlocks[0];
+ return nullptr;
+}
+
+/* Return all of the successor blocks of this loop. */
+void InnerLoop::getExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const
+{
+ typedef GraphTraits<BasicBlock *> BlockTraits;
+ for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) {
+ /* Skip the latch tail block. */
+ if (SplitLatches.find(*BI) != SplitLatches.end())
+ continue;
+
+ for (typename BlockTraits::ChildIteratorType I =
+ BlockTraits::child_begin(*BI), E = BlockTraits::child_end(*BI);
+ I != E; ++I)
+ if (!contains(*I))
+ /* Not in current loop? It must be an exit block. */
+ ExitBlocks.push_back(*I);
+ }
+}
+
+/* If getExitBlocks would return exactly one block, return that block.
+ * Otherwise return null. */
+BasicBlock *InnerLoop::getExitBlock() const
+{
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ getExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() == 1)
+ return ExitBlocks[0];
+ return nullptr;
+}
+
+/* If there is a preheader for this loop, return it. A loop has a preheader
+ * if there is only one edge to the header of the loop from outside of the
+ * loop. If this is the case, the block branching to the header of the loop
+ * is the preheader node.
+ *
+ * This method returns null if there is no preheader for the loop. */
+BasicBlock *InnerLoop::getLoopPreheader() const
+{
+ /* Keep track of nodes outside the loop branching to the header. */
+ BasicBlock *Out = getLoopPredecessor();
+ if (!Out) return nullptr;
+
+ /* Make sure there is only one exit out of the preheader. */
+ typedef GraphTraits<BasicBlock *> BlockTraits;
+ typename BlockTraits::ChildIteratorType SI = BlockTraits::child_begin(Out);
+ ++SI;
+ if (SI != BlockTraits::child_end(Out))
+ return nullptr; /* Multiple exits from the block, must not be a preheader. */
+
+ /* The predecessor has exactly one successor, so it is a preheader. */
+ return Out;
+}
+
+/* If the given loop's header has exactly one unique predecessor outside the
+ * loop, return it. Otherwise return null.
+ * This is less strict that the loop "preheader" concept, which requires
+ * the predecessor to have exactly one successor. */
+BasicBlock *InnerLoop::getLoopPredecessor() const
+{
+ /* Keep track of nodes outside the loop branching to the header. */
+ BasicBlock *Out = nullptr;
+
+ /* Loop over the predecessors of the header node. */
+ BasicBlock *Header = getHeader();
+#if defined(LLVM_V35) || defined(LLVM_V38) || defined(LLVM_V39)
+ typedef GraphTraits<Inverse<BasicBlock *> > InvBlockTraits;
+ for (typename InvBlockTraits::ChildIteratorType PI =
+ InvBlockTraits::child_begin(Header),
+ PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) {
+ typename InvBlockTraits::NodeType *N = *PI;
+ if (!contains(N)) { /* If the block is not in the loop. */
+ if (Out && Out != N)
+ return nullptr; /* Multiple predecessors outside the loop */
+ Out = N;
+ }
+ }
+#else
+ for (const auto Pred : children<Inverse<BasicBlock *> >(Header)) {
+ if (!contains(Pred)) { /* If the block is not in the loop. */
+ if (Out && Out != Pred)
+ return nullptr; /* Multiple predecessors outside the loop */
+ Out = Pred;
+ }
+ }
+#endif
+
+ return Out;
+}
+
+bool InnerLoop::isReachable(Instruction *From, Instruction *To)
+{
+ if (!contains(From->getParent()) || !contains(To->getParent()))
+ return false;
+ if (From == To)
+ return true;
+
+ SmallPtrSet<Instruction*, 8> Visited;
+ SmallVector<Instruction*, 8> VisitStack;
+
+ VisitStack.push_back(From);
+ while (!VisitStack.empty()) {
+ Instruction *I = VisitStack.back();
+ VisitStack.pop_back();
+
+ if (Visited.count(I))
+ continue;
+
+ Visited.insert(I);
+ for (User *U : I->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (UI == To)
+ return true;
+
+ if (contains(UI->getParent()))
+ VisitStack.push_back(UI);
+ }
+ }
+
+ return false;
+}
+
+
+/*
+ * InnerLoopAnalysis
+ */
+static void addInnerLoop(Loop &L, std::vector<Loop *> &Loops)
+{
+ if (L.empty()) {
+ /* Innermost loop.
+ * If any basic block of current loop has been included in another
+ * loop, skip this loop. */
+ for (Loop *InnerL : Loops) {
+ for (auto I = L.begin(), E = L.end(); I != E; ++I) {
+ if (InnerL->contains(*I))
+ return;
+ }
+ }
+ Loops.push_back(&L);
+ return;
+ }
+ for (Loop *InnerL : L)
+ addInnerLoop(*InnerL, Loops);
+}
+
+
+void InnerLoopAnalysis::analyze(LoopInfo *LI, ScalarEvolution *SE)
+{
+ std::vector<Loop *> Loops;
+ for (Loop *L : *LI)
+ addInnerLoop(*L, Loops);
+
+ for (auto L : Loops)
+ InnerLoops.push_back(new InnerLoop(L));
+
+ for (auto L : InnerLoops)
+ analyzePhi(*L, SE);
+}
+
+bool InnerLoopAnalysis::analyzeInduction(InnerLoop &TheLoop,
+ ScalarEvolution *SE,
+ PHINode *Phi)
+{
+ Type *PhiTy = Phi->getType();
+ /* We only handle integer and pointer inductions variables. */
+ if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+ return false;
+
+ /* We only handle induction that has no outside users (except that the
+ * outside users are all stores.) */
+ for (User *U : Phi->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ if (!TheLoop.contains(UI) && !isa<StoreInst>(UI))
+ return false;
+ }
+
+ const SCEV *PhiScev = SE->getSCEV(Phi);
+ const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
+ if (!AR)
+ return false;
+
+ const SCEV *Step = AR->getStepRecurrence(*SE);
+ const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
+ if (!ConstStep && !SE->isLoopInvariant(Step, AR->getLoop()))
+ return false;
+
+ /* We found an induction variable. */
+ Value *StartValue =
+ Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
+ TheLoop.addInduction(Phi, StartValue, Step);
+
+ return true;
+}
+
+/*
+ * isReductionInstr()
+ * Check if the reduction operation is supported.
+ * We don't allow a reduction to bind more than one operation, so drop a
+ * reduction if it already has one operation.
+ */
+static bool isReductionInstr(Instruction *I, ReductionDesc::ReductionKind &Kind,
+ Type *&Ty)
+{
+ ReductionDesc::ReductionKind K = ReductionDesc::NoReduction;
+ switch (I->getOpcode()) {
+ default:
+ return false;
+ case Instruction::PHI:
+ case Instruction::BitCast:
+ return true;
+ case Instruction::Add:
+ case Instruction::Sub:
+ K = ReductionDesc::IntegerAdd;
+ break;
+ case Instruction::Mul:
+ K = ReductionDesc::IntegerMult;
+ break;
+ case Instruction::And:
+ K = ReductionDesc::IntegerAnd;
+ break;
+ case Instruction::Or:
+ K = ReductionDesc::IntegerOr;
+ break;
+ case Instruction::Xor:
+ K = ReductionDesc::IntegerXor;
+ break;
+ case Instruction::FAdd:
+ case Instruction::FSub:
+ K = ReductionDesc::FloatAdd;
+ break;
+ case Instruction::FMul:
+ K = ReductionDesc::FloatMult;
+ break;
+ }
+
+ if (VectorType *VecTy = dyn_cast<VectorType>(I->getType()))
+ Ty = VecTy->getScalarType();
+ else
+ Ty = I->getType();
+
+ if (Kind == ReductionDesc::NoReduction) {
+ Kind = K;
+ return true;
+ }
+
+ if (Kind != K) {
+ /* Different reduction operation to the previous one. */
+ return false;
+ }
+ return true;
+}
+
+static bool hasMultipleUsesOf(Instruction *I,
+ SmallPtrSet<Instruction *, 8> &Insts)
+{
+ unsigned NumUses = 0;
+ for(User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use) {
+ if (Insts.count(dyn_cast<Instruction>(*Use)))
+ ++NumUses;
+ if (NumUses > 1)
+ return true;
+ }
+ return false;
+}
+
+static bool isLegalUser(Instruction *I)
+{
+ if (isa<StoreInst>(I) && !MDFactory::isGuestMemory(I))
+ return true;
+ return false;
+}
+
+bool InnerLoopAnalysis::analyzeReduction(InnerLoop &TheLoop, PHINode *Phi)
+{
+ if (Phi->getNumIncomingValues() != 2)
+ return false;
+
+ /* Reduction variables are only found in the loop header block. */
+ if (Phi->getParent() != TheLoop.getHeader())
+ return false;
+
+ /* Obtain the reduction start value from from the loop preheader. */
+ Value *StartValue = Phi->getIncomingValueForBlock(TheLoop.getLoopPreheader());
+
+ /* ExitInstruction is the single value which is used outside the loop.
+ * We only allow for a single reduction value to be used outside the loop.
+ * This includes users of the reduction, variables (which form a cycle
+ * which ends in the phi node). */
+ Instruction *ExitInstruction = nullptr;
+ /* Indicates that we found a reduction operation in our scan. */
+ bool FoundReduxOp = false;
+
+ /* We start with the PHI node and scan for all of the users of this
+ * instruction. All users must be instructions that can be used as reduction
+ * variables (such as ADD). We must have a single out-of-block user. The cycle
+ * must include the original PHI. */
+ bool FoundStartPHI = false;
+
+ ReductionDesc::ReductionKind Kind = ReductionDesc::NoReduction;
+ Type *Ty = nullptr;
+
+ SmallPtrSet<Instruction *, 8> VisitedInsts;
+ SmallVector<Instruction *, 8> Worklist;
+ Worklist.push_back(Phi);
+ VisitedInsts.insert(Phi);
+
+ /* A value in the reduction can be used:
+ * - By the reduction:
+ * - Reduction operation:
+ * - One use of reduction value (safe).
+ * - Multiple use of reduction value (not safe).
+ * - PHI:
+ * - All uses of the PHI must be the reduction (safe).
+ * - Otherwise, not safe.
+ * - By one or no instruction outside of the loop (safe).
+ * - By further instructions outside of the loop (not safe).
+ * - By an instruction that is not part of the reduction (not safe).
+ * This is either:
+ * An instruction type other than PHI or the reduction operation.
+ * A PHI in the header other than the initial PHI. */
+ while (!Worklist.empty()) {
+ Instruction *Cur = Worklist.back();
+ Worklist.pop_back();
+
+ /* No Users.
+ * If the instruction has no users then this is a broken chain and
+ * cannot be a reduction variable. */
+ if (Cur->use_empty())
+ return false;
+
+ bool IsAPhi = isa<PHINode>(Cur);
+ bool IsBitCast = isa<BitCastInst>(Cur);
+
+ /* Currenly, we don't handle a reduction used by another PHI other than
+ * the original PHI. */
+ if (IsAPhi && Cur != Phi)
+ return false;
+
+ /* Any reduction instruction must be of one of the allowed kinds. */
+ if (!isReductionInstr(Cur, Kind, Ty))
+ return false;
+
+ /* Reductions of instructions such as Div, and Sub is only possible if the
+ * LHS is the reduction variable. */
+ if (!IsAPhi && !Cur->isCommutative() &&
+ !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
+ return false;
+
+ /* A reduction operation must only have one use of the reduction value. */
+ if (!IsAPhi && hasMultipleUsesOf(Cur, VisitedInsts))
+ return false;
+
+ /* Check whether we found a reduction operator. */
+ FoundReduxOp |= (!IsAPhi && !IsBitCast);
+
+ /* Process users of current instruction. Push non-PHI nodes after PHI
+ * nodes onto the stack. This way we are going to have seen all inputs
+ * to PHI nodes once we get to them. */
+ SmallVector<Instruction *, 8> NonPHIs;
+ SmallVector<Instruction *, 8> PHIs;
+ for (User *U : Cur->users()) {
+ Instruction *UI = cast<Instruction>(U);
+
+ if (isLegalUser(UI))
+ continue;
+
+ /* Check if we found the exit user. */
+ BasicBlock *Parent = UI->getParent();
+ if (!TheLoop.contains(Parent)) {
+ /* Exit if you find multiple outside users or if the header phi node is
+ * being used. In this case the user uses the value of the previous
+ * iteration, in which case we would loose "VF-1" iterations of the
+ * reduction operation if we vectorize. */
+ if (ExitInstruction != nullptr || Cur == Phi)
+ return false;
+
+ /* The instruction used by an outside user must be the last instruction
+ * before we feed back to the reduction phi. Otherwise, we loose VF-1
+ * operations on the value. */
+ if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
+ return false;
+
+ ExitInstruction = Cur;
+ continue;
+ }
+
+ /* Process instructions only once (termination). Each reduction cycle
+ * value must only be used once, except by phi nodes and min/max
+ * reductions which are represented as a cmp followed by a select. */
+ if (!VisitedInsts.count(UI)) {
+ VisitedInsts.insert(UI);
+ if (isa<PHINode>(UI))
+ PHIs.push_back(UI);
+ else
+ NonPHIs.push_back(UI);
+ } else if (!isa<PHINode>(UI))
+ return false;
+
+ /* Remember that we completed the cycle. */
+ if (UI == Phi)
+ FoundStartPHI = true;
+ }
+ Worklist.append(PHIs.begin(), PHIs.end());
+ Worklist.append(NonPHIs.begin(), NonPHIs.end());
+ }
+
+ /* Set the exit instruction to the last instruction feed back to the
+ * reduction phi if we cannot find an exit instruction. */
+ if (!ExitInstruction) {
+ Value *NextValue = Phi->getIncomingValueForBlock(TheLoop.getSingleLatchTail());
+ if (!isa<Instruction>(NextValue))
+ return false;
+ ExitInstruction = cast<Instruction>(NextValue);
+ }
+
+ if (!FoundStartPHI || !FoundReduxOp)
+ return false;
+
+ /* We found an induction variable. */
+ TheLoop.addReduction(Phi, StartValue, ExitInstruction, Kind, Ty);
+
+ return true;
+}
+
+void InnerLoopAnalysis::analyzePhi(InnerLoop &TheLoop, ScalarEvolution *SE)
+{
+ BasicBlock *Header = TheLoop.getHeader();
+ for (BasicBlock *BB : TheLoop.blocks()) {
+ auto I = BB->begin();
+ auto E = BasicBlock::iterator(BB->getFirstNonPHI());
+
+ for (; I != E; ++I) {
+ /* Currently, we cannot handle PHIs in a non-header block, so set
+ * the loop with unknown PHI if we find any of it. */
+ if (BB != Header) {
+ TheLoop.UnknownPhi = true;
+ return;
+ }
+
+ /* The loop must have a preheader and one split latch for us to
+ * analyze inductions and reductions. */
+ if (!TheLoop.getLoopPreheader() || !TheLoop.getSingleLatchTail()) {
+ TheLoop.UnknownPhi = true;
+ return;
+ }
+
+ PHINode *Phi = cast<PHINode>(I);
+ if (!analyzeInduction(TheLoop, SE, Phi) &&
+ !analyzeReduction(TheLoop, Phi))
+ TheLoop.UnknownPhi = true;
+ }
+ }
+}
+
+
+/*
+ * InnerLoopAnalysisWrapperPass Pass
+ */
+char InnerLoopAnalysisWrapperPass::ID = 0;
+INITIALIZE_PASS_BEGIN(InnerLoopAnalysisWrapperPass, "InnerLoopAnalysis",
+ "Inner Loop Analysis", true, true)
+#if defined(LLVM_V35)
+INITIALIZE_PASS_DEPENDENCY(LoopInfo)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+#else
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+#endif
+INITIALIZE_PASS_END(InnerLoopAnalysisWrapperPass, "InnerLoopAnalysis",
+ "Inner Loop Analysis", true, true)
+
+void InnerLoopAnalysisWrapperPass::releaseMemory() {
+ LA.releaseMemory();
+}
+
+void InnerLoopAnalysisWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+#if defined(LLVM_V35)
+ AU.addRequired<LoopInfo>();
+ AU.addRequired<ScalarEvolution>();
+#else
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+#endif
+}
+void InnerLoopAnalysisWrapperPass::print(raw_ostream &OS, const Module *) const {
+ LA.print(OS);
+}
+
+void InnerLoopAnalysisWrapperPass::verifyAnalysis() const {
+ LA.verify();
+}
+
+bool InnerLoopAnalysisWrapperPass::runOnFunction(Function &F) {
+#if defined(LLVM_V35)
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolution>();
+ LoopInfo *LI = &getAnalysis<LoopInfo>();
+#else
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+#endif
+
+ LA.analyze(LI, SE);
+ return false;
+}
+
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/llvm/atomic/atomic-arm.c b/llvm/atomic/atomic-arm.c
new file mode 100644
index 0000000..4176caa
--- /dev/null
+++ b/llvm/atomic/atomic-arm.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ.
+ * <http://ppi.fudan.edu.cn/system_research_group>
+ *
+ * Authors:
+ * Zhaoguo Wang <zgwang@fudan.edu.cn>
+ * Yufei Chen <chenyufei@fudan.edu.cn>
+ * Ran Liu <naruilone@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* We include this file in op_helper.c */
+
+#include <stdlib.h>
+#include <pthread.h>
+#include "coremu-atomic.h"
+
+__thread uint64_t cm_exclusive_val;
+__thread uint32_t cm_exclusive_addr = -1;
+
+#define GEN_LOAD_EXCLUSIVE(type, TYPE) \
+void HELPER(load_exclusive##type)(CPUArchState *env, uint32_t reg, \
+ uint32_t addr) \
+{ \
+ unsigned long q_addr = 0; \
+ DATA_##type val = 0; \
+ \
+ cm_exclusive_addr = addr; \
+ CM_GET_QEMU_ADDR(env, q_addr,addr); \
+ val = *(DATA_##type *)q_addr; \
+ cm_exclusive_val = val; \
+ env->regs[reg] = val; \
+}
+
+GEN_LOAD_EXCLUSIVE(b, B);
+GEN_LOAD_EXCLUSIVE(w, W);
+GEN_LOAD_EXCLUSIVE(l, L);
+//GEN_LOAD_EXCLUSIVE(q, Q);
+
+#define GEN_STORE_EXCLUSIVE(type, TYPE) \
+void HELPER(store_exclusive##type)(CPUArchState *env, uint32_t res, \
+ uint32_t reg, uint32_t addr) \
+{ \
+ unsigned long q_addr = 0; \
+ DATA_##type val = 0; \
+ DATA_##type r = 0; \
+ \
+ if(addr != cm_exclusive_addr) \
+ goto fail; \
+ \
+ CM_GET_QEMU_ADDR(env, q_addr,addr); \
+ val = (DATA_##type)env->regs[reg]; \
+ \
+ r = atomic_compare_exchange##type((DATA_##type *)q_addr, \
+ (DATA_##type)cm_exclusive_val, val); \
+ \
+ if(r == (DATA_##type)cm_exclusive_val) { \
+ env->regs[res] = 0; \
+ goto done; \
+ } else { \
+ goto fail; \
+ } \
+ \
+fail: \
+ env->regs[res] = 1; \
+ \
+done: \
+ cm_exclusive_addr = -1; \
+ return; \
+}
+
+GEN_STORE_EXCLUSIVE(b, B);
+GEN_STORE_EXCLUSIVE(w, W);
+GEN_STORE_EXCLUSIVE(l, L);
+//GEN_STORE_EXCLUSIVE(q, Q);
+
+void HELPER(load_exclusiveq)(CPUArchState *env, uint32_t reg, uint32_t addr)
+{
+ unsigned long q_addr = 0;
+ uint64_t val = 0;
+
+ cm_exclusive_addr = addr;
+ CM_GET_QEMU_ADDR(env, q_addr,addr);
+ val = *(uint64_t *)q_addr;
+ cm_exclusive_val = val;
+ env->regs[reg] = (uint32_t)val;
+ env->regs[reg + 1] = (uint32_t)(val>>32);
+}
+
+void HELPER(store_exclusiveq)(CPUArchState *env, uint32_t res, uint32_t reg, uint32_t addr)
+{
+ unsigned long q_addr = 0;
+ uint64_t val = 0;
+ uint64_t r = 0;
+
+ if(addr != cm_exclusive_addr)
+ goto fail;
+
+ CM_GET_QEMU_ADDR(env, q_addr,addr);
+ val = (uint32_t)env->regs[reg];
+ val |= ((uint64_t)env->regs[reg + 1]) << 32;
+
+ r = atomic_compare_exchangeq((uint64_t *)q_addr,
+ (uint64_t)cm_exclusive_val, val);
+
+ if(r == (uint64_t)cm_exclusive_val) {
+ env->regs[res] = 0;
+ goto done;
+ } else {
+ goto fail;
+ }
+
+fail:
+ env->regs[res] = 1;
+
+done:
+ cm_exclusive_addr = -1;
+ return;
+}
+
+void HELPER(clear_exclusive)(CPUArchState *env)
+{
+ cm_exclusive_addr = -1;
+}
+
+void HELPER(swpb)(CPUArchState *env, uint32_t dst, uint32_t src, uint32_t addr)
+{
+ uint8_t old, val;
+ unsigned long q_addr;
+ CM_GET_QEMU_ADDR(env, q_addr,env->regs[addr]);
+ val = (uint8_t)env->regs[src];
+ old = atomic_exchangeb((uint8_t *)q_addr, (uint8_t)val);
+ env->regs[dst] = old;
+ //printf("SWPB\n");
+}
+
+void HELPER(swp)(CPUArchState *env, uint32_t dst, uint32_t src, uint32_t addr)
+{
+ uint32_t old, val;
+ unsigned long q_addr;
+ CM_GET_QEMU_ADDR(env, q_addr,env->regs[addr]);
+ val = env->regs[src];
+ old = atomic_exchangel((uint32_t *)q_addr, val);
+ env->regs[dst] = old;
+ //printf("SWP\n");
+}
diff --git a/llvm/atomic/atomic-helper.h b/llvm/atomic/atomic-helper.h
new file mode 100644
index 0000000..9e3cedf
--- /dev/null
+++ b/llvm/atomic/atomic-helper.h
@@ -0,0 +1,74 @@
+/*
+ * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ.
+ * <http://ppi.fudan.edu.cn/system_research_group>
+ *
+ * Authors:
+ * Zhaoguo Wang <zgwang@fudan.edu.cn>
+ * Yufei Chen <chenyufei@fudan.edu.cn>
+ * Ran Liu <naruilone@gmail.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config-target.h"
+
+#ifdef CONFIG_COREMU
+
+#if defined(TARGET_I386)
+#define __GEN_HEADER(type) \
+DEF_HELPER_3(atomic_inc##type, void, env, tl, int) \
+DEF_HELPER_4(xchg##type, void, env, tl, int, int) \
+DEF_HELPER_4(atomic_op##type, void, env, tl, tl, int) \
+DEF_HELPER_4(atomic_xadd##type, void, env, tl, int, int) \
+DEF_HELPER_4(atomic_cmpxchg##type, void, env, tl, int, int) \
+DEF_HELPER_2(atomic_not##type, void, env, tl) \
+DEF_HELPER_2(atomic_neg##type, void, env, tl)
+
+__GEN_HEADER(b)
+__GEN_HEADER(w)
+__GEN_HEADER(l)
+#ifdef TARGET_X86_64
+__GEN_HEADER(q)
+#endif
+
+DEF_HELPER_2(atomic_cmpxchg8b, void, env, tl)
+DEF_HELPER_2(atomic_cmpxchg16b, void, env, tl)
+
+DEF_HELPER_4(atomic_bts, void, env, tl, tl, int)
+DEF_HELPER_4(atomic_btr, void, env, tl, tl, int)
+DEF_HELPER_4(atomic_btc, void, env, tl, tl, int)
+
+/* fence */
+DEF_HELPER_1(fence, void, env)
+
+#elif defined(TARGET_ARM)
+#define __GEN_HEADER(type) \
+DEF_HELPER_3(load_exclusive##type, void, env, i32, i32) \
+DEF_HELPER_4(store_exclusive##type, void, env, i32, i32, i32)
+
+__GEN_HEADER(b)
+__GEN_HEADER(w)
+__GEN_HEADER(l)
+__GEN_HEADER(q)
+
+DEF_HELPER_1(clear_exclusive, void, env)
+
+DEF_HELPER_4(swpb, void, env, i32, i32, i32)
+DEF_HELPER_4(swp, void, env, i32, i32, i32)
+#else
+#error "unsupported processor type"
+#endif
+
+#endif
+
diff --git a/llvm/atomic/atomic-x86.c b/llvm/atomic/atomic-x86.c
new file mode 100644
index 0000000..dc0baf0
--- /dev/null
+++ b/llvm/atomic/atomic-x86.c
@@ -0,0 +1,504 @@
+/*
+ * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ.
+ * <http://ppi.fudan.edu.cn/system_research_group>
+ *
+ * Authors:
+ * Zhaoguo Wang <zgwang@fudan.edu.cn>
+ * Yufei Chen <chenyufei@fudan.edu.cn>
+ * Ran Liu <naruilone@gmail.com>
+ * Xi Wu <wuxi@fudan.edu.cn>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* We include this file in op_helper.c */
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <assert.h>
+#include "coremu-atomic.h"
+
+#define EAX (env->regs[R_EAX])
+#define ECX (env->regs[R_ECX])
+#define EDX (env->regs[R_EDX])
+#define EBX (env->regs[R_EBX])
+
+/* These definitions are copied from translate.c */
+#if defined(WORDS_BIGENDIAN)
+#define REG_B_OFFSET (sizeof(target_ulong) - 1)
+#define REG_H_OFFSET (sizeof(target_ulong) - 2)
+#define REG_W_OFFSET (sizeof(target_ulong) - 2)
+#define REG_L_OFFSET (sizeof(target_ulong) - 4)
+#define REG_LH_OFFSET (sizeof(target_ulong) - 8)
+#else
+#define REG_B_OFFSET 0
+#define REG_H_OFFSET 1
+#define REG_W_OFFSET 0
+#define REG_L_OFFSET 0
+#define REG_LH_OFFSET 4
+#endif
+
+#ifdef TARGET_X86_64
+#define X86_64_DEF(...) __VA_ARGS__
+#else
+#define X86_64_DEF(...)
+#endif
+
+#define REG_LOW_MASK (~(uint64_t)0x0>>32)
+
+/* gen_op instructions */
+/* i386 arith/logic operations */
+enum {
+ OP_ADDL,
+ OP_ORL,
+ OP_ADCL,
+ OP_SBBL,
+ OP_ANDL,
+ OP_SUBL,
+ OP_XORL,
+ OP_CMPL,
+};
+
+/* */
+static target_ulong cm_get_reg_val(CPUX86State *env, int ot, int hregs, int reg)
+{
+ target_ulong val, offset;
+ CPUX86State *env1 = env;
+
+ switch(ot) {
+ case 0: /* OT_BYTE */
+ if (reg < 4 X86_64_DEF( || reg >= 8 || hregs)) {
+ goto std_case;
+ } else {
+ offset = offsetof(CPUX86State, regs[reg - 4]) + REG_H_OFFSET;
+ val = *(((uint8_t *)env1) + offset);
+ }
+ break;
+ default:
+ std_case:
+ val = env1->regs[reg];
+ break;
+ }
+
+ return val;
+}
+
+static void cm_set_reg_val(CPUX86State *env, int ot, int hregs, int reg, target_ulong val)
+{
+ target_ulong offset;
+
+ CPUX86State *env1 = env;
+
+ switch(ot) {
+ case 0: /* OT_BYTE */
+ if (reg < 4 X86_64_DEF (|| reg >= 8 || hregs)) {
+ offset = offsetof(CPUX86State, regs[reg]) + REG_B_OFFSET;
+ *(((uint8_t *) env1) + offset) = (uint8_t)val;
+ } else {
+ offset = offsetof(CPUX86State, regs[reg - 4]) + REG_H_OFFSET;
+ *(((uint8_t *) env1) + offset) = (uint8_t)val;
+ }
+ break;
+ case 1: /* OT_WORD */
+ offset = offsetof(CPUX86State, regs[reg]) + REG_W_OFFSET;
+ *((uint16_t *)((uint8_t *)env1 + offset)) = (uint16_t)val;
+ break;
+ case 2: /* OT_LONG */
+ env1->regs[reg] = REG_LOW_MASK & val;
+ break;
+ default:
+ case 3: /* OT_QUAD */
+ env1->regs[reg] = val;
+ break;
+ }
+}
+
+#define LD_b ldub_p
+#define LD_w lduw_p
+#define LD_l ldl_p
+#define LD_q ldq_p
+
+/* Lightweight transactional memory. */
+#define TX(vaddr, type, value, command) \
+ unsigned long __q_addr; \
+ DATA_##type __oldv; \
+ DATA_##type value; \
+ \
+ CM_GET_QEMU_ADDR(env, __q_addr, vaddr); \
+ do { \
+ __oldv = value = LD_##type((DATA_##type *)__q_addr); \
+ {command;}; \
+ mb(); \
+ } while (__oldv != (atomic_compare_exchange##type( \
+ (DATA_##type *)__q_addr, __oldv, value)))
+
+/* Atomically emulate INC instruction using CAS1 and memory transaction. */
+
+#define GEN_ATOMIC_INC(type, TYPE) \
+void helper_atomic_inc##type(CPUX86State *env, target_ulong a0, int c) \
+{ \
+ int eflags_c, eflags; \
+ int cc_op; \
+ \
+ /* compute the previous instruction c flags */ \
+ eflags_c = helper_cc_compute_c(CC_DST, CC_SRC, CC_SRC2, CC_OP); \
+ \
+ TX(a0, type, value, { \
+ if (c > 0) { \
+ value++; \
+ cc_op = CC_OP_INC##TYPE; \
+ } else { \
+ value--; \
+ cc_op = CC_OP_DEC##TYPE; \
+ } \
+ }); \
+ \
+ CC_SRC = eflags_c; \
+ CC_DST = value; \
+ \
+ eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, cc_op); \
+ CC_SRC = eflags; \
+} \
+
+GEN_ATOMIC_INC(b, B);
+GEN_ATOMIC_INC(w, W);
+GEN_ATOMIC_INC(l, L);
+#ifdef TARGET_X86_64
+GEN_ATOMIC_INC(q, Q);
+#endif
+
+#define OT_b 0
+#define OT_w 1
+#define OT_l 2
+#define OT_q 3
+
+#define GEN_ATOMIC_XCHG(type) \
+void helper_xchg##type(CPUX86State *env, target_ulong a0, int reg, \
+ int hreg) \
+{ \
+ DATA_##type val, out; \
+ unsigned long q_addr; \
+ \
+ CM_GET_QEMU_ADDR(env, q_addr, a0); \
+ val = (DATA_##type)cm_get_reg_val(env, OT_##type, hreg, reg); \
+ out = atomic_exchange##type((DATA_##type *)q_addr, val); \
+ mb(); \
+ \
+ cm_set_reg_val(env, OT_##type, hreg, reg, out); \
+}
+
+GEN_ATOMIC_XCHG(b);
+GEN_ATOMIC_XCHG(w);
+GEN_ATOMIC_XCHG(l);
+#ifdef TARGET_X86_64
+GEN_ATOMIC_XCHG(q);
+#endif
+
+#define GEN_ATOMIC_OP(type, TYPE) \
+void helper_atomic_op##type(CPUX86State *env, target_ulong a0, \
+ target_ulong t1, int op) \
+{ \
+ DATA_##type operand; \
+ int eflags_c, eflags; \
+ int cc_op; \
+ \
+ /* compute the previous instruction c flags */ \
+ eflags_c = helper_cc_compute_c(CC_DST, CC_SRC, CC_SRC2, CC_OP); \
+ operand = (DATA_##type)t1; \
+ \
+ TX(a0, type, value, { \
+ switch(op) { \
+ case OP_ADCL: \
+ value += operand + eflags_c; \
+ cc_op = CC_OP_ADD##TYPE + (eflags_c << 2); \
+ CC_SRC = operand; \
+ break; \
+ case OP_SBBL: \
+ value = value - operand - eflags_c; \
+ cc_op = CC_OP_SUB##TYPE + (eflags_c << 2); \
+ CC_SRC = operand; \
+ break; \
+ case OP_ADDL: \
+ value += operand; \
+ cc_op = CC_OP_ADD##TYPE; \
+ CC_SRC = operand; \
+ break; \
+ case OP_SUBL: \
+ value -= operand; \
+ cc_op = CC_OP_SUB##TYPE; \
+ CC_SRC = operand; \
+ break; \
+ default: \
+ case OP_ANDL: \
+ value &= operand; \
+ cc_op = CC_OP_LOGIC##TYPE; \
+ break; \
+ case OP_ORL: \
+ value |= operand; \
+ cc_op = CC_OP_LOGIC##TYPE; \
+ break; \
+ case OP_XORL: \
+ value ^= operand; \
+ cc_op = CC_OP_LOGIC##TYPE; \
+ break; \
+ case OP_CMPL: \
+ abort(); \
+ break; \
+ } \
+ }); \
+ CC_DST = value; \
+ /* successful transaction, compute the eflags */ \
+ eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, cc_op); \
+ CC_SRC = eflags; \
+}
+
+GEN_ATOMIC_OP(b, B);
+GEN_ATOMIC_OP(w, W);
+GEN_ATOMIC_OP(l, L);
+#ifdef TARGET_X86_64
+GEN_ATOMIC_OP(q, Q);
+#endif
+
+/* xadd */
+#define GEN_ATOMIC_XADD(type, TYPE) \
+void helper_atomic_xadd##type(CPUX86State *env, target_ulong a0, \
+ int reg, int hreg) \
+{ \
+ DATA_##type operand, oldv; \
+ int eflags; \
+ \
+ operand = (DATA_##type)cm_get_reg_val( \
+ env, OT_##type, hreg, reg); \
+ \
+ TX(a0, type, newv, { \
+ oldv = newv; \
+ newv += operand; \
+ }); \
+ \
+ /* transaction successes */ \
+ /* xchg the register and compute the eflags */ \
+ cm_set_reg_val(env, OT_##type, hreg, reg, oldv); \
+ CC_SRC = oldv; \
+ CC_DST = newv; \
+ \
+ eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \
+ CC_OP_ADD##TYPE); \
+ CC_SRC = eflags; \
+}
+
+GEN_ATOMIC_XADD(b, B);
+GEN_ATOMIC_XADD(w, W);
+GEN_ATOMIC_XADD(l, L);
+#ifdef TARGET_X86_64
+GEN_ATOMIC_XADD(q, Q);
+#endif
+
+/* cmpxchg */
+#define GEN_ATOMIC_CMPXCHG(type, TYPE) \
+void helper_atomic_cmpxchg##type(CPUX86State *env, target_ulong a0, \
+ int reg, int hreg) \
+{ \
+ DATA_##type reg_v, eax_v, res; \
+ int eflags; \
+ unsigned long q_addr; \
+ \
+ CM_GET_QEMU_ADDR(env, q_addr, a0); \
+ reg_v = (DATA_##type)cm_get_reg_val(env, OT_##type, hreg, reg); \
+ eax_v = (DATA_##type)cm_get_reg_val(env, OT_##type, 0, R_EAX); \
+ \
+ res = atomic_compare_exchange##type( \
+ (DATA_##type *)q_addr, eax_v, reg_v); \
+ mb(); \
+ \
+ if (res != eax_v) \
+ cm_set_reg_val(env, OT_##type, 0, R_EAX, res); \
+ \
+ CC_SRC = res; \
+ CC_DST = eax_v - res; \
+ \
+ eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \
+ CC_OP_SUB##TYPE); \
+ CC_SRC = eflags; \
+}
+
+GEN_ATOMIC_CMPXCHG(b, B);
+GEN_ATOMIC_CMPXCHG(w, W);
+GEN_ATOMIC_CMPXCHG(l, L);
+#ifdef TARGET_X86_64
+GEN_ATOMIC_CMPXCHG(q, Q);
+#endif
+
+#if defined(_LP64)
+/* cmpxchgb (8, 16) */
+void helper_atomic_cmpxchg8b(CPUX86State *env, target_ulong a0)
+{
+ uint64_t edx_eax, ecx_ebx, res;
+ int eflags;
+ unsigned long q_addr;
+
+ eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, CC_OP);
+ CM_GET_QEMU_ADDR(env, q_addr, a0);
+
+ edx_eax = (((uint64_t)EDX << 32) | (uint32_t)EAX);
+ ecx_ebx = (((uint64_t)ECX << 32) | (uint32_t)EBX);
+
+ res = atomic_compare_exchangeq((uint64_t *)q_addr, edx_eax, ecx_ebx);
+ mb();
+
+ if (res == edx_eax) {
+ eflags |= CC_Z;
+ } else {
+ EDX = (uint32_t)(res >> 32);
+ EAX = (uint32_t)res;
+ eflags &= ~CC_Z;
+ }
+
+ CC_SRC = eflags;
+}
+#else
+void helper_atomic_cmpxchg8b(CPUX86State *env, target_ulong a0)
+{
+ assert("helper_atomic_cmpxchg8b: not supported.\n");
+ exit(0);
+}
+#endif
+
+void helper_atomic_cmpxchg16b(CPUX86State *env, target_ulong a0)
+{
+ uint8_t res;
+ int eflags;
+ unsigned long q_addr;
+
+ eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, CC_OP);
+ CM_GET_QEMU_ADDR(env, q_addr, a0);
+
+ uint64_t old_rax = *(uint64_t *)q_addr;
+ uint64_t old_rdx = *(uint64_t *)(q_addr + 8);
+ res = atomic_compare_exchange16b((uint64_t *)q_addr, EAX, EDX, EBX, ECX);
+ mb();
+
+ if (res) {
+ eflags |= CC_Z; /* swap success */
+ } else {
+ EDX = old_rdx;
+ EAX = old_rax;
+ eflags &= ~CC_Z; /* read the old value ! */
+ }
+
+ CC_SRC = eflags;
+}
+
+/* not */
+#define GEN_ATOMIC_NOT(type) \
+void helper_atomic_not##type(CPUX86State *env, \
+ target_ulong a0) \
+{ \
+ TX(a0, type, value, { \
+ value = ~value; \
+ }); \
+}
+
+GEN_ATOMIC_NOT(b);
+GEN_ATOMIC_NOT(w);
+GEN_ATOMIC_NOT(l);
+#ifdef TARGET_X86_64
+GEN_ATOMIC_NOT(q);
+#endif
+
+/* neg */
+#define GEN_ATOMIC_NEG(type, TYPE) \
+void helper_atomic_neg##type(CPUX86State *env, \
+ target_ulong a0) \
+{ \
+ int eflags; \
+ \
+ TX(a0, type, value, { \
+ value = -value; \
+ }); \
+ \
+ /* We should use the old value to compute CC */ \
+ CC_SRC = CC_DST = -value; \
+ \
+ eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \
+ CC_OP_SUB##TYPE); \
+ CC_SRC = eflags; \
+} \
+
+GEN_ATOMIC_NEG(b, B);
+GEN_ATOMIC_NEG(w, W);
+GEN_ATOMIC_NEG(l, L);
+#ifdef TARGET_X86_64
+GEN_ATOMIC_NEG(q, Q);
+#endif
+
+/* This is only used in BTX instruction, with an additional offset.
+ * Note that, when using register bitoffset, the value can be larger than
+ * operand size - 1 (operand size can be 16/32/64), refer to intel manual 2A
+ * page 3-11. */
+#define TX2(vaddr, type, value, offset, command) \
+ unsigned long __q_addr; \
+ DATA_##type __oldv; \
+ DATA_##type value; \
+ \
+ CM_GET_QEMU_ADDR(env, __q_addr, vaddr); \
+ __q_addr += offset >> 3; \
+ do { \
+ __oldv = value = LD_##type((DATA_##type *)__q_addr); \
+ {command;}; \
+ mb(); \
+ } while (__oldv != (atomic_compare_exchange##type( \
+ (DATA_##type *)__q_addr, __oldv, value)))
+
+#define GEN_ATOMIC_BTX(ins, command) \
+void helper_atomic_##ins(CPUX86State *env, target_ulong a0, \
+ target_ulong offset, int ot) \
+{ \
+ uint8_t old_byte; \
+ int eflags; \
+ \
+ TX2(a0, b, value, offset, { \
+ old_byte = value; \
+ {command;}; \
+ }); \
+ \
+ CC_SRC = (old_byte >> (offset & 0x7)); \
+ CC_DST = 0; \
+ eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \
+ CC_OP_SARB + ot); \
+ CC_SRC = eflags; \
+}
+
+/* bts */
+GEN_ATOMIC_BTX(bts, {
+ value |= (1 << (offset & 0x7));
+});
+/* btr */
+GEN_ATOMIC_BTX(btr, {
+ value &= ~(1 << (offset & 0x7));
+});
+/* btc */
+GEN_ATOMIC_BTX(btc, {
+ value ^= (1 << (offset & 0x7));
+});
+
+/* fence **/
+void helper_fence(CPUX86State *env)
+{
+ mb();
+}
+
+#undef EAX
+#undef ECX
+#undef EDX
+#undef EBX
diff --git a/llvm/atomic/coremu-atomic.h b/llvm/atomic/coremu-atomic.h
new file mode 100644
index 0000000..998232b
--- /dev/null
+++ b/llvm/atomic/coremu-atomic.h
@@ -0,0 +1,412 @@
+/*
+ * COREMU Parallel Emulator Framework
+ *
+ * Atomic support for COREMU system.
+ * XXX: Now only support x86-64 architecture.
+ *
+ * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ.
+ * <http://ppi.fudan.edu.cn/system_research_group>
+ *
+ * Authors:
+ * Zhaoguo Wang <zgwang@fudan.edu.cn>
+ * Yufei Chen <chenyufei@fudan.edu.cn>
+ * Ran Liu <naruilone@gmail.com>
+ * Xi Wu <wuxi@fudan.edu.cn>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _COREMU_ATOMIC_H
+#define _COREMU_ATOMIC_H
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "config-target.h"
+#include "hqemu.h"
+
+/* Given the guest virtual address, get the corresponding host address.
+ * This macro resembles ldxxx in softmmu_template.h
+ * NOTE: This must be inlined since the use of GETPC needs to get the
+ * return address. Using always inline also works, we use macro here to be more
+ * explicit. */
+#if defined(CONFIG_USER_ONLY)
+#define CM_GET_QEMU_ADDR(__env1, q_addr, v_addr) \
+do { \
+ q_addr = v_addr + GUEST_BASE; \
+} while (0)
+
+#else
+#define CM_GET_QEMU_ADDR(__env1, q_addr, v_addr) \
+do { \
+ CPUState *cpu = ENV_GET_CPU(__env1); \
+ int __mmu_idx, __index; \
+ uintptr_t __retaddr; \
+ __index = (v_addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); \
+ /* get the CPL, hence determine the MMU mode */ \
+ __mmu_idx = cpu_mmu_index(__env1, false); \
+ /* We use this function in the implementation of atomic instructions */ \
+ /* and we are going to modify these memory. So we use addr_write. */ \
+ if (unlikely(__env1->tlb_table[__mmu_idx][__index].addr_write \
+ != ((v_addr & TARGET_PAGE_MASK) | tlb_version(__env1)))) { \
+ __retaddr = GETPC(); \
+ tlb_fill(cpu, v_addr, 1, __mmu_idx, __retaddr); \
+ } \
+ q_addr = v_addr + __env1->tlb_table[__mmu_idx][__index].addend; \
+} while(0)
+#endif
+
+/* XXX These are also used by atomic instruction handling.
+ * Put these defines in some other files? */
+#define DATA_b uint8_t
+#define DATA_w uint16_t
+#define DATA_l uint32_t
+#define DATA_q uint64_t
+
+#define __inline__ inline __attribute__((always_inline))
+
+#if defined(__i386__) || defined(__x86_64__)
+// Is this the correct way to detect 64 system?
+#if defined(_LP64)
+static __inline__ uint8_t
+atomic_compare_exchange16b(uint64_t *memp,
+ uint64_t rax, uint64_t rdx,
+ uint64_t rbx, uint64_t rcx)
+{
+ uint8_t z;
+ __asm __volatile__ ( "lock; cmpxchg16b %3\n\t"
+ "setz %2\n\t"
+ : "=a" (rax), "=d" (rdx), "=r" (z), "+m" (*memp)
+ : "a" (rax), "d" (rdx), "b" (rbx), "c" (rcx)
+ : "memory", "cc" );
+ return z;
+}
+#else
+static __inline__ uint8_t
+atomic_compare_exchange16b(uint64_t *memp,
+ uint64_t rax, uint64_t rdx,
+ uint64_t rbx, uint64_t rcx)
+{
+ assert("atomic_compare_exchange16b: not supported.\n");
+ exit(0);
+}
+
+static __inline__ uint8_t
+atomic_compare_exchangeq(uint64_t *addr,
+ uint64_t oldval, uint64_t newval)
+{
+ assert("atomic_compare_exchangeq: not supported.\n");
+ exit(0);
+}
+
+#endif
+
+/* Memory Barriers: x86-64 ONLY now */
+#define mb() asm volatile("mfence":::"memory")
+#define rmb() asm volatile("lfence":::"memory")
+#define wmb() asm volatile("sfence" ::: "memory")
+
+#define LOCK_PREFIX "lock; "
+
+#define coremu_xglue(a, b) a ## b
+// If a/b is macro, it will expand first, then pass to coremu_xglue
+#define coremu_glue(a, b) coremu_xglue(a, b)
+
+#define coremu_xstr(s) # s
+#define coremu_str(s) coremu_xstr(s)
+
+#define DATA_BITS 8
+#include "coremu-template.h"
+
+#define DATA_BITS 16
+#include "coremu-template.h"
+
+#define DATA_BITS 32
+#include "coremu-template.h"
+
+#if defined(_LP64)
+#define DATA_BITS 64
+#include "coremu-template.h"
+#else
+static inline uint64_t atomic_exchangeq(uint64_t *p, uint64_t val)
+{
+ assert("atomic_exchangeq: not supported.\n");
+ exit(0);
+}
+
+#endif
+
+#elif defined(__arm__)
+
+#if defined(__ARM_ARCH_7__) || \
+ defined(__ARM_ARCH_7A__) || \
+ defined(__ARM_ARCH_7EM__) || \
+ defined(__ARM_ARCH_7M__) || \
+ defined(__ARM_ARCH_7R__) || \
+ defined(__ARM_ARCH_6J__) || \
+ defined(__ARM_ARCH_6K__) || \
+ defined(__ARM_ARCH_6T2__) || \
+ defined(__ARM_ARCH_6Z__) || \
+ defined(__ARM_ARCH_6ZK__)
+#define USE_ARMV6_INSTRUCTIONS
+#endif
+
+#ifdef USE_ARMV6_INSTRUCTIONS
+#define mb() __asm__ __volatile__("dmb" : : : "memory")
+#define raw_local_irq_save(x) \
+ ({ \
+ __asm__ __volatile__( \
+ "mrs %0, cpsr @ local_irq_save\n" \
+ "cpsid i" \
+ : "=r" (x) : : "memory", "cc"); \
+ })
+#else
+#define mb() __asm__ __volatile__("":::"memory")
+#define raw_local_irq_save(x) \
+ ({ \
+ unsigned long temp; \
+ (void) (&temp == &x); \
+ __asm__ __volatile__( \
+ "mrs %0, cpsr @ local_irq_save\n" \
+" orr %1, %0, #128\n" \
+" msr cpsr_c, %1" \
+ : "=r" (x), "=r" (temp) \
+ : \
+ : "memory", "cc"); \
+ })
+#endif
+
+#define raw_local_irq_restore(x) \
+ __asm__ __volatile( \
+ "msr cpsr_c, %0 @ local_irq_restore\n" \
+ : \
+ : "r" (x) \
+ : "memory", "cc")
+
+static __inline__ uint8_t atomic_compare_exchangeb(uint8_t *addr,
+ uint8_t oldval, uint8_t newval)
+{
+ uint8_t ret;
+#ifdef USE_ARMV6_INSTRUCTIONS
+ unsigned long tmp;
+ __asm__ __volatile__("@ atomic_cmpxchgl\n"
+ "1: ldrexb %1, [%3]\n"
+ " mov %0, #0\n"
+ " teq %1, %4\n"
+ " strexbeq %0, %5, [%3]\n"
+ " teq %0, #0\n"
+ " bne 1b\n"
+ : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr)
+ : "r" (addr), "Ir" (oldval), "r" (newval)
+ : "cc");
+#else
+ unsigned long flags;
+ raw_local_irq_save(flags);
+ ret = *addr;
+ if (likely(ret == oldval))
+ *addr = newval;
+ raw_local_irq_restore(flags);
+#endif
+ return ret;
+}
+
+static __inline__ uint16_t atomic_compare_exchangew(uint16_t *addr,
+ uint16_t oldval, uint16_t newval)
+{
+ uint16_t ret;
+#ifdef USE_ARMV6_INSTRUCTIONS
+ unsigned long tmp;
+ __asm__ __volatile__("@ atomic_cmpxchgl\n"
+ "1: ldrexh %1, [%3]\n"
+ " mov %0, #0\n"
+ " teq %1, %4\n"
+ " strexheq %0, %5, [%3]\n"
+ " teq %0, #0\n"
+ " bne 1b\n"
+ : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr)
+ : "r" (addr), "Ir" (oldval), "r" (newval)
+ : "cc");
+#else
+ unsigned long flags;
+ raw_local_irq_save(flags);
+ ret = *addr;
+ if (likely(ret == oldval))
+ *addr = newval;
+ raw_local_irq_restore(flags);
+#endif
+ return ret;
+}
+
+static __inline__ uint32_t atomic_compare_exchangel(uint32_t *addr,
+ uint32_t oldval, uint32_t newval)
+{
+ uint32_t ret;
+#ifdef USE_ARMV6_INSTRUCTIONS
+ unsigned long tmp;
+ __asm__ __volatile__("@ atomic_cmpxchgl\n"
+ "1: ldrex %1, [%3]\n"
+ " mov %0, #0\n"
+ " teq %1, %4\n"
+ " strexeq %0, %5, [%3]\n"
+ " teq %0, #0\n"
+ " bne 1b\n"
+ : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr)
+ : "r" (addr), "Ir" (oldval), "r" (newval)
+ : "cc");
+#else
+ unsigned long flags;
+ raw_local_irq_save(flags);
+ ret = *addr;
+ if (likely(ret == oldval))
+ *addr = newval;
+ raw_local_irq_restore(flags);
+#endif
+ return ret;
+}
+
+static __inline__ uint64_t atomic_compare_exchangeq(uint64_t *addr,
+ uint64_t oldval, uint64_t newval)
+{
+ uint64_t ret;
+#ifdef USE_ARMV6_INSTRUCTIONS
+ unsigned long tmp;
+ __asm__ __volatile__("@ atomic_cmpxchgl\n"
+ "1: ldrexd %1, %H1, [%3]\n"
+ " mov %0, #0\n"
+ " teq %1, %4\n"
+ " teqeq %H1, %H4\n"
+ " strexdeq %0, %5, %H5, [%3]\n"
+ " teq %0, #0\n"
+ " bne 1b\n"
+ : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr)
+ : "r" (addr), "Ir" (oldval), "r" (newval)
+ : "cc");
+#else
+ unsigned long flags;
+ raw_local_irq_save(flags);
+ ret = *addr;
+ if (likely(ret == oldval))
+ *addr = newval;
+ raw_local_irq_restore(flags);
+#endif
+ return ret;
+}
+
+static __inline__ uint8_t
+atomic_compare_exchange16b(uint64_t *memp,
+ uint64_t old_less, uint64_t old_most,
+ uint64_t new_less, uint64_t new_most)
+{
+ uint8_t ret = 0;
+ unsigned long flags;
+ raw_local_irq_save(flags);
+ ret = *memp;
+ if (likely(*memp == old_less && *(memp+1) == old_most))
+ {
+ *memp = new_less;
+ *(memp+1) = new_most;
+ ret = 1;
+ }
+ raw_local_irq_restore(flags);
+ return ret;
+}
+
+static __inline__ unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
+{
+ unsigned long ret;
+#ifdef USE_ARMV6_INSTRUCTIONS
+ unsigned int tmp;
+#endif
+
+ mb();
+
+ switch (size) {
+#ifdef USE_ARMV6_INSTRUCTIONS
+ case 1:
+ __asm __volatile("@ __xchg1\n"
+ "1: ldrexb %0, [%3]\n"
+ " strexb %1, %2, [%3]\n"
+ " teq %1, #0\n"
+ " bne 1b"
+ : "=&r" (ret), "=&r" (tmp)
+ : "r" (x), "r" (ptr)
+ : "memory", "cc");
+ break;
+ case 2:
+ __asm __volatile("@ __xchg1\n"
+ "1: ldrexh %0, [%3]\n"
+ " strexh %1, %2, [%3]\n"
+ " teq %1, #0\n"
+ " bne 1b"
+ : "=&r" (ret), "=&r" (tmp)
+ : "r" (x), "r" (ptr)
+ : "memory", "cc");
+ break;
+ case 4:
+ __asm __volatile("@ __xchg4\n"
+ "1: ldrex %0, [%3]\n"
+ " strex %1, %2, [%3]\n"
+ " teq %1, #0\n"
+ " bne 1b"
+ : "=&r" (ret), "=&r" (tmp)
+ : "r" (x), "r" (ptr)
+ : "memory", "cc");
+ break;
+#else
+ case 1:
+ __asm __volatile("@ __xchg1\n"
+ " swpb %0, %1, [%2]"
+ : "=&r" (ret)
+ : "r" (x), "r" (ptr)
+ : "memory", "cc");
+ break;
+
+ case 4:
+ __asm __volatile("@ __xchg4\n"
+ " swp %0, %1, [%2]"
+ : "=&r" (ret)
+ : "r" (x), "r" (ptr)
+ : "memory", "cc");
+ break;
+ case 2:
+ {
+ unsigned long flags = 0;
+ raw_local_irq_save(flags);
+ ret = *(volatile uint16_t *)ptr;
+ *(volatile uint16_t *)ptr = x;
+ raw_local_irq_restore(flags);
+ break;
+ }
+
+#endif
+ default:
+ exit(0);
+ }
+ mb();
+
+ return ret;
+}
+
+#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+#define GEN_ATOMIC_XCHG_HELPER(TYPE) \
+static __inline__ DATA_##TYPE atomic_exchange##TYPE(DATA_##TYPE *p, DATA_##TYPE val) { return xchg(p, val); }
+
+GEN_ATOMIC_XCHG_HELPER(b);
+GEN_ATOMIC_XCHG_HELPER(w);
+GEN_ATOMIC_XCHG_HELPER(l);
+
+#endif
+
+#endif /* _COREMU_ATOMIC_H */
+
diff --git a/llvm/atomic/coremu-template.h b/llvm/atomic/coremu-template.h
new file mode 100644
index 0000000..66b185c
--- /dev/null
+++ b/llvm/atomic/coremu-template.h
@@ -0,0 +1,101 @@
+/* The following code may be included multiple times in a single file. */
+
+#if DATA_BITS == 64
+# define DATA_TYPE uint64_t
+# define SUFFIX q
+#elif DATA_BITS == 32
+# define DATA_TYPE uint32_t
+# define SUFFIX l
+#elif DATA_BITS == 16
+# define DATA_TYPE uint16_t
+# define SUFFIX w
+#elif DATA_BITS == 8
+# define DATA_TYPE uint8_t
+# define SUFFIX b
+#else
+#error unsupported data size
+#endif
+
+static __inline__ void coremu_glue(atomic_inc, SUFFIX)(DATA_TYPE *p) {
+ asm volatile(
+ LOCK_PREFIX "inc"coremu_str(SUFFIX)" %0"
+ : "+m"(*p)
+ :
+ : "cc");
+}
+
+static __inline__ void coremu_glue(atomic_dec, SUFFIX)(DATA_TYPE *p) {
+ asm volatile(
+ LOCK_PREFIX "dec"coremu_str(SUFFIX)" %0"
+ : "+m"(*p)
+ :
+ : "cc");
+}
+
+static __inline__ void coremu_glue(atomic_add, SUFFIX)(DATA_TYPE* addr,
+ DATA_TYPE val) {
+ asm volatile(
+ LOCK_PREFIX "add"coremu_str(SUFFIX)" %1, %0"
+ : "+m"(*addr)
+ : "a"(val)
+ : "cc");
+}
+
+/* swap the value VAL and *p.
+ * Return the value swapped out from memory. */
+static inline DATA_TYPE coremu_glue(atomic_exchange, SUFFIX)(
+ DATA_TYPE *p, DATA_TYPE val)
+{
+ DATA_TYPE out;
+ __asm __volatile(
+ "lock; xchg"coremu_str(SUFFIX)" %1,%2 \n\t"
+ : "=a" (out), "+m" (*p)
+ : "a" (val)
+ );
+ return out;
+}
+/* Return previous value in addr. So if the return value is the same as oldval,
+ * swap occured. */
+static __inline__ DATA_TYPE coremu_glue(atomic_compare_exchange, SUFFIX)(DATA_TYPE *addr,
+ DATA_TYPE oldval, DATA_TYPE newval) {
+ asm volatile(
+ LOCK_PREFIX "cmpxchg"coremu_str(SUFFIX)" %2, %1"
+ : "+a"(oldval), "+m"(*addr)
+ : "q"(newval)
+ : "cc");
+
+ return oldval;
+}
+
+static __inline__ void coremu_glue(atomic_and, SUFFIX)(DATA_TYPE *addr,
+ DATA_TYPE mask) {
+ asm volatile(
+ LOCK_PREFIX "and"coremu_str(SUFFIX)" %1, %0"
+ : "+m"(*addr)
+ : "r"(mask)
+ : "cc");
+}
+
+static __inline__ void coremu_glue(atomic_or, SUFFIX)(DATA_TYPE *addr,
+ DATA_TYPE mask) {
+ asm volatile(
+ LOCK_PREFIX "or"coremu_str(SUFFIX)" %1, %0"
+ : "+m"(*addr)
+ : "r"(mask)
+ : "cc");
+}
+
+static __inline__ DATA_TYPE coremu_glue(atomic_xadd, SUFFIX)(
+ DATA_TYPE* addr, DATA_TYPE val) {
+ asm volatile(
+ LOCK_PREFIX "xadd"coremu_str(SUFFIX)" %0, %1"
+ : "+a"(val), "+m"(*addr)
+ :
+ : "cc");
+
+ return val;
+}
+
+#undef DATA_BITS
+#undef DATA_TYPE
+#undef SUFFIX
diff --git a/llvm/fpu/softfloat-native-def.h b/llvm/fpu/softfloat-native-def.h
new file mode 100644
index 0000000..4b0fd22
--- /dev/null
+++ b/llvm/fpu/softfloat-native-def.h
@@ -0,0 +1,127 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#ifndef SOFTFLOAT_NATIVE_DEF_H
+#define SOFTFLOAT_NATIVE_DEF_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "fpu/softfloat.h"
+
+int num_native_fpu_helpers(void);
+void *get_native_fpu_helpers(void);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 llvm_int32_to_float32(int32_t v);
+float64 llvm_int32_to_float64(int32_t v);
+float32 llvm_uint32_to_float32(uint32_t v);
+float64 llvm_uint32_to_float64(uint32_t v);
+float32 llvm_int64_to_float32(int64_t v);
+float32 llvm_uint64_to_float32(uint64_t v);
+float64 llvm_int64_to_float64(int64_t v);
+float64 llvm_uint64_to_float64(uint64_t v);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 llvm_float32_to_int32( float32 a );
+int32 llvm_float32_to_int32_round_to_zero( float32 a );
+int64 llvm_float32_to_int64( float32 a );
+int64 llvm_float32_to_int64_round_to_zero( float32 a );
+float64 llvm_float32_to_float64( float32 a );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 llvm_float32_round_to_int( float32 a );
+float32 llvm_float32_add( float32 a, float32 b );
+float32 llvm_float32_sub( float32 a, float32 b );
+float32 llvm_float32_mul( float32 a, float32 b );
+float32 llvm_float32_div( float32 a, float32 b );
+float32 llvm_float32_rem( float32 a, float32 b );
+float32 llvm_float32_sqrt( float32 a );
+int llvm_float32_eq( float32 a, float32 b );
+int llvm_float32_le( float32 a, float32 b );
+int llvm_float32_lt( float32 a, float32 b );
+int llvm_float32_unordered( float32 a, float32 b );
+float32 llvm_float32_abs(float32 a);
+float32 llvm_float32_chs(float32 a);
+
+float32 llvm_float32_muladd( float32 a, float32 b, float32 c );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 llvm_float64_to_int32( float64 a );
+int32 llvm_float64_to_int32_round_to_zero( float64 a );
+int64 llvm_float64_to_int64( float64 a );
+int64 llvm_float64_to_int64_round_to_zero( float64 a );
+float32 llvm_float64_to_float32( float64 a );
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 llvm_float64_round_to_int( float64 a );
+float64 llvm_float64_trunc_to_int( float64 a );
+float64 llvm_float64_add( float64 a, float64 b );
+float64 llvm_float64_sub( float64 a, float64 b );
+float64 llvm_float64_mul( float64 a, float64 b );
+float64 llvm_float64_div( float64 a, float64 b );
+float64 llvm_float64_rem( float64 a, float64 b );
+float64 llvm_float64_sqrt( float64 a );
+int llvm_float64_eq( float64 a, float64 b );
+int llvm_float64_le( float64 a, float64 b );
+int llvm_float64_lt( float64 a, float64 b );
+int llvm_float64_unordered( float64 a, float64 b );
+float64 llvm_float64_abs(float64 a);
+float64 llvm_float64_chs(float64 a);
+
+float64 llvm_float64_muladd( float64 a, float64 b, float64 c );
+
+float32 llvm_float32_maybe_silence_nan( float32 a );
+float64 llvm_float64_maybe_silence_nan( float64 a );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* !SOFTFLOAT_NATIVE_DEF_H */
diff --git a/llvm/fpu/softfloat-native.h b/llvm/fpu/softfloat-native.h
new file mode 100644
index 0000000..c12f62b
--- /dev/null
+++ b/llvm/fpu/softfloat-native.h
@@ -0,0 +1,248 @@
+/*
+ * QEMU float support
+ *
+ * Derived from SoftFloat.
+ */
+
+/*============================================================================
+
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser. This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704. Funding was partially provided by the
+National Science Foundation under grant MIP-9311980. The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek. More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+
+=============================================================================*/
+
+#ifndef SOFTFLOAT_NATIVE_H
+#define SOFTFLOAT_NATIVE_H
+
+#include <math.h>
+#include "fpu/softfloat-native-def.h"
+
+typedef union {
+ float32 f;
+ int32_t i;
+ uint32_t u;
+ float s;
+} llvm_float32;
+
+typedef union {
+ float64 f;
+ int64_t i;
+ uint64_t u;
+ double d;
+} llvm_float64;
+
+#ifdef float32_val
+#undef float32_val
+#endif
+#ifdef float64_val
+#undef float64_val
+#endif
+
+#define float32_val(x) ((llvm_float32)(x)).f
+#define float64_val(x) ((llvm_float64)(x)).f
+#define lfloat(x) ((llvm_float32)(x)).s
+#define ldouble(x) ((llvm_float64)(x)).d
+
+#define DEF_HELPER(name) { (void *)llvm_##name, "llvm_"#name }
+static TCGHelperInfo native_fpu_helpers[] = {
+ DEF_HELPER(int32_to_float32),
+ DEF_HELPER(int32_to_float64),
+ DEF_HELPER(uint32_to_float32),
+ DEF_HELPER(uint32_to_float64),
+ DEF_HELPER(int64_to_float32),
+ DEF_HELPER(uint64_to_float32),
+ DEF_HELPER(int64_to_float64),
+ DEF_HELPER(uint64_to_float64),
+ DEF_HELPER(float32_to_int32),
+ DEF_HELPER(float32_to_int64),
+ DEF_HELPER(float32_to_float64),
+ DEF_HELPER(float32_add),
+ DEF_HELPER(float32_sub),
+ DEF_HELPER(float32_mul),
+ DEF_HELPER(float32_div),
+ DEF_HELPER(float32_rem),
+ DEF_HELPER(float32_sqrt),
+ DEF_HELPER(float32_abs),
+ DEF_HELPER(float32_chs),
+ DEF_HELPER(float64_to_int32),
+ DEF_HELPER(float64_to_int64),
+ DEF_HELPER(float64_to_float32),
+ DEF_HELPER(float64_add),
+ DEF_HELPER(float64_sub),
+ DEF_HELPER(float64_mul),
+ DEF_HELPER(float64_div),
+ DEF_HELPER(float64_rem),
+ DEF_HELPER(float64_sqrt),
+ DEF_HELPER(float64_abs),
+ DEF_HELPER(float64_chs),
+
+ DEF_HELPER(float32_muladd),
+ DEF_HELPER(float64_muladd),
+
+ DEF_HELPER(float32_maybe_silence_nan),
+ DEF_HELPER(float64_maybe_silence_nan),
+#if 0
+ DEF_HELPER(float32_to_int32_round_to_zero),
+ DEF_HELPER(float32_to_int64_round_to_zero),
+ DEF_HELPER(float32_round_to_int),
+ DEF_HELPER(float32_eq),
+ DEF_HELPER(float32_le),
+ DEF_HELPER(float32_lt),
+ DEF_HELPER(float32_unordered),
+ DEF_HELPER(float64_to_int32_round_to_zero),
+ DEF_HELPER(float64_to_int64_round_to_zero),
+ DEF_HELPER(float64_round_to_int),
+ DEF_HELPER(float64_trunc_to_int),
+ DEF_HELPER(float64_eq),
+ DEF_HELPER(float64_le),
+ DEF_HELPER(float64_lt),
+ DEF_HELPER(float64_unordered),
+#endif
+};
+#undef DEF_HELPER
+
+int num_native_fpu_helpers(void)
+{
+ return ARRAY_SIZE(native_fpu_helpers);
+}
+
+void *get_native_fpu_helpers(void)
+{
+ return native_fpu_helpers;
+}
+
+/* XXX: this code implements the x86 behaviour, not the IEEE one. */
+#if TCG_TARGET_REG_BITS == 32
+static inline int32 long_to_int32(long a)
+{
+ return a;
+}
+#else
+static inline int32 long_to_int32(long a)
+{
+ if (a != (int32_t)a)
+ a = 0x80000000;
+ return a;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 llvm_int32_to_float32(int32_t v) { return float32_val((float)v); }
+float64 llvm_int32_to_float64(int32_t v) { return float64_val((double)v); }
+float32 llvm_uint32_to_float32(uint32_t v) { return float32_val((float)v); }
+float64 llvm_uint32_to_float64(uint32_t v) { return float64_val((double)v); }
+float32 llvm_int64_to_float32(int64_t v) { return float32_val((float)v); }
+float32 llvm_uint64_to_float32(uint64_t v) { return float32_val((float)v); }
+float64 llvm_int64_to_float64(int64_t v) { return float64_val((double)v); }
+float64 llvm_uint64_to_float64(uint64_t v) { return float64_val((double)v); }
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 llvm_float32_to_int32( float32 a ) { return long_to_int32(lrintf(lfloat(a))); }
+int32 llvm_float32_to_int32_round_to_zero( float32 a ) { return (int32)lfloat(a); }
+int64 llvm_float32_to_int64( float32 a ) { return llrintf(lfloat(a)); }
+int64 llvm_float32_to_int64_round_to_zero( float32 a ) { return (int64)lfloat(a); }
+float64 llvm_float32_to_float64( float32 a ) { return float64_val((double)lfloat(a)); }
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 llvm_float32_round_to_int( float32 a ) { return float32_val(rintf(lfloat(a))); }
+float32 llvm_float32_add( float32 a, float32 b ) { return float32_val(lfloat(a) + lfloat(b)); }
+float32 llvm_float32_sub( float32 a, float32 b ) { return float32_val(lfloat(a) - lfloat(b)); }
+float32 llvm_float32_mul( float32 a, float32 b ) { return float32_val(lfloat(a) * lfloat(b)); }
+float32 llvm_float32_div( float32 a, float32 b ) { return float32_val(lfloat(a) / lfloat(b)); }
+float32 llvm_float32_rem( float32 a, float32 b ) { return float32_val(remainderf(lfloat(a), lfloat(b))); }
+float32 llvm_float32_sqrt( float32 a ) { return float32_val(sqrtf(lfloat(a))); }
+int llvm_float32_eq( float32 a, float32 b ) { return lfloat(a) == lfloat(b); }
+int llvm_float32_le( float32 a, float32 b ) { return lfloat(a) <= lfloat(b); }
+int llvm_float32_lt( float32 a, float32 b ) { return lfloat(a) < lfloat(b); }
+int llvm_float32_unordered( float32 a, float32 b ) { return isunordered(lfloat(a), lfloat(b)); }
+float32 llvm_float32_abs(float32 a) { return float32_val(fabsf(lfloat(a))); }
+float32 llvm_float32_chs(float32 a) { return float32_val(-lfloat(a)); }
+
+float32 llvm_float32_muladd( float32 a, float32 b, float32 c ) { return float32_val(lfloat(a) * lfloat(b) + lfloat(c)); }
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32 llvm_float64_to_int32( float64 a ) { return long_to_int32(lrint(ldouble(a))); }
+int32 llvm_float64_to_int32_round_to_zero( float64 a ) { return (int32)ldouble(a); }
+int64 llvm_float64_to_int64( float64 a ) { return llrint(ldouble(a)); }
+int64 llvm_float64_to_int64_round_to_zero( float64 a ) { return (int64)ldouble(a); }
+float32 llvm_float64_to_float32( float64 a ) { return float32_val((float)ldouble(a)); }
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 llvm_float64_round_to_int( float64 a ) { return float64_val(rint(ldouble(a))); }
+float64 llvm_float64_trunc_to_int( float64 a ) { return float64_val(trunc(ldouble(a))); }
+float64 llvm_float64_add( float64 a, float64 b ) { return float64_val(ldouble(a) + ldouble(b)); }
+float64 llvm_float64_sub( float64 a, float64 b ) { return float64_val(ldouble(a) - ldouble(b)); }
+float64 llvm_float64_mul( float64 a, float64 b ) { return float64_val(ldouble(a) * ldouble(b)); }
+float64 llvm_float64_div( float64 a, float64 b ) { return float64_val(ldouble(a) / ldouble(b)); }
+float64 llvm_float64_rem( float64 a, float64 b ) { return float64_val(remainder(ldouble(a), ldouble(b))); }
+float64 llvm_float64_sqrt( float64 a ) { return float64_val(sqrt(ldouble(a))); }
+int llvm_float64_eq( float64 a, float64 b ) { return ldouble(a) == ldouble(b); }
+int llvm_float64_le( float64 a, float64 b ) { return ldouble(a) <= ldouble(b); }
+int llvm_float64_lt( float64 a, float64 b ) { return ldouble(a) < ldouble(b); }
+int llvm_float64_unordered( float64 a, float64 b ) { return isunordered(ldouble(a), ldouble(b)); }
+float64 llvm_float64_abs(float64 a) { return float64_val(fabs(ldouble(a))); }
+float64 llvm_float64_chs(float64 a) { return float64_val(-ldouble(a)); }
+
+float64 llvm_float64_muladd( float64 a, float64 b, float64 c ) { return float64_val(ldouble(a) * ldouble(b) + ldouble(c)); }
+
+float32 llvm_float32_maybe_silence_nan( float32 a ) {
+ uint32_t _a = ((llvm_float32)(a)).u;
+ if ( ((_a >> 22) & 0x1FF) == 0x1FE && (_a & 0x003FFFFF)) {
+ _a |= (1 << 22);
+ return float32_val(_a);
+ }
+ return a;
+}
+float64 llvm_float64_maybe_silence_nan( float64 a ) {
+ uint64_t _a = ((llvm_float64)(a)).u;
+ if (((_a >> 51) & 0xFFF) == 0xFFE && (_a & 0x0007FFFFFFFFFFFFLL)) {
+ _a |= 0x0008000000000000LL;
+ return float64_val(_a);
+ }
+ return a;
+}
+
+#undef float32_val
+#undef float64_val
+#undef lfloat
+#undef ldouble
+
+#endif /* !SOFTFLOAT_NATIVE_H */
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/hqemu-helper.c b/llvm/hqemu-helper.c
new file mode 100644
index 0000000..6325716
--- /dev/null
+++ b/llvm/hqemu-helper.c
@@ -0,0 +1,77 @@
+#include "cpu.h"
+#include "tcg.h"
+#include "exec/helper-proto.h"
+#include "hqemu.h"
+#include "fpu/softfloat-native.h"
+
+CPUArchState basereg;
+target_ulong pcid;
+
+#if defined(TARGET_I386)
+XMMReg xmm_reg;
+#endif
+
+extern TranslationBlock *tbs;
+
+void *ibtc_lookup(CPUArchState *env);
+void *cpbl_lookup(CPUArchState *env);
+int cpbl_validate(CPUArchState *env, target_ulong pc, int id);
+
+/* This helper is a hack to export symbols of helper functions in the LLVM
+ * bitcode file. If a target is alerted with lacks of symbols of function/variable,
+ * add such symbols in this helper by accessing it. */
+void helper_export_hqemu(CPUArchState *env)
+{
+ helper_lookup_ibtc(env);
+ helper_lookup_cpbl(env);
+ helper_validate_cpbl(env, 0, 0);
+
+#if defined(CONFIG_SOFTMMU) && defined(CONFIG_LLVM)
+ target_ulong ptr = 0;
+ llvm_ret_ldub_mmu(env, ptr, 0);
+ llvm_le_lduw_mmu(env, ptr, 0);
+ llvm_le_ldul_mmu(env, ptr, 0);
+ llvm_le_ldq_mmu(env, ptr, 0);
+ llvm_be_lduw_mmu(env, ptr, 0);
+ llvm_be_ldul_mmu(env, ptr, 0);
+ llvm_be_ldq_mmu(env, ptr, 0);
+ llvm_ret_ldsb_mmu(env, ptr, 0);
+ llvm_le_ldsw_mmu(env, ptr, 0);
+ llvm_le_ldsl_mmu(env, ptr, 0);
+ llvm_be_ldsw_mmu(env, ptr, 0);
+ llvm_be_ldsl_mmu(env, ptr, 0);
+ llvm_ret_stb_mmu(env, ptr, 0, 0);
+ llvm_le_stw_mmu(env, ptr, 0, 0);
+ llvm_le_stl_mmu(env, ptr, 0, 0);
+ llvm_le_stq_mmu(env, ptr, 0, 0);
+ llvm_be_stw_mmu(env, ptr, 0, 0);
+ llvm_be_stl_mmu(env, ptr, 0, 0);
+ llvm_be_stq_mmu(env, ptr, 0, 0);
+#endif
+}
+
+void helper_verify_tb(CPUArchState *env, int id)
+{
+ static TranslationBlock *last_tb;
+ TranslationBlock *tb = &tbs[id];
+ if (tb->mode == BLOCK_INVALID) {
+ fprintf(stderr, "%s: tb=%p pc=" TARGET_FMT_lx " last_pc="
+ TARGET_FMT_lx "\n", __func__, tb, tb->pc,
+ (last_tb) ? last_tb->pc : -1U);
+ }
+ last_tb = tb;
+}
+
+/*
+ * helper_profile_exec is used to profile LLVM translated code.
+ */
+void helper_profile_exec(CPUArchState *env, void *counter_p, int idx)
+{
+ CPUState *cpu = ENV_GET_CPU(env);
+ uint64_t **counter = (uint64_t **)counter_p;
+ counter[cpu->cpu_index][idx]++;
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/hqemu.mk b/llvm/hqemu.mk
new file mode 100644
index 0000000..01de6d6
--- /dev/null
+++ b/llvm/hqemu.mk
@@ -0,0 +1,191 @@
+# Makefile for HQEMU.
+
+QEMU_CFLAGS += -I$(SRC_PATH)/llvm -I$(SRC_PATH)/llvm/include -I$(SRC_PATH)/llvm/atomic
+QEMU_CXXFLAGS += -std=c++11 -Wno-narrowing
+obj-y += llvm/optimization.o llvm/tracer.o llvm/utils.o llvm/hqemu-helper.o
+
+# LLVM
+ifdef CONFIG_LLVM
+
+LLVM_EXTRA_FLAGS += -Wall -DNEED_CPU_H -D$(LLVM_VERSION) -I..
+
+LLVM_CXXFLAGS := $(patsubst -Wcast-qual, ,$(LLVM_CXXFLAGS))
+LLVM_CXXFLAGS := $(patsubst -fno-exceptions, ,$(LLVM_CXXFLAGS))
+LLVM_CXXFLAGS := $(patsubst -pedantic, ,$(LLVM_CXXFLAGS))
+LLVM_CXXFLAGS += -Wno-unused-local-typedefs -Wno-cast-qual -fno-rtti
+LLVM_CFLAGS += $(patsubst -O2, ,$(patsubst -g, ,$(CFLAGS)))
+LLVM_CFLAGS += $(LLVM_EXTRA_FLAGS) $(QEMU_INCLUDES) \
+ -I$(SRC_PATH)/linux-user -I$(SRC_PATH)/linux-user/$(TARGET_ABI_DIR) \
+ -I$(SRC_PATH)/target-$(TARGET_BASE_ARCH) -I$(SRC_PATH)/llvm \
+ -I$(SRC_PATH)/llvm/atomic -I$(SRC_PATH)/llvm/include/pmu
+LLVM_CFLAGS := $(patsubst -pedantic, ,$(LLVM_CFLAGS))
+LLVM_CFLAGS := $(patsubst -g, ,$(LLVM_CFLAGS))
+
+PASS := llvm/pass
+ANALYSIS := llvm/analysis
+HPM := llvm/pmu
+QEMU_CXXFLAGS += $(LLVM_CXXFLAGS) $(LLVM_EXTRA_FLAGS) -Wno-undef
+LDFLAGS += $(LLVM_LDFLAGS)
+LIBS += $(LLVM_LIBS) -ldl -lz -lncurses
+
+
+ifeq ($(CONFIG_WIN32), y)
+LIBS += -lpthread -limagehlp -lpsapi
+endif
+
+obj-y += llvm/xml/tinyxml2.o
+obj-y += llvm/llvm.o \
+ llvm/llvm-translator.o \
+ llvm/llvm-opc.o \
+ llvm/llvm-opc-vector.o \
+ llvm/llvm-opc-mmu.o \
+ llvm/llvm-debug.o \
+ llvm/llvm-target.o \
+ llvm/llvm-soft-perfmon.o \
+ llvm/llvm-hard-perfmon.o \
+ llvm/llvm-annotate.o
+obj-y += $(PASS)/ProfileExec.o \
+ $(PASS)/ReplaceIntrinsic.o \
+ $(PASS)/CombineGuestMemory.o \
+ $(PASS)/CombineCasts.o \
+ $(PASS)/CombineZExtTrunc.o \
+ $(PASS)/FastMathPass.o \
+ $(PASS)/StateMappingPass.o \
+ $(PASS)/RedundantStateElimination.o \
+ $(PASS)/SimplifyPointer.o
+obj-y += $(ANALYSIS)/InnerLoopAnalysis.o
+
+# HPM
+obj-y += $(HPM)/pmu.o \
+ $(HPM)/pmu-events.o
+
+ifeq ($(ARCH),$(filter $(ARCH),i386 x86_64))
+obj-y += $(HPM)/x86/x86-events.o
+endif
+ifeq ($(ARCH),$(filter $(ARCH),arm aarch64))
+obj-y += $(HPM)/arm/arm-events.o
+endif
+ifeq ($(ARCH),$(filter $(ARCH),ppc64))
+obj-y += $(HPM)/ppc/ppc-events.o
+endif
+
+#
+# LLVM Bitcode file
+#
+
+ifdef CONFIG_SOFTMMU
+BCSUF = _softmmu
+MMU_HELPER = $(TARGET_PATH)/mmu_helper.bc
+endif
+
+LLVM_BITCODE = llvm_helper_${TARGET_NAME}${BCSUF}.bc
+TARGET_PATH = target-$(TARGET_BASE_ARCH)
+LLVM_HELPER += tcg-runtime.bc llvm/hqemu-helper.bc $(TARGET_PATH)/helper.bc
+
+ifeq ($(TARGET_I386), y)
+LLVM_HELPER += $(TARGET_PATH)/cc_helper.bc \
+ $(TARGET_PATH)/int_helper.bc \
+ $(TARGET_PATH)/smm_helper.bc \
+ $(TARGET_PATH)/excp_helper.bc \
+ $(TARGET_PATH)/mem_helper.bc \
+ $(TARGET_PATH)/svm_helper.bc \
+ $(TARGET_PATH)/fpu_helper.bc \
+ $(TARGET_PATH)/misc_helper.bc \
+ $(TARGET_PATH)/seg_helper.bc \
+ $(TARGET_PATH)/bpt_helper.bc
+endif
+ifeq ($(TARGET_X86_64), y)
+LLVM_HELPER += $(TARGET_PATH)/cc_helper.bc \
+ $(TARGET_PATH)/int_helper.bc \
+ $(TARGET_PATH)/smm_helper.bc \
+ $(TARGET_PATH)/excp_helper.bc \
+ $(TARGET_PATH)/mem_helper.bc \
+ $(TARGET_PATH)/svm_helper.bc \
+ $(TARGET_PATH)/fpu_helper.bc \
+ $(TARGET_PATH)/misc_helper.bc \
+ $(TARGET_PATH)/seg_helper.bc
+endif
+ifeq ($(TARGET_ALPHA), y)
+LLVM_HELPER += $(TARGET_PATH)/fpu_helper.bc \
+ $(TARGET_PATH)/int_helper.bc \
+ $(TARGET_PATH)/mem_helper.bc \
+ $(TARGET_PATH)/sys_helper.bc
+endif
+ifeq ($(TARGET_ARM), y)
+LLVM_HELPER += $(TARGET_PATH)/op_helper.bc \
+ $(TARGET_PATH)/neon_helper.bc
+endif
+ifeq ($(TARGET_AARCH64), y)
+LLVM_HELPER += $(TARGET_PATH)/op_helper.bc \
+ $(TARGET_PATH)/helper-a64.bc \
+ $(TARGET_PATH)/neon_helper.bc
+endif
+ifeq ($(TARGET_MICROBLAZE), y)
+LLVM_HELPER += $(TARGET_PATH)/op_helper.bc
+endif
+ifeq ($(TARGET_MIPS), y)
+LLVM_HELPER += $(TARGET_PATH)/op_helper.bc \
+ $(TARGET_PATH)/dsp_helper.bc \
+ $(TARGET_PATH)/lmi_helper.bc
+endif
+ifeq ($(TARGET_OPENRISC), y)
+LLVM_HELPER += $(TARGET_PATH)/exception_helper.bc \
+ $(TARGET_PATH)/fpu_helper.bc \
+ $(TARGET_PATH)/interrupt_helper.bc \
+ $(TARGET_PATH)/int_helper.bc \
+ $(TARGET_PATH)/sys_helper.bc \
+ $(MMU_HELPER)
+endif
+ifeq ($(TARGET_PPC), y)
+LLVM_HELPER += $(TARGET_PATH)/excp_helper.bc \
+ $(TARGET_PATH)/int_helper.bc \
+ $(TARGET_PATH)/misc_helper.bc \
+ $(TARGET_PATH)/fpu_helper.bc \
+ $(TARGET_PATH)/mem_helper.bc \
+ $(TARGET_PATH)/timebase_helper.bc \
+ $(MMU_HELPER)
+endif
+ifeq ($(TARGET_PPC64), y)
+LLVM_HELPER += $(TARGET_PATH)/excp_helper.bc \
+ $(TARGET_PATH)/int_helper.bc \
+ $(TARGET_PATH)/misc_helper.bc \
+ $(TARGET_PATH)/fpu_helper.bc \
+ $(TARGET_PATH)/mem_helper.bc \
+ $(TARGET_PATH)/timebase_helper.bc \
+ $(MMU_HELPER)
+endif
+ifeq ($(TARGET_SH4), y)
+LLVM_HELPER += $(TARGET_PATH)/op_helper.bc
+endif
+ifeq ($(TARGET_SPARC), y)
+LLVM_HELPER += $(TARGET_PATH)/cc_helper.bc \
+ $(TARGET_PATH)/fop_helper.bc \
+ $(TARGET_PATH)/int32_helper.bc \
+ $(TARGET_PATH)/ldst_helper.bc \
+ $(TARGET_PATH)/vis_helper.bc \
+ $(TARGET_PATH)/win_helper.bc \
+ $(MMU_HELPER)
+endif
+ifeq ($(TARGET_SPARC64), y)
+LLVM_HELPER += $(TARGET_PATH)/cc_helper.bc \
+ $(TARGET_PATH)/fop_helper.bc \
+ $(TARGET_PATH)/int64_helper.bc \
+ $(TARGET_PATH)/ldst_helper.bc \
+ $(TARGET_PATH)/vis_helper.bc \
+ $(TARGET_PATH)/win_helper.bc \
+ $(MMU_HELPER)
+endif
+
+LOCAL_BC := clang
+LOCAL_BC_CFLAGS := -S -emit-llvm $(BCFLAGS) -I$(SRC_PATH)/llvm/include $(LLVM_CFLAGS) \
+ -Wno-missing-prototypes -Wno-sign-compare -Wno-unused-function \
+ -Wno-constant-conversion
+
+%.bc: %.c
+ $(call quiet-command,$(LOCAL_BC) $(LOCAL_BC_CFLAGS) -c -o $@ $<, " LCC $(TARGET_DIR)$@")
+
+
+$(LLVM_BITCODE): $(LLVM_HELPER)
+ $(call quiet-command,llvm-link -o $@ $^, " LCC $(TARGET_DIR)$@")
+
+endif
diff --git a/llvm/include/InnerLoopAnalysis.h b/llvm/include/InnerLoopAnalysis.h
new file mode 100644
index 0000000..f11225d
--- /dev/null
+++ b/llvm/include/InnerLoopAnalysis.h
@@ -0,0 +1,291 @@
+/*
+ * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __INNERLOOPANALYSIS_H
+#define __INNERLOOPANALYSIS_H
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm-types.h"
+
+
+class InductionDesc {
+ /* Start value. */
+ Value *StartValue;
+ /* Step value. */
+ const SCEV *Step;
+
+public:
+ InductionDesc() : StartValue(nullptr), Step(nullptr) {}
+ InductionDesc(Value *Start, const SCEV *Step)
+ : StartValue(Start), Step(Step) {}
+
+ Value *getStartValue() const { return StartValue; }
+ const SCEV *getStep() const { return Step; }
+};
+
+class ReductionDesc {
+public:
+
+ enum ReductionKind {
+ NoReduction, /* Not a reduction. */
+ IntegerAdd, /* Sum of numbers. */
+ IntegerMult, /* Product of numbers. */
+ IntegerOr, /* Bitwise or logical OR of numbers. */
+ IntegerAnd, /* Bitwise or logical AND of numbers. */
+ IntegerXor, /* Bitwise or logical XOR of numbers. */
+ FloatAdd, /* Sum of float numbers. */
+ FloatMult, /* Product of float numbers. */
+ };
+
+ ReductionDesc()
+ : StartValue(nullptr), LoopExitInstr(nullptr),
+ Kind(ReductionKind::NoReduction), Ty(nullptr) {}
+ ReductionDesc(Value *Start, Instruction *Exit, ReductionKind K, Type *Ty)
+ : StartValue(Start), LoopExitInstr(Exit), Kind(K), Ty(Ty) {}
+
+ Value *getStartValue() const { return StartValue; }
+ Value *getNextValue() const { return LoopExitInstr; }
+ Instruction *getLoopExitInstr() { return LoopExitInstr; }
+ ReductionKind getReductionKind() { return Kind; }
+ Type *getScalarType() { return Ty; }
+
+private:
+ /* The starting value of the recurrence. */
+ Value *StartValue;
+ /* The instruction who's value is used outside the loop. */
+ Instruction *LoopExitInstr;
+ /* The kind of the recurrence.*/
+ ReductionKind Kind;
+ /* The scalar type. */
+ Type *Ty;
+};
+
+/*
+ * The InnertLoop class represents a single innertmost loop. The InnerLoop has
+ * a special shape that is specific to the DBT decoded guest loop, and its loop
+ * definition is different to a nature loop, e.g., latch and exiting block.
+ */
+class InnerLoop {
+public:
+ typedef std::map<PHINode *, InductionDesc> InductionList;
+ typedef std::map<PHINode *, ReductionDesc> ReductionList;
+
+private:
+ Loop &TheLoop;
+
+ /* The list of blocks in this loop. First entry is the header node. */
+ std::vector<BasicBlock *> Blocks;
+ SmallPtrSet<const BasicBlock *, 8> DenseBlockSet;
+
+ std::vector<BasicBlock *> Latches;
+ std::map<BasicBlock *, BasicBlock *> SplitLatches;
+
+ bool UnknownPhi;
+ InductionList Inductions;
+ ReductionList Reductions;
+
+ void addInduction(PHINode *Phi, Value *Start, const SCEV *Step) {
+ Inductions[Phi] = InductionDesc(Start, Step);
+ }
+
+ void addReduction(PHINode *Phi, Value *Start, Instruction *Exit,
+ ReductionDesc::ReductionKind K, Type *Ty) {
+ Reductions[Phi] = ReductionDesc(Start, Exit, K, Ty);
+ }
+
+ InnerLoop(const InnerLoop &) = delete;
+ const InnerLoop& operator=(const InnerLoop &) = delete;
+
+ friend class InnerLoopAnalysis;
+
+public:
+ InnerLoop(Loop *loop);
+ ~InnerLoop() {}
+
+ Loop &getLoop() const { return TheLoop; }
+
+ BasicBlock *getHeader() const { return Blocks.front(); }
+
+ /* Return true if the specified basic block is in this loop. */
+ bool contains(const BasicBlock *BB) const {
+ return DenseBlockSet.count(BB);
+ }
+
+ /* Return true if the specified instruction is in this loop. */
+ bool contains(const Instruction *Inst) const {
+ return contains(Inst->getParent());
+ }
+
+ /* Get a list of the basic blocks which make up this loop. */
+ typedef typename std::vector<BasicBlock*>::const_iterator block_iterator;
+ const std::vector<BasicBlock*> &getBlocks() const { return Blocks; }
+ block_iterator block_begin() const { return Blocks.begin(); }
+ block_iterator block_end() const { return Blocks.end(); }
+ inline iterator_range<block_iterator> blocks() const {
+ return make_range(block_begin(), block_end());
+ }
+
+ /* Get the number of blocks in this loop in constant time. */
+ unsigned getNumBlocks() const { return Blocks.size(); }
+
+ /* True if terminator in the block can branch to another block that is
+ * outside of the current loop. */
+ bool isLoopExiting(BasicBlock *BB) const;
+
+ /* Calculate the number of back edges to the loop header. */
+ unsigned getNumBackEdges() const;
+
+ /* Return all blocks inside the loop that have successors outside of the
+ * loop. */
+ void getExitingBlocks(SmallVectorImpl<BasicBlock *> &ExitingBlocks) const;
+
+ /* If getExitingBlocks would return exactly one block, return that block.
+ * Otherwise return null. */
+ BasicBlock *getExitingBlock() const;
+
+ /* Return all of the successor blocks of this loop. */
+ void getExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const;
+
+ /* If getExitBlocks would return exactly one block, return that block.
+ * Otherwise return null. */
+ BasicBlock *getExitBlock() const;
+
+ /* If there is a preheader for this loop, return it. A loop has a preheader
+ * if there is only one edge to the header of the loop from outside of the
+ * loop. If this is the case, the block branching to the header of the loop
+ * is the preheader node.
+ *
+ * This method returns null if there is no preheader for the loop. */
+ BasicBlock *getLoopPreheader() const;
+
+ /* If the given loop's header has exactly one unique predecessor outside the
+ * loop, return it. Otherwise return null.
+ * This is less strict that the loop "preheader" concept, which requires
+ * the predecessor to have exactly one successor. */
+ BasicBlock *getLoopPredecessor() const;
+
+ unsigned getNumLoopLatches() const { return Latches.size(); }
+ unsigned getNumSplitLatches() const { return SplitLatches.size(); }
+
+ /* Return all loop latch blocks of this loop. A latch block is a block that
+ * contains a branch back to the header. */
+ void getLoopLatches(SmallVectorImpl<BasicBlock *> &LoopLatches) const {
+ for (auto I : Latches)
+ LoopLatches.push_back(I);
+ }
+
+ /* If there is a latch tail, return it. */
+ BasicBlock *getSingleLatchTail() const {
+ return (SplitLatches.size() == 1) ? SplitLatches.begin()->first :
+ nullptr;
+ }
+
+ /* If there is a latch head, return it. */
+ BasicBlock *getSingleLatchHead() const {
+ return (SplitLatches.size() == 1) ? SplitLatches.begin()->second :
+ nullptr;
+ }
+
+ /* Return all of the latch tails of this loop. */
+ void getLatchTails(SmallVectorImpl<BasicBlock *> &LatchTails) const {
+ for (auto &I : SplitLatches)
+ LatchTails.push_back(I.first);
+ }
+
+ /* Given a latch tail, return its latch head. */
+ BasicBlock *getLatchHead(BasicBlock *BB) {
+ if (SplitLatches.find(BB) == SplitLatches.end())
+ return nullptr;
+ return SplitLatches[BB];
+ }
+
+ /* If the given phi is an induction of the loop, return the induciton. */
+ InductionDesc *getInduction(PHINode *Phi) {
+ if (Inductions.find(Phi) == Inductions.end())
+ return nullptr;
+ return &Inductions[Phi];
+ }
+
+ /* If the given phi is a reduction of the loop, return the induciton. */
+ ReductionDesc *getReduction(PHINode *Phi) {
+ if (Reductions.find(Phi) == Reductions.end())
+ return nullptr;
+ return &Reductions[Phi];
+ }
+
+ /* Return true if the loop has unknown phi(s). A loop has unknown phi(s) if
+ * a phi node is not identified, or the loop has no preheader or latch tail.
+ *
+ * If the loop has unknown phi(s), the data structure of Inductions and
+ * Reductions can be undefined. */
+ bool hasUnknownPhi() { return UnknownPhi; }
+
+ /* Return true if the instruction `From' can flow to instruction `To' in
+ * the loop. */
+ bool isReachable(Instruction *From, Instruction *To);
+};
+
+class InnerLoopAnalysis {
+ std::vector<InnerLoop *> InnerLoops;
+
+ void analyzePhi(InnerLoop &TheLoop, ScalarEvolution *SE);
+ bool analyzeInduction(InnerLoop &TheLoop, ScalarEvolution *SE, PHINode *Phi);
+ bool analyzeReduction(InnerLoop &TheLoop, PHINode *Phi);
+
+public:
+ InnerLoopAnalysis() {}
+ ~InnerLoopAnalysis() { releaseMemory(); }
+
+ void releaseMemory() {
+ while (!InnerLoops.empty()) {
+ InnerLoop *L = InnerLoops.back();
+ InnerLoops.pop_back();
+ delete L;
+ }
+ }
+ void print(raw_ostream &OS, const Module * = nullptr) const {}
+ void verify() const {}
+ void analyze(LoopInfo *LI, ScalarEvolution *SE);
+
+ /* iterator/begin/end - The interface to the innermost loops. */
+ typedef typename std::vector<InnerLoop *>::const_iterator iterator;
+ typedef typename std::vector<InnerLoop *>::const_reverse_iterator
+ reverse_iterator;
+ iterator begin() const { return InnerLoops.begin(); }
+ iterator end() const { return InnerLoops.end(); }
+ reverse_iterator rbegin() const { return InnerLoops.rbegin(); }
+ reverse_iterator rend() const { return InnerLoops.rend(); }
+ bool empty() const { return InnerLoops.empty(); }
+ unsigned size() { return InnerLoops.size(); }
+};
+
+/*
+ * InnerLoopAnalysisWrapperPass Pass
+ */
+class InnerLoopAnalysisWrapperPass : public FunctionPass {
+ InnerLoopAnalysis LA;
+
+public:
+ static char ID;
+ InnerLoopAnalysisWrapperPass() : FunctionPass(ID) {
+ initializeInnerLoopAnalysisWrapperPassPass(*PassRegistry::getPassRegistry());
+ }
+
+ InnerLoopAnalysis &getLoopAnalysis() { return LA; }
+ const InnerLoopAnalysis &getLoopAnalysis() const { return LA; }
+
+ void releaseMemory() override;
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ void print(raw_ostream &OS, const Module * = nullptr) const override;
+ void verifyAnalysis() const override;
+ bool runOnFunction(Function &F) override;
+};
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/JIT.h b/llvm/include/JIT.h
new file mode 100644
index 0000000..a1b3c8d
--- /dev/null
+++ b/llvm/include/JIT.h
@@ -0,0 +1,228 @@
+//===-- JIT.h - Class definition for the JIT --------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the top-level JIT data structure.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __JIT_H
+#define __JIT_H
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/PassManager.h"
+
+namespace llvm {
+
+class Function;
+struct JITEvent_EmittedFunctionDetails;
+class MachineCodeEmitter;
+class MachineCodeInfo;
+class TargetJITInfo;
+class TargetMachine;
+
+class JITState {
+private:
+ FunctionPassManager PM; // Passes to compile a function
+ Module *M; // Module used to create the PM
+
+ /// PendingFunctions - Functions which have not been code generated yet, but
+ /// were called from a function being code generated.
+ std::vector<AssertingVH<Function> > PendingFunctions;
+
+public:
+ explicit JITState(Module *M) : PM(M), M(M) {}
+
+ FunctionPassManager &getPM() {
+ return PM;
+ }
+
+ Module *getModule() const { return M; }
+ std::vector<AssertingVH<Function> > &getPendingFunctions() {
+ return PendingFunctions;
+ }
+};
+
+
+class JIT : public ExecutionEngine {
+ /// types
+ typedef ValueMap<const BasicBlock *, void *>
+ BasicBlockAddressMapTy;
+ /// data
+ TargetMachine &TM; // The current target we are compiling to
+ TargetJITInfo &TJI; // The JITInfo for the target we are compiling to
+ JITCodeEmitter *JCE; // JCE object
+ JITMemoryManager *JMM;
+ std::vector<JITEventListener*> EventListeners;
+
+ /// AllocateGVsWithCode - Some applications require that global variables and
+ /// code be allocated into the same region of memory, in which case this flag
+ /// should be set to true. Doing so breaks freeMachineCodeForFunction.
+ bool AllocateGVsWithCode;
+
+ /// True while the JIT is generating code. Used to assert against recursive
+ /// entry.
+ bool isAlreadyCodeGenerating;
+
+ JITState *jitstate;
+
+ /// BasicBlockAddressMap - A mapping between LLVM basic blocks and their
+ /// actualized version, only filled for basic blocks that have their address
+ /// taken.
+ BasicBlockAddressMapTy BasicBlockAddressMap;
+
+
+ JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji,
+ JITMemoryManager *JMM, bool AllocateGVsWithCode);
+public:
+ ~JIT();
+
+ static void Register() {
+ JITCtor = createJIT;
+ }
+
+ /// getJITInfo - Return the target JIT information structure.
+ ///
+ TargetJITInfo &getJITInfo() const { return TJI; }
+
+ /// create - Create an return a new JIT compiler if there is one available
+ /// for the current target. Otherwise, return null.
+ ///
+ static ExecutionEngine *create(Module *M,
+ std::string *Err,
+ JITMemoryManager *JMM,
+ CodeGenOpt::Level OptLevel =
+ CodeGenOpt::Default,
+ bool GVsWithCode = true,
+ Reloc::Model RM = Reloc::Default,
+ CodeModel::Model CMM = CodeModel::JITDefault) {
+ return ExecutionEngine::createJIT(M, Err, JMM, OptLevel, GVsWithCode,
+ RM, CMM);
+ }
+
+ void addModule(Module *M) override;
+
+ /// removeModule - Remove a Module from the list of modules. Returns true if
+ /// M is found.
+ bool removeModule(Module *M) override;
+
+ /// runFunction - Start execution with the specified function and arguments.
+ ///
+ GenericValue runFunction(Function *F,
+ const std::vector<GenericValue> &ArgValues) override;
+
+ /// getPointerToNamedFunction - This method returns the address of the
+ /// specified function by using the MemoryManager. As such it is only
+ /// useful for resolving library symbols, not code generated symbols.
+ ///
+ /// If AbortOnFailure is false and no function with the given name is
+ /// found, this function silently returns a null pointer. Otherwise,
+ /// it prints a message to stderr and aborts.
+ ///
+ void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true) override;
+
+ // CompilationCallback - Invoked the first time that a call site is found,
+ // which causes lazy compilation of the target function.
+ //
+ static void CompilationCallback();
+
+ /// getPointerToFunction - This returns the address of the specified function,
+ /// compiling it if necessary.
+ ///
+ void *getPointerToFunction(Function *F) override;
+
+ /// addPointerToBasicBlock - Adds address of the specific basic block.
+ void addPointerToBasicBlock(const BasicBlock *BB, void *Addr);
+
+ /// clearPointerToBasicBlock - Removes address of specific basic block.
+ void clearPointerToBasicBlock(const BasicBlock *BB);
+
+ /// getPointerToBasicBlock - This returns the address of the specified basic
+ /// block, assuming function is compiled.
+ void *getPointerToBasicBlock(BasicBlock *BB) override;
+
+ /// getOrEmitGlobalVariable - Return the address of the specified global
+ /// variable, possibly emitting it to memory if needed. This is used by the
+ /// Emitter.
+ void *getOrEmitGlobalVariable(const GlobalVariable *GV) override;
+
+ /// getPointerToFunctionOrStub - If the specified function has been
+ /// code-gen'd, return a pointer to the function. If not, compile it, or use
+ /// a stub to implement lazy compilation if available.
+ ///
+ void *getPointerToFunctionOrStub(Function *F) override;
+
+ /// recompileAndRelinkFunction - This method is used to force a function
+ /// which has already been compiled, to be compiled again, possibly
+ /// after it has been modified. Then the entry to the old copy is overwritten
+ /// with a branch to the new copy. If there was no old copy, this acts
+ /// just like JIT::getPointerToFunction().
+ ///
+ void *recompileAndRelinkFunction(Function *F) override;
+
+ /// freeMachineCodeForFunction - deallocate memory used to code-generate this
+ /// Function.
+ ///
+ void freeMachineCodeForFunction(Function *F) override;
+
+ /// addPendingFunction - while jitting non-lazily, a called but non-codegen'd
+ /// function was encountered. Add it to a pending list to be processed after
+ /// the current function.
+ ///
+ void addPendingFunction(Function *F);
+
+ /// getCodeEmitter - Return the code emitter this JIT is emitting into.
+ ///
+ JITCodeEmitter *getCodeEmitter() const { return JCE; }
+
+ static ExecutionEngine *createJIT(Module *M,
+ std::string *ErrorStr,
+ JITMemoryManager *JMM,
+ bool GVsWithCode,
+ TargetMachine *TM);
+
+ // Run the JIT on F and return information about the generated code
+ void runJITOnFunction(Function *F, MachineCodeInfo *MCI = nullptr) override;
+
+ void RegisterJITEventListener(JITEventListener *L) override;
+ void UnregisterJITEventListener(JITEventListener *L) override;
+
+ TargetMachine *getTargetMachine() override { return &TM; }
+
+ /// These functions correspond to the methods on JITEventListener. They
+ /// iterate over the registered listeners and call the corresponding method on
+ /// each.
+ void NotifyFunctionEmitted(
+ const Function &F, void *Code, size_t Size,
+ const JITEvent_EmittedFunctionDetails &Details);
+ void NotifyFreeingMachineCode(void *OldPtr);
+
+ BasicBlockAddressMapTy &
+ getBasicBlockAddressMap() {
+ return BasicBlockAddressMap;
+ }
+
+
+private:
+ static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM,
+ TargetMachine &tm);
+ void runJITOnFunctionUnlocked(Function *F);
+ void updateFunctionStubUnlocked(Function *F);
+ void jitTheFunctionUnlocked(Function *F);
+
+protected:
+
+ /// getMemoryforGV - Allocate memory for a global variable.
+ char* getMemoryForGV(const GlobalVariable* GV) override;
+
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/llvm/include/JITMemoryManager.h b/llvm/include/JITMemoryManager.h
new file mode 100644
index 0000000..301d227
--- /dev/null
+++ b/llvm/include/JITMemoryManager.h
@@ -0,0 +1,318 @@
+//===-- JITMemoryManager.cpp - Memory Allocator for JIT'd code ------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the DefaultJITMemoryManager class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __JITMEMORYMANAGER_H
+#define __JITMEMORYMANAGER_H
+
+#include <sys/mman.h>
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
+#include "llvm-debug.h"
+#include "utils.h"
+
+using namespace llvm;
+
+#define MIN_CODE_CACHE_SIZE (1 * 1024 * 1024)
+#define DEFAULT_GLOBAL_SIZE (64 * 1024)
+#define DEFAULT_THRESHOLD (32 * 1024)
+
+
+// AtExitHandlers - List of functions to call when the program exits,
+// registered with the atexit() library function.
+static std::vector<void (*)()> AtExitHandlers;
+
+/// runAtExitHandlers - Run any functions registered by the program's
+/// calls to atexit(3), which we intercept and store in
+/// AtExitHandlers.
+///
+static void runAtExitHandlers() {
+ while (!AtExitHandlers.empty()) {
+ void (*Fn)() = AtExitHandlers.back();
+ AtExitHandlers.pop_back();
+ Fn();
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Function stubs that are invoked instead of certain library calls
+//
+// Force the following functions to be linked in to anything that uses the
+// JIT. This is a hack designed to work around the all-too-clever Glibc
+// strategy of making these functions work differently when inlined vs. when
+// not inlined, and hiding their real definitions in a separate archive file
+// that the dynamic linker can't see. For more info, search for
+// 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+#if defined(__linux__) && defined(__GLIBC__)
+/* stat functions are redirecting to __xstat with a version number. On x86-64
+ * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat'
+ * available as an exported symbol, so we have to add it explicitly.
+ */
+namespace {
+class StatSymbols {
+public:
+ StatSymbols() {
+ sys::DynamicLibrary::AddSymbol("stat", (void*)(intptr_t)stat);
+ sys::DynamicLibrary::AddSymbol("fstat", (void*)(intptr_t)fstat);
+ sys::DynamicLibrary::AddSymbol("lstat", (void*)(intptr_t)lstat);
+ sys::DynamicLibrary::AddSymbol("stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1stat64", (void*)(intptr_t)stat64);
+ sys::DynamicLibrary::AddSymbol("\x1open64", (void*)(intptr_t)open64);
+ sys::DynamicLibrary::AddSymbol("\x1lseek64", (void*)(intptr_t)lseek64);
+ sys::DynamicLibrary::AddSymbol("fstat64", (void*)(intptr_t)fstat64);
+ sys::DynamicLibrary::AddSymbol("lstat64", (void*)(intptr_t)lstat64);
+ sys::DynamicLibrary::AddSymbol("atexit", (void*)(intptr_t)atexit);
+ sys::DynamicLibrary::AddSymbol("mknod", (void*)(intptr_t)mknod);
+ }
+};
+}
+static StatSymbols initStatSymbols;
+#endif // __linux__
+
+// jit_exit - Used to intercept the "exit" library call.
+static void jit_exit(int Status) {
+ runAtExitHandlers(); // Run atexit handlers...
+ exit(Status);
+}
+
+// jit_atexit - Used to intercept the "atexit" library call.
+static int jit_atexit(void (*Fn)()) {
+ AtExitHandlers.push_back(Fn); // Take note of atexit handler...
+ return 0; // Always successful
+}
+
+static int jit_noop() {
+ return 0;
+}
+
+
+/// DefaultJITMemoryManager - Manage trace cache memory for the JIT code generation.
+class DefaultJITMemoryManager : public JITMemoryManager {
+ uint8_t *TraceCache;
+ size_t TraceCacheSize;
+
+ uint8_t *GlobalBase; /* section for global data used by QEMU helpers */
+ uint8_t *CodeBase; /* section for emitting trace code */
+ uint8_t *CodeGenPtr;
+
+ size_t GlobalRemain;
+ size_t CodeRemain;
+ size_t Threshold;
+
+ hqemu::Mutex lock;
+
+public:
+ DefaultJITMemoryManager(uint8_t *Cache, size_t Size)
+ : TraceCache(Cache), TraceCacheSize(Size), Threshold(DEFAULT_THRESHOLD)
+ {
+ GlobalBase = TraceCache;
+ GlobalRemain = DEFAULT_GLOBAL_SIZE;
+
+ CodeBase = GlobalBase + DEFAULT_GLOBAL_SIZE;
+ CodeBase = (uint8_t *)(((uintptr_t)CodeBase + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+ CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase;
+ CodeGenPtr = CodeBase;
+ }
+
+ ~DefaultJITMemoryManager() {}
+
+ //===----------------------------------------------------------------------===//
+ //
+ /// getPointerToNamedFunction - This method returns the address of the specified
+ /// function by using the dynamic loader interface. As such it is only useful
+ /// for resolving library symbols, not code generated symbols.
+ ///
+ void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true) override {
+ // Check to see if this is one of the functions we want to intercept. Note,
+ // we cast to intptr_t here to silence a -pedantic warning that complains
+ // about casting a function pointer to a normal pointer.
+ if (Name == "exit") return (void*)(intptr_t)&jit_exit;
+ if (Name == "atexit") return (void*)(intptr_t)&jit_atexit;
+
+ // We should not invoke parent's ctors/dtors from generated main()!
+ // On Mingw and Cygwin, the symbol __main is resolved to
+ // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+ // (and register wrong callee's dtors with atexit(3)).
+ // We expect ExecutionEngine::runStaticConstructorsDestructors()
+ // is called before ExecutionEngine::runFunctionAsMain() is called.
+ if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+ const char *NameStr = Name.c_str();
+ // If this is an asm specifier, skip the sentinal.
+ if (NameStr[0] == 1) ++NameStr;
+
+ // If it's an external function, look it up in the process image...
+ void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+ if (Ptr) return Ptr;
+
+ // If it wasn't found and if it starts with an underscore ('_') character,
+ // try again without the underscore.
+ if (NameStr[0] == '_') {
+ Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+ if (Ptr) return Ptr;
+ }
+
+ // Darwin/PPC adds $LDBLStub suffixes to various symbols like printf. These
+ // are references to hidden visibility symbols that dlsym cannot resolve.
+ // If we have one of these, strip off $LDBLStub and try again.
+#if defined(__APPLE__) && defined(__ppc__)
+ if (Name.size() > 9 && Name[Name.size()-9] == '$' &&
+ memcmp(&Name[Name.size()-8], "LDBLStub", 8) == 0) {
+ // First try turning $LDBLStub into $LDBL128. If that fails, strip it off.
+ // This mirrors logic in libSystemStubs.a.
+ std::string Prefix = std::string(Name.begin(), Name.end()-9);
+ if (void *Ptr = getPointerToNamedFunction(Prefix+"$LDBL128", false))
+ return Ptr;
+ if (void *Ptr = getPointerToNamedFunction(Prefix, false))
+ return Ptr;
+ }
+#endif
+
+ if (AbortOnFailure) {
+ report_fatal_error("Program used external function '"+Name+
+ "' which could not be resolved!");
+ }
+ return nullptr;
+ }
+
+ void AllocateGOT() override { hqemu_error("fixme.\n"); }
+
+ // Testing methods.
+ bool CheckInvariants(std::string &ErrorStr) override { hqemu_error("fixme.\n"); return false; }
+ size_t GetDefaultCodeSlabSize() override { hqemu_error("fixme.\n"); return 0; }
+ size_t GetDefaultDataSlabSize() override { hqemu_error("fixme.\n"); return 0; }
+ size_t GetDefaultStubSlabSize() override { hqemu_error("fixme.\n"); return 0; }
+ unsigned GetNumCodeSlabs() override { hqemu_error("fixme.\n"); return 0; }
+ unsigned GetNumDataSlabs() override { hqemu_error("fixme.\n"); return 0; }
+ unsigned GetNumStubSlabs() override { hqemu_error("fixme.\n"); return 0; }
+
+ /// startFunctionBody - When a function starts, allocate a block of free
+ /// executable memory, returning a pointer to it and its actual size.
+ uint8_t *startFunctionBody(const Function *F,
+ uintptr_t &ActualSize) override {
+ lock.acquire();
+ if (unlikely(CodeRemain < Threshold))
+ hqemu_error("internal error (fixme).\n");
+
+ ActualSize = CodeRemain;
+ return CodeGenPtr;
+ }
+
+ /// endFunctionBody - The function F is now allocated, and takes the memory
+ /// in the range [FunctionStart,FunctionEnd).
+ void endFunctionBody(const Function *F, uint8_t *FunctionStart,
+ uint8_t *FunctionEnd) override {
+ assert(FunctionEnd > FunctionStart);
+
+ size_t GenSize = FunctionEnd - FunctionStart;
+ if (unlikely(GenSize > CodeRemain))
+ hqemu_error("exceeds available cache size.\n");
+
+ CodeGenPtr = (uint8_t *)(((uintptr_t)CodeGenPtr + GenSize + CODE_GEN_ALIGN - 1)
+ & ~(CODE_GEN_ALIGN - 1));
+ CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeGenPtr;
+ lock.release();
+ }
+
+ /// allocateSpace - Allocate a memory block of the given size. This method
+ /// cannot be called between calls to startFunctionBody and endFunctionBody.
+ uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) override {
+ hqemu_error("fixme.\n");
+ return nullptr;
+ }
+
+ /// allocateStub - Allocate memory for a function stub.
+ uint8_t *allocateStub(const GlobalValue* F, unsigned StubSize,
+ unsigned Alignment) override {
+ return allocateGlobal(StubSize, Alignment);
+ }
+
+ /// allocateGlobal - Allocate memory for a global.
+ uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) override {
+ hqemu::MutexGuard locked(lock);
+
+ if (!Alignment)
+ Alignment = 16;
+ if (Alignment & (Alignment - 1))
+ hqemu_error("alignment must be a power of two.\n");
+
+ unsigned MisAligned = ((intptr_t)GlobalBase & (Alignment - 1));
+ if (MisAligned)
+ MisAligned = Alignment - MisAligned;
+
+ if (GlobalRemain < Size + MisAligned)
+ hqemu_error("exceeds available global size.\n");
+
+ uint8_t *GlobalPtr = GlobalBase + MisAligned;
+ GlobalBase = GlobalPtr + Size;
+ GlobalRemain -= (Size + MisAligned);
+ return GlobalPtr;
+ }
+
+ /// allocateCodeSection - Allocate memory for a code section.
+ uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName) override {
+ hqemu_error("fixme.\n"); return nullptr;
+ }
+
+ /// allocateDataSection - Allocate memory for a data section.
+ uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID, StringRef SectionName,
+ bool IsReadOnly) override {
+ hqemu_error("fixme.\n"); return nullptr;
+ }
+
+ bool finalizeMemory(std::string *ErrMsg) override { return false; }
+
+ uint8_t *getGOTBase() const override { return nullptr; }
+
+ void deallocateBlock(void *Block) {}
+
+ /// deallocateFunctionBody - Deallocate all memory for the specified
+ /// function body.
+ void deallocateFunctionBody(void *Body) override {}
+
+ /// setMemoryWritable - When code generation is in progress,
+ /// the code pages may need permissions changed.
+ void setMemoryWritable() override {}
+ /// setMemoryExecutable - When code generation is done and we're ready to
+ /// start execution, the code pages may need permissions changed.
+ void setMemoryExecutable() override {}
+
+ /// setPoisonMemory - Controls whether we write garbage over freed memory.
+ ///
+ void setPoisonMemory(bool poison) override {}
+
+ size_t getCodeSize() { return CodeGenPtr - CodeBase; }
+ bool isSizeAvailable() {
+ hqemu::MutexGuard locked(lock);
+ return CodeRemain >= Threshold ? 1 : 0;
+ }
+ void Flush() {
+ CodeGenPtr = CodeBase;
+ CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase;
+ }
+
+ static DefaultJITMemoryManager *Create(uint8_t *Cache, size_t Size) {
+ if (Size < MIN_CODE_CACHE_SIZE)
+ hqemu_error("Trace cache size is too small.\n");
+ return new DefaultJITMemoryManager(Cache, Size);
+ }
+};
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/MCJITMemoryManager.h b/llvm/include/MCJITMemoryManager.h
new file mode 100644
index 0000000..33059a5
--- /dev/null
+++ b/llvm/include/MCJITMemoryManager.h
@@ -0,0 +1,213 @@
+//===-- MCJITMemoryManager.cpp - Memory manager for MC-JIT -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface of the MCJIT memory manager base class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef __MCJITMEMORYMANAGER_H
+#define __MCJITMEMORYMANAGER_H
+
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+#include "llvm-debug.h"
+#include "utils.h"
+
+using namespace llvm;
+
+#define MIN_CODE_CACHE_SIZE (1 * 1024 * 1024)
+#define DEFAULT_GLOBAL_SIZE (64 * 1024)
+#define DEFAULT_THRESHOLD (32 * 1024)
+
+// RuntimeDyld clients often want to handle the memory management of
+// what gets placed where. For JIT clients, this is the subset of
+// JITMemoryManager required for dynamic loading of binaries.
+//
+// FIXME: As the RuntimeDyld fills out, additional routines will be needed
+// for the varying types of objects to be allocated.
+class DefaultMCJITMemoryManager : public RTDyldMemoryManager {
+ uint8_t *TraceCache;
+ size_t TraceCacheSize;
+
+ uint8_t *GlobalBase; /* section for global data used by QEMU helpers */
+ uint8_t *CodeBase; /* section for emitting trace code */
+ uint8_t *CodeGenPtr;
+
+ size_t GlobalRemain;
+ size_t CodeRemain;
+ size_t Threshold;
+
+ hqemu::Mutex lock;
+
+ SymbolMap Symbols;
+
+public:
+ DefaultMCJITMemoryManager(uint8_t *Cache, size_t Size)
+ : TraceCache(Cache), TraceCacheSize(Size), Threshold(DEFAULT_THRESHOLD)
+ {
+ GlobalBase = TraceCache;
+ GlobalRemain = DEFAULT_GLOBAL_SIZE;
+
+ CodeBase = GlobalBase + DEFAULT_GLOBAL_SIZE;
+ CodeBase = (uint8_t *)(((uintptr_t)CodeBase + CODE_GEN_ALIGN - 1) & ~(CODE_GEN_ALIGN - 1));
+ CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase;
+ CodeGenPtr = CodeBase;
+ }
+ ~DefaultMCJITMemoryManager() {}
+
+ /// Allocate a memory block of (at least) the given size suitable for
+ /// executable code. The SectionID is a unique identifier assigned by the JIT
+ /// engine, and optionally recorded by the memory manager to access a loaded
+ /// section.
+ uint8_t *allocateCodeSection(
+ uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName) override {
+ hqemu::MutexGuard locked(lock);
+
+ if (!Alignment)
+ Alignment = 16;
+
+ if (Alignment & (Alignment - 1))
+ hqemu_error("Alignment must be a power of two.\n");
+
+ uintptr_t CurGenPtr = (uintptr_t)CodeGenPtr;
+ CurGenPtr = (CurGenPtr + Alignment - 1) & ~(uintptr_t)(Alignment - 1);
+ CodeGenPtr = (uint8_t *)((CurGenPtr + Size + CODE_GEN_ALIGN - 1) &
+ ~(uintptr_t)(CODE_GEN_ALIGN - 1));
+ CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeGenPtr;
+ return (uint8_t *)CurGenPtr;
+ }
+
+ /// Allocate a memory block of (at least) the given size suitable for data.
+ /// The SectionID is a unique identifier assigned by the JIT engine, and
+ /// optionally recorded by the memory manager to access a loaded section.
+ uint8_t *allocateDataSection(
+ uintptr_t Size, unsigned Alignment, unsigned SectionID,
+ StringRef SectionName, bool IsReadOnly) override {
+ return allocateCodeSection(Size, Alignment, SectionID, SectionName);
+ }
+
+ /// Inform the memory manager about the total amount of memory required to
+ /// allocate all sections to be loaded:
+ /// \p CodeSize - the total size of all code sections
+ /// \p DataSizeRO - the total size of all read-only data sections
+ /// \p DataSizeRW - the total size of all read-write data sections
+ ///
+ /// Note that by default the callback is disabled. To enable it
+ /// redefine the method needsToReserveAllocationSpace to return true.
+ void reserveAllocationSpace(
+ uintptr_t CodeSize, uintptr_t DataSizeRO, uintptr_t DataSizeRW) {
+ hqemu_error("fixme.\n");
+ }
+
+ /// Override to return true to enable the reserveAllocationSpace callback.
+ bool needsToReserveAllocationSpace() { return false; }
+
+ /// Register the EH frames with the runtime so that c++ exceptions work.
+ ///
+ /// \p Addr parameter provides the local address of the EH frame section
+ /// data, while \p LoadAddr provides the address of the data in the target
+ /// address space. If the section has not been remapped (which will usually
+ /// be the case for local execution) these two values will be the same.
+ void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
+ hqemu_error("fixme.\n");
+ }
+
+ void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
+ hqemu_error("fixme.\n");
+ }
+
+ /// This method returns the address of the specified function or variable.
+ /// It is used to resolve symbols during module linking.
+ uint64_t getSymbolAddress(const std::string &Name) {
+ hqemu::MutexGuard locked(lock);
+ if (Symbols.find(Name) == Symbols.end()) {
+ std::string ErrMsg = "Program used external symbol '" + Name +
+ "'which could not be resolved!\n";
+ hqemu_error(ErrMsg.c_str());
+ }
+ return Symbols[Name];
+ }
+
+ /// This method returns the address of the specified function. As such it is
+ /// only useful for resolving library symbols, not code generated symbols.
+ ///
+ /// If \p AbortOnFailure is false and no function with the given name is
+ /// found, this function returns a null pointer. Otherwise, it prints a
+ /// message to stderr and aborts.
+ ///
+ /// This function is deprecated for memory managers to be used with
+ /// MCJIT or RuntimeDyld. Use getSymbolAddress instead.
+ void *getPointerToNamedFunction(const std::string &Name,
+ bool AbortOnFailure = true) {
+ if (AbortOnFailure) {
+ std::string ErrMsg = "Program used external symbol '" + Name +
+ "'which could not be resolved!\n";
+ hqemu_error(ErrMsg.c_str());
+ }
+ return nullptr;
+ }
+
+ /// This method is called after an object has been loaded into memory but
+ /// before relocations are applied to the loaded sections. The object load
+ /// may have been initiated by MCJIT to resolve an external symbol for another
+ /// object that is being finalized. In that case, the object about which
+ /// the memory manager is being notified will be finalized immediately after
+ /// the memory manager returns from this call.
+ ///
+ /// Memory managers which are preparing code for execution in an external
+ /// address space can use this call to remap the section addresses for the
+ /// newly loaded object.
+#if defined(LLVM_V35)
+ void notifyObjectLoaded(ExecutionEngine *EE,
+ const ObjectImage *Obj) {
+ }
+#else
+ void notifyObjectLoaded(RuntimeDyld &RTDyld,
+ const object::ObjectFile &Obj) {
+ }
+#endif
+
+ /// This method is called when object loading is complete and section page
+ /// permissions can be applied. It is up to the memory manager implementation
+ /// to decide whether or not to act on this method. The memory manager will
+ /// typically allocate all sections as read-write and then apply specific
+ /// permissions when this method is called. Code sections cannot be executed
+ /// until this function has been called. In addition, any cache coherency
+ /// operations needed to reliably use the memory are also performed.
+ ///
+ /// Returns true if an error occurred, false otherwise.
+ bool finalizeMemory(std::string *ErrMsg = nullptr) override {
+ return false;
+ }
+
+ void AddSymbols(SymbolMap &symbols) {
+ Symbols = symbols;
+ }
+
+ size_t getCodeSize() { return CodeGenPtr - CodeBase; }
+ bool isSizeAvailable() {
+ hqemu::MutexGuard locked(lock);
+ return CodeRemain >= Threshold ? 1 : 0;
+ }
+ void Flush() {
+ CodeGenPtr = CodeBase;
+ CodeRemain = (uintptr_t)TraceCache + TraceCacheSize - (uintptr_t)CodeBase;
+ }
+
+ static DefaultMCJITMemoryManager *Create(uint8_t *Cache, size_t Size) {
+ if (Size < MIN_CODE_CACHE_SIZE) {
+ std::string ErrMsg = "Trace cache size is too small (" +
+ std::to_string(Size) + ")\n.";
+ hqemu_error(ErrMsg.c_str());
+ }
+ return new DefaultMCJITMemoryManager(Cache, Size);
+ }
+};
+
+#endif
diff --git a/llvm/include/hqemu-config.h b/llvm/include/hqemu-config.h
new file mode 100644
index 0000000..2e2f42f
--- /dev/null
+++ b/llvm/include/hqemu-config.h
@@ -0,0 +1,142 @@
+/*
+ * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __HQEMU_CONFIG_H
+#define __HQEMU_CONFIG_H
+
+
+#define PACKAGE_NAME "HQEMU"
+#define PACKAGE_VERSION_MAJOR "2.5"
+#define PACKAGE_VERSION_MINOR "2"
+
+#define ENABLE_IBTC
+#define ENABLE_CPBL
+#define ENABLE_LPAGE
+#define ENABLE_PASSES
+#define ENABLE_MCJIT
+//#define ENABLE_HPM_THREAD
+//#define ENABLE_TLBVERSION
+//#define ENALBE_CPU_PROFILE
+//#define USE_TRACETREE_ONLY
+
+
+#if defined(CONFIG_USER_ONLY)
+# define ENABLE_TCG_VECTOR
+# define GUEST_BASE guest_base
+#else
+# define GUEST_BASE (0UL)
+#endif
+
+#if defined(ENABLE_TLBVERSION)
+# if defined(ALIGNED_ONLY)
+# undef ENABLE_TLBVERSION
+# elif HOST_LONG_BITS == 64 && TARGET_LONG_BITS == 32 && defined(HOST_X86_64)
+# define ENABLE_TLBVERSION_EXT
+# endif
+#endif
+
+#ifndef ENABLE_TLBVERSION
+# define TLB_INVALID_SHIFT 3
+# define TLB_NOTDIRTY_SHIFT 4
+# define TLB_MMIO_SHIFT 5
+# define TLB_VERSION_BITS 0
+# define TLB_VERSION_MASK 0
+# define TLB_VERSION_SHIFT (0)
+# define tlb_version(__env) 0
+typedef target_ulong tlbaddr_t;
+#elif defined(ENABLE_TLBVERSION_EXT)
+# define TLB_INVALID_SHIFT 3
+# define TLB_NOTDIRTY_SHIFT 4
+# define TLB_MMIO_SHIFT 5
+# define TLB_VERSION_BITS 32
+# define TLB_VERSION_SIZE (1UL << TLB_VERSION_BITS)
+# define TLB_VERSION_MASK (0xFFFFFFFF00000000UL)
+# define TLB_VERSION_SHIFT (32)
+# define tlb_version(__env) (__env->tlb_version)
+typedef unsigned long tlbaddr_t;
+#else
+# define TLB_INVALID_SHIFT (TARGET_PAGE_BITS - 3)
+# define TLB_NOTDIRTY_SHIFT (TARGET_PAGE_BITS - 2)
+# define TLB_MMIO_SHIFT (TARGET_PAGE_BITS - 1)
+# define TLB_VERSION_BITS (TARGET_PAGE_BITS - 3)
+# define TLB_VERSION_SIZE (1 << TLB_VERSION_BITS)
+# define TLB_VERSION_MASK (TLB_VERSION_SIZE - 1)
+# define TLB_VERSION_SHIFT (0)
+# define tlb_version(__env) (__env->tlb_version)
+typedef target_ulong tlbaddr_t;
+#endif
+
+
+typedef int BlockID;
+typedef int TraceID;
+#define BUILD_NONE ((uint16_t)0)
+#define BUILD_TCG ((uint16_t)1 << 0)
+#define BUILD_LLVM ((uint16_t)1 << 1)
+
+#define CPU_OPTIMIZATION_COMMON \
+ unsigned long sp; \
+ void *opt_link; \
+ uint16_t build_mode; \
+ int start_trace_prediction; \
+ int fallthrough; \
+ uintptr_t image_base; \
+ uint32_t restore_val; \
+ uint64_t num_trace_exits; \
+
+
+#define TB_OPTIMIZATION_COMMON \
+ BlockID id; \
+ TraceID tid; /* trace id */ \
+ int mode; /* current state */ \
+ void *opt_ptr; /* pointer to the optimized code */ \
+ uint32_t exec_count; /* trace profile execution count */ \
+ uint16_t patch_jmp; /* offset of trace trampoline */ \
+ uint16_t patch_next; /* offset of trace prediction stub */ \
+ target_ulong jmp_pc[2]; /* pc of the succeeding blocks */ \
+ void *image; \
+ void *state; \
+ void *chain;
+
+
+enum {
+ BLOCK_NONE = 0,
+ BLOCK_ACTIVE,
+ BLOCK_TRACEHEAD,
+ BLOCK_OPTIMIZED,
+ BLOCK_INVALID,
+};
+
+enum {
+ TRANS_MODE_NONE = 0,
+ TRANS_MODE_BLOCK,
+ TRANS_MODE_HYBRIDS,
+ TRANS_MODE_HYBRIDM,
+ TRANS_MODE_INVALID,
+};
+
+/* Parse translation mode from env-variable LLVM_MODE. */
+static inline int getTransMode(void) {
+ char *p = getenv("LLVM_MODE");
+ if (p == NULL) return TRANS_MODE_HYBRIDM;
+ if (!strcmp(p, "hybridm")) return TRANS_MODE_HYBRIDM;
+ if (!strcmp(p, "hybrids")) return TRANS_MODE_HYBRIDS;
+ if (!strcmp(p, "block")) return TRANS_MODE_BLOCK;
+ if (!strcmp(p, "none")) return TRANS_MODE_NONE;
+ return TRANS_MODE_INVALID;
+}
+
+/* Annotation/attribute for traces. */
+enum {
+ A_None = ((uint32_t)0),
+ A_SetCC = ((uint32_t)1 << 0),
+ A_NoSIMDization = ((uint32_t)1 << 1),
+};
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/llvm/include/hqemu-helper.h b/llvm/include/hqemu-helper.h
new file mode 100644
index 0000000..dfcb396
--- /dev/null
+++ b/llvm/include/hqemu-helper.h
@@ -0,0 +1,8 @@
+DEF_HELPER_1(export_hqemu, void, env)
+DEF_HELPER_1(lookup_ibtc, ptr, env)
+DEF_HELPER_1(lookup_cpbl, ptr, env)
+DEF_HELPER_3(validate_cpbl, int, env, tl, int)
+DEF_HELPER_2(NET_profile, void, env, int)
+DEF_HELPER_2(NET_predict, void, env, int)
+DEF_HELPER_2(verify_tb, void, env, int)
+DEF_HELPER_3(profile_exec, void, env, ptr, int)
diff --git a/llvm/include/hqemu.h b/llvm/include/hqemu.h
new file mode 100644
index 0000000..f5e7180
--- /dev/null
+++ b/llvm/include/hqemu.h
@@ -0,0 +1,84 @@
+/*
+ * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __HQEMU_H
+#define __HQEMU_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#include "config-host.h"
+#include "config-target.h"
+#include "hqemu-config.h"
+
+#define build_tcg(_env) ((_env)->build_mode & BUILD_TCG)
+#define build_llvm(_env) ((_env)->build_mode & BUILD_LLVM)
+#define build_llvm_only(_env) ((_env)->build_mode == BUILD_LLVM)
+
+void hqemu_help(void);
+
+/* Optimizations */
+int optimization_init(CPUArchState *env);
+int optimization_finalize(CPUArchState *env);
+int optimization_reset(CPUArchState *env, int force_flush);
+int optimization_remove_entry(CPUArchState *env, TranslationBlock *tb);
+int optimization_flush_page(CPUArchState *env, target_ulong pc);
+int optimization_init_tb(TranslationBlock *tb, int id);
+
+void itlb_update_entry(CPUArchState *env, TranslationBlock *tb);
+void ibtc_update_entry(CPUArchState *env, TranslationBlock *tb);
+
+int lpt_reset(CPUArchState *env);
+int lpt_add_page(CPUArchState *env, target_ulong addr, target_ulong size);
+int lpt_search_page(CPUArchState *env, target_ulong addr, target_ulong *addrp, target_ulong *sizep);
+int lpt_flush_page(CPUArchState *env, target_ulong addr, target_ulong *addrp, target_ulong *sizep);
+
+
+/* Tracer */
+void tracer_exec_tb(CPUArchState *env, uintptr_t next_tb, TranslationBlock *tb);
+void tracer_reset(CPUArchState *env);
+
+
+/* LLVM */
+int llvm_init(void);
+int llvm_finalize(void);
+int llvm_alloc_cache(void);
+int llvm_check_cache(void);
+int llvm_tb_flush(void);
+int llvm_tb_remove(TranslationBlock *tb);
+void llvm_handle_chaining(uintptr_t next_tb, TranslationBlock *tb);
+int llvm_locate_trace(uintptr_t searched_pc);
+TranslationBlock *llvm_find_pc(CPUState *cpu, uintptr_t searched_pc);
+int llvm_restore_state(CPUState *cpu, TranslationBlock *tb, uintptr_t searched_pc);
+void llvm_fork_start(void);
+void llvm_fork_end(int child);
+
+
+/* Annotation */
+enum {
+ ANNOTATION_NONE = 0,
+ ANNOTATION_LOOP,
+};
+int llvm_has_annotation(target_ulong addr, int annotation);
+
+
+/* External variables */
+extern int tracer_mode;
+extern target_ulong pcid;
+extern unsigned long alignment_count[]; /* 0: misaligned, 1: aligned. */
+extern unsigned long aligned_boundary;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/llvm/include/llvm-annotate.h b/llvm/include/llvm-annotate.h
new file mode 100644
index 0000000..25454ed
--- /dev/null
+++ b/llvm/include/llvm-annotate.h
@@ -0,0 +1,51 @@
+/*
+ * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __LLVM_ANNOTATE_H
+#define __LLVM_ANNOTATE_H
+
+#include <map>
+#include <cstdint>
+#include "qemu-types.h"
+#include "llvm-types.h"
+#include "utils.h"
+
+/* Loop metadata */
+struct LoopMetadata {
+ LoopMetadata()
+ : Address(-1), Length(-1), VS(-1), VF(-1), Distance(INT_MIN), Start(-1),
+ End(-1), Stride(-1) {}
+ target_ulong Address;
+ uint32_t Length;
+ uint32_t VS, VF;
+ int Distance;
+ int Start, End;
+ int Stride;
+};
+
+/*
+ * The AnnotationFactory class manages the metadata information.
+ */
+class AnnotationFactory {
+ typedef std::map<uintptr_t, LoopMetadata*> LoopList;
+
+ std::string MetaFile;
+
+ int ParseXML(const char *name);
+
+public:
+ AnnotationFactory();
+ ~AnnotationFactory();
+
+ LoopList Loops;
+ LoopMetadata *getLoopAnnotation(target_ulong addr);
+ bool hasLoopAnnotation(target_ulong addr);
+};
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/llvm-debug.h b/llvm/include/llvm-debug.h
new file mode 100644
index 0000000..405b466
--- /dev/null
+++ b/llvm/include/llvm-debug.h
@@ -0,0 +1,247 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __LLVM_DEBUG_H
+#define __LLVM_DEBUG_H
+
+#include <cstdint>
+#include <cstring>
+#include <iostream>
+#include <sstream>
+#include <cstdarg>
+#include <unistd.h>
+#include <sys/time.h>
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/Support/FileSystem.h"
+#include "utils.h"
+
+
+struct DebugMode {
+ uint64_t Mode;
+
+ DebugMode(uint64_t M) : Mode(M) {}
+
+ bool operator==(const DebugMode &RHS) const {
+ return Mode == RHS.Mode;
+ }
+ bool operator&(const DebugMode &RHS) const {
+ return Mode & RHS.Mode;
+ }
+ DebugMode operator|(const DebugMode &RHS) {
+ return DebugMode(Mode | RHS.Mode);
+ }
+ DebugMode &operator|=(const DebugMode &RHS) {
+ Mode |= RHS.Mode;
+ return *this;
+ }
+};
+
+/*
+ * LLVMDebug provides facilities to debug the LLVM translator, based on the
+ * debug levels.
+ */
+class LLVMDebug {
+public:
+ enum LLVMDebugMode {
+ D_NONE = ((uint64_t)0),
+ D_LLVM = ((uint64_t)1 << 0),
+ D_INASM = ((uint64_t)1 << 1),
+ D_OP = ((uint64_t)1 << 2),
+ D_OUTASM = ((uint64_t)1 << 3),
+ D_IR = ((uint64_t)1 << 4),
+ D_IR_OPT = ((uint64_t)1 << 5),
+ D_ENTRY = ((uint64_t)1 << 6),
+ D_VERIFY = ((uint64_t)1 << 7),
+ D_PASS = ((uint64_t)1 << 8),
+ D_ANNOTATE = ((uint64_t)1 << 9),
+ D_HPM = ((uint64_t)1 << 10),
+ D_ASM = (D_INASM | D_OP | D_OUTASM),
+ D_DEBUG = (D_LLVM | D_IR_OPT | D_OUTASM | D_PASS),
+ D_ALL = (D_LLVM | D_INASM | D_OP | D_OUTASM | D_IR | D_IR_OPT |
+ D_ENTRY | D_VERIFY | D_PASS | D_ANNOTATE | D_HPM),
+ };
+
+ LLVMDebug() : Mode(D_NONE)
+ {
+ hqemu_out.reset(new llvm::raw_fd_ostream(STDOUT_FILENO, false, true));
+ hqemu_dbg.reset(new llvm::raw_fd_ostream(STDERR_FILENO, false, true));
+
+ std::string Str("");
+ gettimeofday(&uptime, nullptr);
+ ParseDebugMode(Str, false);
+ hqemu_null.SetUnbuffered();
+ }
+
+ DebugMode &getDebugMode() {
+ return Mode;
+ }
+
+ DebugMode &getDebugMode(LLVMDebugMode M) {
+ if (Modes.find(M) == Modes.end())
+ M = D_NONE;
+ return *Modes[M];
+ }
+
+ void setDebugMode(std::string &DebugLevel, std::string &DebugFile) {
+ ParseDebugMode(DebugLevel);
+ if (DebugFile != "") {
+ std::error_code EC;
+ auto OS = new llvm::raw_fd_ostream(DebugFile, EC,
+ llvm::sys::fs::F_Text);
+ if (EC) {
+ *hqemu_dbg << "Error: failed to open debug file " << DebugFile
+ << ". (" << EC.message().c_str() << ")\n";
+ }
+ OS->SetUnbuffered();
+ hqemu_dbg.reset(OS);
+ }
+ }
+
+ void Flush() {
+ hqemu_dbg->flush();
+ }
+
+ void error(const char *fname, const char *fmt, ...) {
+ static char str[256] = {'\0'};
+ va_list ap;
+ va_start(ap, fmt);
+ vsprintf(str, fmt, ap);
+ va_end(ap);
+ *hqemu_dbg << timestamp() << " Error: " << fname << " - " << str;
+ exit(0);
+ }
+
+ llvm::raw_ostream &output() {
+ return *hqemu_out;
+ }
+
+ llvm::raw_ostream &debug() {
+ return *hqemu_dbg;
+ }
+
+ llvm::raw_ostream &operator<<(DebugMode &M) {
+ if (M & Mode) {
+ *hqemu_dbg << timestamp() << " ";
+ return *hqemu_dbg;
+ }
+ return hqemu_null;
+ };
+
+private:
+ llvm::raw_null_ostream hqemu_null;
+ std::unique_ptr<llvm::raw_fd_ostream> hqemu_out;
+ std::unique_ptr<llvm::raw_fd_ostream> hqemu_dbg;
+ struct timeval uptime; /* The startup time of the DBT */
+ DebugMode Mode; /* The debug level */
+ std::map<LLVMDebugMode, DebugMode*> Modes;
+
+ std::string timestamp() {
+ struct timeval tv;
+ char timestamp[32];
+ gettimeofday(&tv, 0);
+ timersub(&tv, &uptime, &tv);
+ strftime(timestamp, 32, "[%H:%M:%S", gmtime(&tv.tv_sec));
+ sprintf(timestamp + 9, ".%06ld]", tv.tv_usec);
+ return timestamp;
+ }
+
+ void ParseDebugMode(std::string &DebugLevel, bool Update=true) {
+ static std::string debug_str[] = {
+ "none", "llvm", "in_asm", "op", "out_asm", "ir", "ir_opt",
+ "entry", "verify", "pass", "annotate", "hpm", "asm", "debug",
+ "all"
+ };
+ static LLVMDebugMode debug_enum[] = {
+ D_NONE, D_LLVM, D_INASM, D_OP, D_OUTASM, D_IR, D_IR_OPT,
+ D_ENTRY, D_VERIFY, D_PASS, D_ANNOTATE, D_HPM, D_ASM, D_DEBUG,
+ D_ALL
+ };
+
+ if (!Update) {
+ for (auto M : debug_enum)
+ Modes[M] = new DebugMode(M);
+ return;
+ }
+
+ if (DebugLevel.empty())
+ return;
+
+ std::istringstream ss(DebugLevel);
+ std::string token;
+ while(std::getline(ss, token, ',')) {
+ for (unsigned i = 0, e = ARRAY_SIZE(debug_enum); i != e; ++i) {
+ if (token == debug_str[i]) {
+ Mode |= getDebugMode(debug_enum[i]);
+ break;
+ }
+ }
+ }
+ }
+};
+
+extern LLVMDebug DM;
+
+/* Print messages to stdout. Should not use this function in release mode. */
+static inline llvm::raw_ostream &out() {
+ return DM.output();
+}
+/* Print messages to stderr, controlled by DebugMode. */
+static inline LLVMDebug &dbg() {
+ return DM;
+}
+/* Print error messages to stderr and terminate the process. */
+#define hqemu_error(msg,args...) do { DM.error(__func__,msg,##args); } while(0)
+
+/* Macros to get defined DebugMode. */
+#define DEBUG_NONE DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_NONE)
+#define DEBUG_LLVM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_LLVM)
+#define DEBUG_INASM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_INASM)
+#define DEBUG_OP DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_OP)
+#define DEBUG_OUTASM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_OUTASM)
+#define DEBUG_IR DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_IR)
+#define DEBUG_IR_OPT DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_IR_OPT)
+#define DEBUG_ENTRY DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ENTRY)
+#define DEBUG_VERIFY DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_VERIFY)
+#define DEBUG_PASS DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_PASS)
+#define DEBUG_ANNOTATE DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ANNOTATE)
+#define DEBUG_HPM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_HPM)
+#define DEBUG_ASM DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ASM)
+#define DEBUG_DEBUG DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_DEBUG)
+#define DEBUG_ALL DM.getDebugMode(LLVMDebug::LLVMDebugMode::D_ALL)
+
+
+
+/*
+ * Binary disassembler using MCDisassembler.
+ */
+class MCDisasm {
+ const llvm::MCDisassembler *DisAsm;
+ const llvm::MCSubtargetInfo *STI;
+ llvm::MCInstPrinter *IP;
+ const llvm::MCInstrAnalysis *MIA;
+ bool HostDisAsm;
+ bool NoShowRawInsn;
+
+ MCDisasm(const llvm::Target *TheTarget, std::string TripleName,
+ bool isHost);
+
+ void DumpBytes(llvm::ArrayRef<uint8_t> bytes, llvm::raw_ostream &OS);
+
+public:
+ ~MCDisasm();
+ void PrintInAsm(uint64_t Addr, uint64_t Size, uint64_t GuestAddr);
+ void PrintOutAsm(uint64_t Addr, uint64_t Size);
+
+ static MCDisasm *CreateMCDisasm(std::string TripleName, bool isHost);
+};
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/llvm-hard-perfmon.h b/llvm/include/llvm-hard-perfmon.h
new file mode 100644
index 0000000..ac03b23
--- /dev/null
+++ b/llvm/include/llvm-hard-perfmon.h
@@ -0,0 +1,87 @@
+/*
+ * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __LLVM_HARD_PERFMON_H
+#define __LLVM_HARD_PERFMON_H
+
+#include <map>
+#include <thread>
+#include "pmu/pmu.h"
+#include "utils.h"
+
+class PerfmonData;
+class BaseTracer;
+
+enum HPMControl {
+ HPM_INIT = 0,
+ HPM_FINALIZE,
+ HPM_START,
+ HPM_STOP,
+};
+
+/*
+ * Hardware Performance Monitor (HPM)
+ */
+class HardwarePerfmon {
+ std::thread MonThread; /* Monitor thread */
+ int MonThreadID; /* Monitor thread id */
+ bool MonThreadStop; /* Monitor thread is stopped or not */
+ hqemu::Mutex Lock;
+
+ /* Start monitor thread. */
+ void StartMonThread();
+
+ /* Monitor thread routine. */
+ void MonitorFunc();
+
+public:
+ HardwarePerfmon();
+ ~HardwarePerfmon();
+
+ /* Set up HPM with the monitor thread id */
+ void Init(int monitor_thread_tid);
+
+ /* Register a thread to be monitored. */
+ void RegisterThread(BaseTracer *Tracer);
+
+ /* Unreigster a thread from being monitored. */
+ void UnregisterThread(BaseTracer *Tracer);
+
+ /* Notify that the execution enters/leaves the code cache. */
+ void NotifyCacheEnter(BaseTracer *Tracer);
+ void NotifyCacheLeave(BaseTracer *Tracer);
+
+ /* Stop the monitor. */
+ void Pause();
+
+ /* Restart the monitor. */
+ void Resume();
+};
+
+
+class PerfmonData {
+public:
+ PerfmonData(int tid);
+ ~PerfmonData();
+
+ int TID;
+ pmu::Handle ICountHndl;
+ pmu::Handle BranchHndl;
+ pmu::Handle MemLoadHndl;
+ pmu::Handle MemStoreHndl;
+ pmu::Handle CoverSetHndl;
+ uint64_t LastNumBranches, LastNumLoads, LastNumStores;
+
+ void MonitorBasic(HPMControl Ctl);
+ void MonitorCoverSet(HPMControl Ctl);
+};
+
+extern HardwarePerfmon *HP;
+
+#endif /* __LLVM_HARD_PERFMON_H */
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/llvm-helper.h b/llvm/include/llvm-helper.h
new file mode 100644
index 0000000..2d24f81
--- /dev/null
+++ b/llvm/include/llvm-helper.h
@@ -0,0 +1,755 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ *
+ * This file defines the QEMU helper functions that could be inlined by
+ * the LLVM translators.
+ */
+
+#ifndef __LLVM_HELPER_H
+#define __LLVM_HELPER_H
+
+/* Speical TCG runtime helper */
+ "tcg_helper_div_i32",
+ "tcg_helper_rem_i32",
+ "tcg_helper_divu_i32",
+ "tcg_helper_remu_i32",
+ "tcg_helper_shl_i64",
+ "tcg_helper_shr_i64",
+ "tcg_helper_sar_i64",
+ "tcg_helper_div_i64",
+ "tcg_helper_rem_i64",
+ "tcg_helper_divu_i64",
+ "tcg_helper_remu_i64",
+
+#if defined(TARGET_I386)
+ /* General */
+ "helper_cc_compute_c",
+ "helper_cc_compute_all",
+ "helper_load_seg",
+ "helper_write_eflags",
+ "helper_read_eflags",
+ "helper_cli",
+ "helper_sti",
+ "helper_set_inhibit_irq",
+ "helper_reset_inhibit_irq",
+ /* FPU */
+ "helper_divb_AL",
+ "helper_idivb_AL",
+ "helper_divw_AX",
+ "helper_idivw_AX",
+ "helper_divl_EAX",
+ "helper_idivl_EAX",
+ "helper_flds_FT0",
+ "helper_fldl_FT0",
+ "helper_fildl_FT0",
+ "helper_flds_ST0",
+ "helper_fldl_ST0",
+ "helper_fildl_ST0",
+ "helper_fildll_ST0",
+ "helper_fsts_ST0",
+ "helper_fstl_ST0",
+ "helper_fist_ST0",
+ "helper_fistl_ST0",
+ "helper_fistll_ST0",
+ "helper_fistt_ST0",
+ "helper_fisttl_ST0",
+ "helper_fisttll_ST0",
+ "helper_fldt_ST0",
+ "helper_fstt_ST0",
+ "helper_fpush",
+ "helper_fpop",
+ "helper_fdecstp",
+ "helper_fincstp",
+ "helper_ffree_STN",
+ "helper_fmov_ST0_FT0",
+ "helper_fmov_FT0_STN",
+ "helper_fmov_ST0_STN",
+ "helper_fmov_STN_ST0",
+ "helper_fxchg_ST0_STN",
+ "helper_fcom_ST0_FT0",
+ "helper_fucom_ST0_FT0",
+ "helper_fcomi_ST0_FT0",
+ "helper_fucomi_ST0_FT0",
+ "helper_fadd_ST0_FT0",
+ "helper_fmul_ST0_FT0",
+ "helper_fsub_ST0_FT0",
+ "helper_fsubr_ST0_FT0",
+ "helper_fdiv_ST0_FT0",
+ "helper_fdivr_ST0_FT0",
+ "helper_fadd_STN_ST0",
+ "helper_fmul_STN_ST0",
+ "helper_fsub_STN_ST0",
+ "helper_fsubr_STN_ST0",
+ "helper_fdiv_STN_ST0",
+ "helper_fdivr_STN_ST0",
+ "helper_fchs_ST0",
+ "helper_fabs_ST0",
+#if defined(TCG_TARGET_I386) && TCG_TARGET_REG_BITS == 64
+ "helper_fxam_ST0",
+#endif
+ "helper_fld1_ST0",
+ "helper_fldl2t_ST0",
+ "helper_fldl2e_ST0",
+ "helper_fldpi_ST0",
+ "helper_fldlg2_ST0",
+ "helper_fldln2_ST0",
+ "helper_fldz_ST0",
+ "helper_fldz_FT0",
+ "helper_fnstsw",
+ "helper_fnstcw",
+ "helper_fldcw",
+ "helper_fclex",
+ "helper_fwait",
+ "helper_fninit",
+ "helper_fbld_ST0",
+ "helper_fbst_ST0",
+ "helper_f2xm1",
+ "helper_fyl2x",
+ "helper_fptan",
+ "helper_fpatan",
+ "helper_fxtract",
+ "helper_fprem1",
+ "helper_fprem",
+ "helper_fyl2xp1",
+ "helper_fsqrt",
+ "helper_fsincos",
+ "helper_frndint",
+ "helper_fscale",
+ "helper_fsin",
+ "helper_fcos",
+ "helper_fstenv",
+ "helper_fldenv",
+ "helper_fsave",
+ "helper_frstor",
+ "helper_fxsave",
+ "helper_fxrstor",
+ "helper_bsf",
+ "helper_bsr",
+ "helper_lzcnt",
+
+ /* MMX/SSE */
+ "helper_psrlw_xmm",
+ "helper_psraw_xmm",
+ "helper_psllw_xmm",
+ "helper_psrld_xmm",
+ "helper_psrad_xmm",
+ "helper_pslld_xmm",
+ "helper_psrlq_xmm",
+ "helper_psllq_xmm",
+ "helper_psrldq_xmm",
+ "helper_pslldq_xmm",
+ "helper_paddb_xmm",
+ "helper_paddw_xmm",
+ "helper_paddl_xmm",
+ "helper_paddq_xmm",
+ "helper_psubb_xmm",
+ "helper_psubw_xmm",
+ "helper_psubl_xmm",
+ "helper_psubq_xmm",
+ "helper_paddusb_xmm",
+ "helper_paddsb_xmm",
+ "helper_psubusb_xmm",
+ "helper_psubsb_xmm",
+ "helper_paddusw_xmm",
+ "helper_paddsw_xmm",
+ "helper_psubusw_xmm",
+ "helper_psubsw_xmm",
+ "helper_pminub_xmm",
+ "helper_pmaxub_xmm",
+ "helper_pminsw_xmm",
+ "helper_pmaxsw_xmm",
+ "helper_pand_xmm",
+ "helper_pandn_xmm",
+ "helper_por_xmm",
+ "helper_pxor_xmm",
+ "helper_pcmpgtb_xmm",
+ "helper_pcmpgtw_xmm",
+ "helper_pcmpgtl_xmm",
+ "helper_pcmpeqb_xmm",
+ "helper_pcmpeqw_xmm",
+ "helper_pcmpeql_xmm",
+ "helper_pmullw_xmm",
+ "helper_pmulhuw_xmm",
+ "helper_pmulhw_xmm",
+ "helper_pavgb_xmm",
+ "helper_pavgw_xmm",
+ "helper_pmuludq_xmm",
+ "helper_pmaddwd_xmm",
+ "helper_psadbw_xmm",
+ "helper_maskmov_xmm",
+ "helper_movl_mm_T0_xmm",
+ "helper_shufps_xmm",
+ "helper_shufpd_xmm",
+#if !defined(TCG_TARGET_ARM)
+ "helper_pshufd_xmm",
+ "helper_pshuflw_xmm",
+ "helper_pshufhw_xmm",
+ "helper_punpcklbw_xmm",
+ "helper_punpcklwd_xmm",
+ "helper_punpckldq_xmm",
+ "helper_punpckhbw_xmm",
+ "helper_punpckhwd_xmm",
+ "helper_punpckhdq_xmm",
+#endif
+ "helper_punpcklqdq_xmm",
+ "helper_punpckhqdq_xmm",
+
+ "helper_enter_mmx",
+ "helper_psrlw_mmx",
+ "helper_psraw_mmx",
+ "helper_psllw_mmx",
+ "helper_psrld_mmx",
+ "helper_psrad_mmx",
+ "helper_pslld_mmx",
+ "helper_psrlq_mmx",
+ "helper_psllq_mmx",
+ "helper_psrldq_mmx",
+ "helper_pslldq_mmx",
+ "helper_paddb_mmx",
+ "helper_paddw_mmx",
+ "helper_paddl_mmx",
+ "helper_paddq_mmx",
+ "helper_psubb_mmx",
+ "helper_psubw_mmx",
+ "helper_psubl_mmx",
+ "helper_psubq_mmx",
+ "helper_paddusb_mmx",
+ "helper_paddsb_mmx",
+ "helper_psubusb_mmx",
+ "helper_psubsb_mmx",
+ "helper_paddusw_mmx",
+ "helper_paddsw_mmx",
+ "helper_psubusw_mmx",
+ "helper_psubsw_mmx",
+ "helper_pminub_mmx",
+ "helper_pmaxub_mmx",
+ "helper_pminsw_mmx",
+ "helper_pmaxsw_mmx",
+ "helper_pand_mmx",
+ "helper_pandn_mmx",
+ "helper_por_mmx",
+ "helper_pxor_mmx",
+ "helper_pcmpgtb_mmx",
+ "helper_pcmpgtw_mmx",
+ "helper_pcmpgtl_mmx",
+ "helper_pcmpeqb_mmx",
+ "helper_pcmpeqw_mmx",
+ "helper_pcmpeql_mmx",
+ "helper_pmullw_mmx",
+ "helper_pmulhuw_mmx",
+ "helper_pmulhw_mmx",
+ "helper_pavgb_mmx",
+ "helper_pavgw_mmx",
+ "helper_pmuludq_mmx",
+ "helper_pmaddwd_mmx",
+ "helper_psadbw_mmx",
+ "helper_maskmov_mmx",
+ "helper_movl_mm_T0_mmx",
+ "helper_shufps_mmx",
+ "helper_shufpd_mmx",
+#if !defined(TCG_TARGET_ARM)
+ "helper_pshufd_mmx",
+ "helper_pshuflw_mmx",
+ "helper_pshufhw_mmx",
+ "helper_punpcklbw_mmx",
+ "helper_punpcklwd_mmx",
+ "helper_punpckldq_mmx",
+ "helper_punpckhbw_mmx",
+ "helper_punpckhwd_mmx",
+ "helper_punpckhdq_mmx",
+#endif
+ "helper_punpcklqdq_mmx",
+ "helper_punpckhqdq_mmx",
+
+ "helper_addps",
+ "helper_addss",
+ "helper_addpd",
+ "helper_addsd",
+ "helper_subps",
+ "helper_subss",
+ "helper_subpd",
+ "helper_subsd",
+ "helper_mulps",
+ "helper_mulss",
+ "helper_mulpd",
+ "helper_mulsd",
+ "helper_divps",
+ "helper_divss",
+ "helper_divpd",
+ "helper_divsd",
+ "helper_minps",
+ "helper_minss",
+ "helper_minpd",
+ "helper_minsd",
+ "helper_maxps",
+ "helper_maxss",
+ "helper_maxpd",
+ "helper_maxsd",
+ "helper_sqrtps",
+ "helper_sqrtss",
+ "helper_sqrtpd",
+ "helper_sqrtsd",
+ "helper_shufps",
+ "helper_shufpd",
+
+ "helper_cmpeqps",
+ "helper_cmpeqss",
+ "helper_cmpeqpd",
+ "helper_cmpeqsd",
+ "helper_cmpltps",
+ "helper_cmpltss",
+ "helper_cmpltpd",
+ "helper_cmpltsd",
+ "helper_cmpleps",
+ "helper_cmpless",
+ "helper_cmplepd",
+ "helper_cmplesd",
+ "helper_cmpunordps",
+ "helper_cmpunordss",
+ "helper_cmpunordpd",
+ "helper_cmpunordsd",
+ "helper_cmpneqps",
+ "helper_cmpneqss",
+ "helper_cmpneqpd",
+ "helper_cmpneqsd",
+ "helper_cmpnltps",
+ "helper_cmpnltss",
+ "helper_cmpnltpd",
+ "helper_cmpnltsd",
+ "helper_cmpnleps",
+ "helper_cmpnless",
+ "helper_cmpnlepd",
+ "helper_cmpnlesd",
+ "helper_cmpordps",
+ "helper_cmpordss",
+ "helper_cmpordpd",
+ "helper_cmpordsd",
+
+ "helper_cvtps2pd",
+ "helper_cvtpd2ps",
+ "helper_cvtss2sd",
+ "helper_cvtsd2ss",
+ "helper_cvtdq2ps",
+ "helper_cvtdq2pd",
+ "helper_cvtpi2ps",
+ "helper_cvtpi2pd",
+ "helper_cvtsi2ss",
+ "helper_cvtsi2sd",
+ "helper_cvtps2dq",
+ "helper_cvtpd2dq",
+ "helper_cvtps2pi",
+ "helper_cvtpd2pi",
+ "helper_cvtss2si",
+ "helper_cvtsd2si",
+ "helper_cvttps2dq",
+ "helper_cvttpd2dq",
+ "helper_cvttps2pi",
+ "helper_cvttpd2pi",
+ "helper_cvttss2si",
+ "helper_cvttsd2si",
+
+ "helper_cmpeqps",
+ "helper_cmpeqss",
+ "helper_cmpeqpd",
+ "helper_cmpeqsd",
+ "helper_cmpltps",
+ "helper_cmpltss",
+ "helper_cmpltpd",
+ "helper_cmpltsd",
+ "helper_cmpleps",
+ "helper_cmpless",
+ "helper_cmplepd",
+ "helper_cmplesd",
+ "helper_cmpunordps",
+ "helper_cmpunordss",
+ "helper_cmpunordpd",
+ "helper_cmpunordsd",
+ "helper_cmpneqps",
+ "helper_cmpneqss",
+ "helper_cmpneqpd",
+ "helper_cmpneqsd",
+ "helper_cmpnltps",
+ "helper_cmpnltss",
+ "helper_cmpnltpd",
+ "helper_cmpnltsd",
+ "helper_cmpnleps",
+ "helper_cmpnless",
+ "helper_cmpnlepd",
+ "helper_cmpnlesd",
+ "helper_cmpordps",
+ "helper_cmpordss",
+ "helper_cmpordpd",
+ "helper_cmpordsd",
+
+ "helper_ucomisd",
+ "helper_comisd",
+ "helper_ucomiss",
+ "helper_comiss",
+
+ "helper_packuswb_xmm",
+ "helper_packsswb_xmm",
+ "helper_pmovmskb_xmm",
+ "helper_pshufw_mmx",
+
+#elif defined(TARGET_ARM)
+ "helper_add_cc",
+ "helper_sub_cc",
+ "helper_shl_cc",
+ "helper_shr_cc",
+ "helper_sar_cc",
+ "helper_adc_cc",
+ "helper_sbc_cc",
+ "helper_shl",
+ "helper_shr",
+ "helper_sar",
+ "helper_clz",
+
+ "helper_sadd8",
+ "helper_sadd16",
+ "helper_ssub8",
+ "helper_ssub16",
+ "helper_ssubaddx",
+ "helper_saddsubx",
+ "helper_uadd8",
+ "helper_uadd16",
+ "helper_usub8",
+ "helper_usub16",
+ "helper_usubaddx",
+ "helper_uaddsubx",
+
+ "helper_qadd8",
+ "helper_qadd16",
+ "helper_qsub8",
+ "helper_qsub16",
+ "helper_qsubaddx",
+ "helper_qaddsubx",
+ "helper_uqadd8",
+ "helper_uqadd16",
+ "helper_uqsub8",
+ "helper_uqsub16",
+ "helper_uqsubaddx",
+ "helper_uqaddsubx",
+
+ "helper_set_rmode",
+ "helper_cpsr_write_nzcv",
+ "helper_cpsr_write",
+ "helper_cpsr_read",
+ "helper_vfp_get_fpscr",
+ "helper_vfp_set_fpscr",
+ "helper_vfp_adds",
+ "helper_vfp_addd",
+ "helper_vfp_subs",
+ "helper_vfp_subd",
+ "helper_vfp_muls",
+ "helper_vfp_muld",
+ "helper_vfp_divs",
+ "helper_vfp_divd",
+ "helper_vfp_negs",
+ "helper_vfp_negd",
+ "helper_vfp_abss",
+ "helper_vfp_absd",
+ "helper_vfp_sqrts",
+ "helper_vfp_sqrtd",
+ "helper_vfp_cmps",
+ "helper_vfp_cmpd",
+ "helper_vfp_cmpes",
+ "helper_vfp_cmped",
+
+ "helper_vfp_muladds",
+ "helper_vfp_muladdd",
+
+#if defined(TARGET_AARCH64)
+ "helper_vfp_cmps_a64",
+ "helper_vfp_cmpd_a64",
+ "helper_vfp_cmpes_a64",
+ "helper_vfp_cmped_a64",
+ "helper_vfp_minnums",
+ "helper_vfp_maxnums",
+ "helper_vfp_minnumd",
+ "helper_vfp_maxnumd",
+#endif
+#if !defined(TCG_TARGET_PPC64)
+ "helper_vfp_fcvtds",
+ "helper_vfp_fcvtsd",
+ "helper_vfp_uitos",
+ "helper_vfp_uitod",
+ "helper_vfp_sitos",
+ "helper_vfp_sitod",
+ "helper_vfp_touis",
+ "helper_vfp_touid",
+ "helper_vfp_touizs",
+ "helper_vfp_touizd",
+ "helper_vfp_tosis",
+ "helper_vfp_tosid",
+ "helper_vfp_tosizs",
+ "helper_vfp_tosizd",
+ "helper_vfp_toshs",
+ "helper_vfp_tosls",
+ "helper_vfp_touhs",
+ "helper_vfp_touls",
+ "helper_vfp_toshd",
+ "helper_vfp_tosld",
+ "helper_vfp_touhd",
+ "helper_vfp_tould",
+ "helper_vfp_shtos",
+ "helper_vfp_sltos",
+ "helper_vfp_uhtos",
+ "helper_vfp_ultos",
+ "helper_vfp_shtod",
+ "helper_vfp_sltod",
+ "helper_vfp_uhtod",
+ "helper_vfp_ultod",
+#endif
+
+ /* neon helper */
+ "helper_neon_qadd_u8",
+ "helper_neon_qadd_s8",
+ "helper_neon_qadd_u16",
+ "helper_neon_qadd_s16",
+ "helper_neon_qsub_u8",
+ "helper_neon_qsub_s8",
+ "helper_neon_qsub_u16",
+ "helper_neon_qsub_s16",
+
+ "helper_neon_hadd_s8",
+ "helper_neon_hadd_u8",
+ "helper_neon_hadd_s16",
+ "helper_neon_hadd_u16",
+ "helper_neon_hadd_s32",
+ "helper_neon_hadd_u32",
+ "helper_neon_rhadd_s8",
+ "helper_neon_rhadd_u8",
+ "helper_neon_rhadd_s16",
+ "helper_neon_rhadd_u16",
+ "helper_neon_rhadd_s32",
+ "helper_neon_rhadd_u32",
+ "helper_neon_hsub_s8",
+ "helper_neon_hsub_u8",
+ "helper_neon_hsub_s16",
+ "helper_neon_hsub_u16",
+ "helper_neon_hsub_s32",
+ "helper_neon_hsub_u32",
+
+ "helper_neon_cgt_u8",
+ "helper_neon_cgt_s8",
+ "helper_neon_cgt_u16",
+ "helper_neon_cgt_s16",
+ "helper_neon_cgt_u32",
+ "helper_neon_cgt_s32",
+ "helper_neon_cge_u8",
+ "helper_neon_cge_s8",
+ "helper_neon_cge_u16",
+ "helper_neon_cge_s16",
+ "helper_neon_cge_u32",
+ "helper_neon_cge_s32",
+
+ "helper_neon_min_u8",
+ "helper_neon_min_s8",
+ "helper_neon_min_u16",
+ "helper_neon_min_s16",
+ "helper_neon_min_u32",
+ "helper_neon_min_s32",
+ "helper_neon_max_u8",
+ "helper_neon_max_s8",
+ "helper_neon_max_u16",
+ "helper_neon_max_s16",
+ "helper_neon_max_u32",
+ "helper_neon_max_s32",
+ "helper_neon_pmin_u8",
+ "helper_neon_pmin_s8",
+ "helper_neon_pmin_u16",
+ "helper_neon_pmin_s16",
+ "helper_neon_pmax_u8",
+ "helper_neon_pmax_s8",
+ "helper_neon_pmax_u16",
+ "helper_neon_pmax_s16",
+
+ "helper_neon_abd_u8",
+ "helper_neon_abd_s8",
+ "helper_neon_abd_u16",
+ "helper_neon_abd_s16",
+ "helper_neon_abd_u32",
+ "helper_neon_abd_s32",
+
+ "helper_neon_shl_u8",
+ "helper_neon_shl_s8",
+ "helper_neon_shl_u16",
+ "helper_neon_shl_s16",
+ "helper_neon_shl_u32",
+ "helper_neon_shl_s32",
+ "helper_neon_shl_u64",
+ "helper_neon_shl_s64",
+ "helper_neon_rshl_u8",
+ "helper_neon_rshl_s8",
+ "helper_neon_rshl_u16",
+ "helper_neon_rshl_s16",
+ "helper_neon_rshl_u32",
+ "helper_neon_rshl_s32",
+ "helper_neon_rshl_u64",
+ "helper_neon_rshl_s64",
+ "helper_neon_qshl_u8",
+ "helper_neon_qshl_s8",
+ "helper_neon_qshl_u16",
+ "helper_neon_qshl_s16",
+ "helper_neon_qshl_u32",
+ "helper_neon_qshl_s32",
+ "helper_neon_qshl_u64",
+ "helper_neon_qshl_s64",
+ "helper_neon_qrshl_u8",
+ "helper_neon_qrshl_s8",
+ "helper_neon_qrshl_u16",
+ "helper_neon_qrshl_s16",
+ "helper_neon_qrshl_u32",
+ "helper_neon_qrshl_s32",
+ "helper_neon_qrshl_u64",
+ "helper_neon_qrshl_s64",
+
+ "helper_neon_add_u8",
+ "helper_neon_add_u16",
+ "helper_neon_padd_u8",
+ "helper_neon_padd_u16",
+ "helper_neon_sub_u8",
+ "helper_neon_sub_u16",
+ "helper_neon_mul_u8",
+ "helper_neon_mul_u16",
+ "helper_neon_mul_p8",
+
+ "helper_neon_tst_u8",
+ "helper_neon_tst_u16",
+ "helper_neon_tst_u32",
+ "helper_neon_ceq_u8",
+ "helper_neon_ceq_u16",
+ "helper_neon_ceq_u32",
+
+ "helper_neon_abs_s8",
+ "helper_neon_abs_s16",
+ "helper_neon_clz_u8",
+ "helper_neon_clz_u16",
+ "helper_neon_cls_s8",
+ "helper_neon_cls_s16",
+ "helper_neon_cls_s32",
+ "helper_neon_cnt_u8",
+
+ "helper_neon_qdmulh_s16",
+ "helper_neon_qrdmulh_s16",
+ "helper_neon_qdmulh_s32",
+ "helper_neon_qrdmulh_s32",
+
+ "helper_neon_narrow_u8",
+ "helper_neon_narrow_u16",
+ "helper_neon_narrow_sat_u8",
+ "helper_neon_narrow_sat_s8",
+ "helper_neon_narrow_sat_u16",
+ "helper_neon_narrow_sat_s16",
+ "helper_neon_narrow_sat_u32",
+ "helper_neon_narrow_sat_s32",
+ "helper_neon_narrow_high_u8",
+ "helper_neon_narrow_high_u16",
+ "helper_neon_narrow_round_high_u8",
+ "helper_neon_narrow_round_high_u16",
+ "helper_neon_widen_u8",
+ "helper_neon_widen_s8",
+ "helper_neon_widen_u16",
+ "helper_neon_widen_s16",
+
+ "helper_neon_addl_u16",
+ "helper_neon_addl_u32",
+ "helper_neon_paddl_u16",
+ "helper_neon_paddl_u32",
+ "helper_neon_subl_u16",
+ "helper_neon_subl_u32",
+ "helper_neon_addl_saturate_s32",
+ "helper_neon_addl_saturate_s64",
+ "helper_neon_abdl_u16",
+ "helper_neon_abdl_s16",
+ "helper_neon_abdl_u32",
+ "helper_neon_abdl_s32",
+ "helper_neon_abdl_u64",
+ "helper_neon_abdl_s64",
+ "helper_neon_mull_u8",
+ "helper_neon_mull_s8",
+ "helper_neon_mull_u16",
+ "helper_neon_mull_s16",
+
+ "helper_neon_negl_u16",
+ "helper_neon_negl_u32",
+ "helper_neon_negl_u64",
+
+ "helper_neon_qabs_s8",
+ "helper_neon_qabs_s16",
+ "helper_neon_qabs_s32",
+ "helper_neon_qneg_s8",
+ "helper_neon_qneg_s16",
+ "helper_neon_qneg_s32",
+
+ "helper_neon_min_f32",
+ "helper_neon_max_f32",
+ "helper_neon_abd_f32",
+ "helper_neon_add_f32",
+ "helper_neon_sub_f32",
+ "helper_neon_mul_f32",
+ "helper_neon_ceq_f32",
+ "helper_neon_cge_f32",
+ "helper_neon_cgt_f32",
+ "helper_neon_acge_f32",
+ "helper_neon_acgt_f32",
+
+#elif defined(TARGET_PPC)
+ "helper_popcntb",
+ "helper_cntlzw",
+ "helper_cntlsw32",
+ "helper_cntlzw32",
+
+ "helper_compute_fprf",
+ "helper_store_fpscr",
+ "helper_fpscr_clrbit",
+ "helper_fpscr_setbit",
+ "helper_fcmpo",
+ "helper_fcmpu",
+
+ "helper_fctiw",
+ "helper_fctiwz",
+ "helper_frsp",
+ "helper_frin",
+ "helper_friz",
+ "helper_frip",
+ "helper_frim",
+
+ "helper_fadd",
+ "helper_fsub",
+ "helper_fmul",
+ "helper_fdiv",
+ "helper_fmadd",
+ "helper_fmsub",
+ "helper_fnmadd",
+ "helper_fnmsub",
+ "helper_fabs",
+ "helper_fnabs",
+ "helper_fneg",
+ "helper_fsqrt",
+ "helper_fre",
+ "helper_fres",
+ "helper_frsqrte",
+ "helper_fsel",
+
+#elif defined(TARGET_MICROBLAZE)
+ "helper_addkc",
+ "helper_subkc",
+ "helper_cmp",
+ "helper_cmpu",
+ "helper_divs",
+ "helper_divu",
+#elif defined(TARGET_MIPS)
+ "helper_lwl",
+ "helper_lwr",
+ "helper_swl",
+ "helper_swr",
+#endif
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/llvm/include/llvm-macro.h b/llvm/include/llvm-macro.h
new file mode 100644
index 0000000..7b0e613
--- /dev/null
+++ b/llvm/include/llvm-macro.h
@@ -0,0 +1,88 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __LLVM_MACRO_H
+#define __LLVM_MACRO_H
+
+#if defined(CONFIG_SOFTMMU)
+#define SaveStates() SaveGlobals(COHERENCE_GLOBAL, LastInst)
+#else
+#define SaveStates()
+#endif
+
+#define CONST8(a) ConstantInt::get(Int8Ty, a)
+#define CONST16(a) ConstantInt::get(Int16Ty, a)
+#define CONST32(a) ConstantInt::get(Int32Ty, a)
+#define CONST64(a) ConstantInt::get(Int64Ty, a)
+#define CONST128(a) ConstantInt::get(Int128Ty, a)
+#define CONSTPtr(a) ConstantInt::get(IntPtrTy, a)
+
+#define FPCONST32(a) ConstantFP::get(FloatTy, a)
+#define FPCONST64(a) ConstantFP::get(DoubleTy, a)
+#define FPCONST80(a) ConstantFP::get(FP80Ty, a)
+#define FPCONST128(a) ConstantFP::get(FP128Ty, a)
+
+#define ICMP(a,b,pred) new ICmpInst(LastInst, pred, a, b, "")
+
+#define AND(a,b) BinaryOperator::Create(Instruction::And, a, b, "", LastInst)
+#define OR(a,b) BinaryOperator::Create(Instruction::Or, a, b, "", LastInst)
+#define XOR(a,b) BinaryOperator::Create(Instruction::Xor, a, b, "", LastInst)
+#define SHL(a,b) BinaryOperator::Create(Instruction::Shl, a, b, "", LastInst)
+#define LSHR(a,b) BinaryOperator::Create(Instruction::LShr, a, b, "", LastInst)
+#define ASHR(a,b) BinaryOperator::Create(Instruction::AShr, a, b, "", LastInst)
+#define ADD(a,b) BinaryOperator::Create(Instruction::Add, a, b, "", LastInst)
+#define SUB(a,b) BinaryOperator::Create(Instruction::Sub, a, b, "", LastInst)
+#define MUL(a,b) BinaryOperator::Create(Instruction::Mul, a, b, "", LastInst)
+#define SDIV(a,b) BinaryOperator::Create(Instruction::SDiv, a, b, "", LastInst)
+#define UDIV(a,b) BinaryOperator::Create(Instruction::UDiv, a, b, "", LastInst)
+#define SREM(a,b) BinaryOperator::Create(Instruction::SRem, a, b, "", LastInst)
+#define UREM(a,b) BinaryOperator::Create(Instruction::URem, a, b, "", LastInst)
+
+#define FADD(a,b) BinaryOperator::Create(Instruction::FAdd, a, b, "", LastInst)
+#define FSUB(a,b) BinaryOperator::Create(Instruction::FSub, a, b, "", LastInst)
+#define FMUL(a,b) BinaryOperator::Create(Instruction::FMul, a, b, "", LastInst)
+#define FDIV(a,b) BinaryOperator::Create(Instruction::FDiv, a, b, "", LastInst)
+
+#define CAST(a,t) new BitCastInst(a, t, "", LastInst)
+#define CASTPTR8(a) CAST(a,Int8PtrTy)
+#define CASTPTR16(a) CAST(a,Int16PtrTy)
+#define CASTPTR32(a) CAST(a,Int32PtrTy)
+#define CASTPTR64(a) CAST(a,Int64PtrTy)
+
+#define ITP(a,t) new IntToPtrInst(a, t, "", LastInst)
+#define ITP8(a) ITP(a,Int8PtrTy)
+#define ITP16(a) ITP(a,Int16PtrTy)
+#define ITP32(a) ITP(a,Int32PtrTy)
+#define ITP64(a) ITP(a,Int64PtrTy)
+
+#define TRUNC(a,t) new TruncInst(a, t, "", LastInst)
+#define TRUNC8(a) TRUNC(a, Int8Ty)
+#define TRUNC16(a) TRUNC(a, Int16Ty)
+#define TRUNC32(a) TRUNC(a, Int32Ty)
+#define TRUNC64(a) TRUNC(a, Int64Ty)
+
+#define ZEXT(a,t) new ZExtInst(a, t, "", LastInst)
+#define ZEXT8(a) ZEXT(a, Int8Ty)
+#define ZEXT16(a) ZEXT(a, Int16Ty)
+#define ZEXT32(a) ZEXT(a, Int32Ty)
+#define ZEXT64(a) ZEXT(a, Int64Ty)
+#define ZEXT128(a) ZEXT(a, Int128Ty)
+#define SEXT(a,t) new SExtInst(a, t, "", LastInst)
+#define SEXT8(a) SEXT(a, Int8Ty)
+#define SEXT16(a) SEXT(a, Int16Ty)
+#define SEXT32(a) SEXT(a, Int32Ty)
+#define SEXT64(a) SEXT(a, Int64Ty)
+#define SEXT128(a) SEXT(a, Int128Ty)
+
+#define BSWAP16(a) CreateBSwap(Int16Ty, a, LastInst)
+#define BSWAP32(a) CreateBSwap(Int32Ty, a, LastInst)
+#define BSWAP64(a) CreateBSwap(Int64Ty, a, LastInst)
+
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/llvm-opc.h b/llvm/include/llvm-opc.h
new file mode 100644
index 0000000..9454dac
--- /dev/null
+++ b/llvm/include/llvm-opc.h
@@ -0,0 +1,494 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __LLVM_OPC_H
+#define __LLVM_OPC_H
+
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "qemu-types.h"
+#include "llvm-types.h"
+#include "llvm-translator.h"
+#include "llvm.h"
+
+//#define ASSERT
+//#define VERIFY_TB
+
+
+#define IRDebug(idx) \
+ do { \
+ dbg() << DEBUG_ENTRY << "op_" << llvm_op_defs[idx].name << ": " \
+ << llvm_op_defs[idx].nb_oargs << " " \
+ << llvm_op_defs[idx].nb_iargs << " " \
+ << llvm_op_defs[idx].nb_cargs << "\n"; \
+ } while (0)
+#define IRError(fmt,args...) hqemu_error(fmt,##args)
+
+#ifdef ASSERT
+#define AssertType(t) \
+ do { \
+ if (!(t)) \
+ hqemu_error("invalid type.\n"); \
+ } while(0)
+#else
+#define AssertType(t)
+#endif
+
+#define IRAbort() \
+ do { \
+ if (!LLEnv->isTraceMode()) { \
+ Func->dump(); \
+ hqemu_error("fixme.\n"); \
+ } \
+ Builder->Abort(); \
+ } while (0)
+
+
+class LLVMTranslator;
+class NotifyInfo;
+class OptimizationInfo;
+
+
+/* Patch flags.
+ * NOTE: patch flags must be synchronized with those in the LLVM backend. */
+enum {
+ PATCH_HQEMU = 0x4182U,
+ PATCH_DUMMY,
+ PATCH_EXIT_TB,
+ PATCH_DIRECT_JUMP,
+ PATCH_TRACE_BLOCK_CHAINING,
+ PATCH_QMMU,
+};
+
+/*
+ * Register is used to describe the pseudo registers used by QEMU TCG op.
+ */
+struct Register {
+ /* Status of the register. */
+ enum {
+ STATE_NONE = 0x0,
+ STATE_REV = 0x1, /* Register is reserved */
+ STATE_REG = 0x2, /* Register is promoted */
+ STATE_MEM = 0x4, /* Register is in CPUArchState memory */
+ STATE_LOC = 0x8, /* Register is a local register */
+ STATE_TMP = 0x10, /* Register is a tmp register */
+ };
+
+ int State; /* State of the register */
+ int Base;
+ intptr_t Off; /* Register offset of CPUArchState */
+ int Size; /* Register size */
+ std::string Name; /* Name string of this register */
+ bool Dirty; /* This register is updated or not */
+ Type *Ty; /* Register type in LLVM */
+ Value *Data; /* Data value if this regisrer is promoted */
+ Value *AI; /* Register as Alloca */
+ Register *Alias;
+
+ Register() : State(STATE_NONE), Off(-1), Dirty(false), Ty(nullptr),
+ Data(nullptr), AI(nullptr), Alias(nullptr) {}
+
+ void set(int base, intptr_t off, std::string name) {
+ Base = base;
+ Off = off;
+ Name = name;
+ }
+ void reset(int state, int size, Type *ty) {
+ State = state;
+ Size = size;
+ Ty = ty;
+ Dirty = false;
+ Data = AI = nullptr;
+ }
+
+ void Promote() { State |= STATE_REG; }
+ void Demote() { State &= ~STATE_REG; }
+
+ Value *getData() { return Data; }
+ Register &getAlias() { return *Alias; }
+
+ void setState(int state) { State = state; }
+ void setData(Value *data, bool dirty = false) {
+ if (Alias) {
+ Alias->setData(data, dirty);
+ return;
+ }
+ Data = data;
+ Dirty = dirty;
+ Promote();
+ }
+ bool isRev() { return State & STATE_REV; }
+ bool isReg() { return State & STATE_REG; }
+ bool isMem() { return State & STATE_MEM; }
+ bool isLocal() { return State & STATE_LOC; }
+ bool isDirty() { return Dirty; }
+ bool isAlias() { return Alias != nullptr; }
+};
+
+/*
+ * TraceBuilder provides the facilities to build a trace in IRFactory.
+ */
+class TraceBuilder {
+ typedef std::map<target_ulong,
+ std::pair<GraphNode*, BasicBlock*> > NodeBuildMap;
+ typedef std::vector<std::pair<BranchInst*, GraphNode*> > BranchList;
+
+ IRFactory *IF;
+ OptimizationInfo *Opt;
+ GraphNode *CurrNode; /* The current CFG node to process */
+ NodeBuildMap Nodes;
+ BranchList Branches;
+ NodeVec NodeQueue; /* CFG nodes to be translated */
+ NodeSet NodeVisisted;
+ NodeVec NodeUsed;
+ bool Aborted;
+ uint32_t Attribute;
+
+ TraceInfo *Trace;
+
+public:
+ TraceBuilder(IRFactory *IRF, OptimizationInfo *Opt);
+ ~TraceBuilder() {}
+
+ void ConvertToTCGIR(CPUArchState *env);
+ void ConvertToLLVMIR();
+ void Abort();
+ void Finalize();
+ bool isAborted() { return Aborted; }
+
+ OptimizationInfo *getOpt() { return Opt; }
+ TraceInfo *getTrace() { return Trace; }
+ GraphNode *getEntryNode() { return Opt->getCFG(); }
+ GraphNode *getCurrNode() { return CurrNode; }
+ unsigned getNumNodes() { return Nodes.size(); }
+ std::string getPCString(GraphNode *Node) {
+ std::stringstream ss;
+ ss << std::hex << Node->getGuestPC();
+ return ss.str();
+ }
+
+ GraphNode *getNextNode() {
+ if (NodeQueue.empty())
+ return nullptr;
+ CurrNode = NodeQueue.back();
+ NodeQueue.pop_back();
+
+ if (NodeVisisted.find(CurrNode) != NodeVisisted.end())
+ return getNextNode();
+
+ NodeVisisted.insert(CurrNode);
+ NodeUsed.push_back(CurrNode);
+ return CurrNode;
+ }
+
+ target_ulong getGuestPC(GraphNode *Node) {
+#if defined(TARGET_I386)
+ return Node->getTB()->pc - Node->getTB()->cs_base;
+#else
+ return Node->getTB()->pc;
+#endif
+ }
+ void setUniqueNode(GraphNode *Node) {
+ target_ulong gpc = getGuestPC(Node);
+ if (Nodes.find(gpc) == Nodes.end())
+ Nodes[gpc] = std::make_pair(Node, nullptr);
+ }
+ void setBasicBlock(GraphNode *Node, BasicBlock *BB) {
+ target_ulong gpc = getGuestPC(Node);
+ if (Nodes.find(gpc) == Nodes.end())
+ hqemu_error("internal error.\n");
+ Nodes[gpc].second = BB;
+ }
+ void setBranch(BranchInst *BI, GraphNode *Node) {
+ Branches.push_back(std::make_pair(BI, Node));
+ target_ulong gpc = getGuestPC(Node);
+ if (!Nodes[gpc].second)
+ NodeQueue.push_back(Node);
+ }
+ GraphNode *getNode(target_ulong gpc) {
+ return Nodes.find(gpc) == Nodes.end() ? nullptr : Nodes[gpc].first;
+ }
+ BasicBlock *getBasicBlock(GraphNode *Node) {
+ target_ulong gpc = getGuestPC(Node);
+ if (Nodes.find(gpc) == Nodes.end())
+ hqemu_error("internal error.\n");
+ return Nodes[gpc].second;
+ }
+ void addAttribute(uint32_t Attr) {
+ Attribute |= Attr;
+ }
+};
+
+
+#define META_CONST "const"
+#define META_GVA "gva"
+#define META_LOOP "loop"
+#define META_EXIT "exit"
+#define META_CC "cc"
+
+class MDFactory {
+ uint32_t UID;
+ LLVMContext &Context;
+ MDNode *Dummy;
+
+ ConstantInt *getUID() {
+ return ConstantInt::get(IntegerType::get(Context, 32), UID++);
+ }
+
+public:
+ MDFactory(Module *M);
+ ~MDFactory();
+
+ MDNode *getMDNode(ArrayRef<ConstantInt*> V);
+ DebugLoc getDebugLoc(unsigned Line, unsigned Col, Function *F,
+ ArrayRef<ConstantInt*> Meta);
+
+ void setConst(Instruction *I) { I->setMetadata(META_CONST, Dummy); }
+ void setGuestMemory(Instruction *I) { I->setMetadata(META_GVA, Dummy); }
+ void setLoop(Instruction *I) { I->setMetadata(META_LOOP, Dummy); }
+ void setExit(Instruction *I) { I->setMetadata(META_EXIT, Dummy); }
+ void setCondition(Instruction *I) { I->setMetadata(META_CC, Dummy); }
+
+ static bool isConst(Instruction *I) {
+ return I->getMetadata(META_CONST);
+ }
+ static bool isGuestMemory(Instruction *I) {
+ return I->getMetadata(META_GVA);
+ }
+ static bool isLoop(Instruction *I) {
+ return I->getMetadata(META_LOOP);
+ }
+ static bool isExit(Instruction *I) {
+ return I->getMetadata(META_EXIT);
+ }
+ static bool isCondition(Instruction *I) {
+ return I->getMetadata(META_CC);
+ }
+
+ static void setConstStatic(LLVMContext &Context, Instruction *I,
+ ArrayRef<ConstantInt*> V);
+};
+
+/*
+ * IRFactory conducts QEMU TCG opcodes to LLVM IR conversion.
+ */
+class IRFactory {
+ typedef std::map<std::pair<intptr_t, Type *>, Value *> StatePtrMap;
+ typedef std::map<TCGArg, BasicBlock *> LabelMap;
+
+ enum {
+ COHERENCE_NONE = 0,
+ COHERENCE_GLOBAL,
+ COHERENCE_ALL,
+ };
+
+ bool InitOnce;
+
+ /* Basic types */
+ Type *VoidTy;
+ IntegerType *Int8Ty;
+ IntegerType *Int16Ty;
+ IntegerType *Int32Ty;
+ IntegerType *Int64Ty;
+ IntegerType *Int128Ty;
+ IntegerType *IntPtrTy;
+ PointerType *Int8PtrTy;
+ PointerType *Int16PtrTy;
+ PointerType *Int32PtrTy;
+ PointerType *Int64PtrTy;
+ Type *FloatTy;
+ Type *DoubleTy;
+ Type *FP80Ty;
+ Type *FP128Ty;
+
+ ConstantInt *ExitAddr;
+
+ LLVMTranslator &Translator; /* Uplink to the LLVMTranslator instance */
+ LLVMContext *Context; /* Translator local context */
+ Module *Mod; /* The LLVM module */
+ ExecutionEngine *EE; /* The JIT compiler */
+ EventListener *Listener; /* The JIT listener */
+ JITEventListener *IntelJIT; /* The Intel JIT listener */
+ const DataLayout *DL; /* Data layout */
+ TraceBuilder *Builder;
+ MDFactory *MF;
+ MCDisasm *HostDisAsm;
+
+ HelperMap &Helpers;
+ std::vector<BaseRegister> &BaseReg; /* TCG base register */
+ std::vector<Register> Reg; /* TCG virtual registers */
+ LabelMap Labels; /* TCG labels */
+ int Segment;
+ GuestBaseRegister &GuestBaseReg; /* Reserved guest base register */
+
+ Function *Func; /* The container of LLVM IR to be translated */
+ BasicBlock *InitBB; /* BasicBlock for variable decalaration */
+ BasicBlock *CurrBB; /* Current BasicBlock to insert LLVM IR */
+ BasicBlock *ExitBB; /* Temp BasicBlock as the exit-function stub */
+ BranchInst *LastInst; /* Position to insert LLVM IR */
+
+ Instruction *CPU; /* Base register with (char*) type */
+ Instruction *CPUStruct; /* Base register with (struct CPUArchState*) type */
+ Instruction *GEPInsertPos; /* Position to insert GEP instruction */
+
+ StatePtrMap StatePtr;
+ IVec InlineCalls; /* Helpers to be inlined */
+ std::map<std::string, BasicBlock*> CommonBB;
+ IVec IndirectBrs;
+ IVec toErase;
+ BBVec toSink;
+ std::set<Function *> ClonedFuncs;
+ bool runPasses;
+
+ void CreateJIT();
+ void DeleteJIT();
+
+ /* Initialize basic types used during IR conversion. */
+ void InitializeTypes();
+
+ /* Store dirty states back to CPU state in the memory. */
+ void SaveGlobals(int level, Instruction *InsertPos);
+
+ /* Sync PC to CPU state in the memory. */
+ void CreateStorePC(Instruction *InsertPos);
+
+ /* Get or insert the pointer to the CPU state. */
+ Value *StatePointer(Register &reg);
+ Value *StatePointer(Register &reg, intptr_t Off, Type *PTy);
+
+ /* Load value from the CPU state in the memory. */
+ Value *LoadState(Register &reg);
+ void StoreState(Register &reg, Instruction *InsertPos);
+
+ /* Load/Store data from/to the guest memory. */
+ Value *QEMULoad(Value *AddrL, Value *AddrH, TCGMemOpIdx oi);
+ void QEMUStore(Value *Data, Value *AddrL, Value *AddrH, TCGMemOpIdx oi);
+
+ Value *ConvertCPUType(Function *F, int Idx, Instruction *InsertPos);
+ Value *ConvertCPUType(Function *F, int Idx, BasicBlock *InsertPos);
+
+ Value *ConvertEndian(Value *V, int opc);
+ Value *getExtendValue(Value *V, Type *Ty, int opc);
+ Value *getTruncValue(Value *V, int opc);
+ int getSizeInBits(int opc) {
+ return 8 * (1 << (opc & MO_SIZE));
+ }
+
+ Value *ConcatTLBVersion(Value *GVA);
+
+ /* Return the LLVM instruction that stores PC. For the guest's register
+ * size larger than the host, replace the multiple store-PC instructions
+ * to one single store-PC instruction. */
+ StoreInst *getStorePC();
+
+ /* Create both chaining and exiting stubs. */
+ void InsertLinkAndExit(Instruction *InsertPos);
+
+ /* Create exit stub */
+ void InsertExit(uintptr_t RetVal, bool setExit = false);
+
+ /* Find the next node of a trace according to the brach pc.
+ * Return null if we cannot find one. */
+ GraphNode *findNextNode(target_ulong pc);
+
+ /* Perform internal linking of basic blocks to form a region. */
+ void TraceLink(StoreInst *SI);
+
+ /* Link basic blocks of direct branch. */
+ void TraceLinkDirectJump(GraphNode *NextNode, StoreInst *SI);
+ void TraceLinkDirectJump(StoreInst *SI);
+
+ /* Link basic blocks of indirect branch. */
+ void TraceLinkIndirectJump(GraphNode *NextNode, StoreInst *SI);
+
+ /* Insert code for IBTC hash table lookup. */
+ void InsertLookupIBTC(GraphNode *CurrNode);
+
+ /* Insert code for CPBL hash table lookup. */
+ void InsertLookupCPBL(GraphNode *CurrNode);
+
+ void TraceValidateCPBL(GraphNode *NextNode, StoreInst *StorePC);
+
+ /* Insert bswap intrinsic instruction. */
+ Value *CreateBSwap(Type *Ty, Value *V, Instruction *InsertPos);
+
+ /* Given the size, return its PointerType. */
+ PointerType *getPointerTy(int Size, unsigned AS = 0);
+
+ /* Analyze a helper function to determine if it will be inlined or not. */
+ int AnalyzeInlineCost(CallSite CS);
+
+ /* Perform helper function inlining. */
+ void ProcessInline();
+
+ void VerifyFunction(Function &F);
+
+ /* Legalize LLVM IR before running the pre-defined passes. */
+ void PreProcess();
+
+ void Optimize();
+
+ /* Legalize LLVM IR after running the pre-defined passes. */
+ void PostProcess();
+
+ void FinalizeObject();
+
+ void InitializeLLVMPasses(legacy::FunctionPassManager *FPM);
+
+ uint32_t setRestorePoint(TCGMemOpIdx oi) {
+ if (oi != (uint16_t)oi)
+ hqemu_error("key value too large.\n");
+ return (NI.setRestorePoint() << 16) | oi;
+ }
+
+public:
+ typedef void (IRFactory::*FuncPtr)(const TCGArg *);
+
+ NotifyInfo &NI; /* Info to pass among translator and JIT */
+
+ /* QEMU TCG IR to LLVM IR converion routines. */
+#define DEF(name, oargs, iargs, cargs, flags) void op_ ## name(const TCGArg *);
+#include "tcg-opc.h"
+#undef DEF
+
+ IRFactory(LLVMTranslator *Trans);
+ ~IRFactory();
+
+ void CreateSession(TraceBuilder *builder);
+ void DeleteSession();
+
+ /* Prepare the initial LLVM Function, BasicBlocks and variables. */
+ void CreateFunction();
+ void CreateBlock();
+
+ /* Start LLVM JIT compilation. */
+ void Compile();
+
+ /* Set instruction BI to jump to the basic block BB. */
+ void setSuccessor(BranchInst *BI, BasicBlock *BB);
+
+ /* Get function pointer of the IR converion routines. */
+ void *getOpcFunc();
+
+ Function *ResolveFunction(std::string Name);
+
+ LLVMTranslator &getTranslator() { return Translator; }
+ LLVMContext &getContext() { return *Context; }
+ const DataLayout *getDL() { return DL; }
+ MDFactory *getMDFactory() { return MF; }
+ HelperMap &getHelpers() { return Helpers; }
+ TraceInfo *getTrace() { return Builder->getTrace(); }
+ Value *getGuestBase() { return GuestBaseReg.Base; }
+ Instruction *getDefaultCPU(Function &F);
+
+public:
+ static bool isStateOfPC(intptr_t Off);
+};
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/llvm-pass.h b/llvm/include/llvm-pass.h
new file mode 100644
index 0000000..75bcf4a
--- /dev/null
+++ b/llvm/include/llvm-pass.h
@@ -0,0 +1,205 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __LLVM_PASS_H
+#define __LLVM_PASS_H
+
+#include <map>
+#include <vector>
+#include "llvm-types.h"
+
+class IRFactory;
+
+
+static inline Value *getPointerOperand(Value *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getPointerOperand();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperand();
+ return nullptr;
+}
+
+static inline Value *getValueOperand(Value *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI;
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getValueOperand();
+ return nullptr;
+}
+
+static inline unsigned getAddressSpaceOperand(Value *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getPointerAddressSpace();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerAddressSpace();
+ return -1;
+}
+
+/* A CPU state reference. */
+struct StateRef {
+ StateRef(intptr_t Start, intptr_t End, Instruction *I)
+ : Start(Start), End(End), I(I) {}
+ intptr_t Start;
+ intptr_t End;
+ Instruction *I;
+
+ intptr_t getSize() {
+ return End - Start;
+ }
+ Type *getType() {
+ return getValueOperand(I)->getType();
+ }
+};
+
+/* A group of references to a CPU state. */
+struct StateData {
+ intptr_t Start;
+ intptr_t End;
+ std::vector<StateRef*> Refs;
+
+ void reset(StateRef &Ref) {
+ Start = Ref.Start;
+ End = Ref.End;
+ Refs.clear();
+ Refs.push_back(&Ref);
+ }
+ void insert(StateRef &Ref) {
+ End = std::max(End, Ref.End);
+ Refs.push_back(&Ref);
+ }
+};
+
+typedef std::map<intptr_t, intptr_t> StateRange;
+typedef std::vector<StateData> StateList;
+typedef std::vector<CallInst*> CallList;
+
+/*
+ * The purpose of StateAnalyzer is to analyze loads/stores of CPU states and
+ * group loads/stores of the same CPU state into the same bucket (StateData).
+ */
+class StateAnalyzer {
+ const DataLayout *DL;
+ std::vector<StateRef> StateRefs;
+ CallList Calls;
+ StateList States;
+
+ /* Sort state references by the state offset. */
+ void sortStateRefs() {
+ if (StateRefs.empty())
+ return;
+ std::sort(StateRefs.begin(), StateRefs.end(),
+ [](const StateRef &lhs, const StateRef &rhs) -> bool {
+ return lhs.Start < rhs.Start;
+ });
+ }
+
+public:
+ StateAnalyzer(const DataLayout *DL) : DL(DL) {}
+
+ void clear() {
+ StateRefs.clear();
+ Calls.clear();
+ States.clear();
+ }
+
+ /* Add a CPU state reference. */
+ void addStateRef(Instruction *I, intptr_t Off) {
+ Type *Ty = getValueOperand(I)->getType();
+ intptr_t Start = Off;
+ intptr_t End = Off + DL->getTypeSizeInBits(Ty) / 8;
+ StateRefs.push_back(StateRef(Start, End, I));
+ }
+
+ /* Add a helper function call. */
+ void addCall(CallInst *CI) {
+ Calls.push_back(CI);
+ }
+
+ /* Return non-overlapped ranges of states. */
+ void computeStateRange(StateRange &Reads, StateRange &Writes) {
+ computeState();
+ if (StateRefs.empty())
+ return;
+
+ const uint8_t READ = 0x1;
+ const uint8_t WRITE = 0x2;
+ for (auto &State : States) {
+ uint8_t RW = 0;
+ for (auto &Ref : State.Refs)
+ RW |= isa<LoadInst>(Ref->I) ? READ : WRITE;
+ if (RW & READ)
+ Reads[State.Start] = State.End;
+ if (RW & WRITE)
+ Writes[State.Start] = State.End;
+ }
+ }
+
+ /* Compute referenced states and group instructions. */
+ void computeState() {
+ /* Sort state refs by the offset. */
+ sortStateRefs();
+ if (StateRefs.empty())
+ return;
+
+ StateData State;
+ State.reset(StateRefs.front());
+ for (unsigned i = 1, e = StateRefs.size(); i != e; ++i) {
+ StateRef &Next = StateRefs[i];
+ if (State.End <= Next.Start) {
+ /* The next reference is not overlapped with the previous
+ * reference. A new state is found. */
+ States.push_back(State);
+ /* Reset Curr to the next state. */
+ State.reset(Next);
+ } else {
+ /* Overlap and merge. */
+ State.insert(Next);
+ }
+ }
+ /* The last state. */
+ States.push_back(State);
+ }
+
+ StateList &getStateList() {
+ return States;
+ }
+
+ CallList &getCalls() {
+ return Calls;
+ }
+};
+
+
+namespace llvm {
+/* Passes */
+FunctionPass *createReplaceIntrinsic();
+FunctionPass *createFastMathPass();
+FunctionPass *createProfileExec(IRFactory *IF);
+FunctionPass *createStateMappingPass(IRFactory *IF);
+FunctionPass *createRedundantStateElimination(IRFactory *IF);
+FunctionPass *createCombineGuestMemory(IRFactory *IF);
+FunctionPass *createCombineCasts(IRFactory *IF);
+FunctionPass *createCombineZExtTrunc();
+FunctionPass *createSimplifyPointer(IRFactory *IF);
+
+void initializeReplaceIntrinsicPass(llvm::PassRegistry&);
+void initializeFastMathPassPass(llvm::PassRegistry&);
+void initializeProfileExecPass(llvm::PassRegistry&);
+void initializeStateMappingPassPass(llvm::PassRegistry&);
+void initializeRedundantStateEliminationPass(llvm::PassRegistry&);
+void initializeCombineGuestMemoryPass(llvm::PassRegistry&);
+void initializeCombineCastsPass(llvm::PassRegistry&);
+void initializeCombineZExtTruncPass(llvm::PassRegistry&);
+void initializeSimplifyPointerPass(llvm::PassRegistry&);
+
+/* Analysis */
+void initializeInnerLoopAnalysisWrapperPassPass(llvm::PassRegistry&);
+}
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/llvm-soft-perfmon.h b/llvm/include/llvm-soft-perfmon.h
new file mode 100644
index 0000000..c55201e
--- /dev/null
+++ b/llvm/include/llvm-soft-perfmon.h
@@ -0,0 +1,74 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __LLVM_SOFT_PERFMON_H
+#define __LLVM_SOFT_PERFMON_H
+
+#include "utils.h"
+
+#define MAX_SPM_THREADS 256
+
+#define SPM_NONE (uint64_t)0
+#define SPM_BASIC ((uint64_t)1 << 0)
+#define SPM_TRACE ((uint64_t)1 << 1)
+#define SPM_CACHE ((uint64_t)1 << 2)
+#define SPM_PASS ((uint64_t)1 << 3)
+#define SPM_HPM ((uint64_t)1 << 4)
+#define SPM_EXIT ((uint64_t)1 << 5)
+#define SPM_HOTSPOT ((uint64_t)1 << 6)
+#define SPM_ALL SPM_BASIC | SPM_TRACE | SPM_CACHE | SPM_PASS | SPM_HPM | \
+ SPM_EXIT | SPM_HOTSPOT
+#define SPM_NUM 9
+
+
+/*
+ * Software Performance Monitor (SPM)
+ */
+class SoftwarePerfmon {
+public:
+ typedef void (*ExitFuncPtr)(void);
+
+ uint64_t Mode; /* Profile level */
+ uint64_t NumInsns; /* Number of instructions */
+ uint64_t NumBranches; /* Number of branches */
+ uint64_t NumLoads; /* Number of memory loads */
+ uint64_t NumStores; /* Number of memory stores */
+ uint64_t NumTraceExits; /* Count of trace exits */
+ uint64_t SampleTime; /* Process time of the sampling handler. */
+ unsigned CoverSet;
+ std::vector<std::vector<uint64_t> *> SampleListVec;
+
+ SoftwarePerfmon()
+ : Mode(SPM_NONE), NumInsns(0), NumBranches(0), NumLoads(0), NumStores(0),
+ NumTraceExits(0), SampleTime(0), CoverSet(90) {}
+ SoftwarePerfmon(std::string &ProfileLevel) : SoftwarePerfmon() {
+ ParseProfileMode(ProfileLevel);
+ }
+
+ bool isEnabled() {
+ return Mode != SPM_NONE;
+ }
+
+ void registerExitFn(ExitFuncPtr F) {
+ ExitFunc.push_back(F);
+ }
+
+ void printProfile();
+
+private:
+ std::vector<ExitFuncPtr> ExitFunc;
+
+ void ParseProfileMode(std::string &ProfileLevel);
+ void printBlockProfile();
+ void printTraceProfile();
+};
+
+extern SoftwarePerfmon *SP;
+
+#endif /* __LLVM_SOFT_PERFMON_H */
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/llvm-state.h b/llvm/include/llvm-state.h
new file mode 100644
index 0000000..e573073
--- /dev/null
+++ b/llvm/include/llvm-state.h
@@ -0,0 +1,194 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ *
+ * This file implements the basic optimization schemes including indirect
+ * branch target cache (IBTC), indirect branch chain (IB chain), and trace
+ * profiling and prediction routines.
+ */
+
+#ifndef __LLVM_STATE_H
+#define __LLVM_STATE_H
+
+#define COPY_STATE(_dst, _src, _e) do { _dst->_e = _src->_e; } while(0)
+
+/*
+ * The following data structure and routine are used to save/restore the states
+ * of CPUArchState. Only the states that could affect decoding the guest binary by
+ * the TCG front-end are saved/restored. Such states are saved when translating
+ * the block at the first time because the states could change later and are
+ * restored to the saved values when the block is decoded again during the
+ * trace formation.
+ */
+#if defined(TARGET_I386) || defined(TARGET_X86_64)
+typedef struct i386_env {
+ int singlestep_enabled;
+ uint32_t hflags;
+ target_ulong eflags;
+} cpustate;
+#elif defined(TARGET_ARM)
+typedef struct arm_env {
+ int singlestep_enabled;
+ uint32_t pstate;
+ uint32_t aarch64;
+ struct {
+ uint32_t c15_cpar;
+ uint64_t scr_el3;
+ } cp15;
+ uint32_t uncached_cpsr;
+ uint64_t features;
+} cpustate;
+#elif defined(TARGET_PPC) || defined(TARGET_PPC64)
+typedef struct ppc_env {
+ int singlestep_enabled;
+ target_ulong msr;
+ int mmu_idx;
+ uint32_t flags;
+ uint64_t insns_flags;
+ uint64_t insns_flags2;
+ target_ulong hflags;
+} cpustate;
+#elif defined(TARGET_SH4)
+typedef struct sh4_env {
+ int singlestep_enabled;
+ uint32_t sr; /* status register */
+ uint32_t fpscr; /* floating point status/control register */
+ uint32_t features;
+} cpustate;
+#elif defined(TARGET_M68K)
+typedef struct m68k_env {
+ int singlestep_enabled;
+ uint32_t sr; /* status register */
+ uint32_t fpcr; /* floating point status/control register */
+} cpustate;
+#elif defined(TARGET_MIPS)
+typedef struct mips_env {
+ int singlestep_enabled;
+ target_ulong btarget;
+} cpustate;
+#else
+typedef struct dummy_env {
+ int dummy;
+} cpustate;
+#endif
+
+static inline void tcg_save_state(CPUArchState *env, TranslationBlock *tb)
+{
+#if defined(TARGET_I386) || defined(TARGET_X86_64)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct i386_env *s = new struct i386_env;
+ COPY_STATE(s, cpu, singlestep_enabled);
+ COPY_STATE(s, env, hflags);
+ COPY_STATE(s, env, eflags);
+#elif defined(TARGET_ARM)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct arm_env *s = new struct arm_env;
+ COPY_STATE(s, cpu, singlestep_enabled);
+ COPY_STATE(s, env, cp15.c15_cpar);
+ COPY_STATE(s, env, cp15.scr_el3);
+ COPY_STATE(s, env, uncached_cpsr);
+ COPY_STATE(s, env, features);
+ COPY_STATE(s, env, pstate);
+ COPY_STATE(s, env, aarch64);
+#elif defined(TARGET_PPC) || defined(TARGET_PPC64)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct ppc_env *s = new struct ppc_env;
+ COPY_STATE(s, cpu, singlestep_enabled);
+ COPY_STATE(s, env, msr);
+ COPY_STATE(s, env, mmu_idx);
+ COPY_STATE(s, env, flags);
+ COPY_STATE(s, env, insns_flags);
+ COPY_STATE(s, env, insns_flags2);
+ COPY_STATE(s, env, hflags);
+#elif defined(TARGET_SH4)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct sh4_env *s = new struct sh4_env;
+ COPY_STATE(s, cpu, singlestep_enabled);
+ COPY_STATE(s, env, sr);
+ COPY_STATE(s, env, fpscr);
+ COPY_STATE(s, env, features);
+#elif defined(TARGET_M68K)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct m68k_env *s = new struct m68k_env;
+ COPY_STATE(s, cpu, singlestep_enabled);
+ COPY_STATE(s, env, sr);
+ COPY_STATE(s, env, fpcr);
+#elif defined(TARGET_MIPS)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct mips_env *s = new struct mips_env;
+ COPY_STATE(s, cpu, singlestep_enabled);
+ COPY_STATE(s, env, btarget);
+#else
+ void *s = nullptr;
+#endif
+
+ tb->state = (void *)s;
+}
+
+/*
+ * tcg_restore_state()
+ * Reset states to those when the block is first translated.
+ */
+static inline void tcg_copy_state(CPUArchState *env, TranslationBlock *tb)
+{
+#if defined(TARGET_I386) || defined(TARGET_X86_64)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct i386_env *i386e = (struct i386_env *)tb->state;
+ COPY_STATE(cpu, i386e, singlestep_enabled);
+ COPY_STATE(env, i386e, hflags);
+ COPY_STATE(env, i386e, eflags);
+#elif defined(TARGET_ARM)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct arm_env *arme = (struct arm_env *)tb->state;
+ COPY_STATE(cpu, arme, singlestep_enabled);
+ COPY_STATE(env, arme, cp15.c15_cpar);
+ COPY_STATE(env, arme, cp15.scr_el3);
+ COPY_STATE(env, arme, uncached_cpsr);
+ COPY_STATE(env, arme, features);
+ COPY_STATE(env, arme, pstate);
+ COPY_STATE(env, arme, aarch64);
+#elif defined(TARGET_PPC) || defined(TARGET_PPC64)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct ppc_env *ppce = (struct ppc_env *)tb->state;
+ COPY_STATE(cpu, ppce, singlestep_enabled);
+ COPY_STATE(env, ppce, msr);
+ COPY_STATE(env, ppce, mmu_idx);
+ COPY_STATE(env, ppce, flags);
+ COPY_STATE(env, ppce, insns_flags);
+ COPY_STATE(env, ppce, insns_flags2);
+ COPY_STATE(env, ppce, hflags);
+#elif defined(TARGET_SH4)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct sh4_env *sh4e = (struct sh4_env *)tb->state;
+ COPY_STATE(cpu, sh4e, singlestep_enabled);
+ COPY_STATE(env, sh4e, sr);
+ COPY_STATE(env, sh4e, fpscr);
+ COPY_STATE(env, sh4e, features);
+#elif defined(TARGET_M68K)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct m68k_env *m68ke = (struct m68k_env *)tb->state;
+ COPY_STATE(cpu, m68ke, singlestep_enabled);
+ COPY_STATE(env, m68ke, sr);
+ COPY_STATE(env, m68ke, fpcr);
+#elif defined(TARGET_MIPS)
+ CPUState *cpu = ENV_GET_CPU(env);
+ struct mips_env *mipse = (struct mips_env *)tb->state;
+ COPY_STATE(cpu, mipse, singlestep_enabled);
+ COPY_STATE(env, mipse, btarget);
+#endif
+}
+
+static inline void delete_state(TranslationBlock *tb)
+{
+ delete (cpustate *)tb->state;
+ tb->state = nullptr;
+}
+
+#undef COPY_STATE
+#endif /* __LLVM_STATE_H */
+
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/llvm/include/llvm-target.h b/llvm/include/llvm-target.h
new file mode 100644
index 0000000..1784942
--- /dev/null
+++ b/llvm/include/llvm-target.h
@@ -0,0 +1,116 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __LLVM_TARGET_H
+#define __LLVM_TARGET_H
+
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm-types.h"
+#include "llvm-translator.h"
+
+#ifndef __PRI64_PREFIX
+# if __WORDSIZE == 64
+# define __PRI64_PREFIX "l"
+# else
+# define __PRI64_PREFIX "ll"
+# endif
+#endif
+
+#if TARGET_LONG_BITS == 32
+# define PRId "d"
+# define PRIx "x"
+#else
+# define PRId __PRI64_PREFIX "d"
+# define PRIx __PRI64_PREFIX "x"
+#endif
+
+#define PRId64 __PRI64_PREFIX "d"
+#define PRIu64 __PRI64_PREFIX "u"
+
+class code_ostream {
+ char *OutBufStart;
+ char *OutBufCur;
+public:
+ void Skip(unsigned Size) {
+ OutBufCur += Size;
+ }
+
+ code_ostream(uintptr_t Ptr)
+ : OutBufStart((char *)Ptr), OutBufCur((char *)Ptr) {}
+ code_ostream &operator<<(char C) {
+ *OutBufCur = C;
+ OutBufCur++;
+ return *this;
+ }
+ code_ostream &operator<<(unsigned char C) {
+ *(unsigned char *)OutBufCur = C;
+ OutBufCur++;
+ return *this;
+ }
+ code_ostream &operator<<(unsigned int C) {
+ *(unsigned int *)OutBufCur = C;
+ OutBufCur += sizeof(unsigned int);
+ return *this;
+ }
+ code_ostream &operator<<(unsigned long C) {
+ *(unsigned long *)OutBufCur = C;
+ OutBufCur += sizeof(unsigned long);
+ return *this;
+ }
+};
+
+static inline void EmitByte(code_ostream &OS, unsigned char C)
+{
+ OS << (char)C;
+}
+static inline void EmitConstant(code_ostream &OS, uint64_t Val, unsigned Size)
+{
+ for (unsigned i = 0; i != Size; ++i) {
+ EmitByte(OS, Val & 255);
+ Val >>= 8;
+ }
+}
+
+/*
+ * EventListener is used by the JIT to notify clients about significant events
+ * during compilation.
+ */
+class EventListener : public JITEventListener {
+ NotifyInfo &NI;
+
+public:
+ EventListener(NotifyInfo &NI) : NI(NI) {}
+ ~EventListener() {}
+ virtual void NotifyFunctionEmitted(const Function &F, void *Code, size_t Size,
+ const EmittedFunctionDetails &Details);
+#if defined(LLVM_V35)
+ virtual void NotifyObjectEmitted(const ObjectImage &Obj);
+#else
+ virtual void NotifyObjectEmitted(const object::ObjectFile &Obj,
+ const RuntimeDyld::LoadedObjectInfo &L);
+#endif
+};
+
+
+const char *getMMUFName(const void *func);
+bool isMMUFunction(std::string &Name);
+bool isLMTFunction(std::string &Name);
+bool isIllegalHelper(const void *func);
+bool isLibcall(std::string &Name);
+bool isSoftFPcall(std::string &Name);
+void AddDependentSymbols(LLVMTranslator *Translator);
+Value *StripPointer(Value *Ptr);
+Value *StripPointerWithConstantOffset(const DataLayout *DL, Value *Ptr,
+ APInt &Offset, Value *GuestBase);
+Value *getBaseWithConstantOffset(const DataLayout *DL, Value *Ptr, intptr_t &Offset);
+void ProcessErase(IVec &toErase);
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/llvm/include/llvm-translator.h b/llvm/include/llvm-translator.h
new file mode 100644
index 0000000..d1d92c5
--- /dev/null
+++ b/llvm/include/llvm-translator.h
@@ -0,0 +1,270 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __LLVM_TRANSLATOR_H
+#define __LLVM_TRANSLATOR_H
+
+#include <map>
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm-types.h"
+#include "llvm-pass.h"
+#include "llvm.h"
+
+
+class OptimizationInfo;
+class EventListener;
+class NotifyInfo;
+class IRFactory;
+class TraceBuilder;
+
+
+/*
+ * BaseRegister is used to describe the `reserved' registers by QEMU TCG.
+ * Ex: R14 for the x86 host or R7 for the ARM host.
+ */
+struct BaseRegister {
+ BaseRegister() : Base(nullptr) {}
+ int RegNo; /* Register number */
+ std::string Name; /* Register name string */
+ Type *Ty; /* Type (struct CPUArchState) */
+ Instruction *Base; /* CallInst to retrieve basereg */
+};
+
+struct GuestBaseRegister {
+ GuestBaseRegister() : Name(""), Base(nullptr) {}
+ std::string Name; /* Register name string */
+ Value *Base; /* CallInst to retrieve basereg */
+};
+
+/*
+ * Information of helper functions defined in llvm-helper.h.
+ */
+struct HelperInfo {
+ HelperInfo()
+ : ConflictSize(0), mayConflictArg(false), hasNestedCall(false) {}
+
+ struct ArgInfo {
+ unsigned ConstantWeight; /* Weight if the argument is a constant */
+ unsigned AllocaWeight; /* Weight if the argument is a alloca */
+ ArgInfo(unsigned CWeight, unsigned AWeight)
+ : ConstantWeight(CWeight), AllocaWeight(AWeight) {}
+ };
+
+ Function *Func; /* Function symbol to be inlined */
+ Function *FuncNoInline; /* Function symbol not to be inlined */
+ std::vector<std::pair<Instruction*, intptr_t> > States;
+ std::vector<CallInst*> NestedCalls;
+ StateRange StateUse;
+ StateRange StateDef;
+ CodeMetrics Metrics; /* Inlining metrics */
+ std::vector<ArgInfo> ArgumentWeights; /* Weight of the function arguments */
+ intptr_t ConflictSize;
+
+ bool mayConflictArg; /* Arguments conflict with state mapping or not */
+ bool hasNestedCall; /* This function has nested function or not */
+
+ void CalculateMetrics(Function *F);
+
+ void insertState(StateRange &Range, bool isWrite) {
+ if (isWrite)
+ StateDef.insert(Range.begin(), Range.end());
+ else
+ StateUse.insert(Range.begin(), Range.end());
+ }
+};
+
+/*
+ * NotifyInfo is used to pass information between LLVMTranslator, IRFactory and
+ * the JIT listener.
+ */
+class NotifyInfo {
+#define MAX_CHAINSLOT 256
+public:
+ struct SlotInfo {
+ size_t Key;
+ uintptr_t Addr;
+ };
+
+ struct PatchInfo {
+ PatchInfo(unsigned ty, unsigned idx, uintptr_t addr)
+ : Type(ty), Idx(idx), Addr(addr) {}
+ unsigned Type;
+ unsigned Idx;
+ uintptr_t Addr;
+ };
+
+ NotifyInfo() : Func(nullptr) {
+ ChainSlot = new SlotInfo[MAX_CHAINSLOT];
+ }
+ ~NotifyInfo() {
+ delete ChainSlot;
+ }
+
+ Function *Func; /* LLVM Function of this translation unit */
+ TCGOp *Op;
+ TranslationBlock *TB;
+ uint16_t NumInsts;
+ RestoreVec Restore;
+ unsigned NumChainSlot;
+ SlotInfo *ChainSlot;
+
+ uint32_t Size; /* Size of the translated host code */
+ uint8_t *Code; /* Start PC of the translated host code */
+ std::vector<PatchInfo> Patches;
+
+ void reset() {
+ Restore.clear();
+ Patches.clear();
+ NumInsts = 0;
+ NumChainSlot = 0;
+ }
+ unsigned setChainSlot(size_t Key) {
+ if (NumChainSlot >= MAX_CHAINSLOT)
+ hqemu_error("run out of chain slot.\n");
+ unsigned Curr = NumChainSlot;
+ ChainSlot[NumChainSlot++].Key = Key;
+ return Curr;
+ }
+ uintptr_t getChainSlotAddr(unsigned Idx) {
+ if (NumChainSlot >= MAX_CHAINSLOT)
+ hqemu_error("invalid chain slot index.\n");
+ return (uintptr_t)&ChainSlot[Idx].Addr;
+ }
+ void addPatch(unsigned Type, unsigned Idx, uintptr_t Addr) {
+ Patches.push_back(PatchInfo(Type, Idx, Addr));
+ }
+ void setOp(TCGOp *op) { Op = op; }
+ void setTB(TranslationBlock *tb) {
+ TB = tb;
+ NumInsts = 0;
+ }
+ uint32_t setRestorePoint() {
+ uint32_t Idx = Restore.size();
+ if (Idx != (uint16_t)Idx)
+ hqemu_error("key value too large.\n");
+ Restore.push_back(std::make_pair(TB->id, NumInsts));
+ return Idx;
+ }
+};
+
+/*
+ * LLVM Translator
+ */
+class LLVMTranslator {
+ unsigned MyID; /* Translator ID */
+ CPUArchState *Env;
+
+ /* Basic types */
+ Type *VoidTy;
+ IntegerType *Int8Ty;
+ IntegerType *Int16Ty;
+ IntegerType *Int32Ty;
+ IntegerType *Int64Ty;
+ IntegerType *Int128Ty;
+ IntegerType *IntPtrTy;
+ PointerType *Int8PtrTy;
+ PointerType *Int16PtrTy;
+ PointerType *Int32PtrTy;
+ PointerType *Int64PtrTy;
+ Type *FloatTy;
+ Type *DoubleTy;
+ PointerType *FloatPtrTy;
+ PointerType *DoublePtrTy;
+
+ LLVMContext Context; /* Translator local context */
+ Module *Mod; /* The LLVM module */
+ const DataLayout *DL; /* Data layout */
+ NotifyInfo NI; /* Info to set/use by the JIT listener */
+
+ std::vector<BaseRegister> BaseReg; /* Reserved base registers */
+ GuestBaseRegister GuestBaseReg; /* Reserved guest base register */
+ FlatType StateType; /* Offset and type of guest registers */
+ TCGHelperMap TCGHelpers;
+ HelperMap Helpers;
+ std::set<std::string> ConstHelpers;
+ SymbolMap Symbols;
+
+ MCDisasm *GuestDisAsm;
+ MCDisasm *HostDisAsm;
+
+ IRFactory *IF; /* TCG-to-LLVM IR converter */
+
+ /* Initialize the LLVM module. */
+ void InitializeModule();
+
+ /* Create the JIT compiler. */
+ void InitializeJIT();
+
+ /* Initialize required LLVM types. */
+ void InitializeType();
+
+ /* Setup guest and host dependent structures. */
+ void InitializeTarget();
+
+ /* Setup special registers. */
+ void DefineSpecialReg(std::map<Type*, Type*> &SpecialReg);
+
+ /* Convert the CPUArchState structure type to a list of primitive types. */
+ void FlattenCPUState(Type *Ty, intptr_t &Off, std::map<Type*, Type*> &SpecialReg);
+
+ /* Initialize helper functions. */
+ void InitializeHelpers();
+
+ /* Analyze and optimize a helper function. */
+ bool OptimizeHelper(HelperInfo &Helper);
+
+ void InitializeDisasm();
+
+ void InitializeConstHelpers();
+
+ void Commit(TraceBuilder &Builder);
+
+ void Abort(TraceBuilder &Builder);
+
+ void dump(CPUArchState *env, TranslationBlock *tb);
+
+ LLVMTranslator(unsigned id, CPUArchState *env);
+
+public:
+ ~LLVMTranslator();
+
+ void GenBlock(CPUArchState *env, OptimizationInfo *Opt);
+ void GenTrace(CPUArchState *env, OptimizationInfo *Opt);
+
+ unsigned getID() { return MyID; }
+ LLVMContext *getContext() { return &Context; }
+ Module *getModule() { return Mod; }
+ NotifyInfo &getNotifyInfo() { return NI; }
+ std::vector<BaseRegister> &getBaseReg() { return BaseReg; }
+ GuestBaseRegister &getGuestBaseReg() { return GuestBaseReg; }
+ TCGHelperMap &getTCGHelpers() { return TCGHelpers; }
+ HelperMap &getHelpers() { return Helpers; }
+ std::set<std::string> &getConstHelpers() { return ConstHelpers; }
+ FlatType &getStateType() { return StateType; }
+ SymbolMap &getSymbols() { return Symbols; }
+ MCDisasm *getHostDisAsm() { return HostDisAsm;}
+
+ void AddSymbol(std::string Name, void *FP) {
+ Symbols[Name] = (uintptr_t)FP;
+ }
+
+ /* Create the LLVMTranslator instrance. */
+ static LLVMTranslator *CreateLLVMTranslator(int id, CPUArchState *env) {
+ return new LLVMTranslator(id, env);
+ }
+
+ /* Show guest assembly code for each compiled TB. */
+ void printAsm(CPUArchState *env, TranslationBlock *tb);
+
+ /* Show TCG micro ops for each compiled TB. */
+ void printOp(CPUArchState *env, TranslationBlock *tb);
+};
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/llvm-types.h b/llvm/include/llvm-types.h
new file mode 100644
index 0000000..1b8d09c
--- /dev/null
+++ b/llvm/include/llvm-types.h
@@ -0,0 +1,127 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __LLVM_TYPES_H
+#define __LLVM_TYPES_H
+
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+
+#if defined(LLVM_V35)
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#elif defined(LLVM_V38)
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/MemoryObject.h"
+#elif defined(LLVM_V39)
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Support/MemoryObject.h"
+#else
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/Object/SymbolSize.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#endif
+
+#include <vector>
+#include <set>
+#include <map>
+#include "llvm-macro.h"
+#include "qemu-types.h"
+
+using namespace llvm;
+
+class HelperInfo;
+
+typedef std::vector<TranslationBlock *> TBVec;
+typedef std::vector<std::pair<BlockID, uint16_t> > RestoreVec;
+typedef std::map<uintptr_t, std::string> TCGHelperMap; /* <func_ptr, func_name> */
+typedef std::map<std::string, HelperInfo*> HelperMap;
+typedef std::map<std::string, uintptr_t> SymbolMap;
+typedef std::map<intptr_t, Type *> FlatType; /* <state_off, state_ty> */
+typedef std::vector<Instruction *> IVec;
+typedef std::vector<BasicBlock *> BBVec;
+
+
+static inline const DataLayout *getDataLayout(Module *Mod) {
+#if defined(LLVM_V35)
+ return Mod->getDataLayout();
+#else
+ return &Mod->getDataLayout();
+#endif
+}
+
+static inline AllocaInst *CreateAlloca(Type *Ty, unsigned AddrSpace,
+ const Twine &Name,
+ Instruction *InsertBefore = nullptr) {
+#if defined(LLVM_V35) || defined(LLVM_V38) || defined(LLVM_V39)
+ return new AllocaInst(Ty, Name, InsertBefore);
+#else
+ return new AllocaInst(Ty, AddrSpace, Name, InsertBefore);
+#endif
+}
+
+static inline AllocaInst *CreateAlloca(Type *Ty, unsigned AddrSpace,
+ Value *ArraySize = nullptr,
+ const Twine &Name = "",
+ Instruction *InsertBefore = nullptr) {
+#if defined(LLVM_V35) || defined(LLVM_V38) || defined(LLVM_V39)
+ return new AllocaInst(Ty, ArraySize, Name, InsertBefore);
+#else
+ return new AllocaInst(Ty, AddrSpace, ArraySize, Name, InsertBefore);
+#endif
+}
+
+static inline void InlineFunc(CallInst *CI) {
+#if defined(LLVM_V38) || defined(LLVM_V39)
+ AssumptionCacheTracker ACT;
+ InlineFunctionInfo IFI(nullptr, &ACT);
+#else
+ InlineFunctionInfo IFI;
+#endif
+ InlineFunction(CI, IFI);
+}
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/llvm.h b/llvm/include/llvm.h
new file mode 100644
index 0000000..67bff2f
--- /dev/null
+++ b/llvm/include/llvm.h
@@ -0,0 +1,278 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __LLVM_H
+#define __LLVM_H
+
+#include <memory>
+#include <vector>
+#include "llvm/ADT/STLExtras.h"
+#include "llvm-types.h"
+#include "llvm-debug.h"
+#include "utils.h"
+
+#if defined(ENABLE_MCJIT)
+#include "llvm/ExecutionEngine/MCJIT.h"
+#include "MCJITMemoryManager.h"
+typedef class DefaultMCJITMemoryManager MemoryManager;
+#else
+#if defined(LLVM_V35)
+#include "JIT.h"
+#include "JITMemoryManager.h"
+#else
+# error "LLVM version >3.5 supports MCJIT only. ENABLE_MCJIT must be enabled."
+#endif
+typedef class DefaultJITMemoryManager MemoryManager;
+#endif
+
+
+extern cl::OptionCategory CategoryHQEMU;
+
+class LLVMTranslator;
+class OptimizationInfo;
+class TranslatedCode;
+
+typedef std::unique_ptr<OptimizationInfo> OptRequest;
+
+
+/*
+ * LLVMEnv is the top level container of whole LLVM translation environment
+ * which manages the LLVM translator(s) and globally shared resources. The
+ * LLVMEnv instance must be initialized before using the underlying transaltion
+ * service and can only be initialized ONCE.
+ */
+class LLVMEnv {
+public:
+ typedef std::vector<TranslatedCode *> TransCodeList;
+ typedef std::map<uintptr_t, TranslatedCode *> TransCodeMap;
+ typedef std::vector<uintptr_t> ChainSlot;
+ typedef std::pair<size_t, uintptr_t> SlotInfo;
+
+private:
+ std::shared_ptr<MemoryManager> MM; /* Trace cache manager */
+ unsigned NumTranslator; /* The amount of LLVM translators */
+ std::vector<LLVMTranslator *> Translator; /* LLVM translators */
+ std::vector<pthread_t> HelperThread; /* LLVM translation threads */
+ std::vector<CPUState *> ThreadEnv;
+
+ TransCodeList TransCode; /* Translated traces. */
+ TransCodeMap SortedCode; /* Sorted traces in code cache address order. */
+ ChainSlot ChainPoint; /* Address of stubs for trace-to-block linking */
+
+ bool UseThreading; /* Whether multithreaded translators are used or not. */
+ unsigned NumFlush;
+
+ LLVMEnv();
+
+ /* Parse the command line options. */
+ void ParseCommandLineOptions();
+
+ /* Test whether HQEMU is running in Intel VTune. */
+ void ProbeIntelVTune();
+
+public:
+ QemuMutex mutex;
+
+ ~LLVMEnv();
+
+ /* Start/stop/restart LLVM translators and worker threads. */
+ void CreateTranslator();
+ void DeleteTranslator();
+ void RestartTranslator();
+ void StartThread();
+ void StopThread();
+
+ /* Get the LLVM translator with index. */
+ LLVMTranslator *getTranslator(unsigned ID) {
+ if (ID >= Translator.size())
+ hqemu_error("invalid translator ID.\n");
+ return Translator[ID];
+ }
+
+ /* Acquire and lock the first LLVM translator. */
+ LLVMTranslator *AcquireSingleTranslator();
+
+ /* Release the first LLVM translator. */
+ void ReleaseSingleTranslator();
+
+ /* Get CPUState of the LLVM translator with index. */
+ CPUState *getThreadEnv(int ID) { return ThreadEnv[ID]; }
+
+ std::vector<pthread_t> &getHelperThread() { return HelperThread; }
+ std::shared_ptr<MemoryManager> getMemoryManager() { return MM; }
+ TransCodeList &getTransCode() { return TransCode; }
+ TransCodeMap &getSortedCode() { return SortedCode; }
+ ChainSlot &getChainPoint() { return ChainPoint; }
+ TraceID insertTransCode(TranslatedCode *TC);
+ SlotInfo getChainSlot();
+
+ bool isThreading() { return UseThreading; }
+ void incNumFlush() { NumFlush++; }
+ unsigned getNumFlush() { return NumFlush; }
+
+ /*
+ * static public members
+ */
+ static bool InitOnce; /* LLVMEnv is initialized or not? */
+ static int TransMode;
+ static uint8_t *TraceCache;
+ static size_t TraceCacheSize;
+ static bool RunWithVTune;
+
+ static void CreateLLVMEnv();
+ static void DeleteLLVMEnv();
+ static int OptimizeBlock(CPUArchState *env, OptRequest Request);
+ static int OptimizeTrace(CPUArchState *env, OptRequest Request);
+ static void setTransMode(int Mode) { TransMode = Mode; }
+ static int isTraceMode() {
+ return (TransMode == TRANS_MODE_HYBRIDS ||
+ TransMode == TRANS_MODE_HYBRIDM);
+ }
+};
+
+class QueueManager {
+ std::vector<Queue *> ActiveQueue;
+ Queue *CurrentQueue;
+
+public:
+ QueueManager();
+ ~QueueManager();
+ void Enqueue(OptimizationInfo *Opt);
+ void *Dequeue();
+ void Flush();
+};
+
+/*
+ * OptimizationInfo is the description to an optimization request. It consists
+ * of the optimization mode and the control-flow-graph of the trace.
+ */
+class OptimizationInfo {
+public:
+ typedef std::set<TranslationBlock *> TraceNode;
+ typedef std::map<TranslationBlock *, TraceNode> TraceEdge;
+
+ ~OptimizationInfo() {
+ if (CFG)
+ GraphNode::DeleteCFG(CFG);
+ }
+
+ void ComposeCFG();
+ GraphNode *getCFG() { return CFG; }
+ bool isTrace() { return !isBlock; }
+
+ static OptRequest CreateRequest(TranslationBlock *tb) {
+ return OptRequest(new OptimizationInfo(tb));
+ }
+ static OptRequest CreateRequest(TBVec &trace, int idx) {
+ return OptRequest(new OptimizationInfo(trace, idx));
+ }
+ static OptRequest CreateRequest(TranslationBlock *head, TraceEdge &edges) {
+ return OptRequest(new OptimizationInfo(head, edges));
+ }
+
+private:
+ TBVec Trace; /* Trace of a list of TBs */
+ int LoopHeadIdx; /* Index to the loopback block */
+ bool isUserTrace; /* Trace of all user-mode blocks */
+ bool isBlock; /* Trace of a single block */
+ GraphNode *CFG; /* CFG of the trace */
+
+ OptimizationInfo(TranslationBlock *tb)
+ : isUserTrace(true), isBlock(true) {
+ Trace.push_back(tb);
+ LoopHeadIdx = -1;
+ CFG = new GraphNode(tb);
+ }
+ OptimizationInfo(TBVec &trace, int idx)
+ : isUserTrace(true), isBlock(false), CFG(nullptr) {
+ if (trace.empty())
+ hqemu_error("trace length cannot be zero.\n");
+ Trace = trace;
+ LoopHeadIdx = idx;
+ }
+ OptimizationInfo(TranslationBlock *HeadTB, TraceEdge &Edges);
+
+ void SearchCycle(TraceNode &SearchNodes, TraceNode &Nodes,
+ TraceEdge &Edges, TBVec &Visited, int Depth);
+ void ExpandTrace(TranslationBlock *HeadTB, TraceEdge &Edges);
+};
+
+class TraceInfo {
+public:
+ TBVec TBs;
+ unsigned NumLoop;
+ unsigned NumExit;
+ unsigned NumIndirectBr;
+ uint64_t **ExecCount;
+ uint64_t TransTime;
+ uint32_t Attribute;
+
+ TraceInfo(NodeVec &Nodes, uint32_t Attr = A_None)
+ : NumLoop(0), NumExit(0), NumIndirectBr(0), ExecCount(nullptr),
+ TransTime(0), Attribute(Attr)
+ {
+ if (Nodes.empty())
+ hqemu_error("number of nodes cannot be zero.\n");
+ for (unsigned i = 0, e = Nodes.size(); i != e; ++i)
+ TBs.push_back(Nodes[i]->getTB());
+ }
+
+ TranslationBlock *getEntryTB() { return TBs[0]; }
+ target_ulong getEntryPC() { return TBs[0]->pc; }
+ unsigned getNumBlock() { return TBs.size(); }
+ void setTransTime(struct timeval *start, struct timeval *end) {
+ struct timeval t;
+ timersub(end, start, &t);
+ TransTime = t.tv_sec * 1e6 + t.tv_usec;
+ }
+ bool hasAttribute(uint32_t Attr) {
+ return Attribute & Attr;
+ }
+};
+
+struct ChainInfo {
+ std::vector<uintptr_t> Chains;
+ std::vector<BlockID> DepTraces;
+
+ void insertChain(uintptr_t addr) {
+ Chains.push_back(addr);
+ }
+ void insertDepTrace(BlockID id) {
+ DepTraces.push_back(id);
+ }
+ static ChainInfo *get(TranslationBlock *tb) {
+ if (!tb->chain)
+ tb->chain = (ChainInfo *)new ChainInfo;
+ return (ChainInfo *)tb->chain;
+ }
+ static void free(TranslationBlock *tb) {
+ delete (ChainInfo *)tb->chain;
+ tb->chain = nullptr;
+ }
+};
+
+class TranslatedCode {
+public:
+ TranslatedCode() : Trace(nullptr), SampleCount(0) {}
+ ~TranslatedCode() {
+ if (Trace)
+ delete Trace;
+ }
+
+ bool Active;
+ uint32_t Size; /* Size of the translated host code */
+ uint8_t *Code; /* Start PC of the translated host code */
+ TranslationBlock *EntryTB; /* The entry block of the region */
+ RestoreVec Restore;
+ TraceInfo *Trace;
+ uint64_t SampleCount;
+};
+
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/optimization.h b/llvm/include/optimization.h
new file mode 100644
index 0000000..bdafb3a
--- /dev/null
+++ b/llvm/include/optimization.h
@@ -0,0 +1,261 @@
+/*
+ * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __OPTIMIZATION_H
+#define __OPTIMIZATION_H
+
+#include <iostream>
+#include <list>
+#include "qemu-types.h"
+
+
+extern "C" TranslationBlock *tbs;
+
+/*
+ * Instruction TLB (iTLB)
+ */
+#define ITLB_CACHE_BITS (10)
+#define ITLB_CACHE_SIZE (1U << ITLB_CACHE_BITS)
+#define ITLB_CACHE_MASK (ITLB_CACHE_SIZE - 1)
+
+class ITLB {
+ struct itlb_t { tb_page_addr_t paddr; };
+ itlb_t Cache[ITLB_CACHE_SIZE];
+
+public:
+ ITLB() { reset(); }
+ ~ITLB() {}
+
+ inline itlb_t &cache(target_ulong vaddr) {
+ return Cache[(vaddr >> TARGET_PAGE_BITS) & ITLB_CACHE_MASK];
+ }
+ void reset() {
+ for (unsigned i = 0; i < ITLB_CACHE_SIZE; ++i)
+ Cache[i].paddr = (tb_page_addr_t)-1;
+ }
+ void flush(target_ulong vaddr) {
+ cache(vaddr).paddr = (tb_page_addr_t)-1;
+ }
+ void insert(target_ulong vaddr, tb_page_addr_t paddr) {
+ cache(vaddr).paddr = paddr;
+ }
+ tb_page_addr_t get(target_ulong vaddr) {
+ return cache(vaddr).paddr;
+ }
+};
+
+
+/*
+ * Indirect Branch Target Cache (IBTC)
+ */
+#define IBTC_CACHE_BITS (16)
+#define IBTC_CACHE_SIZE (1U << IBTC_CACHE_BITS)
+#define IBTC_CACHE_MASK (IBTC_CACHE_SIZE - 1)
+
+class IBTC {
+ typedef std::pair<target_ulong, TranslationBlock *> ibtc_t;
+ ibtc_t Cache[IBTC_CACHE_SIZE];
+ bool NeedUpdate;
+ uint64_t Total; /* Total access count */
+ uint64_t Miss; /* Miss count */
+
+public:
+ IBTC() : NeedUpdate(false), Total(0), Miss(0) { reset(); }
+ ~IBTC() {}
+
+ inline ibtc_t &cache(target_ulong pc) {
+ return Cache[(pc >> 2) & IBTC_CACHE_MASK];
+ }
+ void reset() {
+ for (unsigned i = 0; i < IBTC_CACHE_SIZE; ++i)
+ Cache[i].first = (target_ulong)-1;
+ }
+ void remove(TranslationBlock *tb) {
+ ibtc_t &c = cache(tb->pc);
+ if (c.first == tb->pc)
+ c.first = (target_ulong)-1;
+ }
+ void insert(target_ulong pc, TranslationBlock *tb) {
+ cache(pc) = std::make_pair(pc, tb);
+ }
+ TranslationBlock *get(target_ulong pc) {
+ ibtc_t &c = cache(pc);
+ return (c.first == pc) ? c.second : nullptr;
+ }
+ void setUpdate() { NeedUpdate = true; }
+ void resetUpdate() { NeedUpdate = false; }
+ bool needUpdate() { return NeedUpdate; }
+ inline void incTotal() { Total++; }
+ inline void incMiss() { Miss++; }
+ void dump() {
+ double HitRate = (double)(Total - Miss) * 100 / Total;
+ std::cerr << "\nibtc.miss = " << Miss << "/" << Total <<
+ " (hit rate=" << HitRate << "%)\n";
+ }
+};
+
+/*
+ * Cross-Page Block Linking (CPBL)
+ */
+class CPBL {
+ uint64_t Total; /* Total access count */
+ uint64_t Miss; /* Miss count */
+ uint64_t ValidateTotal; /* Total validation count */
+ uint64_t ValidateMiss; /* Miss validation count */
+public:
+ CPBL() : Total(0), Miss(0), ValidateTotal(0), ValidateMiss(0) {}
+
+ inline void incTotal() { Total++; }
+ inline void incMiss() { Miss++; }
+ inline void incValidateTotal() { ValidateTotal++; }
+ inline void incValidateMiss() { ValidateMiss++; }
+ void dump() {
+ double HitRate = (double)(Total - Miss) * 100 / Total;
+ double HitRate2 = (double)(ValidateTotal - ValidateMiss) * 100 / Total;
+ std::cerr << "cpbl.miss = " << Miss << "/" << Total <<
+ " (hit rate=" << HitRate << "%)\n" <<
+ "validate.miss = " << ValidateMiss << "/" << ValidateTotal <<
+ " (hit rate=" << HitRate2 << "%)\n";
+ }
+};
+
+/*
+ * Large Page Table
+ *
+ * This handling is to track every large page created by the guest system.
+ * Once a `possibly' large page is invalidated, do a search with the tracked
+ * pages to determine if it is really a large page invalidation. If it cannot
+ * be found, this is a false alert and we can fall back to the default-size
+ * page flushing. Otherwise, SoftTLB, IBTC/CPBL optimization, etc. are
+ * partial or full cleanup due to the true large page flushing.
+ */
+#define MAX_NUM_LARGEPAGE (1024)
+
+class LargePageTable {
+ typedef std::pair<target_ulong, target_ulong> PTE;
+ typedef std::list<PTE> PTEList;
+ PTEList Used;
+ PTEList Free;
+ CPUState *CS;
+ uint64_t Total;
+ uint64_t Miss;
+
+public:
+ LargePageTable(CPUState *cpu) : Total(0), Miss(0) {
+ CS = cpu;
+ Used.clear();
+ Free.resize(MAX_NUM_LARGEPAGE);
+ }
+ ~LargePageTable() {}
+
+ enum {
+ SEARCH = 0,
+ FLUSH,
+ };
+
+ void reset() {
+ Free.splice(Free.end(), Used);
+ }
+ void remove(PTEList::iterator I) {
+ Free.splice(Free.begin(), Used, I);
+ }
+ void allocate(PTE pte) {
+ /* If the free list is empty, we need to clear softtlb by calling
+ * tlb_flush() which will then invoke LTP::reset() to clear LPT. */
+ if (Free.empty())
+ tlb_flush(CS, 0);
+ Free.front() = pte;
+ Used.splice(Used.begin(), Free, Free.begin());
+ }
+ void insert(target_ulong addr, target_ulong size) {
+ for (PTEList::iterator I = Used.begin(), E = Used.end(); I != E; ++I) {
+ if (I->first == (addr & I->second)) {
+ Used.splice(Used.begin(), Used, I);
+ return;
+ }
+ }
+ target_ulong mask = ~(size - 1);
+ allocate(PTE(addr & mask, mask));
+ }
+ bool search(target_ulong addr, bool mode, target_ulong *addrp,
+ target_ulong *sizep) {
+ for (PTEList::iterator I = Used.begin(), E = Used.end(); I != E; ++I) {
+ if (I->first != (addr & I->second))
+ continue;
+ *addrp = I->first;
+ *sizep = ~I->second + 1;
+ if (mode == FLUSH)
+ remove(I);
+ return true;
+ }
+ return false;
+ }
+ void incTotal() { Total++; }
+ void incMiss() { Miss++; }
+ void dump() {
+ double Rate = (double)(Total - Miss) * 100 / Total;
+ std::cerr << "lpt.miss = " << Miss << "/" << Total <<
+ " (false flushing=" << Rate << "% #pages=" <<
+ Used.size() << ")\n";
+ }
+};
+
+
+class BaseTracer;
+
+struct CPUOptimization {
+ CPUOptimization(CPUState *cpu, BaseTracer *tracer)
+ : lpt(LargePageTable(cpu)), pt(tracer) {}
+
+ ITLB itlb; /* instruction TLB */
+ IBTC ibtc; /* indirect branch target cache */
+ CPBL cpbl; /* cross-page block linking */
+ LargePageTable lpt; /* large page handling */
+ BaseTracer *pt; /* processor tracer */
+};
+
+
+static inline int isUserTB(TranslationBlock *tb) {
+ int is_user = 1;
+#if defined(CONFIG_SOFTMMU)
+#if defined(TARGET_ALPHA)
+ is_user = (tb->flags & TB_FLAGS_USER_MODE);
+#elif defined(TARGET_ARM)
+ is_user = ((ARM_TBFLAG_MMUIDX(tb->flags) & 3) == 0);
+#elif defined(TARGET_I386)
+ is_user = ((tb->flags >> HF_CPL_SHIFT) & 3) == 3;
+#elif defined(TARGET_MIPS)
+ is_user = (tb->flags & MIPS_HFLAG_UM);
+#elif defined(TARGET_PPC)
+ is_user = ((tb->flags >> MSR_PR) & 1);
+#else
+#error "unsupported processor type"
+#endif
+#endif
+ return is_user;
+}
+
+static inline ITLB &cpu_get_itlb(CPUArchState *env) {
+ return ((CPUOptimization *)env->opt_link)->itlb;
+}
+static inline IBTC &cpu_get_ibtc(CPUArchState *env) {
+ return ((CPUOptimization *)env->opt_link)->ibtc;
+}
+static inline CPBL &cpu_get_cpbl(CPUArchState *env) {
+ return ((CPUOptimization *)env->opt_link)->cpbl;
+}
+static inline LargePageTable &cpu_get_lpt(CPUArchState *env) {
+ return ((CPUOptimization *)env->opt_link)->lpt;
+}
+static inline BaseTracer *cpu_get_tracer(CPUArchState *env) {
+ return ((CPUOptimization *)env->opt_link)->pt;
+}
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/llvm/include/pmu/arm/arm-events.h b/llvm/include/pmu/arm/arm-events.h
new file mode 100644
index 0000000..b3bb1d7
--- /dev/null
+++ b/llvm/include/pmu/arm/arm-events.h
@@ -0,0 +1,35 @@
+/*
+ * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __ARM_EVENTS_H
+#define __ARM_EVENTS_H
+
+#include <vector>
+#include "pmu/pmu.h"
+
+namespace pmu {
+
+class PMUEvent;
+
+#if defined(__arm__)
+#define pmu_mb() ((void(*)(void))0xffff0fa0)()
+#define pmu_rmb() ((void(*)(void))0xffff0fa0)()
+#define pmu_wmb() ((void(*)(void))0xffff0fa0)()
+#elif defined(__aarch64__)
+#define pmu_mb() asm volatile("dmb ish" ::: "memory")
+#define pmu_rmb() asm volatile("dmb ishld" ::: "memory")
+#define pmu_wmb() asm volatile("dmb ishst" ::: "memory")
+#endif
+
+
+int ARMInit(void);
+
+} /* namespace pmu */
+
+#endif /* __ARM_EVENTS_H */
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/pmu/perf_event.h b/llvm/include/pmu/perf_event.h
new file mode 100644
index 0000000..81fed4a
--- /dev/null
+++ b/llvm/include/pmu/perf_event.h
@@ -0,0 +1,992 @@
+/*
+ * This file is copied from linux-4.11/include/uapi/linux/perf_event.h.
+ *
+ * Performance events:
+ *
+ * Copyright (C) 2008-2009, Thomas Gleixner <tglx@linutronix.de>
+ * Copyright (C) 2008-2011, Red Hat, Inc., Ingo Molnar
+ * Copyright (C) 2008-2011, Red Hat, Inc., Peter Zijlstra
+ *
+ * Data type definitions, declarations, prototypes.
+ *
+ * Started by: Thomas Gleixner and Ingo Molnar
+ *
+ * For licencing details see kernel-base/COPYING
+ */
+#ifndef _UAPI_LINUX_PERF_EVENT_H
+#define _UAPI_LINUX_PERF_EVENT_H
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*
+ * User-space ABI bits:
+ */
+
+/*
+ * attr.type
+ */
+enum perf_type_id {
+ PERF_TYPE_HARDWARE = 0,
+ PERF_TYPE_SOFTWARE = 1,
+ PERF_TYPE_TRACEPOINT = 2,
+ PERF_TYPE_HW_CACHE = 3,
+ PERF_TYPE_RAW = 4,
+ PERF_TYPE_BREAKPOINT = 5,
+
+ PERF_TYPE_MAX, /* non-ABI */
+};
+
+/*
+ * Generalized performance event event_id types, used by the
+ * attr.event_id parameter of the sys_perf_event_open()
+ * syscall:
+ */
+enum perf_hw_id {
+ /*
+ * Common hardware events, generalized by the kernel:
+ */
+ PERF_COUNT_HW_CPU_CYCLES = 0,
+ PERF_COUNT_HW_INSTRUCTIONS = 1,
+ PERF_COUNT_HW_CACHE_REFERENCES = 2,
+ PERF_COUNT_HW_CACHE_MISSES = 3,
+ PERF_COUNT_HW_BRANCH_INSTRUCTIONS = 4,
+ PERF_COUNT_HW_BRANCH_MISSES = 5,
+ PERF_COUNT_HW_BUS_CYCLES = 6,
+ PERF_COUNT_HW_STALLED_CYCLES_FRONTEND = 7,
+ PERF_COUNT_HW_STALLED_CYCLES_BACKEND = 8,
+ PERF_COUNT_HW_REF_CPU_CYCLES = 9,
+
+ PERF_COUNT_HW_MAX, /* non-ABI */
+};
+
+/*
+ * Generalized hardware cache events:
+ *
+ * { L1-D, L1-I, LLC, ITLB, DTLB, BPU, NODE } x
+ * { read, write, prefetch } x
+ * { accesses, misses }
+ */
+enum perf_hw_cache_id {
+ PERF_COUNT_HW_CACHE_L1D = 0,
+ PERF_COUNT_HW_CACHE_L1I = 1,
+ PERF_COUNT_HW_CACHE_LL = 2,
+ PERF_COUNT_HW_CACHE_DTLB = 3,
+ PERF_COUNT_HW_CACHE_ITLB = 4,
+ PERF_COUNT_HW_CACHE_BPU = 5,
+ PERF_COUNT_HW_CACHE_NODE = 6,
+
+ PERF_COUNT_HW_CACHE_MAX, /* non-ABI */
+};
+
+enum perf_hw_cache_op_id {
+ PERF_COUNT_HW_CACHE_OP_READ = 0,
+ PERF_COUNT_HW_CACHE_OP_WRITE = 1,
+ PERF_COUNT_HW_CACHE_OP_PREFETCH = 2,
+
+ PERF_COUNT_HW_CACHE_OP_MAX, /* non-ABI */
+};
+
+enum perf_hw_cache_op_result_id {
+ PERF_COUNT_HW_CACHE_RESULT_ACCESS = 0,
+ PERF_COUNT_HW_CACHE_RESULT_MISS = 1,
+
+ PERF_COUNT_HW_CACHE_RESULT_MAX, /* non-ABI */
+};
+
+/*
+ * Special "software" events provided by the kernel, even if the hardware
+ * does not support performance events. These events measure various
+ * physical and sw events of the kernel (and allow the profiling of them as
+ * well):
+ */
+enum perf_sw_ids {
+ PERF_COUNT_SW_CPU_CLOCK = 0,
+ PERF_COUNT_SW_TASK_CLOCK = 1,
+ PERF_COUNT_SW_PAGE_FAULTS = 2,
+ PERF_COUNT_SW_CONTEXT_SWITCHES = 3,
+ PERF_COUNT_SW_CPU_MIGRATIONS = 4,
+ PERF_COUNT_SW_PAGE_FAULTS_MIN = 5,
+ PERF_COUNT_SW_PAGE_FAULTS_MAJ = 6,
+ PERF_COUNT_SW_ALIGNMENT_FAULTS = 7,
+ PERF_COUNT_SW_EMULATION_FAULTS = 8,
+ PERF_COUNT_SW_DUMMY = 9,
+ PERF_COUNT_SW_BPF_OUTPUT = 10,
+
+ PERF_COUNT_SW_MAX, /* non-ABI */
+};
+
+/*
+ * Bits that can be set in attr.sample_type to request information
+ * in the overflow packets.
+ */
+enum perf_event_sample_format {
+ PERF_SAMPLE_IP = 1U << 0,
+ PERF_SAMPLE_TID = 1U << 1,
+ PERF_SAMPLE_TIME = 1U << 2,
+ PERF_SAMPLE_ADDR = 1U << 3,
+ PERF_SAMPLE_READ = 1U << 4,
+ PERF_SAMPLE_CALLCHAIN = 1U << 5,
+ PERF_SAMPLE_ID = 1U << 6,
+ PERF_SAMPLE_CPU = 1U << 7,
+ PERF_SAMPLE_PERIOD = 1U << 8,
+ PERF_SAMPLE_STREAM_ID = 1U << 9,
+ PERF_SAMPLE_RAW = 1U << 10,
+ PERF_SAMPLE_BRANCH_STACK = 1U << 11,
+ PERF_SAMPLE_REGS_USER = 1U << 12,
+ PERF_SAMPLE_STACK_USER = 1U << 13,
+ PERF_SAMPLE_WEIGHT = 1U << 14,
+ PERF_SAMPLE_DATA_SRC = 1U << 15,
+ PERF_SAMPLE_IDENTIFIER = 1U << 16,
+ PERF_SAMPLE_TRANSACTION = 1U << 17,
+ PERF_SAMPLE_REGS_INTR = 1U << 18,
+
+ PERF_SAMPLE_MAX = 1U << 19, /* non-ABI */
+};
+
+/*
+ * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
+ *
+ * If the user does not pass priv level information via branch_sample_type,
+ * the kernel uses the event's priv level. Branch and event priv levels do
+ * not have to match. Branch priv level is checked for permissions.
+ *
+ * The branch types can be combined, however BRANCH_ANY covers all types
+ * of branches and therefore it supersedes all the other types.
+ */
+enum perf_branch_sample_type_shift {
+ PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */
+ PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */
+ PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */
+
+ PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */
+ PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */
+ PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */
+ PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */
+ PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */
+ PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */
+ PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */
+ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */
+
+ PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */
+ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */
+ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */
+
+ PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT = 14, /* no flags */
+ PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT = 15, /* no cycles */
+
+ PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
+};
+
+enum perf_branch_sample_type {
+ PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+ PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+ PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+
+ PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+ PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+ PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
+
+ PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+ PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
+ PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT,
+
+ PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT,
+ PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT,
+
+ PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
+};
+
+#define PERF_SAMPLE_BRANCH_PLM_ALL \
+ (PERF_SAMPLE_BRANCH_USER|\
+ PERF_SAMPLE_BRANCH_KERNEL|\
+ PERF_SAMPLE_BRANCH_HV)
+
+/*
+ * Values to determine ABI of the registers dump.
+ */
+enum perf_sample_regs_abi {
+ PERF_SAMPLE_REGS_ABI_NONE = 0,
+ PERF_SAMPLE_REGS_ABI_32 = 1,
+ PERF_SAMPLE_REGS_ABI_64 = 2,
+};
+
+/*
+ * Values for the memory transaction event qualifier, mostly for
+ * abort events. Multiple bits can be set.
+ */
+enum {
+ PERF_TXN_ELISION = (1 << 0), /* From elision */
+ PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */
+ PERF_TXN_SYNC = (1 << 2), /* Instruction is related */
+ PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */
+ PERF_TXN_RETRY = (1 << 4), /* Retry possible */
+ PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */
+ PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */
+ PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */
+
+ PERF_TXN_MAX = (1 << 8), /* non-ABI */
+
+ /* bits 32..63 are reserved for the abort code */
+
+ PERF_TXN_ABORT_MASK = (0xffffffffULL << 32),
+ PERF_TXN_ABORT_SHIFT = 32,
+};
+
+/*
+ * The format of the data returned by read() on a perf event fd,
+ * as specified by attr.read_format:
+ *
+ * struct read_format {
+ * { u64 value;
+ * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
+ * { u64 id; } && PERF_FORMAT_ID
+ * } && !PERF_FORMAT_GROUP
+ *
+ * { u64 nr;
+ * { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
+ * { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
+ * { u64 value;
+ * { u64 id; } && PERF_FORMAT_ID
+ * } cntr[nr];
+ * } && PERF_FORMAT_GROUP
+ * };
+ */
+enum perf_event_read_format {
+ PERF_FORMAT_TOTAL_TIME_ENABLED = 1U << 0,
+ PERF_FORMAT_TOTAL_TIME_RUNNING = 1U << 1,
+ PERF_FORMAT_ID = 1U << 2,
+ PERF_FORMAT_GROUP = 1U << 3,
+
+ PERF_FORMAT_MAX = 1U << 4, /* non-ABI */
+};
+
+#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
+#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */
+#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */
+#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */
+ /* add: sample_stack_user */
+#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */
+
+/*
+ * Hardware event_id to monitor via a performance monitoring event:
+ *
+ * @sample_max_stack: Max number of frame pointers in a callchain,
+ * should be < /proc/sys/kernel/perf_event_max_stack
+ */
+struct perf_event_attr {
+
+ /*
+ * Major type: hardware/software/tracepoint/etc.
+ */
+ uint32_t type;
+
+ /*
+ * Size of the attr structure, for fwd/bwd compat.
+ */
+ uint32_t size;
+
+ /*
+ * Type specific configuration information.
+ */
+ uint64_t config;
+
+ union {
+ uint64_t sample_period;
+ uint64_t sample_freq;
+ };
+
+ uint64_t sample_type;
+ uint64_t read_format;
+
+ uint64_t disabled : 1, /* off by default */
+ inherit : 1, /* children inherit it */
+ pinned : 1, /* must always be on PMU */
+ exclusive : 1, /* only group on PMU */
+ exclude_user : 1, /* don't count user */
+ exclude_kernel : 1, /* ditto kernel */
+ exclude_hv : 1, /* ditto hypervisor */
+ exclude_idle : 1, /* don't count when idle */
+ mmap : 1, /* include mmap data */
+ comm : 1, /* include comm data */
+ freq : 1, /* use freq, not period */
+ inherit_stat : 1, /* per task counts */
+ enable_on_exec : 1, /* next exec enables */
+ task : 1, /* trace fork/exit */
+ watermark : 1, /* wakeup_watermark */
+ /*
+ * precise_ip:
+ *
+ * 0 - SAMPLE_IP can have arbitrary skid
+ * 1 - SAMPLE_IP must have constant skid
+ * 2 - SAMPLE_IP requested to have 0 skid
+ * 3 - SAMPLE_IP must have 0 skid
+ *
+ * See also PERF_RECORD_MISC_EXACT_IP
+ */
+ precise_ip : 2, /* skid constraint */
+ mmap_data : 1, /* non-exec mmap data */
+ sample_id_all : 1, /* sample_type all events */
+
+ exclude_host : 1, /* don't count in host */
+ exclude_guest : 1, /* don't count in guest */
+
+ exclude_callchain_kernel : 1, /* exclude kernel callchains */
+ exclude_callchain_user : 1, /* exclude user callchains */
+ mmap2 : 1, /* include mmap with inode data */
+ comm_exec : 1, /* flag comm events that are due to an exec */
+ use_clockid : 1, /* use @clockid for time fields */
+ context_switch : 1, /* context switch data */
+ write_backward : 1, /* Write ring buffer from end to beginning */
+ __reserved_1 : 36;
+
+ union {
+ uint32_t wakeup_events; /* wakeup every n events */
+ uint32_t wakeup_watermark; /* bytes before wakeup */
+ };
+
+ uint32_t bp_type;
+ union {
+ uint64_t bp_addr;
+ uint64_t config1; /* extension of config */
+ };
+ union {
+ uint64_t bp_len;
+ uint64_t config2; /* extension of config1 */
+ };
+ uint64_t branch_sample_type; /* enum perf_branch_sample_type */
+
+ /*
+ * Defines set of user regs to dump on samples.
+ * See asm/perf_regs.h for details.
+ */
+ uint64_t sample_regs_user;
+
+ /*
+ * Defines size of the user stack to dump on samples.
+ */
+ uint32_t sample_stack_user;
+
+ int32_t clockid;
+ /*
+ * Defines set of regs to dump for each sample
+ * state captured on:
+ * - precise = 0: PMU interrupt
+ * - precise > 0: sampled instruction
+ *
+ * See asm/perf_regs.h for details.
+ */
+ uint64_t sample_regs_intr;
+
+ /*
+ * Wakeup watermark for AUX area
+ */
+ uint32_t aux_watermark;
+ uint16_t sample_max_stack;
+ uint16_t __reserved_2; /* align to uint64_t */
+};
+
+#define perf_flags(attr) (*(&(attr)->read_format + 1))
+
+/*
+ * Ioctls that can be done on a perf event fd:
+ */
+#define PERF_EVENT_IOC_ENABLE _IO ('$', 0)
+#define PERF_EVENT_IOC_DISABLE _IO ('$', 1)
+#define PERF_EVENT_IOC_REFRESH _IO ('$', 2)
+#define PERF_EVENT_IOC_RESET _IO ('$', 3)
+#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, uint64_t)
+#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5)
+#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *)
+#define PERF_EVENT_IOC_ID _IOR('$', 7, uint64_t *)
+#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, uint32_t)
+#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, uint32_t)
+
+enum perf_event_ioc_flags {
+ PERF_IOC_FLAG_GROUP = 1U << 0,
+};
+
+/*
+ * Structure of the page that can be mapped via mmap
+ */
+struct perf_event_mmap_page {
+ uint32_t version; /* version number of this structure */
+ uint32_t compat_version; /* lowest version this is compat with */
+
+ /*
+ * Bits needed to read the hw events in user-space.
+ *
+ * u32 seq, time_mult, time_shift, index, width;
+ * u64 count, enabled, running;
+ * u64 cyc, time_offset;
+ * s64 pmc = 0;
+ *
+ * do {
+ * seq = pc->lock;
+ * barrier()
+ *
+ * enabled = pc->time_enabled;
+ * running = pc->time_running;
+ *
+ * if (pc->cap_usr_time && enabled != running) {
+ * cyc = rdtsc();
+ * time_offset = pc->time_offset;
+ * time_mult = pc->time_mult;
+ * time_shift = pc->time_shift;
+ * }
+ *
+ * index = pc->index;
+ * count = pc->offset;
+ * if (pc->cap_user_rdpmc && index) {
+ * width = pc->pmc_width;
+ * pmc = rdpmc(index - 1);
+ * }
+ *
+ * barrier();
+ * } while (pc->lock != seq);
+ *
+ * NOTE: for obvious reason this only works on self-monitoring
+ * processes.
+ */
+ uint32_t lock; /* seqlock for synchronization */
+ uint32_t index; /* hardware event identifier */
+ int64_t offset; /* add to hardware event value */
+ uint64_t time_enabled; /* time event active */
+ uint64_t time_running; /* time event on cpu */
+ union {
+ uint64_t capabilities;
+ struct {
+ uint64_t cap_bit0 : 1, /* Always 0, deprecated, see commit 860f085b74e9 */
+ cap_bit0_is_deprecated : 1, /* Always 1, signals that bit 0 is zero */
+
+ cap_user_rdpmc : 1, /* The RDPMC instruction can be used to read counts */
+ cap_user_time : 1, /* The time_* fields are used */
+ cap_user_time_zero : 1, /* The time_zero field is used */
+ cap_____res : 59;
+ };
+ };
+
+ /*
+ * If cap_user_rdpmc this field provides the bit-width of the value
+ * read using the rdpmc() or equivalent instruction. This can be used
+ * to sign extend the result like:
+ *
+ * pmc <<= 64 - width;
+ * pmc >>= 64 - width; // signed shift right
+ * count += pmc;
+ */
+ uint16_t pmc_width;
+
+ /*
+ * If cap_usr_time the below fields can be used to compute the time
+ * delta since time_enabled (in ns) using rdtsc or similar.
+ *
+ * u64 quot, rem;
+ * u64 delta;
+ *
+ * quot = (cyc >> time_shift);
+ * rem = cyc & (((u64)1 << time_shift) - 1);
+ * delta = time_offset + quot * time_mult +
+ * ((rem * time_mult) >> time_shift);
+ *
+ * Where time_offset,time_mult,time_shift and cyc are read in the
+ * seqcount loop described above. This delta can then be added to
+ * enabled and possible running (if index), improving the scaling:
+ *
+ * enabled += delta;
+ * if (index)
+ * running += delta;
+ *
+ * quot = count / running;
+ * rem = count % running;
+ * count = quot * enabled + (rem * enabled) / running;
+ */
+ uint16_t time_shift;
+ uint32_t time_mult;
+ uint64_t time_offset;
+ /*
+ * If cap_usr_time_zero, the hardware clock (e.g. TSC) can be calculated
+ * from sample timestamps.
+ *
+ * time = timestamp - time_zero;
+ * quot = time / time_mult;
+ * rem = time % time_mult;
+ * cyc = (quot << time_shift) + (rem << time_shift) / time_mult;
+ *
+ * And vice versa:
+ *
+ * quot = cyc >> time_shift;
+ * rem = cyc & (((u64)1 << time_shift) - 1);
+ * timestamp = time_zero + quot * time_mult +
+ * ((rem * time_mult) >> time_shift);
+ */
+ uint64_t time_zero;
+ uint32_t size; /* Header size up to __reserved[] fields. */
+
+ /*
+ * Hole for extension of the self monitor capabilities
+ */
+
+ uint8_t __reserved[118*8+4]; /* align to 1k. */
+
+ /*
+ * Control data for the mmap() data buffer.
+ *
+ * User-space reading the @data_head value should issue an smp_rmb(),
+ * after reading this value.
+ *
+ * When the mapping is PROT_WRITE the @data_tail value should be
+ * written by userspace to reflect the last read data, after issueing
+ * an smp_mb() to separate the data read from the ->data_tail store.
+ * In this case the kernel will not over-write unread data.
+ *
+ * See perf_output_put_handle() for the data ordering.
+ *
+ * data_{offset,size} indicate the location and size of the perf record
+ * buffer within the mmapped area.
+ */
+ uint64_t data_head; /* head in the data section */
+ uint64_t data_tail; /* user-space written tail */
+ uint64_t data_offset; /* where the buffer starts */
+ uint64_t data_size; /* data buffer size */
+
+ /*
+ * AUX area is defined by aux_{offset,size} fields that should be set
+ * by the userspace, so that
+ *
+ * aux_offset >= data_offset + data_size
+ *
+ * prior to mmap()ing it. Size of the mmap()ed area should be aux_size.
+ *
+ * Ring buffer pointers aux_{head,tail} have the same semantics as
+ * data_{head,tail} and same ordering rules apply.
+ */
+ uint64_t aux_head;
+ uint64_t aux_tail;
+ uint64_t aux_offset;
+ uint64_t aux_size;
+};
+
+#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0)
+#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
+#define PERF_RECORD_MISC_KERNEL (1 << 0)
+#define PERF_RECORD_MISC_USER (2 << 0)
+#define PERF_RECORD_MISC_HYPERVISOR (3 << 0)
+#define PERF_RECORD_MISC_GUEST_KERNEL (4 << 0)
+#define PERF_RECORD_MISC_GUEST_USER (5 << 0)
+
+/*
+ * Indicates that /proc/PID/maps parsing are truncated by time out.
+ */
+#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12)
+/*
+ * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
+ * different events so can reuse the same bit position.
+ * Ditto PERF_RECORD_MISC_SWITCH_OUT.
+ */
+#define PERF_RECORD_MISC_MMAP_DATA (1 << 13)
+#define PERF_RECORD_MISC_COMM_EXEC (1 << 13)
+#define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
+/*
+ * Indicates that the content of PERF_SAMPLE_IP points to
+ * the actual instruction that triggered the event. See also
+ * perf_event_attr::precise_ip.
+ */
+#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
+/*
+ * Reserve the last bit to indicate some extended misc field
+ */
+#define PERF_RECORD_MISC_EXT_RESERVED (1 << 15)
+
+struct perf_event_header {
+ uint32_t type;
+ uint16_t misc;
+ uint16_t size;
+};
+
+enum perf_event_type {
+
+ /*
+ * If perf_event_attr.sample_id_all is set then all event types will
+ * have the sample_type selected fields related to where/when
+ * (identity) an event took place (TID, TIME, ID, STREAM_ID, CPU,
+ * IDENTIFIER) described in PERF_RECORD_SAMPLE below, it will be stashed
+ * just after the perf_event_header and the fields already present for
+ * the existing fields, i.e. at the end of the payload. That way a newer
+ * perf.data file will be supported by older perf tools, with these new
+ * optional fields being ignored.
+ *
+ * struct sample_id {
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u64 id; } && PERF_SAMPLE_IDENTIFIER
+ * } && perf_event_attr::sample_id_all
+ *
+ * Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID. The
+ * advantage of PERF_SAMPLE_IDENTIFIER is that its position is fixed
+ * relative to header.size.
+ */
+
+ /*
+ * The MMAP events record the PROT_EXEC mappings so that we can
+ * correlate userspace IPs to code. They have the following structure:
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u32 pid, tid;
+ * u64 addr;
+ * u64 len;
+ * u64 pgoff;
+ * char filename[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_MMAP = 1,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u64 id;
+ * u64 lost;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_LOST = 2,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u32 pid, tid;
+ * char comm[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_COMM = 3,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid, ppid;
+ * u32 tid, ptid;
+ * u64 time;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_EXIT = 4,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u64 time;
+ * u64 id;
+ * u64 stream_id;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_THROTTLE = 5,
+ PERF_RECORD_UNTHROTTLE = 6,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid, ppid;
+ * u32 tid, ptid;
+ * u64 time;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_FORK = 7,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid, tid;
+ *
+ * struct read_format values;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_READ = 8,
+
+ /*
+ * struct {
+ * struct perf_event_header header;
+ *
+ * #
+ * # Note that PERF_SAMPLE_IDENTIFIER duplicates PERF_SAMPLE_ID.
+ * # The advantage of PERF_SAMPLE_IDENTIFIER is that its position
+ * # is fixed relative to header.
+ * #
+ *
+ * { u64 id; } && PERF_SAMPLE_IDENTIFIER
+ * { u64 ip; } && PERF_SAMPLE_IP
+ * { u32 pid, tid; } && PERF_SAMPLE_TID
+ * { u64 time; } && PERF_SAMPLE_TIME
+ * { u64 addr; } && PERF_SAMPLE_ADDR
+ * { u64 id; } && PERF_SAMPLE_ID
+ * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID
+ * { u32 cpu, res; } && PERF_SAMPLE_CPU
+ * { u64 period; } && PERF_SAMPLE_PERIOD
+ *
+ * { struct read_format values; } && PERF_SAMPLE_READ
+ *
+ * { u64 nr,
+ * u64 ips[nr]; } && PERF_SAMPLE_CALLCHAIN
+ *
+ * #
+ * # The RAW record below is opaque data wrt the ABI
+ * #
+ * # That is, the ABI doesn't make any promises wrt to
+ * # the stability of its content, it may vary depending
+ * # on event, hardware, kernel version and phase of
+ * # the moon.
+ * #
+ * # In other words, PERF_SAMPLE_RAW contents are not an ABI.
+ * #
+ *
+ * { u32 size;
+ * char data[size];}&& PERF_SAMPLE_RAW
+ *
+ * { u64 nr;
+ * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+ *
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
+ *
+ * { u64 size;
+ * char data[size];
+ * u64 dyn_size; } && PERF_SAMPLE_STACK_USER
+ *
+ * { u64 weight; } && PERF_SAMPLE_WEIGHT
+ * { u64 data_src; } && PERF_SAMPLE_DATA_SRC
+ * { u64 transaction; } && PERF_SAMPLE_TRANSACTION
+ * { u64 abi; # enum perf_sample_regs_abi
+ * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
+ * };
+ */
+ PERF_RECORD_SAMPLE = 9,
+
+ /*
+ * The MMAP2 records are an augmented version of MMAP, they add
+ * maj, min, ino numbers to be used to uniquely identify each mapping
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u32 pid, tid;
+ * u64 addr;
+ * u64 len;
+ * u64 pgoff;
+ * u32 maj;
+ * u32 min;
+ * u64 ino;
+ * u64 ino_generation;
+ * u32 prot, flags;
+ * char filename[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_MMAP2 = 10,
+
+ /*
+ * Records that new data landed in the AUX buffer part.
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u64 aux_offset;
+ * u64 aux_size;
+ * u64 flags;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_AUX = 11,
+
+ /*
+ * Indicates that instruction trace has started
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u32 pid;
+ * u32 tid;
+ * };
+ */
+ PERF_RECORD_ITRACE_START = 12,
+
+ /*
+ * Records the dropped/lost sample number.
+ *
+ * struct {
+ * struct perf_event_header header;
+ *
+ * u64 lost;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_LOST_SAMPLES = 13,
+
+ /*
+ * Records a context switch in or out (flagged by
+ * PERF_RECORD_MISC_SWITCH_OUT). See also
+ * PERF_RECORD_SWITCH_CPU_WIDE.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_SWITCH = 14,
+
+ /*
+ * CPU-wide version of PERF_RECORD_SWITCH with next_prev_pid and
+ * next_prev_tid that are the next (switching out) or previous
+ * (switching in) pid/tid.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u32 next_prev_pid;
+ * u32 next_prev_tid;
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_SWITCH_CPU_WIDE = 15,
+
+ PERF_RECORD_MAX, /* non-ABI */
+};
+
+#define PERF_MAX_STACK_DEPTH 127
+#define PERF_MAX_CONTEXTS_PER_STACK 8
+
+enum perf_callchain_context {
+ PERF_CONTEXT_HV = (uint64_t)-32,
+ PERF_CONTEXT_KERNEL = (uint64_t)-128,
+ PERF_CONTEXT_USER = (uint64_t)-512,
+
+ PERF_CONTEXT_GUEST = (uint64_t)-2048,
+ PERF_CONTEXT_GUEST_KERNEL = (uint64_t)-2176,
+ PERF_CONTEXT_GUEST_USER = (uint64_t)-2560,
+
+ PERF_CONTEXT_MAX = (uint64_t)-4095,
+};
+
+/**
+ * PERF_RECORD_AUX::flags bits
+ */
+#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */
+
+#define PERF_FLAG_FD_NO_GROUP (1UL << 0)
+#define PERF_FLAG_FD_OUTPUT (1UL << 1)
+#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */
+#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */
+
+union perf_mem_data_src {
+ uint64_t val;
+ struct {
+ uint64_t mem_op:5, /* type of opcode */
+ mem_lvl:14, /* memory hierarchy level */
+ mem_snoop:5, /* snoop mode */
+ mem_lock:2, /* lock instr */
+ mem_dtlb:7, /* tlb access */
+ mem_rsvd:31;
+ };
+};
+
+/* type of opcode (load/store/prefetch,code) */
+#define PERF_MEM_OP_NA 0x01 /* not available */
+#define PERF_MEM_OP_LOAD 0x02 /* load instruction */
+#define PERF_MEM_OP_STORE 0x04 /* store instruction */
+#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */
+#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */
+#define PERF_MEM_OP_SHIFT 0
+
+/* memory hierarchy (memory level, hit or miss) */
+#define PERF_MEM_LVL_NA 0x01 /* not available */
+#define PERF_MEM_LVL_HIT 0x02 /* hit level */
+#define PERF_MEM_LVL_MISS 0x04 /* miss level */
+#define PERF_MEM_LVL_L1 0x08 /* L1 */
+#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */
+#define PERF_MEM_LVL_L2 0x20 /* L2 */
+#define PERF_MEM_LVL_L3 0x40 /* L3 */
+#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */
+#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */
+#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */
+#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */
+#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */
+#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */
+#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */
+#define PERF_MEM_LVL_SHIFT 5
+
+/* snoop mode */
+#define PERF_MEM_SNOOP_NA 0x01 /* not available */
+#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */
+#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */
+#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */
+#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */
+#define PERF_MEM_SNOOP_SHIFT 19
+
+/* locked instruction */
+#define PERF_MEM_LOCK_NA 0x01 /* not available */
+#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */
+#define PERF_MEM_LOCK_SHIFT 24
+
+/* TLB access */
+#define PERF_MEM_TLB_NA 0x01 /* not available */
+#define PERF_MEM_TLB_HIT 0x02 /* hit level */
+#define PERF_MEM_TLB_MISS 0x04 /* miss level */
+#define PERF_MEM_TLB_L1 0x08 /* L1 */
+#define PERF_MEM_TLB_L2 0x10 /* L2 */
+#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/
+#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
+#define PERF_MEM_TLB_SHIFT 26
+
+#define PERF_MEM_S(a, s) \
+ (((uint64_t)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
+
+/*
+ * single taken branch record layout:
+ *
+ * from: source instruction (may not always be a branch insn)
+ * to: branch target
+ * mispred: branch target was mispredicted
+ * predicted: branch target was predicted
+ *
+ * support for mispred, predicted is optional. In case it
+ * is not supported mispred = predicted = 0.
+ *
+ * in_tx: running in a hardware transaction
+ * abort: aborting a hardware transaction
+ * cycles: cycles from last branch (or 0 if not supported)
+ */
+struct perf_branch_entry {
+ uint64_t from;
+ uint64_t to;
+ uint64_t mispred:1, /* target mispredicted */
+ predicted:1,/* target predicted */
+ in_tx:1, /* in transaction */
+ abort:1, /* transaction abort */
+ cycles:16, /* cycle count to last branch */
+ reserved:44;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/llvm/include/pmu/pmu-events.h b/llvm/include/pmu/pmu-events.h
new file mode 100644
index 0000000..2c31ae9
--- /dev/null
+++ b/llvm/include/pmu/pmu-events.h
@@ -0,0 +1,131 @@
+/*
+ * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __PMU_EVENTS_H
+#define __PMU_EVENTS_H
+
+#include <list>
+#include <vector>
+#include <signal.h>
+#include "pmu-global.h"
+#include "pmu.h"
+
+namespace pmu {
+
+#define PMU_MAX_EVENTS (1024)
+
+class Timer;
+
+/* Mode of the event. */
+enum {
+ MODE_NONE = 0,
+ MODE_COUNTER = ((uint32_t)1U << 1),
+ MODE_SAMPLE = ((uint32_t)1U << 2),
+ MODE_SAMPLE_IP = ((uint32_t)1U << 3),
+ MODE_SAMPLE_READ = ((uint32_t)1U << 4),
+};
+
+/* State of the event. */
+enum {
+ STATE_STOP = 0,
+ STATE_START = ((uint32_t)1U << 1),
+ STATE_GOTO_STOP = ((uint32_t)1U << 2),
+ STATE_GOTO_START = ((uint32_t)1U << 3),
+};
+
+/* Sampling mmap buffer information. */
+struct MMap {
+ void *Base;
+ uint64_t Size;
+ uint64_t Prev;
+};
+
+/* Event. */
+struct PMUEvent {
+ PMUEvent() : Hndl(0), Mode(MODE_NONE), State(STATE_STOP) {}
+
+ Handle Hndl; /* Unique handle value */
+ int Mode; /* Event mode */
+ int State; /* Current event state */
+ std::vector<int> FD; /* Opened fd(s) of this event */
+ MMap Data; /* mmap data info */
+ MMap Aux; /* mmap aux info */
+ uint64_t Watermark; /* The bytes before wakeup */
+ /* Overflow handling function pointer */
+ union {
+ void *OverflowHandler;
+ SampleHandlerTy SampleHandler;
+ };
+ void *Opaque; /* Opaque pointer passed to the overflow handler. */
+
+ int getFD() { return FD[0]; } /* Group leader fd */
+};
+
+/*
+ * Event Manager.
+ */
+class EventManager {
+ typedef std::list<PMUEvent *> EventList;
+
+ PMUEvent Events[PMU_MAX_EVENTS]; /* Pre-allocated events */
+ EventList FreeEvents; /* Free events */
+ EventList SampleEvents; /* Sampling events */
+ Timer *EventTimer; /* Timer for sampling events. */
+ std::vector<PMUEvent *> ChangedEvents;
+
+public:
+ EventManager();
+ ~EventManager();
+
+ /* Return the event of the input handle. */
+ PMUEvent *GetEvent(Handle Hndl);
+
+ /* Add a counting event and return its handle. */
+ Handle AddEvent(int fd);
+
+ /* Add a sampling event and return its handle. */
+ Handle AddSampleEvent(unsigned NumFDs, int *FD, uint64_t DataSize, void *Data,
+ uint32_t Mode, SampleConfig &Config);
+
+ /* Notify that an event is started. */
+ void StartEvent(PMUEvent *Event, bool ShouldLock = true);
+
+ /* Notify that an event is stopped. */
+ void StopEvent(PMUEvent *Event, bool ShouldLock = true);
+
+ /* Notify that an event is deleted. */
+ void DeleteEvent(PMUEvent *Event);
+
+ /* Stop the event manager. */
+ void Pause();
+
+ /* Restart the event manager. */
+ void Resume();
+
+ friend void DefaultHandler(int signum, siginfo_t *info, void *data);
+};
+
+/* Interval timer. */
+class Timer {
+ timer_t T;
+
+public:
+ Timer(int Signum, int TID);
+ ~Timer();
+
+ /* Start a timer that expires just once. */
+ void Start();
+
+ /* Stop a timer.*/
+ void Stop();
+};
+
+} /* namespace pmu */
+
+#endif /* __PMU_EVENTS_H */
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/pmu/pmu-global.h b/llvm/include/pmu/pmu-global.h
new file mode 100644
index 0000000..ed059a4
--- /dev/null
+++ b/llvm/include/pmu/pmu-global.h
@@ -0,0 +1,52 @@
+/*
+ * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __PMU_GLOBAL_H
+#define __PMU_GLOBAL_H
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "pmu/x86/x86-events.h"
+#elif defined(__arm__) || defined(__aarch64__)
+#include "pmu/arm/arm-events.h"
+#elif defined(_ARCH_PPC) || defined(_ARCH_PPC64)
+#include "pmu/ppc/ppc-events.h"
+#endif
+
+#include "pmu/pmu-utils.h"
+#include "pmu/pmu.h"
+
+namespace pmu {
+
+#define PMU_SIGNAL_NUM SIGIO
+#define PMU_SAMPLE_PERIOD 1e6
+#define PMU_SAMPLE_PAGES 4
+
+class EventManager;
+
+/* Pre-defined event identity. */
+struct EventID {
+ int Type; /* Perf major type: hardware/software/etc */
+ int Config; /* Perf type specific configuration information */
+};
+
+/* System-wide configuration. */
+struct GlobalConfig {
+ int PageSize; /* Host page size */
+ int SignalReceiver; /* TID of the signal receiver */
+ uint32_t Timeout; /* Timer period in nanosecond */
+ int PerfVersion; /* Perf version used in this PMU tool */
+ int OSPerfVersion; /* Perf version used in the OS kernel */
+};
+
+extern EventManager *EventMgr;
+extern GlobalConfig SysConfig;
+
+} /* namespace pmu */
+
+#endif /* __PMU_GLOBAL_H */
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/pmu/pmu-utils.h b/llvm/include/pmu/pmu-utils.h
new file mode 100644
index 0000000..5e3e014
--- /dev/null
+++ b/llvm/include/pmu/pmu-utils.h
@@ -0,0 +1,106 @@
+/*
+ * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __PMU_UTILS_H
+#define __PMU_UTILS_H
+
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include "perf_event.h"
+
+#ifndef ACCESS_ONCE
+#define ACCESS_ONCE(x) (*(volatile decltype(x) *)&(x))
+#endif
+
+namespace pmu {
+
+static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
+ int cpu, int group_fd,
+ unsigned long flags) {
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static inline void perf_attr_init(struct perf_event_attr *attr, int type,
+ int config) {
+ memset(attr, 0, sizeof(struct perf_event_attr));
+ attr->type = type;
+ attr->config = config;
+ attr->size = sizeof(struct perf_event_attr);
+ attr->disabled = 1;
+ attr->exclude_kernel = 1;
+ attr->exclude_guest = 1;
+ attr->exclude_hv = 1;
+}
+
+static inline int perf_event_start(int fd) {
+ return ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+static inline int perf_event_stop(int fd) {
+ return ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
+}
+
+static inline int perf_event_reset(int fd) {
+ return ioctl(fd, PERF_EVENT_IOC_RESET, 0);
+}
+
+static inline int perf_event_set_filter(int fd, const char *arg) {
+ return ioctl(fd, PERF_EVENT_IOC_SET_FILTER, (void *)arg);
+}
+
+static inline uint64_t perf_read_data_head(void *header) {
+ struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header;
+ uint64_t head = ACCESS_ONCE(pc->data_head);
+ pmu_rmb();
+ return head;
+}
+
+static inline void perf_write_data_tail(void *header, uint64_t val) {
+ struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header;
+ pmu_mb();
+ pc->data_tail = val;
+}
+
+static inline uint64_t perf_read_aux_head(void *header) {
+ struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header;
+ uint64_t head = ACCESS_ONCE(pc->aux_head);
+ pmu_rmb();
+ return head;
+}
+
+static inline void perf_write_aux_tail(void *header, uint64_t val) {
+ struct perf_event_mmap_page *pc = (struct perf_event_mmap_page *)header;
+ pmu_mb();
+ pc->aux_tail = val;
+}
+
+static inline int isPowerOf2(uint64_t value) {
+ if (!value)
+ return 0;
+ return !(value & (value - 1));
+}
+
+/* Convert system errno to PMU error code. */
+static inline int ErrorCode(int err)
+{
+ switch (err) {
+ case EPERM:
+ case EACCES: return PMU_EPERM;
+ case ENOMEM: return PMU_ENOMEM;
+ default: return PMU_EEVENT;
+ }
+}
+
+} /* namespace pmu */
+
+#endif /* __PMU_UTILS_H */
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/pmu/pmu.h b/llvm/include/pmu/pmu.h
new file mode 100644
index 0000000..89a7c98
--- /dev/null
+++ b/llvm/include/pmu/pmu.h
@@ -0,0 +1,170 @@
+/*
+ * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ *
+ * Hardware Performance Monitoring Unit (PMU), C++ interfaces.
+ */
+
+#ifndef __PMU_H
+#define __PMU_H
+
+#include <vector>
+#include <memory>
+#include <stdint.h>
+
+namespace pmu {
+
+#define PMU_GROUP_EVENTS (8)
+#define PMU_TIMER_PERIOD (400) /* micro-second */
+#define PMU_INVALID_HNDL ((Handle)-1)
+
+typedef unsigned Handle;
+/* Sampling event overflow handling. */
+typedef std::vector<uint64_t> SampleList;
+typedef std::unique_ptr<SampleList> SampleDataPtr;
+typedef void (*SampleHandlerTy)(Handle Hndl, SampleDataPtr Data, void *Opaque);
+
+/* Error code. */
+enum {
+ PMU_OK = 0, /* No error */
+ PMU_EINVAL = -1, /* Invalid argument */
+ PMU_ENOMEM = -2, /* Insufficient memory */
+ PMU_ENOEVENT = -3, /* Pre-defined event not available */
+ PMU_EEVENT = -4, /* Hardware event error */
+ PMU_EPERM = -5, /* Permission denied */
+ PMU_EINTER = -6, /* Internal error */
+ PMU_EDECODER = -7, /* Instruction trace decoder error */
+};
+
+/* Pre-defined event code. */
+enum {
+ /* Basic events */
+ PMU_CPU_CYCLES = 0,
+ PMU_REF_CPU_CYCLES,
+ PMU_INSTRUCTIONS,
+ PMU_LLC_REFERENCES,
+ PMU_LLC_MISSES,
+ PMU_BRANCH_INSTRUCTIONS,
+ PMU_BRANCH_MISSES,
+ /* Instruction cache events */
+ PMU_ICACHE_HITS,
+ PMU_ICACHE_MISSES,
+ /* Memory instruction events */
+ PMU_MEM_LOADS,
+ PMU_MEM_STORES,
+
+ PMU_EVENT_MAX,
+};
+
+/* PMU initial configuration. */
+struct PMUConfig {
+ /* Input */
+ int SignalReceiver; /* TID of the signal receiver. 0 for auto-select. */
+ uint32_t Timeout; /* Timer period in micro-second. 0 for auto-select. */
+
+ /* Output */
+ int PerfVersion; /* Perf version used in this PMU tool */
+ int OSPerfVersion; /* Perf version used in the OS kernel */
+};
+
+/* Config for sampling with one or multiple event(s).*/
+struct SampleConfig {
+ unsigned NumEvents; /* Number of events in the event group */
+ unsigned EventCode[PMU_GROUP_EVENTS]; /* Event group. The 1st event is the leader. */
+ unsigned NumPages; /* Number of pages as the sample buffer size. (must be 2^n) */
+ uint64_t Period; /* Sampling period of the group leader. */
+ uint64_t Watermark; /* Bytes before wakeup. 0 for every timer period. */
+ SampleHandlerTy SampleHandler; /* User handler routine */
+ void *Opaque; /* An opaque pointer passed to the overflow handler. */
+};
+
+/* Config for sampling with only one event. */
+struct Sample1Config {
+ unsigned EventCode; /* Pre-defined event to trigger counter overflow */
+ unsigned NumPages; /* Number of pages as the sample buffer size. (must be 2^n) */
+ uint64_t Period; /* Sampling period */
+ uint64_t Watermark; /* Bytes before wakeup. 0 for every timer period. */
+ SampleHandlerTy SampleHandler; /* User handler routine */
+ void *Opaque; /* An opaque pointer passed to the overflow handler. */
+};
+
+/*
+ * PMU main tools.
+ */
+class PMU {
+ PMU() = delete;
+ ~PMU() = delete;
+
+public:
+ /* Initialize the PMU module. */
+ static int Init(PMUConfig &Config);
+
+ /* Finalize the PMU module. */
+ static int Finalize(void);
+
+ /* Stop the PMU module. When the PMU module is paused, the user can continue
+ * to use counting events, but the overflow handler will not be invoked. */
+ static int Pause(void);
+
+ /* Restart the PMU module. After the PMU module is resumed, the overflow
+ * handler will be invoked. */
+ static int Resume(void);
+
+ /* Start a counting/sampling/tracing event. */
+ static int Start(Handle Hndl);
+
+ /* Stop a counting/sampling/tracing event. */
+ static int Stop(Handle Hndl);
+
+ /* Reset the hardware counter. */
+ static int Reset(Handle Hndl);
+
+ /* Remove an event. */
+ static int Cleanup(Handle Hndl);
+
+ /* Start/stop a sampling/tracing event without acquiring a lock.
+ * Note that these two function should only be used within the overflow
+ * handler. Since the overflow handling is already in a locked section,
+ * acquiring a lock is not required. */
+ static int StartUnlocked(Handle Hndl);
+ static int StopUnlocked(Handle Hndl);
+
+ /* Open an event using the pre-defined event code. */
+ static int CreateEvent(unsigned EventCode, Handle &Hndl);
+
+ /* Open an event using the raw event number and umask value.
+ * The raw event code is computed as (RawEvent | (Umask << 8)). */
+ static int CreateRawEvent(unsigned RawEvent, unsigned Umask, Handle &Hndl);
+
+ /* Open a sampling event, with the 1st EventCode as the interrupt event.
+ * The sample data will be recorded in a vector of type 'uint64_t'.
+ * The following vector shows the data format of sampling with N events:
+ * { pc, val1, val2, ..., valN, # 1st sample
+ * ...
+ * pc, val1, val2, ..., valN }; # nth sample
+ *
+ * Note that ownwership of the output vector is transferred to the user.
+ * It is the user's responsibility to free the resource of the vector. */
+ static int CreateSampleEvent(SampleConfig &Config, Handle &Hndl);
+
+ /* Generate an IP histogram, using EventCode as the interrupt event.
+ * The IP histogram will be recorded in a vector of type 'uint64_t' with
+ * the format: { pc1, pc2, pc3, ..., pcN }.
+ * Note that ownwership of the output vector is transferred to the user.
+ * It is the user's responsibility to free the resource of the vector. */
+ static int CreateSampleIP(Sample1Config &Config, Handle &Hndl);
+
+ /* Read value from the hardware counter. */
+ static int ReadEvent(Handle Hndl, uint64_t &Value);
+
+ /* Convert error code to string. */
+ static const char *strerror(int ErrCode);
+};
+
+} /* namespace pmu */
+
+#endif /* __PMU_H */
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/pmu/ppc/ppc-events.h b/llvm/include/pmu/ppc/ppc-events.h
new file mode 100644
index 0000000..f48e10d
--- /dev/null
+++ b/llvm/include/pmu/ppc/ppc-events.h
@@ -0,0 +1,30 @@
+/*
+ * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __PPC_EVENTS_H
+#define __PPC_EVENTS_H
+
+#include <vector>
+#include "pmu/pmu.h"
+
+namespace pmu {
+
+class PMUEvent;
+
+#if defined(_ARCH_PPC) || defined(_ARCH_PPC64)
+#define pmu_mb() __asm__ __volatile__ ("sync" : : : "memory")
+#define pmu_rmb() __asm__ __volatile__ ("sync" : : : "memory")
+#define pmu_wmb() __asm__ __volatile__ ("sync" : : : "memory")
+#endif
+
+int PPCInit(void);
+
+} /* namespace pmu */
+
+#endif /* __PPC_EVENTS_H */
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/pmu/x86/x86-events.h b/llvm/include/pmu/x86/x86-events.h
new file mode 100644
index 0000000..c6fdb95
--- /dev/null
+++ b/llvm/include/pmu/x86/x86-events.h
@@ -0,0 +1,38 @@
+/*
+ * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __X86_EVENTS_H
+#define __X86_EVENTS_H
+
+#include <vector>
+#include "pmu/pmu.h"
+
+namespace pmu {
+
+class PMUEvent;
+
+#if defined(__i386__)
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define pmu_mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define pmu_rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define pmu_wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#elif defined(__x86_64__)
+#define pmu_mb() asm volatile("mfence" ::: "memory")
+#define pmu_rmb() asm volatile("lfence" ::: "memory")
+#define pmu_wmb() asm volatile("sfence" ::: "memory")
+#endif
+
+int X86Init(void);
+
+} /* namespace pmu */
+
+#endif /* __X86_EVENTS_H */
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/include/qemu-types.h b/llvm/include/qemu-types.h
new file mode 100644
index 0000000..f2430e0
--- /dev/null
+++ b/llvm/include/qemu-types.h
@@ -0,0 +1,33 @@
+/*
+ * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __QEMU_TYPES_H
+#define __QEMU_TYPES_H
+
+extern "C" {
+#include "cpu.h"
+#include "exec/tb-hash.h"
+#include "exec/exec-all.h"
+#include "exec/helper-proto.h"
+#include "exec/cpu_ldst.h"
+#include "tcg/tcg.h"
+#include "qemu/atomic.h"
+#include "hqemu.h"
+
+extern uint8_t *tb_ret_addr;
+extern uint8_t *ibtc_ret_addr;
+
+}
+
+#ifdef inline
+#undef inline
+#endif
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/llvm/include/tcg-opc-vector.h b/llvm/include/tcg-opc-vector.h
new file mode 100644
index 0000000..bc03ea1
--- /dev/null
+++ b/llvm/include/tcg-opc-vector.h
@@ -0,0 +1,80 @@
+DEF(vector_start, 0, 0, 0, 0)
+
+DEF(vmov_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vload_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vstore_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+
+DEF(vsitofp_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vuitofp_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vfptosi_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vfptoui_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+
+DEF(vadd_i8_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vadd_i16_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vadd_i32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vadd_i64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vadd_i8_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vadd_i16_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vadd_i32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+
+DEF(vsub_i8_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vsub_i16_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vsub_i32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vsub_i64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vsub_i8_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vsub_i16_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vsub_i32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+
+DEF(vadd_f32_128, 0, 0, 0, 0)
+DEF(vadd_f64_128, 0, 0, 0, 0)
+DEF(vadd_f32_64, 0, 0, 0, 0)
+DEF(vpadd_f32_128, 0, 0, 0, 0)
+DEF(vpadd_f64_128, 0, 0, 0, 0)
+DEF(vpadd_f32_64, 0, 0, 0, 0)
+DEF(vsub_f32_128, 0, 0, 0, 0)
+DEF(vsub_f64_128, 0, 0, 0,0)
+DEF(vsub_f32_64, 0, 0, 0, 0)
+DEF(vabd_f32_128, 0, 0, 0 ,0)
+DEF(vabd_f64_128, 0, 0, 0 ,0)
+DEF(vabd_f32_64, 0, 0, 0, 0)
+
+DEF(vfma_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vfma_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vfma_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vfms_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vfms_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vfms_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+
+DEF(vmul_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vmul_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vmul_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vmla_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vmla_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vmla_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vmls_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vmls_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vmls_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+
+DEF(vdiv_f32_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vdiv_f64_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vdiv_f32_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+
+DEF(vand_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vand_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vbic_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vbic_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vorr_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vorr_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vorn_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vorn_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(veor_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(veor_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+
+DEF(vbif_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vbif_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vbit_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vbit_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vbsl_128, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+DEF(vbsl_64, 0, 0, 0, TCG_OPF_SIDE_EFFECTS)
+
+DEF(vector_end, 0, 0, 0, 0)
diff --git a/llvm/include/tracer.h b/llvm/include/tracer.h
new file mode 100644
index 0000000..2813e0e
--- /dev/null
+++ b/llvm/include/tracer.h
@@ -0,0 +1,109 @@
+/*
+ * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __TRACE_H
+#define __TRACE_H
+
+#include <vector>
+#include <iostream>
+#include "qemu-types.h"
+#include "optimization.h"
+#include "utils.h"
+
+
+/*
+ * Base processor tracer
+ */
+class BaseTracer {
+public:
+ CPUArchState *Env;
+ void *Perf;
+
+ BaseTracer(CPUArchState *env) : Env(env), Perf(nullptr) {}
+ virtual ~BaseTracer() {}
+ virtual void Reset() {}
+ virtual void Record(uintptr_t next_tb, TranslationBlock *tb) {}
+
+ /* Create and return the tracer object based on LLVM_MODE. */
+ static BaseTracer *CreateTracer(CPUArchState *env);
+
+ /* Release the trace resources. */
+ static void DeleteTracer(CPUArchState *env);
+};
+
+
+/*
+ * Trace of a single basic block
+ */
+class SingleBlockTracer : public BaseTracer {
+ TranslationBlock *TB;
+
+public:
+ SingleBlockTracer(CPUArchState *env);
+
+ void Record(uintptr_t next_tb, TranslationBlock *tb) override;
+};
+
+
+/*
+ * Trace with NET trace formation algorithm
+ */
+#define NET_PROFILE_THRESHOLD 50
+#if defined(CONFIG_SOFTMMU)
+# define NET_PREDICT_THRESHOLD 16
+#else
+# define NET_PREDICT_THRESHOLD 64
+#endif
+class NETTracer : public BaseTracer {
+ bool isTraceHead(uintptr_t next_tb, TranslationBlock *tb, bool NewTB);
+
+public:
+ typedef std::vector<TranslationBlock *> TBVec;
+ TBVec TBs;
+
+ NETTracer(CPUArchState *env, int Mode);
+ ~NETTracer();
+
+ void Reset() override;
+ void Record(uintptr_t next_tb, TranslationBlock *tb) override;
+ inline void Profile(TranslationBlock *tb);
+ inline void Predict(TranslationBlock *tb);
+};
+
+/* Return the address of the patch point to the trace code. */
+static inline uintptr_t tb_get_jmp_entry(TranslationBlock *tb) {
+ return (uintptr_t)tb->tc_ptr + tb->patch_jmp;
+}
+/* Return the initial jump target address of the patch point. */
+static inline uintptr_t tb_get_jmp_next(TranslationBlock *tb) {
+ return (uintptr_t)tb->tc_ptr + tb->patch_next;
+}
+static inline SingleBlockTracer &getSingleBlockTracer(CPUArchState *env) {
+ return *static_cast<SingleBlockTracer *>(cpu_get_tracer(env));
+}
+static inline NETTracer &getNETTracer(CPUArchState *env) {
+ return *static_cast<NETTracer *>(cpu_get_tracer(env));
+}
+
+static inline void delete_image(TranslationBlock *tb)
+{
+#if defined(CONFIG_LLVM) && defined(CONFIG_SOFTMMU)
+ delete (char *)tb->image;
+ tb->image = nullptr;
+#endif
+}
+
+static inline bool update_tb_mode(TranslationBlock *tb, int from, int to) {
+ if (tb->mode != from)
+ return false;
+ return Atomic<int>::testandset(&tb->mode, from, to);
+}
+
+#endif
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/llvm/include/utils.h b/llvm/include/utils.h
new file mode 100644
index 0000000..90b36d9
--- /dev/null
+++ b/llvm/include/utils.h
@@ -0,0 +1,260 @@
+/*
+ * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#ifndef __UTILS_H
+#define __UTILS_H
+
+#include <cstdint>
+#include <cstdlib>
+#include <sstream>
+#include <iomanip>
+#include <set>
+#include <map>
+#include <vector>
+#include "qemu-types.h"
+
+
+#ifndef timersub
+# define timersub(a, b, result) \
+ do { \
+ (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \
+ (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \
+ if ((result)->tv_usec < 0) { \
+ --(result)->tv_sec; \
+ (result)->tv_usec += 1000000; \
+ } \
+ } while (0)
+#endif
+
+#if !defined(__i386__) && !defined(__x86_64__)
+#define USE_PTHREAD_MUTEX
+#endif
+
+#if defined(USE_PTHREAD_MUTEX)
+# define hqemu_lock_t pthread_mutex_t
+# define hqemu_lock_init(lock) pthread_mutex_init(lock, nullptr)
+# define hqemu_lock(lock) pthread_mutex_lock(lock)
+# define hqemu_unlock(lock) pthread_mutex_unlock(lock)
+#else
+# define hqemu_lock_t volatile int
+# define hqemu_lock_init(lock) do { *lock = 0; } while(0)
+# define hqemu_lock(lock) \
+ do { \
+ while (!Atomic<int>::testandset(lock,0,1)) { \
+ while(*(lock)) _mm_pause(); \
+ } \
+ } while(0)
+# define hqemu_unlock(lock) \
+ do { \
+ barrier(); \
+ *(lock) = 0; \
+ } while(0)
+#endif /* USE_PTHREAD_MUTEX */
+
+
+/*
+ * Atomic Utilities
+ */
+template<class T>
+class Atomic {
+public:
+ static T inc_return(volatile T *p) {
+ return __sync_fetch_and_add(p, 1) + 1;
+ }
+ static bool testandset(volatile T *p, T _old, T _new) {
+ return __sync_bool_compare_and_swap(p, _old, _new);
+ }
+};
+
+
+/*
+ * Mutex
+ */
+namespace hqemu {
+class Mutex {
+ hqemu_lock_t M;
+public:
+ Mutex() { hqemu_lock_init(&M); }
+ inline void acquire() { hqemu_lock(&M); }
+ inline void release() { hqemu_unlock(&M); }
+};
+
+class MutexGuard {
+ Mutex &M;
+public:
+ MutexGuard(Mutex &M) : M(M) { M.acquire(); }
+ ~MutexGuard() { M.release(); }
+};
+};
+
+
+/*
+ * GraphNode is used to describe the information of one node in a CFG.
+ */
+class GraphNode;
+typedef std::vector<GraphNode *> NodeVec;
+typedef std::set<GraphNode *> NodeSet;
+
+class GraphNode {
+ TranslationBlock *TB;
+ NodeVec Children;
+
+public:
+ GraphNode(TranslationBlock *tb) : TB(tb) {}
+
+ TranslationBlock *getTB() { return TB; }
+ target_ulong getGuestPC() { return TB->pc; }
+ NodeVec &getChildren() { return Children; }
+ void insertChild(GraphNode *Node) {
+ Children.push_back(Node);
+ }
+
+ static void DeleteCFG(GraphNode *Root);
+};
+
+/*
+ * ControlFlowGraph is used to build the whole program control flow graph (CFG).
+ * GlobalCFG uses this structure to maintain a whole program CFG connected by
+ * direct branches.
+ */
+class ControlFlowGraph {
+ hqemu::Mutex lock;
+
+public:
+ typedef std::vector<TranslationBlock *> TBVec;
+ typedef std::map<TranslationBlock*, TBVec> SuccMap;
+ SuccMap SuccCFG;
+
+ ControlFlowGraph() {}
+
+ hqemu::Mutex &getLock() { return lock; }
+ TBVec &getSuccessor(TranslationBlock *tb) {
+ return SuccCFG[tb];
+ }
+
+ void reset() {
+ hqemu::MutexGuard locked(lock);
+ SuccCFG.clear();
+ }
+ void insertLink(TranslationBlock *src, TranslationBlock *dst) {
+ hqemu::MutexGuard locked(lock);
+ SuccCFG[src].push_back(dst);
+ }
+};
+
+
+/*
+ * Queue
+ */
+#if defined(__x86_64__)
+#define LOCK_FREE
+#endif
+
+#ifdef LOCK_FREE
+struct pointer_t {
+ struct node_t *ptr;
+ unsigned long int count;
+};
+
+struct node_t {
+ struct pointer_t next;
+ void *value;
+};
+
+/* Lock-free MS-queue */
+class Queue {
+ struct queue_t {
+ struct pointer_t head;
+ struct pointer_t tail;
+ };
+
+ node_t *new_node(void *value) {
+ node_t *node = new node_t;
+ node->next.ptr = nullptr;
+ node->value = value;
+ return node;
+ }
+ void delete_node(node_t *node) {
+ delete node;
+ }
+
+ queue_t Q;
+
+public:
+ Queue();
+ void enqueue(void *data);
+ void *dequeue();
+};
+#else
+class Queue {
+ struct node_t {
+ struct node_t *next;
+ void *value;
+ node_t(void *v) : next(nullptr), value(v) {}
+ };
+ struct queue_t {
+ struct node_t *head;
+ struct node_t *tail;
+ };
+
+ pthread_mutex_t lock;
+ queue_t Q;
+
+public:
+ Queue();
+ void enqueue(void *data);
+ void *dequeue();
+};
+#endif
+
+
+class UUID {
+ static uint64_t uuid;
+
+public:
+#if defined(__x86_64__)
+ static uint64_t gen() {
+ uint64_t i = 1;
+ asm volatile("lock; xaddq %0, %1"
+ : "+r" (i), "+m" (uuid) :: "memory");
+ return i + 1;
+ }
+#else
+ static uint64_t gen() {
+ static pthread_mutex_t uuid_lock = PTHREAD_MUTEX_INITIALIZER;
+ pthread_mutex_lock(&uuid_lock);
+ uint64_t id = uuid++;
+ pthread_mutex_unlock(&uuid_lock);
+ return id;
+ }
+#endif
+};
+
+/* Return the string of a hexadecimal number. */
+template <class T>
+static inline std::string toHexString(T Num) {
+ std::stringstream ss;
+ ss << "0x" << std::hex << Num;
+ return ss.str();
+}
+
+/* Return the string of a zero extended number. */
+template <class T>
+static inline std::string toZextStr(T Num) {
+ std::stringstream ss;
+ ss << std::setfill('0') << std::setw(16) << Num;
+ return ss.str();
+}
+
+/* Misc utilities */
+pid_t gettid();
+void patch_jmp(volatile uintptr_t patch_addr, volatile uintptr_t addr);
+void patch_jmp(volatile uintptr_t patch_addr, volatile void *addr);
+
+#endif
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/llvm/llvm-annotate.cpp b/llvm/llvm-annotate.cpp
new file mode 100644
index 0000000..040c771
--- /dev/null
+++ b/llvm/llvm-annotate.cpp
@@ -0,0 +1,136 @@
+/*
+ * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "xml/tinyxml2.h"
+#include "optimization.h"
+#include "llvm-debug.h"
+#include "llvm-annotate.h"
+
+
+using namespace tinyxml2;
+static hqemu::Mutex Lock;
+
+#if defined(CONFIG_USER_ONLY)
+extern "C" const char *filename;
+#endif
+
+AnnotationFactory::AnnotationFactory()
+{
+#if defined(CONFIG_USER_ONLY)
+ int ret;
+ MetaFile = std::string(filename).append(".xml");
+ ret = ParseXML(MetaFile.c_str());
+ if (ret != 0)
+ return;
+#endif
+}
+
+AnnotationFactory::~AnnotationFactory()
+{
+ for (auto L : Loops)
+ delete L.second;
+}
+
+static inline const char *getAttrName(XMLElement *Attr)
+{
+ return Attr->Name();
+}
+
+static inline const char *getAttrValue(XMLElement *Attr)
+{
+ return Attr->FirstChild() ? Attr->FirstChild()->ToText()->Value() : "";
+}
+
+static LoopMetadata *ParseXMLLoop(XMLElement *LoopNode)
+{
+ if (LoopNode == nullptr)
+ return nullptr;
+
+ LoopMetadata *LoopMD = new LoopMetadata();
+ XMLElement *Attr = LoopNode->FirstChildElement();
+ while (Attr) {
+ std::string Name = getAttrName(Attr);
+ const char *Val = getAttrValue(Attr);
+ if (strlen(Val) == 0)
+ goto next;
+
+ if (Name == "address")
+ LoopMD->Address = (target_ulong)strtoull(Val, nullptr, 16);
+ else if (Name == "length")
+ LoopMD->Length = (uint32_t)strtoul(Val, nullptr, 10);
+ else if (Name == "vs")
+ LoopMD->VS = (uint32_t)strtoul(Val, nullptr, 10);
+ else if (Name == "vf")
+ LoopMD->VF = (uint32_t)strtoul(Val, nullptr, 10);
+ else if (Name == "distance") {
+ LoopMD->Distance = atoi(Val);
+ if (LoopMD->Distance == 0)
+ LoopMD->Distance = INT_MAX;
+ }
+ else if (Name == "start") LoopMD->Start = atoi(Val);
+ else if (Name == "end") LoopMD->End = atoi(Val);
+ else if (Name == "stride") LoopMD->Stride = atoi(Val);
+next:
+ Attr = Attr->NextSiblingElement();
+ }
+
+ if (LoopMD->Address == (target_ulong)-1) {
+ delete LoopMD;
+ return nullptr;
+ }
+
+ return LoopMD;
+}
+
+int AnnotationFactory::ParseXML(const char *name)
+{
+ XMLDocument Doc;
+ XMLElement *RootNode, *LoopNode;
+
+ if (Doc.LoadFile(name) != 0) {
+ dbg() << DEBUG_ANNOTATE << "Disable annotation support."
+ << " (cannot find " << name << ")\n";
+ return 1;
+ }
+
+ dbg() << DEBUG_ANNOTATE << "Found an annotation file " << name << "\n";
+
+ /* A legal annoation should be embedded within the <hqemu> tag. For example:
+ * <hqemu><loop><addr>...</addr></loop></hqemu> */
+ RootNode = Doc.FirstChildElement("hqemu");
+ if (RootNode == nullptr)
+ return 1;
+
+ LoopNode = RootNode->FirstChildElement("loop");
+ while (LoopNode) {
+ LoopMetadata *LoopMD = ParseXMLLoop(LoopNode);
+ if (LoopMD)
+ Loops[LoopMD->Address] = LoopMD;
+ LoopNode = LoopNode->NextSiblingElement();
+ }
+
+ dbg() << DEBUG_ANNOTATE
+ << "Found " << Loops.size() << " loop annotation(s).\n";
+ return 0;
+}
+
+LoopMetadata *AnnotationFactory::getLoopAnnotation(target_ulong addr)
+{
+ hqemu::MutexGuard locked(Lock);
+
+ if (Loops.find(addr) == Loops.end())
+ return nullptr;
+ return Loops[addr];
+}
+
+bool AnnotationFactory::hasLoopAnnotation(target_ulong addr)
+{
+ hqemu::MutexGuard locked(Lock);
+ return Loops.count(addr) ? true : false;
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/llvm-debug.cpp b/llvm/llvm-debug.cpp
new file mode 100644
index 0000000..e5d715a
--- /dev/null
+++ b/llvm/llvm-debug.cpp
@@ -0,0 +1,229 @@
+/*
+ * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm-debug.h"
+#include "llvm.h"
+
+
+static const Target *getTarget(std::string TripleName)
+{
+ /* Get the target specific parser. */
+ std::string Error;
+ const Target *TheTarget = TargetRegistry::lookupTarget(
+ TripleName.c_str(), Error);
+ if (!TheTarget)
+ return nullptr;
+
+ return TheTarget;
+}
+
+MCDisasm *MCDisasm::CreateMCDisasm(std::string TripleName, bool isHost)
+{
+ if (TripleName.empty() || TripleName == "UnknownArch")
+ return nullptr;
+
+ const Target *TheTarget = getTarget(TripleName);
+ if (!TheTarget)
+ return nullptr;
+
+ return new MCDisasm(TheTarget, TripleName, isHost);
+}
+
+MCDisasm::MCDisasm(const llvm::Target *TheTarget, std::string TripleName,
+ bool isHost)
+ : HostDisAsm(isHost), NoShowRawInsn(false)
+{
+ const char *triple = TripleName.c_str();
+ Triple TheTriple(Triple::normalize(TripleName));
+
+ switch (TheTriple.getArch()) {
+ case Triple::x86:
+ case Triple::x86_64:
+ NoShowRawInsn = true;
+ break;
+ default:
+ NoShowRawInsn = false;
+ break;
+ }
+
+ const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(TripleName);
+ if (!MRI)
+ hqemu_error("no register info for target %s.\n", triple);
+ const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, TripleName);
+ if (!MAI)
+ hqemu_error("no assembly info for target %s\n", triple);
+ const MCSubtargetInfo *STI = TheTarget->createMCSubtargetInfo(TripleName, "", "");
+ if (!STI)
+ hqemu_error("no subtarget info for target %s\n", triple);
+ const MCInstrInfo *MII = TheTarget->createMCInstrInfo();
+ if (!MII)
+ hqemu_error("no instruction info for target %s\n", triple);
+
+ MCContext Ctx(MAI, MRI, nullptr);
+ const MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI, Ctx);
+
+ if (!DisAsm)
+ hqemu_error("no disassembler for target %s\n", TripleName.c_str());
+
+ const MCInstrAnalysis *MIA = TheTarget->createMCInstrAnalysis(MII);
+
+ int AsmPrinterVariant = MAI->getAssemblerDialect();
+#if defined(LLVM_V35)
+ MCInstPrinter *IP = TheTarget->createMCInstPrinter(
+ AsmPrinterVariant, *MAI, *MII, *MRI, *STI);
+#else
+ MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName),
+ AsmPrinterVariant, *MAI, *MII, *MRI);
+#endif
+ if (!IP)
+ hqemu_error("no instruction printer for target %s\n", TripleName.c_str());
+
+ IP->setPrintImmHex(true);
+
+ this->DisAsm = DisAsm;
+ this->STI = STI;
+ this->IP = IP;
+ this->MIA = MIA;
+}
+
+MCDisasm::~MCDisasm()
+{
+}
+
+
+void MCDisasm::DumpBytes(ArrayRef<uint8_t> bytes, raw_ostream &OS)
+{
+ if (NoShowRawInsn)
+ return;
+
+ static const char hex_rep[] = "0123456789abcdef";
+ OS << " ";
+ for (auto I = bytes.rbegin(), E = bytes.rend(); I != E; ++I) {
+ char c = *I;
+ OS << hex_rep[(c & 0xF0) >> 4];
+ OS << hex_rep[c & 0xF];
+ OS << ' ';
+ }
+}
+
+#if defined(LLVM_V35)
+class DisasmMemoryObject : public MemoryObject {
+ uint8_t *Bytes;
+ uint64_t Size;
+ uint64_t BasePC;
+public:
+ DisasmMemoryObject(uint8_t *bytes, uint64_t size, uint64_t basePC) :
+ Bytes(bytes), Size(size), BasePC(basePC) {}
+
+ uint64_t getBase() const override { return BasePC; }
+ uint64_t getExtent() const override { return Size; }
+
+ int readByte(uint64_t Addr, uint8_t *Byte) const override {
+ if (Addr - BasePC >= Size)
+ return -1;
+ *Byte = Bytes[Addr - BasePC];
+ return 0;
+ }
+ ArrayRef<uint8_t> slice(size_t N, size_t M) const {
+ return makeArrayRef<uint8_t>(Bytes+N, M);
+ }
+};
+
+void MCDisasm::PrintInAsm(uint64_t Addr, uint64_t Size, uint64_t GuestAddr)
+{
+ uint64_t Len;
+ DisasmMemoryObject MemoryObject((uint8_t *)Addr, Size, Addr);
+
+ for (uint64_t Start = 0; Start < Size; Start += Len) {
+ MCInst Inst;
+ std::string Str;
+ raw_string_ostream OS(Str);
+ if (DisAsm->getInstruction(Inst, Len, MemoryObject,
+ Addr + Start, nulls(), nulls())) {
+ OS << format("0x%08" PRIx64 ":", GuestAddr);
+
+ DumpBytes(MemoryObject.slice(Start, Len), OS);
+ IP->printInst(&Inst, OS, "");
+
+ if (MIA && (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) ||
+ MIA->isConditionalBranch(Inst))) {
+ uint64_t Target;
+ if (MIA->evaluateBranch(Inst, GuestAddr, Len, Target)) {
+ OS << " <" << format("0x%08" PRIx64, Target) << ">";
+ if (HostDisAsm) {
+ if (Target == (uint64_t)tb_ret_addr)
+ OS << " !tb_ret_addr";
+ }
+ }
+ }
+ } else {
+ OS << "\t<internal disassembler error>";
+ if (Len == 0)
+ Len = 1;
+ }
+
+ DM.debug() << OS.str() << "\n";
+ GuestAddr += Len;
+ }
+}
+#else
+void MCDisasm::PrintInAsm(uint64_t Addr, uint64_t Size, uint64_t GuestAddr)
+{
+ uint64_t Len;
+ ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(Addr), Size);
+
+ for (uint64_t Start = 0; Start < Size; Start += Len) {
+ MCInst Inst;
+ std::string Str;
+ raw_string_ostream OS(Str);
+ if (DisAsm->getInstruction(Inst, Len, Bytes.slice(Start),
+ Addr + Start, nulls(), nulls())) {
+ OS << format("0x%08" PRIx64 ":", GuestAddr);
+
+ DumpBytes(Bytes.slice(Start, Len), OS);
+ IP->printInst(&Inst, OS, "", *STI);
+
+ if (MIA && (MIA->isCall(Inst) || MIA->isUnconditionalBranch(Inst) ||
+ MIA->isConditionalBranch(Inst))) {
+ uint64_t Target;
+ if (MIA->evaluateBranch(Inst, GuestAddr, Len, Target)) {
+ OS << " <" << format("0x%08" PRIx64, Target) << ">";
+ if (HostDisAsm) {
+ if (Target == (uint64_t)tb_ret_addr)
+ OS << " !tb_ret_addr";
+ }
+ }
+ }
+ } else {
+ OS << "\t<internal disassembler error>";
+ if (Len == 0)
+ Len = 1;
+ }
+
+ DM.debug() << OS.str() << "\n";
+ GuestAddr += Len;
+ }
+}
+#endif
+
+void MCDisasm::PrintOutAsm(uint64_t Addr, uint64_t Size)
+{
+ auto &OS = DM.debug();
+ OS << "\nOUT: [size=" << Size << "]\n";
+ PrintInAsm(Addr, Size, Addr);
+ OS << "\n";
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/llvm-hard-perfmon.cpp b/llvm/llvm-hard-perfmon.cpp
new file mode 100644
index 0000000..051ee02
--- /dev/null
+++ b/llvm/llvm-hard-perfmon.cpp
@@ -0,0 +1,289 @@
+/*
+ * (C) 2018 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include <string.h>
+#include "config-target.h"
+#include "tracer.h"
+#include "llvm.h"
+#include "llvm-soft-perfmon.h"
+#include "llvm-hard-perfmon.h"
+
+using namespace pmu;
+
+
+HardwarePerfmon::HardwarePerfmon() : MonThreadID(-1), MonThreadStop(true)
+{
+}
+
+HardwarePerfmon::~HardwarePerfmon()
+{
+ if (LLVMEnv::RunWithVTune)
+ return;
+
+ PMU::Finalize();
+
+ if (!MonThreadStop)
+ MonThreadStop = true;
+}
+
+/* Set up HPM with the monitor thread id */
+void HardwarePerfmon::Init(int monitor_thread_tid)
+{
+ if (LLVMEnv::RunWithVTune)
+ return;
+
+ MonThreadID = monitor_thread_tid;
+
+#if defined(ENABLE_HPM_THREAD)
+ /* Start HPM thread. */
+ StartMonThread();
+#else
+ /* If we attempt to profile hotspot but do not run the HPM translation mode,
+ * we enable the HPM monitor thread for the hotspot profiling in order to
+ * avoid deadlock. */
+ if (SP->Mode & SPM_HOTSPOT)
+ StartMonThread();
+#endif
+
+ /* Initialize the PMU tools. */
+ PMUConfig Config;
+ memset(&Config, 0, sizeof(PMUConfig));
+ Config.SignalReceiver = MonThreadID;
+ Config.Timeout = 400;
+ int EC = PMU::Init(Config);
+ if (EC != PMU_OK) {
+ dbg() << DEBUG_HPM << "Failed to initialize PMU (" << PMU::strerror(EC)
+ << ").\n";
+ return;
+ }
+}
+
+/* Stop the monitor. */
+void HardwarePerfmon::Pause()
+{
+ if (LLVMEnv::RunWithVTune)
+ return;
+
+ PMU::Pause();
+}
+
+/* Restart the monitor. */
+void HardwarePerfmon::Resume()
+{
+ if (LLVMEnv::RunWithVTune)
+ return;
+
+ PMU::Resume();
+}
+
+/* Start monitor thread. */
+void HardwarePerfmon::StartMonThread()
+{
+ /* Start HPM thread. */
+ MonThreadID = -1;
+ MonThreadStop = false;
+ MonThread = std::thread(
+ [=]() { MonitorFunc(); }
+ );
+
+ MonThread.detach();
+ while (MonThreadID == -1)
+ usleep(200);
+}
+
+/* Monitor thread routine. */
+void HardwarePerfmon::MonitorFunc()
+{
+ MonThreadID = gettid();
+ copy_tcg_context();
+
+ while (!MonThreadStop)
+ usleep(10000);
+}
+
+static void CoverSetHandler(Handle Hndl, std::unique_ptr<SampleList> DataPtr,
+ void *Opaque)
+{
+ /* Just attach the sampled IPs to the profile list. The soft-perfmon will
+ * release the resource later. */
+ SP->SampleListVec.push_back(DataPtr.release());
+}
+
+void HardwarePerfmon::RegisterThread(BaseTracer *Tracer)
+{
+ hqemu::MutexGuard Locked(Lock);
+
+ dbg() << DEBUG_HPM << "Register thread " << gettid() << ".\n";
+
+ if (LLVMEnv::RunWithVTune)
+ return;
+
+ PerfmonData *Perf = new PerfmonData(gettid());
+ Perf->MonitorBasic(HPM_INIT);
+ Perf->MonitorCoverSet(HPM_INIT);
+
+ Tracer->Perf = static_cast<void *>(Perf);
+}
+
+void HardwarePerfmon::UnregisterThread(BaseTracer *Tracer)
+{
+ hqemu::MutexGuard Locked(Lock);
+
+ dbg() << DEBUG_HPM << "Unregister thread " << gettid() << ".\n";
+
+ if (LLVMEnv::RunWithVTune)
+ return;
+ if (!Tracer->Perf)
+ return;
+
+ auto Perf = static_cast<PerfmonData *>(Tracer->Perf);
+ Perf->MonitorBasic(HPM_FINALIZE);
+ Perf->MonitorCoverSet(HPM_FINALIZE);
+
+ delete Perf;
+ Tracer->Perf = nullptr;
+}
+
+void HardwarePerfmon::NotifyCacheEnter(BaseTracer *Tracer)
+{
+ hqemu::MutexGuard Locked(Lock);
+
+ if (!Tracer->Perf)
+ return;
+ auto Perf = static_cast<PerfmonData *>(Tracer->Perf);
+ Perf->MonitorBasic(HPM_START);
+}
+
+void HardwarePerfmon::NotifyCacheLeave(BaseTracer *Tracer)
+{
+ hqemu::MutexGuard Locked(Lock);
+
+ if (!Tracer->Perf)
+ return;
+ auto Perf = static_cast<PerfmonData *>(Tracer->Perf);
+ Perf->MonitorBasic(HPM_STOP);
+}
+
+/*
+ * PerfmonData
+ */
+PerfmonData::PerfmonData(int tid) : TID(tid)
+{
+}
+
+PerfmonData::~PerfmonData()
+{
+}
+
+void PerfmonData::MonitorBasic(HPMControl Ctl)
+{
+ if (!(SP->Mode & SPM_HPM))
+ return;
+
+ switch (Ctl) {
+ case HPM_INIT:
+ if (PMU::CreateEvent(PMU_INSTRUCTIONS, ICountHndl) == PMU_OK) {
+ dbg() << DEBUG_HPM << "Register event: # instructions.\n";
+ PMU::Start(ICountHndl);
+ }
+ if (PMU::CreateEvent(PMU_BRANCH_INSTRUCTIONS, BranchHndl) == PMU_OK) {
+ dbg() << DEBUG_HPM << "Register event: # branch instructions.\n";
+ PMU::Start(BranchHndl);
+ }
+ if (PMU::CreateEvent(PMU_MEM_LOADS, MemLoadHndl) == PMU_OK) {
+ dbg() << DEBUG_HPM << "Register event: # load instructions.\n";
+ PMU::Start(MemLoadHndl);
+ }
+ if (PMU::CreateEvent(PMU_MEM_STORES, MemStoreHndl) == PMU_OK) {
+ dbg() << DEBUG_HPM << "Register event: # store instructions.\n";
+ PMU::Start(MemStoreHndl);
+ }
+ break;
+ case HPM_FINALIZE:
+ {
+ uint64_t NumInsns = 0, NumBranches = 0, NumLoads = 0, NumStores = 0;
+ if (ICountHndl != PMU_INVALID_HNDL) {
+ PMU::ReadEvent(ICountHndl, NumInsns);
+ PMU::Cleanup(ICountHndl);
+ }
+ if (BranchHndl != PMU_INVALID_HNDL) {
+ PMU::ReadEvent(BranchHndl, NumBranches);
+ PMU::Cleanup(BranchHndl);
+ }
+ if (MemLoadHndl != PMU_INVALID_HNDL) {
+ PMU::ReadEvent(MemLoadHndl, NumLoads);
+ PMU::Cleanup(MemLoadHndl);
+ }
+ if (MemStoreHndl != PMU_INVALID_HNDL) {
+ PMU::ReadEvent(MemStoreHndl, NumStores);
+ PMU::Cleanup(MemStoreHndl);
+ }
+
+ SP->NumInsns += NumInsns;
+ SP->NumBranches += NumBranches;
+ SP->NumLoads += NumLoads;
+ SP->NumStores += NumStores;
+ break;
+ }
+ case HPM_START:
+ if (BranchHndl != PMU_INVALID_HNDL)
+ PMU::ReadEvent(BranchHndl, LastNumBranches);
+ if (MemLoadHndl != PMU_INVALID_HNDL)
+ PMU::ReadEvent(MemLoadHndl, LastNumLoads);
+ if (MemStoreHndl != PMU_INVALID_HNDL)
+ PMU::ReadEvent(MemStoreHndl, LastNumStores);
+ break;
+ case HPM_STOP:
+ {
+ uint64_t NumBranches = 0, NumLoads = 0, NumStores = 0;
+ if (BranchHndl != PMU_INVALID_HNDL)
+ PMU::ReadEvent(BranchHndl, NumBranches);
+ if (MemLoadHndl != PMU_INVALID_HNDL)
+ PMU::ReadEvent(MemLoadHndl, NumLoads);
+ if (MemStoreHndl != PMU_INVALID_HNDL)
+ PMU::ReadEvent(MemStoreHndl, NumStores);
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+void PerfmonData::MonitorCoverSet(HPMControl Ctl)
+{
+ if (!(SP->Mode & SPM_HOTSPOT))
+ return;
+
+ switch (Ctl) {
+ case HPM_INIT: {
+ Sample1Config IPConfig;
+ memset(&IPConfig, 0, sizeof(Sample1Config));
+ IPConfig.EventCode = PMU_INSTRUCTIONS;
+ IPConfig.NumPages = 4;
+ IPConfig.Period = 1e5;
+ IPConfig.Watermark = IPConfig.NumPages * getpagesize() / 2;
+ IPConfig.SampleHandler = CoverSetHandler;
+ IPConfig.Opaque = static_cast<void *>(this);
+
+ if (PMU::CreateSampleIP(IPConfig, CoverSetHndl) == PMU_OK) {
+ dbg() << DEBUG_HPM << "Register event: cover set sampling.\n";
+ PMU::Start(CoverSetHndl);
+ }
+ break;
+ }
+ case HPM_FINALIZE:
+ if (CoverSetHndl != PMU_INVALID_HNDL)
+ PMU::Cleanup(CoverSetHndl);
+ break;
+ case HPM_START:
+ case HPM_STOP:
+ default:
+ break;
+ }
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/llvm-opc-mmu.cpp b/llvm/llvm-opc-mmu.cpp
new file mode 100644
index 0000000..9d2e60f
--- /dev/null
+++ b/llvm/llvm-opc-mmu.cpp
@@ -0,0 +1,344 @@
+/*
+ * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ *
+ * This file provides LLVM IR generator in terms of basic block and trace.
+ */
+
+#include "llvm-debug.h"
+#include "llvm.h"
+#include "llvm-opc.h"
+#include "llvm-target.h"
+#include "utils.h"
+
+#if defined(CONFIG_SOFTMMU)
+extern "C" {
+extern const void * const llvm_ld_helpers[16];
+extern const void * const llvm_st_helpers[16];
+};
+#endif
+
+
+#if defined(CONFIG_USER_ONLY)
+Value *IRFactory::QEMULoad(Value *AddrL, Value *AddrH, TCGMemOpIdx oi)
+{
+ TCGMemOp opc = get_memop(oi);
+ Value *Base = AddrL;
+ PointerType *PtrTy = getPointerTy(getSizeInBits(opc), Segment);
+ LoadInst *LI;
+
+ if (GUEST_BASE == 0 || Segment != 0) {
+ Base = ITP(Base, PtrTy);
+ LI = new LoadInst(Base, "", true, LastInst);
+ } else {
+ Base = ITP(Base, Int8PtrTy);
+ Base = GetElementPtrInst::CreateInBounds(Base, GuestBaseReg.Base, "", LastInst);
+ if (Base->getType() != PtrTy)
+ Base = CAST(Base, PtrTy);
+ LI = new LoadInst(Base, "", true, LastInst);
+ }
+ MF->setGuestMemory(LI);
+
+ return ConvertEndian(LI, opc);
+}
+
+void IRFactory::QEMUStore(Value *Data, Value *AddrL, Value *AddrH, TCGMemOpIdx oi)
+{
+ TCGMemOp opc = get_memop(oi);
+ Value *Base = AddrL;
+ PointerType *PtrTy = getPointerTy(getSizeInBits(opc), Segment);
+ StoreInst *SI;
+
+ Data = ConvertEndian(Data, opc);
+
+ if (GUEST_BASE == 0 || Segment != 0) {
+ Base = ITP(Base, PtrTy);
+ SI = new StoreInst(Data, Base, true, LastInst);
+ } else {
+ Base = ITP(Base, Int8PtrTy);
+ Base = GetElementPtrInst::CreateInBounds(Base, GuestBaseReg.Base, "", LastInst);
+ if (Base->getType() != PtrTy)
+ Base = CAST(Base, PtrTy);
+ SI = new StoreInst(Data, Base, true, LastInst);
+ }
+ MF->setGuestMemory(SI);
+}
+
+#else /* !CONFIG_USER_ONLY */
+
+inline long getTLBOffset(int mem_index)
+{
+ long Offset = 0;
+
+ switch (mem_index) {
+#if NB_MMU_MODES > 0
+ case 0: Offset = offsetof(CPUArchState, tlb_table[0][0]); break;
+#endif
+#if NB_MMU_MODES > 1
+ case 1: Offset = offsetof(CPUArchState, tlb_table[1][0]); break;
+#endif
+#if NB_MMU_MODES > 2
+ case 2: Offset = offsetof(CPUArchState, tlb_table[2][0]); break;
+#endif
+#if NB_MMU_MODES > 3
+ case 3: Offset = offsetof(CPUArchState, tlb_table[3][0]); break;
+#endif
+#if NB_MMU_MODES > 4
+ case 4: Offset = offsetof(CPUArchState, tlb_table[4][0]); break;
+#endif
+#if NB_MMU_MODES > 5
+ case 5: Offset = offsetof(CPUArchState, tlb_table[5][0]);
+#endif
+ default:
+ IRError("%s: internal error. mem_index=%d\n", __func__, mem_index);
+ }
+
+ return Offset;
+}
+
+Value *IRFactory::ConcatTLBVersion(Value *GVA)
+{
+#if defined(ENABLE_TLBVERSION_EXT)
+ GVA = ZEXT64(GVA);
+#endif
+ Type *PtrTy = getPointerTy(DL->getTypeSizeInBits(GVA->getType()));
+ Value *TLBVersion = GetElementPtrInst::CreateInBounds(CPU,
+ CONSTPtr(offsetof(CPUArchState, tlb_version)), "", LastInst);
+ TLBVersion = new BitCastInst(TLBVersion, PtrTy, "", LastInst);
+ TLBVersion = new LoadInst(TLBVersion, "version", true, LastInst);
+ return OR(GVA, TLBVersion);
+}
+
+Value *IRFactory::QEMULoad(Value *AddrL, Value *AddrH, TCGMemOpIdx oi)
+{
+ TCGMemOp opc = get_memop(oi);
+ int mem_index = get_mmuidx(oi);
+ IntegerType *AccessTy;
+ PointerType *GuestPtrTy, *HostPtrTy;
+ int Size, s_bits = opc & MO_SIZE;
+
+ Size = 8 * 1 << s_bits; /* data size (bits) for this load */
+
+ const void *helper = llvm_ld_helpers[opc & (MO_BSWAP | MO_SIZE)];
+ Function *MissFunc = ResolveFunction(getMMUFName(helper));
+ if (!MissFunc)
+ IRError("%s: internal error.\n", __func__);
+
+ GuestPtrTy = (TARGET_LONG_BITS == 32) ? Int32PtrTy : Int64PtrTy;
+ HostPtrTy = (TCG_TARGET_REG_BITS == 32) ? Int32PtrTy : Int64PtrTy;
+
+#if defined(ENABLE_TLBVERSION_EXT)
+ GuestPtrTy = Int64PtrTy;
+#endif
+
+ /* Create TLB basic blocks. */
+ BasicBlock *tlb_hit = BasicBlock::Create(*Context, "tlb_hit", Func);
+ BasicBlock *tlb_miss = BasicBlock::Create(*Context, "tlb_miss", Func);
+ BasicBlock *tlb_exit = BasicBlock::Create(*Context, "tlb_exit", Func);
+ toSink.push_back(tlb_miss);
+
+ /* Load compared value in TLB. QEMU uses only addrlo to index the TLB entry. */
+ Value *TLBEntry, *TLBValue, *CPUAddr;
+ AccessTy = (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) ? Int64Ty : Int32Ty;
+ size_t Offset = getTLBOffset(mem_index) + offsetof(CPUTLBEntry, addr_read);
+ TLBEntry = LSHR(AddrL, ConstantInt::get(AccessTy, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
+ TLBEntry = AND(TLBEntry, ConstantInt::get(AccessTy, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS));
+ TLBEntry = ADD(TLBEntry, ConstantInt::get(AccessTy, Offset));
+
+ if (TLBEntry->getType() != IntPtrTy)
+ TLBEntry = new ZExtInst(TLBEntry, IntPtrTy, "", LastInst);
+
+ CPUAddr = new PtrToIntInst(CPU, IntPtrTy, "", LastInst);
+ TLBEntry = ADD(CPUAddr, TLBEntry);
+ TLBValue = new IntToPtrInst(TLBEntry, GuestPtrTy, "", LastInst);
+ TLBValue = new LoadInst(TLBValue, "tlb.read", false, LastInst);
+
+ /* Compare GVA and TLB value. */
+ Value *GVA, *Cond, *GuestPC = AddrL;
+ AccessTy = (TARGET_LONG_BITS == 32) ? Int32Ty : Int64Ty;
+ if (AddrH) { /* guest is 64-bit and host is 32-bit. */
+ GuestPC = SHL(ZEXT64(AddrH), CONST64(32));
+ GuestPC = OR(GuestPC, ZEXT64(AddrL));
+ }
+#if defined(ALIGNED_ONLY)
+ GVA = AND(GuestPC, ConstantInt::get(AccessTy,
+ TARGET_PAGE_MASK | ((1 << s_bits) - 1)));
+#elif defined(ENABLE_TLBVERSION)
+ GVA = ADD(GuestPC, ConstantInt::get(AccessTy, (1 << s_bits) - 1));
+ GVA = AND(GVA, ConstantInt::get(AccessTy, TARGET_PAGE_MASK));
+ GVA = ConcatTLBVersion(GVA);
+#else
+ GVA = ADD(GuestPC, ConstantInt::get(AccessTy, (1 << s_bits) - 1));
+ GVA = AND(GVA, ConstantInt::get(AccessTy, TARGET_PAGE_MASK));
+#endif
+ Cond = ICMP(GVA, TLBValue, ICmpInst::ICMP_EQ);
+ BranchInst::Create(tlb_hit, tlb_miss, Cond, LastInst);
+ LastInst->eraseFromParent();
+
+ /* TLB hit. */
+ Value *PhyAddr, *Addend, *HitData, *Addr=AddrL;
+
+ LastInst = BranchInst::Create(tlb_exit, tlb_hit);
+ if (Addr->getType() != IntPtrTy)
+ Addr = new ZExtInst(Addr, IntPtrTy, "", LastInst);
+
+ Offset = offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_read);
+ Addend = ADD(TLBEntry, ConstantInt::get(IntPtrTy, Offset));
+ Addend = new IntToPtrInst(Addend, HostPtrTy, "", LastInst);
+ Addend = new LoadInst(Addend, "tlb.addend", false, LastInst);
+ PhyAddr = ADD(Addr, Addend);
+ PhyAddr = ITP(PhyAddr, getPointerTy(Size));
+ HitData = new LoadInst(PhyAddr, "hit", true, LastInst);
+
+ HitData = ConvertEndian(HitData, opc);
+
+ /* TLB miss. */
+ LastInst = BranchInst::Create(tlb_exit, tlb_miss);
+ SmallVector<Value *, 4> Params;
+ uint32_t restore_val = setRestorePoint(oi);
+ Params.push_back(CPUStruct);
+ Params.push_back(GuestPC);
+ Params.push_back(CONST32(restore_val));
+
+ CallInst *MissCall = CallInst::Create(MissFunc, Params, "", LastInst);
+ Value *MissData = MissCall;
+ switch (opc & MO_SSIZE) {
+ case MO_UB:
+ case MO_SB:
+ if (DL->getTypeSizeInBits(MissData->getType()) != 8)
+ MissData = TRUNC8(MissCall);
+ break;
+ case MO_UW:
+ case MO_SW:
+ if (DL->getTypeSizeInBits(MissData->getType()) != 16)
+ MissData = TRUNC16(MissCall);
+ break;
+ case MO_UL:
+ case MO_SL:
+ if (DL->getTypeSizeInBits(MissData->getType()) != 32)
+ MissData = TRUNC32(MissCall);
+ break;
+ case MO_Q:
+ if (DL->getTypeSizeInBits(MissData->getType()) != 64)
+ MissData = ZEXT64(MissCall);
+ break;
+ default:
+ IRError("%s: invalid size (opc=%d)\n", __func__, opc);
+ break;
+ }
+
+ /* TLB exit. */
+ CurrBB = tlb_exit;
+ LastInst = BranchInst::Create(ExitBB, CurrBB);
+ PHINode *PH = PHINode::Create(HitData->getType(), 2, "", LastInst);
+ PH->addIncoming(HitData, tlb_hit);
+ PH->addIncoming(MissData, tlb_miss);
+
+ return PH;
+}
+
+void IRFactory::QEMUStore(Value *Data, Value *AddrL, Value *AddrH, TCGMemOpIdx oi)
+{
+ TCGMemOp opc = get_memop(oi);
+ int mem_index = get_mmuidx(oi);
+ IntegerType *AccessTy;
+ PointerType *GuestPtrTy, *HostPtrTy;
+ int Size, s_bits = opc & MO_SIZE;
+
+ Size = 8 * 1 << s_bits; /* data size (bits) for this load */
+
+ const void *helper = llvm_st_helpers[opc & (MO_BSWAP | MO_SIZE)];
+ Function *MissFunc = ResolveFunction(getMMUFName(helper));
+ if (!MissFunc)
+ IRError("%s: internal error.\n", __func__);
+
+ GuestPtrTy = (TARGET_LONG_BITS == 32) ? Int32PtrTy : Int64PtrTy;
+ HostPtrTy = (TCG_TARGET_REG_BITS == 32) ? Int32PtrTy : Int64PtrTy;
+
+#if defined(ENABLE_TLBVERSION_EXT)
+ GuestPtrTy = Int64PtrTy;
+#endif
+
+ /* Create TLB basic blocks. */
+ BasicBlock *tlb_hit = BasicBlock::Create(*Context, "tlb_hit", Func);
+ BasicBlock *tlb_miss = BasicBlock::Create(*Context, "tlb_miss", Func);
+ BasicBlock *tlb_exit = BasicBlock::Create(*Context, "tlb_exit", Func);
+ toSink.push_back(tlb_miss);
+
+ /* Load compared value in TLB. QEMU uses only addrlo to index the TLB entry. */
+ Value *TLBEntry, *TLBValue, *CPUAddr;
+ AccessTy = (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64) ? Int64Ty : Int32Ty;
+ size_t Offset = getTLBOffset(mem_index) + offsetof(CPUTLBEntry, addr_write);
+ TLBEntry = LSHR(AddrL, ConstantInt::get(AccessTy, TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
+ TLBEntry = AND(TLBEntry, ConstantInt::get(AccessTy, (CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS));
+ TLBEntry = ADD(TLBEntry, ConstantInt::get(AccessTy, Offset));
+
+ if (TLBEntry->getType() != IntPtrTy)
+ TLBEntry = new ZExtInst(TLBEntry, IntPtrTy, "", LastInst);
+
+ CPUAddr = new PtrToIntInst(CPU, IntPtrTy, "", LastInst);
+ TLBEntry = ADD(CPUAddr, TLBEntry);
+ TLBValue = new IntToPtrInst(TLBEntry, GuestPtrTy, "", LastInst);
+ TLBValue = new LoadInst(TLBValue, "tlb.write", false, LastInst);
+
+ /* Compare GVA and TLB value. */
+ Value *GVA, *Cond, *GuestPC = AddrL;
+ AccessTy = (TARGET_LONG_BITS == 32) ? Int32Ty : Int64Ty;
+ if (AddrH != nullptr) { /* guest is 64-bit and host is 32-bit. */
+ GuestPC = SHL(ZEXT64(AddrH), CONST64(32));
+ GuestPC = OR(GuestPC, ZEXT64(AddrL));
+ }
+#if defined(ALIGNED_ONLY)
+ GVA = AND(GuestPC, ConstantInt::get(AccessTy,
+ TARGET_PAGE_MASK | ((1 << s_bits) - 1)));
+#elif defined(ENABLE_TLBVERSION)
+ GVA = ADD(GuestPC, ConstantInt::get(AccessTy, (1 << s_bits) - 1));
+ GVA = AND(GVA, ConstantInt::get(AccessTy, TARGET_PAGE_MASK));
+ GVA = ConcatTLBVersion(GVA);
+#else
+ GVA = ADD(GuestPC, ConstantInt::get(AccessTy, (1 << s_bits) - 1));
+ GVA = AND(GVA, ConstantInt::get(AccessTy, TARGET_PAGE_MASK));
+#endif
+ Cond = ICMP(GVA, TLBValue, ICmpInst::ICMP_EQ);
+ BranchInst::Create(tlb_hit, tlb_miss, Cond, LastInst);
+ LastInst->eraseFromParent();
+
+ /* TLB hit. */
+ Value *PhyAddr, *Addend, *Addr=AddrL;
+
+ LastInst = BranchInst::Create(tlb_exit, tlb_hit);
+ if (Addr->getType() != IntPtrTy)
+ Addr = new ZExtInst(Addr, IntPtrTy, "", LastInst);
+
+ Offset = offsetof(CPUTLBEntry, addend) - offsetof(CPUTLBEntry, addr_write);
+ Addend = ADD(TLBEntry, ConstantInt::get(IntPtrTy, Offset));
+ Addend = new IntToPtrInst(Addend, HostPtrTy, "", LastInst);
+ Addend = new LoadInst(Addend, "tlb.addend", false, LastInst);
+ PhyAddr = ADD(Addr, Addend);
+ PhyAddr = ITP(PhyAddr, getPointerTy(Size));
+
+ Value *HitData = ConvertEndian(Data, opc);
+
+ new StoreInst(HitData, PhyAddr, true, LastInst);
+
+ /* TLB miss. */
+ LastInst = BranchInst::Create(tlb_exit, tlb_miss);
+ SmallVector<Value *, 4> Params;
+ uint32_t restore_val = setRestorePoint(oi);
+ Params.push_back(CPUStruct);
+ Params.push_back(GuestPC);
+ Params.push_back(Data);
+ Params.push_back(CONST32(restore_val));
+
+ CallInst::Create(MissFunc, Params, "", LastInst);
+
+ /* TLB exit. */
+ CurrBB = tlb_exit;
+ LastInst = BranchInst::Create(ExitBB, CurrBB);
+}
+
+#endif /* CONFIG_USER_ONLY */
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/llvm-opc-vector.cpp b/llvm/llvm-opc-vector.cpp
new file mode 100644
index 0000000..3ce5f68
--- /dev/null
+++ b/llvm/llvm-opc-vector.cpp
@@ -0,0 +1,943 @@
+/*
+ * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ *
+ * This file provides TCG vector IR to LLVM IR conversions.
+ */
+
+#include "llvm.h"
+#include "llvm-debug.h"
+#include "llvm-opc.h"
+#include "utils.h"
+
+
+extern TCGOpDef llvm_op_defs[];
+
+
+void IRFactory::op_vector_start(const TCGArg *args)
+{
+ IRError("%s: this function should never be called.\n", __func__);
+}
+
+void IRFactory::op_vector_end(const TCGArg *args)
+{
+ IRError("%s: this function should never be called.\n", __func__);
+}
+
+void IRFactory::op_vmov_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vmov_128);
+
+ TCGArg DstOff = args[0];
+ TCGArg SrcOff = args[1];
+ Value *Dst, *Src;
+
+ VectorType *VectorTy = VectorType::get(Int8Ty, 16);
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy);
+ Src = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(SrcOff), "", LastInst);
+ Src = new BitCastInst(Src, PtrTy, "", LastInst);
+ Dst = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(DstOff), "", LastInst);
+ Dst = new BitCastInst(Dst, PtrTy, "", LastInst);
+
+ Src = new LoadInst(Src, "", false, LastInst);
+ new StoreInst(Src, Dst, false, LastInst);
+}
+
+void IRFactory::op_vload_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vload_128);
+
+ TCGArg Off = args[0];
+ Register &In = Reg[args[1]];
+ TCGArg Alignment = (args[2] == (TCGArg)-1) ? 4 : args[2] / 8;
+ Value *Base = LoadState(In);
+ LoadInst *LI;
+
+ AssertType(In.Size == 32 || In.Size == 64);
+
+ VectorType *VectorTy = VectorType::get(Int8Ty, 16);
+ PointerType *PtrTy = PointerType::get(VectorTy, Segment);
+
+ if (GUEST_BASE == 0 || Segment != 0) {
+ Base = ITP(Base, PtrTy);
+ LI = new LoadInst(Base, "", true, LastInst);
+ } else {
+ Base = ITP(Base, Int8PtrTy);
+ Base = GetElementPtrInst::CreateInBounds(Base, GuestBaseReg.Base, "", LastInst);
+ if (Base->getType() != PtrTy)
+ Base = CAST(Base, PtrTy);
+ LI = new LoadInst(Base, "", true, LastInst);
+ }
+ LI->setAlignment(Alignment);
+
+ MF->setGuestMemory(LI);
+
+ PtrTy = PointerType::getUnqual(VectorTy);
+ Value *V = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Off), "", LastInst);
+ V = new BitCastInst(V, PtrTy, "", LastInst);
+ new StoreInst(LI, V, false, LastInst);
+}
+
+void IRFactory::op_vstore_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vstore_128);
+
+ TCGArg Off = args[0];
+ Register &In = Reg[args[1]];
+ TCGArg Alignment = (args[2] == (TCGArg)-1) ? 4 : args[2] / 8;
+ Value *Base = LoadState(In);
+ StoreInst *SI;
+
+ AssertType(In.Size == 32 || In.Size == 64);
+
+ VectorType *VectorTy = VectorType::get(Int8Ty, 16);
+ PointerType *PtrTy = nullptr;
+
+ PtrTy = PointerType::getUnqual(VectorTy);
+ Value *V = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Off), "", LastInst);
+ V = new BitCastInst(V, PtrTy, "", LastInst);
+ V = new LoadInst(V, "", false, LastInst);
+
+ PtrTy = PointerType::get(VectorTy, Segment);
+ if (GUEST_BASE == 0 || Segment != 0) {
+ Base = ITP(Base, PtrTy);
+ SI = new StoreInst(V, Base, true, LastInst);
+ } else {
+ Base = ITP(Base, Int8PtrTy);
+ Base = GetElementPtrInst::CreateInBounds(Base, GuestBaseReg.Base, "", LastInst);
+ if (Base->getType() != PtrTy)
+ Base = CAST(Base, PtrTy);
+ SI = new StoreInst(V, Base, true, LastInst);
+ }
+
+ SI->setAlignment(Alignment);
+
+ MF->setGuestMemory(SI);
+}
+
+#define llvm_gen_vop(_Fn,_Num,_Ty) \
+do { \
+ TCGArg Out = args[0]; \
+ TCGArg In1 = args[1]; \
+ TCGArg In2 = args[2]; \
+ Value *OutPtr, *InPtr1, *InPtr2; \
+ VectorType *VectorTy = VectorType::get(_Ty, _Num); \
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy); \
+ \
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); \
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); \
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); \
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); \
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); \
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); \
+ \
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); \
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); \
+ InData1 = _Fn(InData1, InData2); \
+ new StoreInst(InData1, OutPtr, false, LastInst); \
+} while (0)
+
+#define llvm_gen_vop2(_Fn1,_Fn2,_Num,_Ty) \
+do { \
+ TCGArg Out = args[0]; \
+ TCGArg In1 = args[1]; \
+ TCGArg In2 = args[2]; \
+ Value *OutPtr, *InPtr1, *InPtr2; \
+ VectorType *VectorTy = VectorType::get(_Ty, _Num); \
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy); \
+ \
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst); \
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst); \
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst); \
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst); \
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst); \
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst); \
+ \
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst); \
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst); \
+ Value *InData3 = new LoadInst(OutPtr, "", false, LastInst); \
+ InData1 = _Fn2(InData3, _Fn1(InData1, InData2)); \
+ new StoreInst(InData1, OutPtr, false, LastInst); \
+} while (0)
+
+
+void IRFactory::op_vadd_i8_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vadd_i8_128);
+ llvm_gen_vop(ADD, 16, Int8Ty);
+}
+
+void IRFactory::op_vadd_i16_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vadd_i16_128);
+ llvm_gen_vop(ADD, 8, Int16Ty);
+}
+
+void IRFactory::op_vadd_i32_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vadd_i32_128);
+ llvm_gen_vop(ADD, 4, Int32Ty);
+}
+
+void IRFactory::op_vadd_i64_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vadd_i64_128);
+ llvm_gen_vop(ADD, 2, Int64Ty);
+}
+
+void IRFactory::op_vadd_i8_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vadd_i8_64);
+ llvm_gen_vop(ADD, 8, Int8Ty);
+}
+
+void IRFactory::op_vadd_i16_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vadd_i16_64);
+ llvm_gen_vop(ADD, 4, Int16Ty);
+}
+
+void IRFactory::op_vadd_i32_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vadd_i32_64);
+ llvm_gen_vop(ADD, 2, Int32Ty);
+}
+
+void IRFactory::op_vsub_i8_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vsub_i8_128);
+ llvm_gen_vop(SUB, 16, Int8Ty);
+}
+
+void IRFactory::op_vsub_i16_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vsub_i16_128);
+ llvm_gen_vop(SUB, 8, Int16Ty);
+}
+
+void IRFactory::op_vsub_i32_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vsub_i32_128);
+ llvm_gen_vop(SUB, 4, Int32Ty);
+}
+
+void IRFactory::op_vsub_i64_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vsub_i64_128);
+ llvm_gen_vop(SUB, 2, Int64Ty);
+}
+
+void IRFactory::op_vsub_i8_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vsub_i8_64);
+ llvm_gen_vop(SUB, 8, Int8Ty);
+}
+
+void IRFactory::op_vsub_i16_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vsub_i16_64);
+ llvm_gen_vop(SUB, 4, Int16Ty);
+}
+
+void IRFactory::op_vsub_i32_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vsub_i32_64);
+ llvm_gen_vop(SUB, 2, Int32Ty);
+}
+
+void IRFactory::op_vadd_f32_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vadd_f32_128);
+ llvm_gen_vop(FADD, 4, FloatTy);
+}
+
+void IRFactory::op_vadd_f64_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vadd_f64_128);
+ llvm_gen_vop(FADD, 2, DoubleTy);
+}
+
+void IRFactory::op_vadd_f32_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vadd_f32_64);
+ llvm_gen_vop(FADD, 2, FloatTy);
+}
+
+void IRFactory::op_vpadd_f32_128(const TCGArg *args)
+{
+ IRError("%s not implemented.\n", __func__);
+}
+
+void IRFactory::op_vpadd_f64_128(const TCGArg *args)
+{
+ IRError("%s not implemented.\n", __func__);
+}
+
+void IRFactory::op_vpadd_f32_64(const TCGArg *args)
+{
+ IRError("%s not implemented.\n", __func__);
+}
+
+void IRFactory::op_vsub_f32_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vsub_f32_128);
+ llvm_gen_vop(FSUB, 4, FloatTy);
+}
+
+void IRFactory::op_vsub_f64_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vsub_f64_128);
+ llvm_gen_vop(FSUB, 2, DoubleTy);
+}
+
+void IRFactory::op_vsub_f32_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vsub_f32_64);
+ llvm_gen_vop(FSUB, 2, FloatTy);
+}
+
+void IRFactory::op_vabd_f32_128(const TCGArg *args)
+{
+ IRError("%s not implemented.\n", __func__);
+}
+
+void IRFactory::op_vabd_f64_128(const TCGArg *args)
+{
+ IRError("%s not implemented.\n", __func__);
+}
+
+void IRFactory::op_vabd_f32_64(const TCGArg *args)
+{
+ IRError("%s not implemented.\n", __func__);
+}
+
+void IRFactory::op_vfma_f32_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vfma_f32_128);
+ llvm_gen_vop2(FMUL, FADD, 4, FloatTy);
+}
+
+void IRFactory::op_vfma_f64_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vfma_f64_128);
+ llvm_gen_vop2(FMUL, FADD, 2, DoubleTy);
+}
+
+void IRFactory::op_vfma_f32_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vfma_f32_64);
+ llvm_gen_vop2(FMUL, FADD, 2, FloatTy);
+}
+
+void IRFactory::op_vfms_f32_128(const TCGArg *args)
+{
+ IRError("%s not implemented.\n", __func__);
+}
+
+void IRFactory::op_vfms_f64_128(const TCGArg *args)
+{
+ IRError("%s not implemented.\n", __func__);
+}
+
+void IRFactory::op_vfms_f32_64(const TCGArg *args)
+{
+ IRError("%s not implemented.\n", __func__);
+}
+
+void IRFactory::op_vmul_f32_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vmul_f32_128);
+ llvm_gen_vop(FMUL, 4, FloatTy);
+}
+
+void IRFactory::op_vmul_f64_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vmul_f64_128);
+ llvm_gen_vop(FMUL, 2, DoubleTy);
+}
+
+void IRFactory::op_vmul_f32_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vmul_f32_64);
+ llvm_gen_vop(FMUL, 2, FloatTy);
+}
+
+void IRFactory::op_vmla_f32_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vmla_f32_128);
+ llvm_gen_vop2(FMUL, FADD, 4, FloatTy);
+}
+
+void IRFactory::op_vmla_f64_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vmla_f64_128);
+ llvm_gen_vop2(FMUL, FADD, 2, DoubleTy);
+}
+
+void IRFactory::op_vmla_f32_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vmla_f32_64);
+ llvm_gen_vop2(FMUL, FADD, 2, FloatTy);
+}
+
+void IRFactory::op_vmls_f32_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vmls_f32_128);
+ llvm_gen_vop2(FMUL, FSUB, 4, FloatTy);
+}
+
+void IRFactory::op_vmls_f64_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vmls_f64_128);
+ llvm_gen_vop2(FMUL, FSUB, 2, DoubleTy);
+}
+
+void IRFactory::op_vmls_f32_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vmls_f32_64);
+ llvm_gen_vop2(FMUL, FSUB, 2, FloatTy);
+}
+
+void IRFactory::op_vdiv_f32_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vdiv_f32_128);
+ llvm_gen_vop(FDIV, 4, FloatTy);
+}
+
+void IRFactory::op_vdiv_f64_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vdiv_f64_128);
+ llvm_gen_vop(FDIV, 2, DoubleTy);
+}
+
+void IRFactory::op_vdiv_f32_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vdiv_f32_64);
+ llvm_gen_vop(FDIV, 2, FloatTy);
+}
+
+void IRFactory::op_vand_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vand_128);
+ if (args[1] == args[2]) {
+ op_vmov_128(args);
+ return;
+ }
+ llvm_gen_vop(AND, 4, Int32Ty);
+}
+
+void IRFactory::op_vand_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vand_64);
+ llvm_gen_vop(AND, 2, Int32Ty);
+}
+
+void IRFactory::op_vbic_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vbic_128);
+
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg In2 = args[2];
+ Value *OutPtr, *InPtr1, *InPtr2;
+ VectorType *VectorTy = VectorType::get(Int32Ty, 4);
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy);
+
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst);
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst);
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst);
+
+ std::vector<Constant *> V;
+ for (int i = 0; i < 4; i++)
+ V.push_back(CONST32(-1U));
+ Value *VecMinusOne = ConstantVector::get(V);
+
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst);
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst);
+ InData2 = XOR(InData2, VecMinusOne);
+ InData1 = AND(InData1, InData2);
+ new StoreInst(InData1, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vbic_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vbic_64);
+
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg In2 = args[2];
+ Value *OutPtr, *InPtr1, *InPtr2;
+ VectorType *VectorTy = VectorType::get(Int32Ty, 2);
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy);
+
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst);
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst);
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst);
+
+ std::vector<Constant *> V;
+ for (int i = 0; i < 2; i++)
+ V.push_back(CONST32(-1U));
+ Value *VecMinusOne = ConstantVector::get(V);
+
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst);
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst);
+ InData2 = XOR(InData2, VecMinusOne);
+ InData1 = AND(InData1, InData2);
+ new StoreInst(InData1, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vorr_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vorr_128);
+ if (args[1] == args[2]) {
+ op_vmov_128(args);
+ return;
+ }
+ llvm_gen_vop(OR, 4, Int32Ty);
+}
+
+void IRFactory::op_vorr_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vorr_64);
+ llvm_gen_vop(OR, 2, Int32Ty);
+}
+
+void IRFactory::op_vorn_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vorn_128);
+
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg In2 = args[2];
+ Value *OutPtr, *InPtr1, *InPtr2;
+ VectorType *VectorTy = VectorType::get(Int32Ty, 4);
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy);
+
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst);
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst);
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst);
+
+ std::vector<Constant *> V;
+ for (int i = 0; i < 4; i++)
+ V.push_back(CONST32(-1U));
+ Value *VecMinusOne = ConstantVector::get(V);
+
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst);
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst);
+ InData2 = XOR(InData2, VecMinusOne);
+ InData1 = OR(InData1, InData2);
+ new StoreInst(InData1, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vorn_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vorn_64);
+
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg In2 = args[2];
+ Value *OutPtr, *InPtr1, *InPtr2;
+ VectorType *VectorTy = VectorType::get(Int32Ty, 2);
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy);
+
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst);
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst);
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst);
+
+ std::vector<Constant *> V;
+ for (int i = 0; i < 2; i++)
+ V.push_back(CONST32(-1U));
+ Value *VecMinusOne = ConstantVector::get(V);
+
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst);
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst);
+ InData2 = XOR(InData2, VecMinusOne);
+ InData1 = OR(InData1, InData2);
+ new StoreInst(InData1, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_veor_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_veor_128);
+ llvm_gen_vop(XOR, 4, Int32Ty);
+}
+
+void IRFactory::op_veor_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_veor_64);
+ llvm_gen_vop(XOR, 2, Int32Ty);
+}
+
+void IRFactory::op_vbif_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vbif_128);
+
+ /* vbif rd, rn, rm
+ * operation: rd <- (rd & rm) | (rn & ~rm) */
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg In2 = args[2];
+ Value *OutPtr, *InPtr1, *InPtr2;
+ VectorType *VectorTy = VectorType::get(Int32Ty, 4);
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy);
+
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst);
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst);
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst);
+
+ std::vector<Constant *> V;
+ for (int i = 0; i < 4; i++)
+ V.push_back(CONST32(-1U));
+ Value *VecMinusOne = ConstantVector::get(V);
+
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst);
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst);
+ Value *InData3 = new LoadInst(OutPtr, "", false, LastInst);
+
+ InData3 = AND(InData3, InData2);
+ InData1 = AND(InData1, XOR(InData2, VecMinusOne));
+ InData3 = OR(InData1, InData3);
+ new StoreInst(InData3, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vbif_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vbif_64);
+
+ /* vbif rd, rn, rm
+ * operation: rd <- (rd & rm) | (rn & ~rm) */
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg In2 = args[2];
+ Value *OutPtr, *InPtr1, *InPtr2;
+ VectorType *VectorTy = VectorType::get(Int32Ty, 2);
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy);
+
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst);
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst);
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst);
+
+ std::vector<Constant *> V;
+ for (int i = 0; i < 2; i++)
+ V.push_back(CONST32(-1U));
+ Value *VecMinusOne = ConstantVector::get(V);
+
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst);
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst);
+ Value *InData3 = new LoadInst(OutPtr, "", false, LastInst);
+
+ InData3 = AND(InData3, InData2);
+ InData1 = AND(InData1, XOR(InData2, VecMinusOne));
+ InData3 = OR(InData1, InData3);
+ new StoreInst(InData3, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vbit_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vbit_128);
+
+ /* vbit rd, rn, rm
+ * operation: rd <- (rn & rm) | (rd & ~rm) */
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg In2 = args[2];
+ Value *OutPtr, *InPtr1, *InPtr2;
+ VectorType *VectorTy = VectorType::get(Int32Ty, 4);
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy);
+
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst);
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst);
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst);
+
+ std::vector<Constant *> V;
+ for (int i = 0; i < 4; i++)
+ V.push_back(CONST32(-1U));
+ Value *VecMinusOne = ConstantVector::get(V);
+
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst);
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst);
+ Value *InData3 = new LoadInst(OutPtr, "", false, LastInst);
+
+ InData1 = AND(InData1, InData2);
+ InData3 = AND(InData3, XOR(InData2, VecMinusOne));
+ InData3 = OR(InData1, InData3);
+ new StoreInst(InData3, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vbit_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vbit_64);
+
+ /* vbit rd, rn, rm
+ * operation: rd <- (rn & rm) | (rd & ~rm) */
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg In2 = args[2];
+ Value *OutPtr, *InPtr1, *InPtr2;
+ VectorType *VectorTy = VectorType::get(Int32Ty, 2);
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy);
+
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst);
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst);
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst);
+
+ std::vector<Constant *> V;
+ for (int i = 0; i < 2; i++)
+ V.push_back(CONST32(-1U));
+ Value *VecMinusOne = ConstantVector::get(V);
+
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst);
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst);
+ Value *InData3 = new LoadInst(OutPtr, "", false, LastInst);
+
+ InData1 = AND(InData1, InData2);
+ InData3 = AND(InData3, XOR(InData2, VecMinusOne));
+ InData3 = OR(InData1, InData3);
+ new StoreInst(InData3, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vbsl_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vbsl_128);
+
+ /* vbsl rd, rn, rm
+ * operation: rd <- (rn & rd) | (rm & ~rd) */
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg In2 = args[2];
+ Value *OutPtr, *InPtr1, *InPtr2;
+ VectorType *VectorTy = VectorType::get(Int32Ty, 4);
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy);
+
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst);
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst);
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst);
+
+ std::vector<Constant *> V;
+ for (int i = 0; i < 4; i++)
+ V.push_back(CONST32(-1U));
+ Value *VecMinusOne = ConstantVector::get(V);
+
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst);
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst);
+ Value *InData3 = new LoadInst(OutPtr, "", false, LastInst);
+
+ InData1 = AND(InData1, InData3);
+ InData2 = AND(InData2, XOR(InData3, VecMinusOne));
+ InData3 = OR(InData1, InData2);
+ new StoreInst(InData3, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vbsl_64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vbsl_64);
+
+ /* vbsl rd, rn, rm
+ * operation: rd <- (rn & rd) | (rm & ~rd) */
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg In2 = args[2];
+ Value *OutPtr, *InPtr1, *InPtr2;
+ VectorType *VectorTy = VectorType::get(Int32Ty, 2);
+ PointerType *PtrTy = PointerType::getUnqual(VectorTy);
+
+ InPtr1 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr1 = new BitCastInst(InPtr1, PtrTy, "", LastInst);
+ InPtr2 = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In2), "", LastInst);
+ InPtr2 = new BitCastInst(InPtr2, PtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, PtrTy, "", LastInst);
+
+ std::vector<Constant *> V;
+ for (int i = 0; i < 2; i++)
+ V.push_back(CONST32(-1U));
+ Value *VecMinusOne = ConstantVector::get(V);
+
+ Value *InData1 = new LoadInst(InPtr1, "", false, LastInst);
+ Value *InData2 = new LoadInst(InPtr2, "", false, LastInst);
+ Value *InData3 = new LoadInst(OutPtr, "", false, LastInst);
+
+ InData1 = AND(InData1, InData3);
+ InData2 = AND(InData2, XOR(InData3, VecMinusOne));
+ InData3 = OR(InData1, InData2);
+ new StoreInst(InData3, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vsitofp_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vsitofp_128);
+
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg Size = args[2];
+
+ unsigned NumElements = 0;
+ Type *SrcTy = nullptr, *DstTy = nullptr;
+ if (Size == 32) {
+ NumElements = 4;
+ SrcTy = Int32Ty;
+ DstTy = FloatTy;
+ } else if (Size == 64) {
+ NumElements = 2;
+ SrcTy = Int64Ty;
+ DstTy = DoubleTy;
+ } else
+ IRError("%s: invalid element size.\n", __func__);
+
+ Value *OutPtr, *InPtr;
+ VectorType *VectorInt = VectorType::get(SrcTy, NumElements);
+ PointerType *VIntPtrTy = PointerType::getUnqual(VectorInt);
+ VectorType *VectorFP = VectorType::get(DstTy, NumElements);
+ PointerType *VFPPtrTy = PointerType::getUnqual(VectorFP);
+
+ InPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr = new BitCastInst(InPtr, VIntPtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, VFPPtrTy, "", LastInst);
+
+ Value *InData = new LoadInst(InPtr, "", false, LastInst);
+ InData = new SIToFPInst(InData, VectorFP, "", LastInst);
+ new StoreInst(InData, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vuitofp_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vuitofp_128);
+
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg Size = args[2];
+
+ unsigned NumElements = 0;
+ Type *SrcTy = nullptr, *DstTy = nullptr;
+ if (Size == 32) {
+ NumElements = 4;
+ SrcTy = Int32Ty;
+ DstTy = FloatTy;
+ } else if (Size == 64) {
+ NumElements = 2;
+ SrcTy = Int64Ty;
+ DstTy = DoubleTy;
+ } else
+ IRError("%s: invalid element size.\n", __func__);
+
+ Value *OutPtr, *InPtr;
+ VectorType *VectorInt = VectorType::get(SrcTy, NumElements);
+ PointerType *VIntPtrTy = PointerType::getUnqual(VectorInt);
+ VectorType *VectorFP = VectorType::get(DstTy, NumElements);
+ PointerType *VFPPtrTy = PointerType::getUnqual(VectorFP);
+
+ InPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr = new BitCastInst(InPtr, VIntPtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, VFPPtrTy, "", LastInst);
+
+ Value *InData = new LoadInst(InPtr, "", false, LastInst);
+ InData = new UIToFPInst(InData, VectorFP, "", LastInst);
+ new StoreInst(InData, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vfptosi_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vfptosi_128);
+
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg Size = args[2];
+
+ unsigned NumElements = 0;
+ Type *SrcTy = nullptr, *DstTy = nullptr;
+ if (Size == 32) {
+ NumElements = 4;
+ SrcTy = FloatTy;
+ DstTy = Int32Ty;
+ } else if (Size == 64) {
+ NumElements = 2;
+ SrcTy = DoubleTy;
+ DstTy = Int64Ty;
+ } else
+ IRError("%s: invalid element size.\n", __func__);
+
+ Value *OutPtr, *InPtr;
+ VectorType *VectorFP = VectorType::get(SrcTy, NumElements);
+ PointerType *VFPPtrTy = PointerType::getUnqual(VectorFP);
+ VectorType *VectorInt = VectorType::get(DstTy, NumElements);
+ PointerType *VIntPtrTy = PointerType::getUnqual(VectorInt);
+
+ InPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr = new BitCastInst(InPtr, VFPPtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, VIntPtrTy, "", LastInst);
+
+ Value *InData = new LoadInst(InPtr, "", false, LastInst);
+ InData = new FPToSIInst(InData, VectorInt, "", LastInst);
+ new StoreInst(InData, OutPtr, false, LastInst);
+}
+
+void IRFactory::op_vfptoui_128(const TCGArg *args)
+{
+ IRDebug(INDEX_op_vfptoui_128);
+
+ TCGArg Out = args[0];
+ TCGArg In1 = args[1];
+ TCGArg Size = args[2];
+
+ unsigned NumElements = 0;
+ Type *SrcTy = nullptr, *DstTy = nullptr;
+ if (Size == 32) {
+ NumElements = 4;
+ SrcTy = FloatTy;
+ DstTy = Int32Ty;
+ } else if (Size == 64) {
+ NumElements = 2;
+ SrcTy = DoubleTy;
+ DstTy = Int64Ty;
+ } else
+ IRError("%s: invalid element size.\n", __func__);
+
+ Value *OutPtr, *InPtr;
+ VectorType *VectorFP = VectorType::get(SrcTy, NumElements);
+ PointerType *VFPPtrTy = PointerType::getUnqual(VectorFP);
+ VectorType *VectorInt = VectorType::get(DstTy, NumElements);
+ PointerType *VIntPtrTy = PointerType::getUnqual(VectorInt);
+
+ InPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(In1), "", LastInst);
+ InPtr = new BitCastInst(InPtr, VFPPtrTy, "", LastInst);
+ OutPtr = GetElementPtrInst::CreateInBounds(CPU, CONSTPtr(Out), "", LastInst);
+ OutPtr = new BitCastInst(OutPtr, VIntPtrTy, "", LastInst);
+
+ Value *InData = new LoadInst(InPtr, "", false, LastInst);
+ InData = new FPToUIInst(InData, VectorInt, "", LastInst);
+ new StoreInst(InData, OutPtr, false, LastInst);
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/llvm-opc.cpp b/llvm/llvm-opc.cpp
new file mode 100644
index 0000000..cc8436c
--- /dev/null
+++ b/llvm/llvm-opc.cpp
@@ -0,0 +1,4431 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ *
+ * This file provides LLVM IR generator in terms of basic block and trace.
+ */
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DIBuilder.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm-debug.h"
+#include "llvm-pass.h"
+#include "llvm-translator.h"
+#include "llvm-target.h"
+#include "llvm-state.h"
+#include "llvm-opc.h"
+
+
+#define INLINE_THRESHOLD 100 /* max # inlined instructions */
+#define INLINE_INSTCOUNT 20 /* max instruction count for inlining a small function */
+
+/* Options enabled by default. */
+static cl::opt<bool> DisableStateMapping("disable-sm", cl::init(false),
+ cl::cat(CategoryHQEMU), cl::desc("Disable state mapping"));
+
+/* Options Disabled by default. */
+static cl::opt<bool> EnableSimplifyPointer("enable-simptr", cl::init(false),
+ cl::cat(CategoryHQEMU), cl::desc("Enable SimplifyPointer"));
+
+
+TCGOpDef llvm_op_defs[] = {
+#define DEF(s, oargs, iargs, cargs, flags) \
+ { #s , oargs, iargs, cargs, iargs + oargs + cargs, flags },
+#include "tcg-opc.h"
+#undef DEF
+};
+
+static IRFactory::FuncPtr OpcFunc[] = {
+#define DEF(name, oargs, iargs, cargs, flags) &IRFactory::op_ ## name,
+#include "tcg-opc.h"
+#undef DEF
+};
+
+extern LLVMEnv *LLEnv;
+extern hqemu::Mutex llvm_global_lock;
+extern hqemu::Mutex llvm_debug_lock;
+
+/*
+ * IRFactory()
+ */
+IRFactory::IRFactory(LLVMTranslator *Trans)
+ : InitOnce(false), Translator(*Trans), EE(nullptr),
+ HostDisAsm(Translator.getHostDisAsm()), Helpers(Translator.getHelpers()),
+ BaseReg(Translator.getBaseReg()), GuestBaseReg(Translator.getGuestBaseReg()),
+ NI(Translator.getNotifyInfo())
+{
+ /* Track TCG virtual registers. */
+ Reg.resize(TCG_MAX_TEMPS);
+
+ TCGContext *s = &tcg_ctx_global;
+ int NumGlobals = s->nb_globals;
+ for (int i = 0; i < NumGlobals; ++i) {
+ TCGTemp *T = &s->temps[i];
+ if (T->type != TCG_TYPE_I32 && T->type != TCG_TYPE_I64)
+ hqemu_error("unsupported register type.\n");
+
+ int Base = (T->fixed_reg) ? T->reg : T->mem_reg;
+ intptr_t Off = (T->fixed_reg) ? -1 : T->mem_offset;
+ Reg[i].set(Base, Off, T->name);
+ }
+
+ for (int i = 0; i < NumGlobals; ++i) {
+ TCGTemp *T1 = &s->temps[i];
+ for (int j = i + 1; j < NumGlobals; ++j) {
+ TCGTemp *T2 = &s->temps[j];
+ if (T1->fixed_reg || T2->fixed_reg)
+ continue;
+ if (Reg[j].Alias)
+ continue;
+ if (T1->mem_offset == T2->mem_offset && T1->type == T2->type)
+ Reg[j].Alias = &Reg[i];
+ }
+ }
+
+ Segment = 0;
+#if defined(__x86_64__) && defined(__linux__)
+ if (GUEST_BASE)
+ Segment = 256; /* GS: 256 */
+#endif
+
+ dbg() << DEBUG_LLVM << "LLVM IR Factory initialized.\n";
+}
+
+IRFactory::~IRFactory()
+{
+ if (EE) {
+ EE->UnregisterJITEventListener(Listener);
+ EE->removeModule(Mod);
+ delete Listener;
+ delete EE;
+ }
+}
+
+void IRFactory::CreateSession(TraceBuilder *builder)
+{
+ Builder = builder;
+
+ CreateJIT();
+ InitializeTypes();
+
+ MF = new MDFactory(Mod);
+ ExitAddr = CONSTPtr((uintptr_t)tb_ret_addr);
+
+ runPasses = true;
+
+ /* Reset data structures. */
+ StatePtr.clear();
+ InlineCalls.clear();
+ IndirectBrs.clear();
+ CommonBB.clear();
+ toErase.clear();
+ toSink.clear();
+ ClonedFuncs.clear();
+ NI.reset();
+}
+
+void IRFactory::DeleteSession()
+{
+ if (Func) {
+ Func->removeFromParent();
+ delete Func;
+ Func = nullptr;
+ }
+ delete MF;
+ DeleteJIT();
+}
+
+static void setHostAttrs(std::string &MCPU, std::vector<std::string> &MAttrs,
+ TargetOptions &Options)
+{
+ MCPU = sys::getHostCPUName();
+
+ StringMap<bool> HostFeatures;
+ sys::getHostCPUFeatures(HostFeatures);
+ for (auto &F : HostFeatures)
+ MAttrs.push_back((F.second ? "+" : "-") + F.first().str());
+
+ if (MCPU == "core-avx2" || MCPU == "haswell" || MCPU == "knl")
+ Options.AllowFPOpFusion = FPOpFusion::Fast;
+}
+
+#if defined(ENABLE_MCJIT)
+#if defined(LLVM_V35)
+void IRFactory::CreateJIT()
+{
+ Module *InitMod = Translator.getModule();
+ Context = &InitMod->getContext();
+ Mod = new Module(InitMod->getModuleIdentifier(), *Context);
+ Mod->setDataLayout(InitMod->getDataLayout());
+ Mod->setTargetTriple(InitMod->getTargetTriple());
+
+ DL = getDataLayout(Mod);
+
+ /* Create JIT execution engine. */
+ std::string ErrorMsg, MCPU;
+ std::vector<std::string> MAttrs;
+ TargetOptions Options;
+
+ setHostAttrs(MCPU, MAttrs, Options);
+
+ EngineBuilder builder(Mod);
+ builder.setMCPU(MCPU);
+ builder.setMAttrs(MAttrs);
+ builder.setErrorStr(&ErrorMsg);
+ builder.setEngineKind(EngineKind::JIT);
+ builder.setOptLevel(CodeGenOpt::Default);
+ builder.setUseMCJIT(true);
+ builder.setMCJITMemoryManager(LLEnv->getMemoryManager().get());
+ builder.setTargetOptions(Options);
+
+ EE = builder.create();
+
+ if (!EE)
+ hqemu_error("%s\n", ErrorMsg.c_str());
+
+ /* Create JIT event listener and link target machine. */
+ Listener = new EventListener(NI);
+
+ EE->RegisterJITEventListener(Listener);
+
+ /* Ask LLVM to reserve basereg. */
+ auto TM = EE->getTargetMachine();
+ auto TRI = const_cast<TargetRegisterInfo*>(TM->getRegisterInfo());
+ TRI->setHQEMUReservedRegs(BaseReg[TCG_AREG0].Name);
+
+ dbg() << DEBUG_LLVM << "LLVM MCJIT initialized.\n";
+}
+#else
+void IRFactory::CreateJIT()
+{
+ Module *InitMod = Translator.getModule();
+ Context = &InitMod->getContext();
+ std::unique_ptr<Module> Owner(
+ new Module(InitMod->getModuleIdentifier(), *Context));
+ Mod = Owner.get();
+ Mod->setDataLayout(InitMod->getDataLayout());
+ Mod->setTargetTriple(InitMod->getTargetTriple());
+
+ DL = getDataLayout(Mod);
+
+ /* Create JIT execution engine. */
+ std::string ErrorMsg, MCPU;
+ std::vector<std::string> MAttrs;
+ TargetOptions Options;
+
+ setHostAttrs(MCPU, MAttrs, Options);
+
+ EngineBuilder builder(std::move(Owner));
+ builder.setMCPU(MCPU);
+ builder.setMAttrs(MAttrs);
+ builder.setErrorStr(&ErrorMsg);
+ builder.setEngineKind(EngineKind::JIT);
+ builder.setOptLevel(CodeGenOpt::Default);
+ builder.setMCJITMemoryManager(LLEnv->getMemoryManager());
+ builder.setTargetOptions(Options);
+
+ EE = builder.create();
+
+ if (!EE)
+ hqemu_error("%s\n", ErrorMsg.c_str());
+
+ /* Create JIT event listener and link target machine. */
+ Listener = new EventListener(NI);
+ EE->RegisterJITEventListener(Listener);
+
+#if LLVM_USE_INTEL_JITEVENTS
+ IntelJIT = JITEventListener::createIntelJITEventListener();
+ EE->RegisterJITEventListener(IntelJIT);
+#endif
+
+ /* Ask LLVM to reserve basereg. */
+ auto TM = EE->getTargetMachine();
+ auto MII = const_cast<MCInstrInfo *>(TM->getMCInstrInfo());
+ MII->setHQEMUExitAddr((unsigned long)tb_ret_addr);
+
+ dbg() << DEBUG_LLVM << "LLVM MCJIT initialized.\n";
+}
+#endif
+
+void IRFactory::DeleteJIT()
+{
+ EE->UnregisterJITEventListener(Listener);
+#if LLVM_USE_INTEL_JITEVENTS
+ EE->UnregisterJITEventListener(IntelJIT);
+ delete IntelJIT;
+#endif
+ EE->removeModule(Mod);
+ delete Listener;
+ delete EE;
+ delete Mod;
+ EE = nullptr;
+}
+
+Function *IRFactory::ResolveFunction(std::string Name)
+{
+ Function *NF = Mod->getFunction(Name);
+ if(NF)
+ return NF;
+
+ ValueToValueMapTy VMap;
+ Module *InitMod = Translator.getModule();
+ Function *F = InitMod->getFunction(Name);
+ if (!F)
+ IRError("%s: unknown function %s.\n", __func__, Name.c_str());
+
+ NF = Function::Create(cast<FunctionType>(F->getType()->getElementType()),
+ F->getLinkage(), F->getName(), Mod);
+ NF->copyAttributesFrom(F);
+ VMap[F] = NF;
+
+ if (Helpers.find(Name) != Helpers.end() && !F->isDeclaration()) {
+ Function::arg_iterator DestI = NF->arg_begin();
+ for (auto J = F->arg_begin(); J != F->arg_end(); ++J) {
+ DestI->setName(J->getName());
+ VMap[&*J] = &*DestI++;
+ }
+ SmallVector<ReturnInst*, 8> Returns;
+ CloneFunctionInto(NF, F, VMap, /*ModuleLevelChanges=*/true, Returns);
+ }
+
+ ClonedFuncs.insert(NF);
+ return NF;
+}
+
+#else
+void IRFactory::CreateJIT()
+{
+ if (InitOnce)
+ return;
+
+ Context = Translator.getContext();
+ Mod = Translator.getModule();
+ DL = getDataLayout(Mod);
+
+ /* Create JIT execution engine. */
+ std::string ErrorMsg, MCPU;
+ std::vector<std::string> MAttrs;
+ TargetOptions Options;
+
+ setHostAttrs(MCPU, MAttrs, Options);
+
+ EngineBuilder builder(Mod);
+ builder.setMCPU(MCPU);
+ builder.setMAttrs(MAttrs);
+ builder.setAllocateGVsWithCode(false);
+ builder.setJITMemoryManager(LLEnv->getMemoryManager().get());
+ builder.setErrorStr(&ErrorMsg);
+ builder.setEngineKind(EngineKind::JIT);
+ builder.setOptLevel(CodeGenOpt::Default);
+ builder.setTargetOptions(Options);
+
+ EE = builder.create();
+
+ if (!EE)
+ hqemu_error("%s\n", ErrorMsg.c_str());
+
+ /* Create JIT event listener and link target machine. */
+ Listener = new EventListener(NI);
+
+ EE->RegisterJITEventListener(Listener);
+ EE->DisableLazyCompilation(false);
+
+ /* Ask LLVM to reserve basereg. */
+ auto TM = EE->getTargetMachine();
+ auto TRI = const_cast<TargetRegisterInfo*>(TM->getRegisterInfo());
+ TRI->setHQEMUReservedRegs(BaseReg[TCG_AREG0].Name);
+
+ /* Bind addresses to external symbols. */
+ SymbolMap &Symbols = Translator.getSymbols();
+ for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) {
+ std::string Name = I->first;
+ if (!Mod->getNamedValue(Name))
+ continue;
+ EE->updateGlobalMapping(Mod->getNamedValue(Name), (void*)I->second);
+ }
+
+ dbg() << DEBUG_LLVM << "LLVM JIT initialized.\n";
+
+ InitOnce = true;
+}
+
+void IRFactory::DeleteJIT()
+{
+ /* Do nothing with the old JIT. */
+}
+
+Function *IRFactory::ResolveFunction(std::string Name)
+{
+ Function *F = Mod->getFunction(Name);
+ if (!F)
+ IRError("%s: unknown function %s.\n", __func__, Name.c_str());
+ return F;
+}
+#endif
+
+/* Initialize basic types that will be used during IR conversion. */
+void IRFactory::InitializeTypes()
+{
+ VoidTy = Type::getVoidTy(*Context);
+ Int8Ty = IntegerType::get(*Context, 8);
+ Int16Ty = IntegerType::get(*Context, 16);
+ Int32Ty = IntegerType::get(*Context, 32);
+ Int64Ty = IntegerType::get(*Context, 64);
+ Int128Ty = IntegerType::get(*Context, 128);
+
+ IntPtrTy = DL->getIntPtrType(*Context);
+ Int8PtrTy = Type::getInt8PtrTy(*Context, 0);
+ Int16PtrTy = Type::getInt16PtrTy(*Context, 0);
+ Int32PtrTy = Type::getInt32PtrTy(*Context, 0);
+ Int64PtrTy = Type::getInt64PtrTy(*Context, 0);
+
+ FloatTy = Type::getFloatTy(*Context);
+ DoubleTy = Type::getDoubleTy(*Context);
+}
+
+/* Get the function pointer of the IR converion routines. */
+void *IRFactory::getOpcFunc()
+{
+ return OpcFunc;
+}
+
+
+/* Get the CPU pointer.
+ * If the CPU pointer is not in the first block of function F, return null. */
+Instruction *IRFactory::getDefaultCPU(Function &F)
+{
+ if (!CPU)
+ return nullptr;
+ if (!CPU->getParent() || CPU->getParent() != &F.getEntryBlock())
+ return nullptr;
+ return CPU;
+}
+
+static inline std::string getGuestSymbol(target_ulong pc)
+{
+#if defined(CONFIG_USER_ONLY)
+ hqemu::MutexGuard locked(llvm_global_lock);
+
+ std::string Symbol = lookup_symbol(pc);
+ if (Symbol != "")
+ Symbol = "<" + Symbol + ">:";
+ return Symbol;
+#else
+ return "";
+#endif
+}
+
+/* Prepare LLVM Function, initial BasicBlocks and variable declaration. */
+void IRFactory::CreateFunction()
+{
+ target_ulong pc = Builder->getEntryNode()->getGuestPC();
+ std::string Name = getGuestSymbol(pc) +
+ Builder->getPCString(Builder->getEntryNode());
+
+ dbg() << DEBUG_LLVM << "Requested trace info: pc "
+ << format("0x%" PRIx, pc) << " length " << Builder->getNumNodes()
+ << "\n";
+
+ FunctionType *FuncTy = FunctionType::get(IntPtrTy, false);
+ Func = Function::Create(FuncTy, GlobalVariable::ExternalLinkage, Name, Mod);
+ Func->setCallingConv(CallingConv::C);
+ Func->addFnAttr(Attribute::NoUnwind);
+ Func->addFnAttr(Attribute::Naked);
+ Func->addFnAttr("hqemu");
+
+ /* Prepare all basic blocks. */
+ InitBB = BasicBlock::Create(*Context, "init", Func);
+ ExitBB = BasicBlock::Create(*Context, "exit", Func);
+ CurrBB = BasicBlock::Create(*Context, "entry", Func);
+ LastInst = BranchInst::Create(CurrBB, InitBB);
+ new UnreachableInst(*Context, ExitBB);
+
+ /* Setup base register for CPUArchState pointer, and register for
+ * guest_base. */
+ for (int i = 0; i < TCG_TARGET_NB_REGS; i++)
+ BaseReg[i].Base = nullptr;
+
+ BaseRegister &CPUReg = BaseReg[TCG_AREG0];
+ char Constraint[16] = {'\0'};
+ sprintf(Constraint, "={%s}", CPUReg.Name.c_str());
+ auto IA = InlineAsm::get(FunctionType::get(Int8PtrTy, false), "",
+ Constraint, true);
+ CPUReg.Base = CallInst::Create(IA, "cpu", LastInst);
+
+ /* Set special register for guest base if necessary. */
+ GuestBaseReg.Base = CONSTPtr(GUEST_BASE);
+ if (GuestBaseReg.Name != "") {
+ sprintf(Constraint, "={%s}", GuestBaseReg.Name.c_str());
+ IA = InlineAsm::get(FunctionType::get(Int8PtrTy, false), "",
+ Constraint, true);
+ GuestBaseReg.Base = new PtrToIntInst(
+ CallInst::Create(IA, "", LastInst),
+ IntPtrTy, "guest_base", LastInst);
+ }
+
+ CPU = CPUReg.Base;
+ CPUStruct = new BitCastInst(CPU, CPUReg.Ty, "cpu.struct", LastInst);
+ GEPInsertPos = CPUStruct;
+}
+
+/* Prepare an LLVM BasicBlock for a new guest block. */
+void IRFactory::CreateBlock()
+{
+ GraphNode *CurrNode = Builder->getCurrNode();
+ bool isEntryNode = CurrNode == Builder->getEntryNode();
+ std::string pc = Builder->getPCString(CurrNode);
+
+ dbg() << DEBUG_LLVM << " - Process block pc "
+ << format("0x%" PRIx, CurrNode->getGuestPC()) << "\n";
+
+ if (!isEntryNode)
+ CurrBB = BasicBlock::Create(*Context, pc, Func);
+
+ LastInst = BranchInst::Create(ExitBB, CurrBB);
+ Builder->setBasicBlock(CurrNode, CurrBB);
+
+ /* Check if the register has legal type. */
+ int NumGlobals = tcg_ctx.nb_globals;
+ int NumTemps = tcg_ctx.nb_temps;
+ for (int i = 0; i < NumTemps; ++i) {
+ TCGTemp *T = &tcg_ctx.temps[i];
+ if (T->type != TCG_TYPE_I32 && T->type != TCG_TYPE_I64)
+ hqemu_error("unsupported register type.\n");
+ }
+
+ /* Initialize global registers. */
+ for (int i = 0; i < NumGlobals; ++i) {
+ TCGTemp *T = &tcg_ctx.temps[i];
+ int State = (T->fixed_reg) ? Register::STATE_REV | Register::STATE_MEM :
+ Register::STATE_MEM;
+ int Size = (T->type == TCG_TYPE_I32) ? 32 : 64;
+ Type *Ty = (T->type == TCG_TYPE_I32) ? Int32Ty : Int64Ty;
+ Reg[i].reset(State, Size, Ty);
+ }
+
+ /* Initialize temporary registers. */
+ for (int i = NumGlobals; i < NumTemps; ++i) {
+ TCGTemp *T = &tcg_ctx.temps[i];
+ int State = (T->temp_local) ? Register::STATE_LOC :
+ Register::STATE_TMP;
+ int Size = (T->type == TCG_TYPE_I32) ? 32 : 64;
+ Type *Ty = (T->type == TCG_TYPE_I32) ? Int32Ty : Int64Ty;
+ Reg[i].reset(State, Size, Ty);
+ }
+
+ Labels.clear();
+
+#ifdef VERIFY_TB
+ Function *F = ResolveFunction("helper_verify_tb");
+ SmallVector<Value *, 4> Params;
+ Params.push_back(CPUStruct);
+ Params.push_back(CONST32(CurrNode->getTB()->id));
+ CallInst *CI = CallInst::Create(F, Params, "", LastInst);
+ MF->setConst(CI);
+#endif
+}
+
+
+/* Wrapper function to set an unconditional branch. */
+void IRFactory::setSuccessor(BranchInst *BI, BasicBlock *BB)
+{
+ BI->setSuccessor(0, BB);
+}
+
+/* Determine whether we should inline a helper function or not. */
+int IRFactory::AnalyzeInlineCost(CallSite CS)
+{
+ Function *Callee = CS.getCalledFunction();
+ HelperInfo *Helper = Helpers[Callee->getName()];
+ int InlineCost = INLINE_THRESHOLD - Helper->Metrics.NumInsts;
+ unsigned ArgNo = 0;
+
+ if (Helper->Metrics.NumInsts <= INLINE_INSTCOUNT)
+ return 1;
+
+ InlineCost *= InlineConstants::InstrCost;
+ for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+ I != E; ++I, ++ArgNo) {
+ InlineCost -= InlineConstants::InstrCost;
+ if (isa<AllocaInst>(I))
+ InlineCost += Helper->ArgumentWeights[ArgNo].AllocaWeight;
+ else if (isa<Constant>(I))
+ InlineCost += Helper->ArgumentWeights[ArgNo].ConstantWeight;
+ }
+
+ return InlineCost;
+}
+
+
+/* Perform helper function inlining. */
+void IRFactory::ProcessInline()
+{
+ while (!InlineCalls.empty()) {
+ CallInst *CI = static_cast<CallInst *>(InlineCalls.back());
+ InlineCalls.pop_back();
+ InlineFunc(CI);
+ }
+}
+
+void IRFactory::VerifyFunction(Function &F)
+{
+ if (DM.getDebugMode() & DEBUG_VERIFY)
+ verifyFunction(F, &DM.debug());
+}
+
+/* Format function to a legal format and inline calls. Be sure to make the
+ * function in a well form before doing any furthur optimization (i.e. inlining
+ * calls). Otherwise, the optimization may fail or the result may be wrong. */
+void IRFactory::PreProcess()
+{
+ dbg() << DEBUG_LLVM << __func__ << " entered.\n";
+
+ ProcessErase(toErase);
+
+ /* Insert terminator instruction to basic blocks that branch to ExitBB.
+ * This could happen when the last TCG opc is a call instruction. */
+ for (auto PI = pred_begin(ExitBB), PE = pred_end(ExitBB); PI != PE; PI++) {
+ Instruction *InsertPos = (*PI)->getTerminator();
+ new UnreachableInst(*Context, InsertPos);
+ toErase.push_back(InsertPos);
+ }
+ ProcessErase(toErase);
+ ExitBB->eraseFromParent();
+
+ /* Remove instructions after indirect branches. */
+ std::set<Instruction *> AfterIB;
+ for (unsigned i = 0, e = IndirectBrs.size(); i != e; ++i) {
+ BasicBlock *BB = IndirectBrs[i]->getParent();
+ for (auto I = ++BasicBlock::iterator(IndirectBrs[i]), E = BB->end();
+ I != E; ++I)
+ AfterIB.insert(&*I);
+ }
+ for (auto I = AfterIB.begin(), E = AfterIB.end(); I != E; ++I)
+ toErase.push_back(*I);
+ ProcessErase(toErase);
+
+ /* Sink blocks to the end. */
+ Function::iterator InsertPos = Func->end();
+ Function::BasicBlockListType &Blocks = Func->getBasicBlockList();
+ for (unsigned i = 0, e = toSink.size(); i != e; ++i) {
+ if (&*InsertPos == toSink[i])
+ continue;
+ Blocks.splice(InsertPos, Blocks, toSink[i]);
+ }
+
+ VerifyFunction(*Func);
+
+ /* Inline helper functions. */
+ ProcessInline();
+
+ SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> BackEdges;
+ FindFunctionBackedges(*Func, BackEdges);
+
+ TraceInfo *Trace = Builder->getTrace();
+ Trace->NumLoop = BackEdges.size();
+ dbg() << DEBUG_LLVM << __func__ << ": trace formation with pc "
+ << format("0x%" PRIx, Trace->getEntryPC())
+ << " length " << Trace->getNumBlock()
+ << " is_loop " << (Trace->NumLoop ? true : false) << "\n";
+
+#if 1 || defined(CONFIG_SOFTMMU)
+ if (Trace->NumLoop) {
+ intptr_t Offset = offsetof(CPUState, tcg_exit_req) - ENV_OFFSET;
+ Value *ExitRequestPtr = GetElementPtrInst::CreateInBounds(CPU,
+ CONSTPtr(Offset),
+ "", InitBB->getTerminator());
+ ExitRequestPtr = new BitCastInst(ExitRequestPtr, Int32PtrTy,
+ "tcg_exit_req",
+ InitBB->getTerminator());
+
+ /* Create the exit stub. */
+ for (unsigned i = 0, e = BackEdges.size(); i != e; ++i) {
+ BasicBlock *TCGExitBB = BasicBlock::Create(*Context, "exit", Func);
+ LastInst = BranchInst::Create(TCGExitBB, TCGExitBB);
+ StoreInst *SI = new StoreInst(CONST32(0), ExitRequestPtr, true, LastInst);
+ InsertExit(0);
+ LastInst->eraseFromParent();
+
+ MF->setExit(SI);
+
+ auto BackEdgeBB = const_cast<BasicBlock*>(BackEdges[i].first);
+ auto LoopHeader = const_cast<BasicBlock*>(BackEdges[i].second);
+ auto BI = const_cast<TerminatorInst *>(BackEdgeBB->getTerminator());
+
+ toErase.push_back(BI);
+
+ Value *ExitRequest = new LoadInst(ExitRequestPtr, "", true, BI);
+ Value *Cond = new ICmpInst(BI, ICmpInst::ICMP_EQ, ExitRequest,
+ CONST32(0), "");
+ BI = BranchInst::Create(LoopHeader, TCGExitBB, Cond, BI);
+ BI->getParent()->setName("loopback");
+ MF->setLoop(BI);
+ }
+ }
+#else
+ if (Trace->NumLoop) {
+ for (unsigned i = 0, e = BackEdges.size(); i != e; ++i) {
+ auto BackEdgeBB = const_cast<BasicBlock*>(BackEdges[i].first);
+ auto BI = const_cast<TerminatorInst *>(BackEdgeBB->getTerminator());
+ BI->getParent()->setName("loopback");
+ MF->setLoop(BI);
+
+ for (auto BI = BackEdgeBB->begin(), BE = BackEdgeBB->end(); BI != BE; ++BI) {
+ if (auto SI = dyn_cast<StoreInst>(BI)) {
+ intptr_t Off = 0;
+ Value *Base = getBaseWithConstantOffset(DL, getPointerOperand(SI), Off);
+ if (Base == CPU && isStateOfPC(Off))
+ toErase.push_back(SI);
+ }
+ }
+ }
+ }
+#endif
+
+ ProcessErase(toErase);
+
+ if (DM.getDebugMode() & DEBUG_IR) {
+ hqemu::MutexGuard locked(llvm_debug_lock);
+ Func->print(DM.debug());
+ }
+}
+
+void IRFactory::InitializeLLVMPasses(legacy::FunctionPassManager *FPM)
+{
+ auto TM = EE->getTargetMachine();
+#if defined(LLVM_V35)
+ TM->addAnalysisPasses(*FPM);
+ FPM->add(new DataLayoutPass(Mod));
+ FPM->add(createBasicTargetTransformInfoPass(TM));
+#else
+ PassRegistry &PassReg = *PassRegistry::getPassRegistry();
+ initializeTargetTransformInfoWrapperPassPass(PassReg);
+
+ FPM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+#endif
+}
+
+void IRFactory::Optimize()
+{
+#define addPass(PM, P) do { PM->add(P); } while(0)
+#define addPassOptional(PM, P, Disable) \
+ do { \
+ if (!Disable) PM->add(P); \
+ } while(0)
+
+#if defined(ENABLE_PASSES)
+ if (runPasses) {
+ legacy::FunctionPassManager *FPM = new legacy::FunctionPassManager(Mod);
+
+ InitializeLLVMPasses(FPM);
+
+ addPass(FPM, createProfileExec(this));
+ addPass(FPM, createCombineGuestMemory(this));
+ addPass(FPM, createCombineZExtTrunc());
+ addPassOptional(FPM, createStateMappingPass(this), DisableStateMapping);
+ addPass(FPM, createPromoteMemoryToRegisterPass());
+ addPass(FPM, createCombineCasts(this));
+ addPassOptional(FPM, createSimplifyPointer(this), !EnableSimplifyPointer);
+ addPass(FPM, createAggressiveDCEPass());
+ addPass(FPM, createCFGSimplificationPass());
+ addPass(FPM, createInstructionCombiningPass());
+ addPass(FPM, createRedundantStateElimination(this));
+ addPass(FPM, createCombineCasts(this));
+
+ FPM->run(*Func);
+
+ delete FPM;
+ }
+#endif
+
+#undef addPass
+#undef addPassOptional
+}
+
+
+/* Legalize LLVM IR after running the pre-defined passes. */
+void IRFactory::PostProcess()
+{
+ dbg() << DEBUG_LLVM << __func__ << " entered.\n";
+
+#if defined(ENABLE_MCJIT)
+ for (auto I = ClonedFuncs.begin(), E = ClonedFuncs.end(); I != E; ++I) {
+ Function *F = *I;
+ if (!F->isDeclaration())
+ F->removeFromParent();
+ }
+ /* Bind addresses to external symbols. */
+ SymbolMap &Symbols = Translator.getSymbols();
+ for (auto I = Symbols.begin(), E = Symbols.end(); I != E; ++I) {
+ std::string Name = I->first;
+ if (!Mod->getNamedValue(Name))
+ continue;
+ EE->updateGlobalMapping(Mod->getNamedValue(Name), (void*)I->second);
+ }
+#endif
+
+ if (DM.getDebugMode() & DEBUG_IR_OPT) {
+ hqemu::MutexGuard locked(llvm_debug_lock);
+ Func->print(DM.debug());
+ }
+}
+
+/* Legalize LLVM IR after running the pre-defined passes. */
+void IRFactory::FinalizeObject()
+{
+ dbg() << DEBUG_LLVM << __func__ << " entered.\n";
+
+ uintptr_t Code = (uintptr_t)NI.Code;
+ uint32_t Size = NI.Size;
+
+#if defined(ENABLE_MCJIT)
+ for (unsigned i = 0, e = NI.Patches.size(); i != e; ++i) {
+ NotifyInfo::PatchInfo &Patch = NI.Patches[i];
+ uintptr_t Addr = Patch.Addr;
+ code_ostream OS(Addr);
+
+ /* If the address to patch is outside this code region, skip this
+ * invalid patch point. Actually this should not happen, but LLVM v35
+ * seems to report such invalid address. */
+ if (Addr >= Code + Size)
+ continue;
+ if (Patch.Type == PATCH_EXIT_TB) {
+#if defined(LLVM_V35) && defined(TCG_TARGET_I386)
+ EmitByte(OS, 0xE9);
+ EmitConstant(OS, (uintptr_t)tb_ret_addr - Addr - 5, 4);
+#endif
+ } else if (Patch.Type == PATCH_TRACE_BLOCK_CHAINING) {
+#if defined(TCG_TARGET_I386)
+ unsigned NumSkip = 3 - Addr % 4;
+ OS.Skip(NumSkip);
+ EmitByte(OS, 0xE9);
+ EmitConstant(OS, 0, 4);
+ NI.ChainSlot[Patch.Idx].Addr = Addr + NumSkip;
+#elif defined(TCG_TARGET_PPC64)
+ unsigned NumSkip = 0;
+ if (Addr & 7)
+ NumSkip = 4;
+ OS.Skip(NumSkip);
+ EmitConstant(OS, 0x48000000 | (16 & 0x3fffffc), 4); /* b .+16 */
+ EmitConstant(OS, 0x60000000, 4); /* nop */
+ EmitConstant(OS, 0x7C0903A6 | (12 << 21), 4); /* mtctr r12 */
+ EmitConstant(OS, 0x4E800420, 4); /* bctr */
+ NI.ChainSlot[Patch.Idx].Addr = Addr + NumSkip;
+#else
+ NI.ChainSlot[Patch.Idx].Addr = Addr;
+#endif
+ }
+ }
+#endif
+
+ /* Flush instruction cache */
+ flush_icache_range(Code, Code + Size);
+
+ if (DM.getDebugMode() & DEBUG_OUTASM) {
+ hqemu::MutexGuard locked(llvm_debug_lock);
+ if (HostDisAsm)
+ HostDisAsm->PrintOutAsm((uint64_t)Code, (uint64_t)Size);
+ else {
+ auto &OS = DM.debug();
+ OS << "\nOUT: [size=" << Size << "]\n";
+ disas(stderr, (void *)Code, Size);
+ OS << "\n";
+ }
+ }
+}
+
+/* Start the LLVM JIT compilation. */
+void IRFactory::Compile()
+{
+ dbg() << DEBUG_LLVM
+ << "Translator " << Translator.getID() << " starts compiling...\n";
+
+ /* Run optimization passes. */
+ PreProcess();
+ Optimize();
+ PostProcess();
+
+ VerifyFunction(*Func);
+
+ /* JIT. */
+ NI.Func = Func;
+ EE->getPointerToFunction(Func);
+ EE->finalizeObject();
+
+ FinalizeObject();
+
+ dbg() << DEBUG_LLVM << __func__ << ": done.\n";
+}
+
+PointerType *IRFactory::getPointerTy(int Size, unsigned AS)
+{
+ switch (Size) {
+ case 32: return Type::getInt32PtrTy(*Context, AS);
+ case 64: return Type::getInt64PtrTy(*Context, AS);
+ case 16: return Type::getInt16PtrTy(*Context, AS);
+ case 8: return Type::getInt8PtrTy(*Context, AS);
+ default:
+ IRError("%s: invalid bit type %d.\n", __func__, Size);
+ }
+ return nullptr;
+}
+
+Value *IRFactory::getExtendValue(Value *V, Type *Ty, int opc)
+{
+ int OldSize = DL->getTypeSizeInBits(V->getType());
+ int NewSize = DL->getTypeSizeInBits(Ty);
+
+ if (OldSize > NewSize)
+ IRError("%s: invalid size old=%d new=%d\n", __func__, OldSize, NewSize);
+ if (OldSize == NewSize)
+ return V;
+
+ if (opc & MO_SIGN)
+ return SEXT(V, Ty);
+ return ZEXT(V, Ty);
+}
+
+Value *IRFactory::getTruncValue(Value *V, int opc)
+{
+ int OldSize = DL->getTypeSizeInBits(V->getType());
+ int NewSize = getSizeInBits(opc);
+
+ if (OldSize < NewSize)
+ IRError("%s: invalid size old=%d new=%d\n", __func__, OldSize, NewSize);
+ if (OldSize == NewSize)
+ return V;
+
+ Type *Ty = Type::getIntNTy(*Context, NewSize);
+ return TRUNC(V, Ty);
+}
+
+Value *IRFactory::ConvertEndian(Value *V, int opc)
+{
+#ifdef NEED_BSWAP
+ switch (opc & MO_SIZE) {
+ case MO_8: return V;
+ case MO_16: return BSWAP16(V);
+ case MO_32: return BSWAP32(V);
+ case MO_64: return BSWAP64(V);
+ default:
+ IRError("%s: invalid size (opc=%d)\n", __func__, opc);
+ break;
+ }
+ return V;
+#else
+ return V;
+#endif
+}
+
+Value *IRFactory::CreateBSwap(Type *Ty, Value *V, Instruction *InsertPos)
+{
+ SmallVector<Value *, 4> Params;
+ Type *Tys[] = { Ty };
+
+ Function *Fn = Intrinsic::getDeclaration(Mod, Intrinsic::bswap, Tys);
+ Params.push_back(V);
+ return CallInst::Create(Fn, Params, "", InsertPos);
+}
+
+Value *IRFactory::ConvertCPUType(Function *F, int Idx, Instruction *InsertPos)
+{
+ Type *ParamTy = F->getFunctionType()->getParamType(Idx);
+ if (CPUStruct->getType() != ParamTy)
+ return new BitCastInst(CPU, ParamTy, "", InsertPos);
+ return CPUStruct;
+}
+
+Value *IRFactory::ConvertCPUType(Function *F, int Idx, BasicBlock *InsertPos)
+{
+ Type *ParamTy = F->getFunctionType()->getParamType(Idx);
+ if (CPUStruct->getType() != ParamTy)
+ return new BitCastInst(CPU, ParamTy, "", InsertPos);
+ return CPUStruct;
+}
+
+/* Return true if the offset is for the state of PC. */
+bool IRFactory::isStateOfPC(intptr_t Off)
+{
+ intptr_t IPOffset;
+#if defined(TARGET_ALPHA)
+ IPOffset = offsetof(CPUArchState, pc);
+#elif defined(TARGET_AARCH64)
+ IPOffset = offsetof(CPUArchState, pc);
+#elif defined(TARGET_ARM)
+ IPOffset = offsetof(CPUArchState, regs[15]);
+#elif defined(TARGET_CRIS)
+ IPOffset = offsetof(CPUArchState, pc);
+#elif defined(TARGET_I386)
+ IPOffset = offsetof(CPUArchState, eip);
+#elif defined(TARGET_M68K)
+ IPOffset = offsetof(CPUArchState, pc);
+#elif defined(TARGET_MICROBLAZE)
+ IPOffset = offsetof(CPUArchState, sregs[0]);
+#elif defined(TARGET_MIPS)
+ IPOffset = offsetof(CPUArchState, active_tc.PC);
+#elif defined(TARGET_PPC)
+ IPOffset = offsetof(CPUArchState, nip);
+#elif defined(TARGET_SH4)
+ IPOffset = offsetof(CPUArchState, pc);
+#elif defined(TARGET_SPARC)
+ intptr_t IPOffset2;
+ IPOffset = offsetof(CPUArchState, pc);
+ IPOffset2 = offsetof(CPUArchState, npc);
+#else
+#error "unsupported processor type"
+#endif
+
+#if defined(TARGET_ALPHA) || defined(TARGET_ARM) || defined(TARGET_AARCH64) || \
+ defined(TARGET_CRIS) || defined(TARGET_I386) || defined(TARGET_M68K) || \
+ defined(TARGET_MICROBLAZE) || defined(TARGET_MIPS) || defined(TARGET_PPC) || \
+ defined(TARGET_SH4)
+ return (Off >= IPOffset && Off < IPOffset + TARGET_LONG_SIZE);
+#elif defined(TARGET_SPARC)
+ return ((Off >= IPOffset && Off < IPOffset + TARGET_LONG_SIZE) ||
+ (Off >= IPOffset2 && Off < IPOffset2 + TARGET_LONG_SIZE));
+#endif
+}
+
+/* Trace building requires store IP instruction to link basic blocks.
+ * But in some archirecture, IP is promoted to register and we need to
+ * regenerate the store IP instruction. */
+void IRFactory::CreateStorePC(Instruction *InsertPos)
+{
+ for (int i = 0, e = tcg_ctx.nb_globals; i != e; ++i) {
+ Register &reg = Reg[i];
+ if (reg.isReg() && reg.isDirty()) {
+ if (isStateOfPC(reg.Off)) {
+ StoreState(reg, InsertPos);
+ reg.Demote();
+ }
+ }
+ }
+}
+
+/* Store dirty states back to CPUArchState in memory. */
+void IRFactory::SaveGlobals(int level, Instruction *InsertPos)
+{
+ if (level == COHERENCE_NONE)
+ return;
+
+ int NumGlobals = tcg_ctx.nb_globals;
+ int NumTemps = tcg_ctx.nb_temps;
+ for (int i = 0; i < NumGlobals; ++i) {
+ Register &reg = Reg[i];
+ if (reg.isReg() && reg.isDirty())
+ StoreState(reg, InsertPos);
+ reg.Demote();
+ }
+
+ if (level == COHERENCE_GLOBAL)
+ return;
+
+ /* Store local registers to stack. */
+ for (int i = NumGlobals; i < NumTemps; ++i) {
+ Register &reg = Reg[i];
+ if (reg.isReg() && reg.isLocal() && reg.isDirty())
+ StoreState(reg, InsertPos);
+ reg.Demote();
+ }
+}
+
+/* Get or insert the pointer to the CPU register in the AddrCache. */
+Value *IRFactory::StatePointer(Register &reg)
+{
+ intptr_t Off = reg.Off;
+ PointerType *PTy = (reg.Size == 32) ? Int32PtrTy : Int64PtrTy;
+ std::pair<intptr_t, Type *> Key(Off, PTy);
+ if (StatePtr.find(Key) == StatePtr.end()) {
+ std::string Name = isStateOfPC(Off) ? "pc" : reg.Name;
+ auto GEP = GetElementPtrInst::CreateInBounds(BaseReg[reg.Base].Base,
+ CONSTPtr(Off), "", GEPInsertPos);
+ StatePtr[Key] = new BitCastInst(GEP, PTy, Name, InitBB->getTerminator());
+ }
+ return StatePtr[Key];
+}
+
+Value *IRFactory::StatePointer(Register &reg, intptr_t Off, Type *PTy)
+{
+ if (!reg.isRev())
+ IRError("%s: internal error.\n", __func__);
+
+ std::pair<intptr_t, Type *> Key(Off, PTy);
+ if (StatePtr.find(Key) == StatePtr.end()) {
+ std::string Name = isStateOfPC(Off) ? "pc" : "";
+ auto GEP = GetElementPtrInst::CreateInBounds(BaseReg[reg.Base].Base,
+ CONSTPtr(Off), "", GEPInsertPos);
+ StatePtr[Key] = new BitCastInst(GEP, PTy, Name, InitBB->getTerminator());
+ }
+ return StatePtr[Key];
+}
+
+/* Retrieve value from CPUArchState. */
+Value *IRFactory::LoadState(Register &reg)
+{
+ if (reg.isRev())
+ return BaseReg[reg.Base].Base;
+ if (reg.isAlias())
+ return LoadState(reg.getAlias());
+ if (reg.isReg())
+ return reg.getData();
+ if (reg.isLocal()) {
+ if (!reg.AI)
+ reg.AI = CreateAlloca(reg.Ty, 0, "loc", InitBB->getTerminator());
+ return new LoadInst(reg.AI, "", false, LastInst);
+ }
+
+ /* If we go here, the state is not loaded into a LLVM virtual register.
+ * Load it from CPUArchState. */
+ Value *V = new LoadInst(StatePointer(reg), "", false, LastInst);
+ reg.setData(V);
+
+ return V;
+}
+
+void IRFactory::StoreState(Register &reg, Instruction *InsertPos)
+{
+#ifdef ASSERT
+ int Size = DL->getTypeSizeInBits(reg.getData()->getType());
+ if (Size != reg.Size)
+ IRError("%s: internal error\n", __func__);
+#endif
+ if (reg.isRev())
+ IRError("%s: fatal error\n", __func__);
+ if (reg.isLocal()) {
+ if (!reg.AI)
+ reg.AI = CreateAlloca(reg.Ty, 0, "loc", InitBB->getTerminator());
+ new StoreInst(reg.getData(), reg.AI, false, InsertPos);
+ } else {
+ bool Volatile = isStateOfPC(reg.Off);
+ new StoreInst(reg.getData(), StatePointer(reg), Volatile, InsertPos);
+ }
+}
+
+
+/*
+ * TCG opcode to LLVM IR translation functions.
+ */
+void IRFactory::op_hotpatch(const TCGArg *args)
+{
+ IRDebug(INDEX_op_hotpatch);
+}
+
+void IRFactory::op_annotate(const TCGArg *args)
+{
+ IRDebug(INDEX_op_annotate);
+
+ uint32_t Annotation = *args;
+ if (Annotation == A_SetCC) {
+ if (LastInst && LastInst != &*LastInst->getParent()->begin())
+ MF->setCondition(&*--BasicBlock::iterator(LastInst));
+ } else if (Annotation == A_NoSIMDization) {
+ Builder->addAttribute(A_NoSIMDization);
+ }
+}
+
+void IRFactory::op_jmp(const TCGArg *args)
+{
+ IRDebug(INDEX_op_jmp);
+
+ Register &In = Reg[args[0]];
+ Value *InData = LoadState(In);
+
+ SaveGlobals(COHERENCE_ALL, LastInst);
+ if (!InData->getType()->isPointerTy())
+ InData = ITP8(InData);
+
+ IndirectBrInst *IB = IndirectBrInst::Create(InData, 1, LastInst);
+ MF->setExit(IB);
+}
+
+/*
+ * op_discard()
+ * args[0]: In
+ */
+void IRFactory::op_discard(const TCGArg *args)
+{
+ IRDebug(INDEX_op_discard);
+ Register &In = Reg[args[0]];
+ if (In.isReg())
+ In.Demote();
+}
+
+/*
+ * op_set_label()
+ * args[0]: Label number
+ */
+void IRFactory::op_set_label(const TCGArg *args)
+{
+ IRDebug(INDEX_op_set_label);
+
+ SaveGlobals(COHERENCE_ALL, LastInst);
+
+ TCGArg label = args[0];
+ if (Labels.find(label) == Labels.end())
+ Labels[label] = BasicBlock::Create(*Context, "true_dest", Func);
+
+ CurrBB = Labels[label];
+ if (LastInst) {
+ if (LastInst != &*LastInst->getParent()->begin() &&
+ isa<IndirectBrInst>(--BasicBlock::iterator(LastInst)))
+ LastInst->eraseFromParent();
+ else
+ setSuccessor(LastInst, CurrBB);
+ }
+
+ LastInst = BranchInst::Create(ExitBB, CurrBB);
+}
+
+/*
+ * op_call()
+ * args[0] : [nb_oargs:16][nb_iargs:16]
+ * args[1~#nb_oargs] : out args
+ * args[1+#nb_oargs~#nb_iargs-2] : function parameters
+ * args[1+#nb_oargs+#nb_iargs-1] : function address
+ * args[1+#nb_oargs+#nb_iargs] : flags
+ */
+void IRFactory::op_call(const TCGArg *args)
+{
+ IRDebug(INDEX_op_call);
+
+ TCGOp * const op = NI.Op;
+ int nb_oargs = op->callo;
+ int nb_iargs = op->calli;
+ int nb_params = nb_iargs;
+ tcg_insn_unit *func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
+ int flags = args[nb_oargs + nb_iargs + 1];
+ SmallVector<Value *, 4> Params;
+
+ /* If the called function is an illegal helper, skip this trace. */
+ if (isIllegalHelper((void *)func_addr)) {
+ Builder->Abort();
+ return;
+ }
+
+ /* Get function declaration from LLVM module. */
+ TCGHelperMap &TCGHelpers = Translator.getTCGHelpers();
+ if (TCGHelpers.find((uintptr_t)func_addr) == TCGHelpers.end())
+ IRError("%s: cannot resolve funtion.\n", __func__);
+
+ std::string FName = TCGHelpers[(uintptr_t)func_addr];
+ Function *F = ResolveFunction(FName);
+
+ std::set<std::string> &ConstHelpers = Translator.getConstHelpers();
+ if (ConstHelpers.find(FName) != ConstHelpers.end())
+ flags |= TCG_CALL_NO_READ_GLOBALS;
+
+ /* Package the function parameters.
+ NOTE: There are situations where the numbers of given arguments
+ are greater than the *real* function parameters. Ex:
+ declare void foo(int64, int64);
+ and
+ call foo(int32, int32, int32, int32);
+ */
+ int real_nb_params = F->getFunctionType()->getNumParams();
+ if (nb_params == real_nb_params) {
+ for (int i = 0; i < real_nb_params; ++i) {
+ Type *ParamTy = F->getFunctionType()->getParamType(i);
+ Register &In = Reg[args[nb_oargs + i]];
+ Value *InData = LoadState(In);
+
+ size_t real_size = DL->getTypeSizeInBits(ParamTy);
+ size_t size = DL->getTypeSizeInBits(InData->getType());
+
+ if (ParamTy->isPointerTy() && !InData->getType()->isPointerTy())
+ InData = ITP8(InData);
+ else if (real_size < size)
+ InData = TRUNC(InData, IntegerType::get(*Context, real_size));
+
+ if (InData->getType() != ParamTy)
+ InData = new BitCastInst(InData, ParamTy, "", LastInst);
+ Params.push_back(InData);
+ }
+ } else {
+ int idx = 0;
+ for (int i = 0; i < real_nb_params; ++i) {
+ Value *V = nullptr;
+ Type *ParamTy = F->getFunctionType()->getParamType(i);
+ size_t real_size = DL->getTypeSizeInBits(ParamTy);
+ size_t size, remain = real_size;
+
+next:
+ Register &In = Reg[args[nb_oargs + idx]];
+ Value *InData = LoadState(In);
+
+ size = DL->getTypeSizeInBits(InData->getType());
+ if (size == real_size) {
+ if (InData->getType() != ParamTy)
+ InData = new BitCastInst(InData, ParamTy, "", LastInst);
+ Params.push_back(InData);
+ idx++;
+ } else {
+ if (remain == real_size)
+ V = ZEXT(InData, IntegerType::get(*Context, real_size));
+ else {
+ InData = ZEXT(InData, ParamTy);
+ InData = SHL(InData, ConstantInt::get(ParamTy, real_size-remain));
+ V = OR(V, InData);
+ }
+
+ if (remain < size)
+ IRError("%s: fatal error.\n", __func__);
+
+ remain -= size;
+ idx++;
+
+ if (remain)
+ goto next;
+
+ Params.push_back(V);
+ }
+ }
+
+ if (idx != nb_params)
+ IRError("%s: num params not matched.\n", __func__);
+ }
+
+
+ /* Save global registers if this function is not TCG constant function.
+ Otherwise, mark this call instruction for state mapping use.
+ The rules can be found in tcg_reg_alloc_call() in tcg/tcg.c */
+ if (!(flags & TCG_CALL_NO_READ_GLOBALS))
+ SaveGlobals(COHERENCE_GLOBAL, LastInst);
+
+ /* handle COREMU's lightweight memory transaction helper */
+ if (isLMTFunction(FName)) {
+ uint32_t Idx = NI.setRestorePoint();
+ Value *ResVal = GetElementPtrInst::CreateInBounds(CPU,
+ CONSTPtr(offsetof(CPUArchState, restore_val)), "", LastInst);
+ ResVal = new BitCastInst(ResVal, Int32PtrTy, "", LastInst);
+ new StoreInst(CONST32(Idx), ResVal, true, LastInst);
+ }
+
+ CallInst *CI = CallInst::Create(F, Params, "", LastInst);
+
+ if (flags & TCG_CALL_NO_READ_GLOBALS)
+ MF->setConst(CI);
+
+ /* Determine if this function can be inlined. */
+ if (Helpers.find(FName) != Helpers.end()) {
+ bool MustInline = false;
+ HelperInfo *Helper = Helpers[FName];
+ if (AnalyzeInlineCost(CallSite(CI)) > 0) {
+ MustInline = true;
+ InlineCalls.push_back(CI);
+ }
+
+ if (!MustInline) {
+ Function *NoInlineF = ResolveFunction(Helper->FuncNoInline->getName());
+ CI->setCalledFunction(NoInlineF);
+ }
+ }
+
+ /* Format the return value.
+ NOTE: There are situations where the return value is split and
+ is used by different instructions. Ex:
+ int64 ret = call foo();
+ ... = opcode ret[0..31];
+ ... = opcode ret[32..64];
+ */
+ if (nb_oargs == 1) {
+ Register &Out = Reg[args[0]];
+ Out.setData(CI, true);
+ } else if (nb_oargs > 1) {
+ Value *V = CI;
+ size_t size = DL->getTypeSizeInBits(F->getReturnType());
+ size_t subsize = size / nb_oargs;
+ for (int i = 0; i < nb_oargs; ++i) {
+ Register &Out = Reg[args[i]];
+ Value *OutData = TRUNC(V, IntegerType::get(*Context, subsize));
+ Out.setData(OutData, true);
+ if (i != nb_oargs - 1)
+ V = LSHR(V, ConstantInt::get(IntegerType::get(*Context, size), subsize));
+ }
+ }
+}
+
+/*
+ * op_br()
+ * args[0]: Label number
+ */
+void IRFactory::op_br(const TCGArg *args)
+{
+ IRDebug(INDEX_op_br);
+
+ SaveGlobals(COHERENCE_ALL, LastInst);
+
+ TCGArg label = args[0];
+ if (Labels.find(label) == Labels.end())
+ Labels[label] = BasicBlock::Create(*Context, "direct_jump_tb", Func);
+
+ setSuccessor(LastInst, Labels[label]);
+ LastInst = nullptr;
+}
+
+/*
+ * op_mov_i32()
+ * args[0]: Out
+ * args[1]: In
+ */
+void IRFactory::op_mov_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_mov_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 32);
+
+ Value *InData = LoadState(In);
+
+ size_t Size = DL->getTypeSizeInBits(InData->getType());
+ if (Size != 32)
+ InData = TRUNC32(InData);
+
+ Out.setData(InData, true);
+}
+
+/*
+ * op_movi_i32()
+ * args[0]: Out
+ * args[1]: In (const value)
+ */
+void IRFactory::op_movi_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_movi_i32);
+
+ Register &Out = Reg[args[0]];
+
+ AssertType(Out.Size == 32);
+
+ Out.setData(CONST32(args[1]), true);
+}
+
+static inline CmpInst::Predicate getPred(const TCGArg cond)
+{
+ CmpInst::Predicate pred = ICmpInst::BAD_ICMP_PREDICATE;
+ switch (cond) {
+ case TCG_COND_EQ: pred = ICmpInst::ICMP_EQ; break;
+ case TCG_COND_NE: pred = ICmpInst::ICMP_NE; break;
+ case TCG_COND_LT: pred = ICmpInst::ICMP_SLT; break;
+ case TCG_COND_GE: pred = ICmpInst::ICMP_SGE; break;
+ case TCG_COND_LE: pred = ICmpInst::ICMP_SLE; break;
+ case TCG_COND_GT: pred = ICmpInst::ICMP_SGT; break;
+ /* unsigned */
+ case TCG_COND_LTU: pred = ICmpInst::ICMP_ULT; break;
+ case TCG_COND_GEU: pred = ICmpInst::ICMP_UGE; break;
+ case TCG_COND_LEU: pred = ICmpInst::ICMP_ULE; break;
+ case TCG_COND_GTU: pred = ICmpInst::ICMP_UGT; break;
+ default:
+ IRError("%s - unsupported predicate\n", __func__);
+ }
+ return pred;
+}
+
+/*
+ * op_setcond_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ * args[3]: In3 (condition code)
+ */
+void IRFactory::op_setcond_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_setcond_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+ CmpInst::Predicate Pred = getPred(args[3]);
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = ICMP(InData1, InData2, Pred);
+ OutData = ZEXT32(OutData);
+ Out.setData(OutData, true);
+}
+
+/*
+ * op_movcond_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ * args[3]: In3
+ * args[4]: In4
+ * args[5]: In5 (condition code)
+ */
+void IRFactory::op_movcond_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_movcond_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+ Register &In3 = Reg[args[3]];
+ Register &In4 = Reg[args[4]];
+ CmpInst::Predicate Pred = getPred(args[5]);
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32 &&
+ In3.Size == 32 && In4.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *InData3 = LoadState(In3);
+ Value *InData4 = LoadState(In4);
+ Value *Cond = ICMP(InData1, InData2, Pred);
+ Value *OutData = SelectInst::Create(Cond, InData3, InData4, "", LastInst);
+ Out.setData(OutData, true);
+}
+
+/* load/store */
+/*
+ * op_ld8u_i32()
+ * args[0]: Out (ret)
+ * args[1]: In1 (addr)
+ * args[2]: In2 (offset)
+ */
+void IRFactory::op_ld8u_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld8u_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 32);
+
+ Value *InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = new LoadInst(InData, "", false, LastInst);
+ InData = ZEXT32(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ld8s_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld8s_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 32);
+
+ Value *InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = new LoadInst(InData, "", false, LastInst);
+ InData = SEXT32(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ld16u_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld16u_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 32);
+
+ Value *InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = CASTPTR16(InData);
+ InData = new LoadInst(InData, "", false, LastInst);
+ InData = ZEXT32(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ld16s_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld16s_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 32);
+
+ Value *InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = CASTPTR16(InData);
+ InData = new LoadInst(InData, "", false, LastInst);
+ InData = SEXT32(InData);
+ Out.setData(InData, true);
+}
+
+/*
+ * op_ld_i32()
+ * args[0]: Out (ret)
+ * args[1]: In1 (addr)
+ * args[2]: In2 (offset)
+ */
+void IRFactory::op_ld_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 32);
+
+ Value *InData;
+ if (In.isRev()) {
+ InData = StatePointer(In, Off, Int32PtrTy);
+ InData = new LoadInst(InData, "", false, LastInst);
+ if (isStateOfPC(Off))
+ static_cast<LoadInst*>(InData)->setVolatile(true);
+ } else {
+ InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = CASTPTR32(InData);
+ InData = new LoadInst(InData, "", false, LastInst);
+ }
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_st8_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_st8_i32);
+
+ Register &In1 = Reg[args[0]];
+ Register &In2 = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(In1.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = TRUNC8(InData1);
+ if (InData2->getType() != Int8PtrTy)
+ InData2 = CASTPTR8(InData2);
+ InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst);
+ new StoreInst(InData1, InData2, false, LastInst);
+}
+
+void IRFactory::op_st16_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_st16_i32);
+
+ Register &In1 = Reg[args[0]];
+ Register &In2 = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(In1.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = TRUNC16(InData1);
+ if (InData2->getType() != Int8PtrTy)
+ InData2 = CASTPTR8(InData2);
+ InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst);
+ InData2 = CASTPTR16(InData2);
+ new StoreInst(InData1, InData2, false, LastInst);
+}
+
+/*
+ * op_st_i32()
+ * args[0]: In1
+ * args[1]: In2 (base)
+ * args[2]: In3 (offset)
+ */
+void IRFactory::op_st_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_st_i32);
+
+ Register &In1 = Reg[args[0]];
+ Register &In2 = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(In1.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+
+ if (In2.isRev()) {
+ Value *InData2 = StatePointer(In2, Off, Int32PtrTy);
+ StoreInst *SI = new StoreInst(InData1, InData2, false, LastInst);
+ if (isStateOfPC(Off))
+ SI->setVolatile(true);
+ } else {
+ Value *InData2 = LoadState(In2);
+ if (InData2->getType() != Int8PtrTy)
+ InData2 = CASTPTR8(InData2);
+ InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst);
+ InData2 = CASTPTR32(InData2);
+ new StoreInst(InData1, InData2, false, LastInst);
+ }
+}
+
+/* arith */
+/*
+ * op_add_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_add_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_add_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData;
+ if (In1.isRev()) {
+ intptr_t Off = static_cast<ConstantInt*>(InData2)->getSExtValue();
+ OutData = StatePointer(In1, Off, Int32PtrTy);
+ } else
+ OutData = ADD(InData1, InData2);
+
+ Out.setData(OutData, true);
+}
+
+/*
+ * op_sub_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_sub_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_sub_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = SUB(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_mul_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_mul_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = MUL(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_div_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_div_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = SDIV(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_divu_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_divu_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = UDIV(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_rem_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_rem_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = SREM(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_remu_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_remu_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = UREM(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_div2_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_div2_i32);
+
+ Register &Out1 = Reg[args[0]];
+ Register &Out2 = Reg[args[1]];
+ Register &In1 = Reg[args[2]];
+#if 0
+ Register &In2 = Reg[args[3]];
+#endif
+ Register &In3 = Reg[args[4]];
+
+ AssertType(Out1.Size == 32 && Out2.Size == 32 &&
+ In1.Size == 32 && In3.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+#if 0
+ Value *InData2 = LoadState(In2);
+#endif
+ Value *InData3 = LoadState(In3);
+ Value *OutData1 = SDIV(InData1, InData3);
+ Value *OutData2 = SREM(InData1, InData3);
+ Out1.setData(OutData1, true);
+ Out2.setData(OutData2, true);
+}
+
+/*
+ * op_divu2_i32()
+ * args[0]: Out1
+ * args[1]: Out2
+ * args[2]: In1
+ * args[3]: In2
+ * args[4]: In3
+ */
+void IRFactory::op_divu2_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_divu2_i32);
+
+ Register &Out1 = Reg[args[0]];
+ Register &Out2 = Reg[args[1]];
+ Register &In1 = Reg[args[2]];
+#if 0
+ Register &In2 = Reg[args[3]];
+#endif
+ Register &In3 = Reg[args[4]];
+
+ AssertType(Out1.Size == 32 && Out2.Size == 32 &&
+ In1.Size == 32 && In3.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+#if 0
+ Value *InData2 = LoadState(In2);
+#endif
+ Value *InData3 = LoadState(In3);
+ Value *OutData1 = UDIV(InData1, InData3);
+ Value *OutData2 = UREM(InData1, InData3);
+ Out1.setData(OutData1, true);
+ Out2.setData(OutData2, true);
+}
+
+/*
+ * op_and_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_and_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_and_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = AND(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+/*
+ * op_or_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_or_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_or_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = OR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+/*
+ * op_xor_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_xor_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_xor_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = XOR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+/* shifts/rotates */
+/*
+ * op_shl_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_shl_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_shl_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = SHL(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+/*
+ * op_shr_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_shr_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_shr_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = LSHR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+/*
+ * op_sar_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_sar_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_sar_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = ASHR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+/*
+ * op_rotl_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_rotl_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_rotl_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ Value *C = LSHR(InData1, SUB(CONST32(32), InData2));
+ Value *OutData = SHL(InData1, InData2);
+ OutData = OR(OutData, C);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_rotr_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_rotr_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ Value *c = SHL(InData1, SUB(CONST32(32), InData2));
+ Value *OutData = LSHR(InData1, InData2);
+ OutData = OR(OutData, c);
+ Out.setData(OutData, true);
+}
+
+/*
+ * op_deposit_i32()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ * args[3]: In3 (offset from LSB)
+ * args[4]: In4 (length)
+ */
+void IRFactory::op_deposit_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_deposit_i32);
+
+ /* Deposit the lowest args[4] bits of register args[2] into register
+ * args[1] starting from bits args[3]. */
+ APInt mask = APInt::getBitsSet(32, args[3], args[3] + args[4]);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ if (args[3])
+ InData2 = SHL(InData2, CONST32(args[3]));
+ InData2 = AND(InData2, ConstantInt::get(*Context, mask));
+ InData1 = AND(InData1, ConstantInt::get(*Context, ~mask));
+ InData1 = OR(InData1, InData2);
+ Out.setData(InData1, true);
+}
+
+/*
+ * op_brcond_i32()
+ * args[0]: In1
+ * args[1]: In2
+ * args[2]: In3 (condition code)
+ * args[3]: In4 (label)
+ */
+void IRFactory::op_brcond_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_brcond_i32);
+
+ /* brcond_i32 format:
+ * brcond_i32 op1,op2,cond,<ifTrue>
+ * <ifFalse>:
+ * A
+ * <ifTrue>:
+ * B
+ */
+ Register &In1 = Reg[args[0]];
+ Register &In2 = Reg[args[1]];
+ CmpInst::Predicate Pred = getPred(args[2]);
+
+ AssertType(In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ SaveGlobals(COHERENCE_ALL, LastInst);
+
+ TCGArg label = args[3];
+ if (Labels.find(label) == Labels.end())
+ Labels[label] = BasicBlock::Create(*Context, "succ", Func);
+
+ BasicBlock *ifTrue = Labels[label];
+ BasicBlock *ifFalse = BasicBlock::Create(*Context, "succ", Func);
+
+ Value *Cond = ICMP(InData1, InData2, Pred);
+ BranchInst::Create(ifTrue, ifFalse, Cond, LastInst);
+ LastInst->eraseFromParent();
+
+ CurrBB = ifFalse;
+ LastInst = BranchInst::Create(ExitBB, CurrBB);
+}
+
+void IRFactory::op_add2_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_add2_i32);
+
+ Register &Out1 = Reg[args[0]];
+ Register &Out2 = Reg[args[1]];
+ Register &In1 = Reg[args[2]];
+ Register &In2 = Reg[args[3]];
+ Register &In3 = Reg[args[4]];
+ Register &In4 = Reg[args[5]];
+
+ AssertType(Out1.Size == 32 && Out2.Size == 32 &&
+ In1.Size == 32 && In2.Size == 32 &&
+ In3.Size == 32 && In4.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *InData3 = LoadState(In3);
+ Value *InData4 = LoadState(In4);
+
+ InData1 = ZEXT64(InData1);
+ InData2 = SHL(ZEXT64(InData2), CONST64(32));
+ InData2 = OR(InData2, InData1);
+
+ InData3 = ZEXT64(InData3);
+ InData4 = SHL(ZEXT64(InData4), CONST64(32));
+ InData4 = OR(InData4, InData3);
+
+ InData2 = ADD(InData2, InData4);
+
+ Value *OutData1 = TRUNC32(InData2);
+ Value *OutData2 = TRUNC32(LSHR(InData2, CONST64(32)));
+ Out1.setData(OutData1, true);
+ Out2.setData(OutData2, true);
+}
+
+void IRFactory::op_sub2_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_sub2_i32);
+
+ Register &Out1 = Reg[args[0]];
+ Register &Out2 = Reg[args[1]];
+ Register &In1 = Reg[args[2]];
+ Register &In2 = Reg[args[3]];
+ Register &In3 = Reg[args[4]];
+ Register &In4 = Reg[args[5]];
+
+ AssertType(Out1.Size == 32 && Out2.Size == 32 &&
+ In1.Size == 32 && In2.Size == 32 &&
+ In3.Size == 32 && In4.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *InData3 = LoadState(In3);
+ Value *InData4 = LoadState(In4);
+
+ InData1 = ZEXT64(InData1);
+ InData2 = SHL(ZEXT64(InData2), CONST64(32));
+ InData2 = OR(InData2, InData1);
+
+ InData3 = ZEXT64(InData3);
+ InData4 = SHL(ZEXT64(InData4), CONST64(32));
+ InData4 = OR(InData4, InData3);
+
+ InData2 = SUB(InData2, InData4);
+
+ Value *OutData1 = TRUNC32(InData2);
+ Value *OutData2 = TRUNC32(LSHR(InData2, CONST64(32)));
+ Out1.setData(OutData1, true);
+ Out2.setData(OutData2, true);
+}
+
+void IRFactory::op_mulu2_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_mulu2_i32);
+
+ Register &Out1 = Reg[args[0]];
+ Register &Out2 = Reg[args[1]];
+ Register &In1 = Reg[args[2]];
+ Register &In2 = Reg[args[3]];
+
+ AssertType(Out1.Size == 32 && Out2.Size == 32 &&
+ In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = ZEXT64(InData1);
+ InData2 = ZEXT64(InData2);
+
+ Value *OutData = MUL(InData1, InData2);
+ Value *Low = TRUNC32(OutData);
+ Value *High = TRUNC32(LSHR(OutData, CONST64(32)));
+ Out1.setData(Low, true);
+ Out2.setData(High, true);
+}
+
+void IRFactory::op_muls2_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_muls2_i32);
+
+ Register &Out1 = Reg[args[0]];
+ Register &Out2 = Reg[args[1]];
+ Register &In1 = Reg[args[2]];
+ Register &In2 = Reg[args[3]];
+
+ AssertType(Out1.Size == 32 && Out2.Size == 32 &&
+ In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = SEXT64(InData1);
+ InData2 = SEXT64(InData2);
+
+ Value *OutData = MUL(InData1, InData2);
+ Value *Low = TRUNC32(OutData);
+ Value *High = TRUNC32(LSHR(OutData, CONST64(32)));
+ Out1.setData(Low, true);
+ Out2.setData(High, true);
+}
+
+void IRFactory::op_muluh_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_muluh_i32);
+
+ Register &Out1 = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out1.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = ZEXT64(InData1);
+ InData2 = ZEXT64(InData2);
+
+ Value *OutData = MUL(InData1, InData2);
+ Value *High = TRUNC32(LSHR(OutData, CONST64(32)));
+ Out1.setData(High, true);
+}
+
+void IRFactory::op_mulsh_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_mulsh_i32);
+
+ Register &Out1 = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out1.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = SEXT64(InData1);
+ InData2 = SEXT64(InData2);
+
+ Value *OutData = MUL(InData1, InData2);
+ Value *High = TRUNC32(LSHR(OutData, CONST64(32)));
+ Out1.setData(High, true);
+}
+
+void IRFactory::op_brcond2_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_brcond2_i32);
+
+ Register &In1 = Reg[args[0]];
+ Register &In2 = Reg[args[1]];
+ Register &In3 = Reg[args[2]];
+ Register &In4 = Reg[args[3]];
+ CmpInst::Predicate Pred = getPred(args[4]);
+
+ AssertType(In1.Size == 32 && In2.Size == 32 &&
+ In3.Size == 32 && In4.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *InData3 = LoadState(In3);
+ Value *InData4 = LoadState(In4);
+
+ SaveGlobals(COHERENCE_ALL, LastInst);
+
+ InData1 = ZEXT64(InData1);
+ InData2 = SHL(ZEXT64(InData2), CONST64(32));
+ InData3 = ZEXT64(InData3);
+ InData4 = SHL(ZEXT64(InData4), CONST64(32));
+
+ InData2 = OR(InData2, InData1);
+ InData4 = OR(InData4, InData3);
+
+ TCGArg label = args[5];
+ if (Labels.find(label) == Labels.end())
+ Labels[label] = BasicBlock::Create(*Context, "succ", Func);
+
+ BasicBlock *ifTrue = Labels[label];
+ BasicBlock *ifFalse = BasicBlock::Create(*Context, "succ", Func);
+
+ Value *Cond = ICMP(InData2, InData4, Pred);
+ BranchInst::Create(ifTrue, ifFalse, Cond, LastInst);
+ LastInst->eraseFromParent();
+
+ CurrBB = ifFalse;
+ LastInst = BranchInst::Create(ExitBB, CurrBB);
+}
+
+void IRFactory::op_setcond2_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_setcond2_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+ Register &In3 = Reg[args[3]];
+ Register &In4 = Reg[args[4]];
+ CmpInst::Predicate Pred = getPred(args[5]);
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32 &&
+ In3.Size == 32 && In4.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *InData3 = LoadState(In3);
+ Value *InData4 = LoadState(In4);
+
+ InData1 = ZEXT64(InData1);
+ InData2 = SHL(ZEXT64(InData2), CONST64(32));
+ InData3 = ZEXT64(InData3);
+ InData4 = SHL(ZEXT64(InData4), CONST64(32));
+
+ InData2 = OR(InData2, InData1);
+ InData4 = OR(InData4, InData3);
+
+ Value *OutData = ICMP(InData2, InData4, Pred);
+ OutData = ZEXT32(OutData);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_ext8s_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ext8s_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 32 && In.Size == 32);
+
+ Value *InData = LoadState(In);
+ InData = TRUNC8(InData);
+ InData = SEXT32(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ext16s_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ext16s_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 32 && In.Size == 32);
+
+ Value *InData = LoadState(In);
+ InData = TRUNC16(InData);
+ InData = SEXT32(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ext8u_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ext8u_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 32 && In.Size == 32);
+
+ Value *InData = LoadState(In);
+ InData = AND(InData, CONST32(0xFF));
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ext16u_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ext16u_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 32 && In.Size == 32);
+
+ Value *InData = LoadState(In);
+ InData = AND(InData, CONST32(0xFFFF));
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_bswap16_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_bswap16_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 32 && In.Size == 32);
+
+ Value *InData = LoadState(In);
+ InData = TRUNC16(InData);
+ InData = BSWAP16(InData);
+ InData = ZEXT32(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_bswap32_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_bswap32_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 32 && In.Size == 32);
+
+ Value *InData = LoadState(In);
+ InData = BSWAP32(InData);
+ Out.setData(InData, true);
+}
+
+/*
+ * op_not_i32()
+ * args[0]: Out
+ * args[1]: In
+ */
+void IRFactory::op_not_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_not_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 32 && In.Size == 32);
+
+ Value *InData = LoadState(In);
+ Value *OutData = XOR(InData, CONST32((uint32_t)-1));
+ Out.setData(OutData, true);
+}
+
+/*
+ * op_neg_i32()
+ * args[0]: Out
+ * args[1]: In
+ */
+void IRFactory::op_neg_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_neg_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 32 && In.Size == 32);
+
+ Value *InData = LoadState(In);
+ Value *OutData = SUB(CONST32(0), InData);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_andc_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_andc_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData2 = XOR(InData2, CONST32((uint32_t)-1));
+ Value *OutData = AND(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_orc_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_orc_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData2 = XOR(InData2, CONST32((uint32_t)-1));
+ Value *OutData = OR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_eqv_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_eqv_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData2 = XOR(InData2, CONST32((uint32_t)-1));
+ Value *OutData = XOR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_nand_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_nand_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ Value *OutData = AND(InData1, InData2);
+ OutData = XOR(OutData, CONST32((uint32_t)-1));
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_nor_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_nor_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 32 && In1.Size == 32 && In2.Size == 32);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ Value *OutData = OR(InData1, InData2);
+ OutData = XOR(OutData, CONST32((uint32_t)-1));
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_mov_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_mov_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64);
+
+ Value *InData = LoadState(In);
+
+ size_t Size = DL->getTypeSizeInBits(InData->getType());
+ if (Size != 64)
+ InData = ZEXT64(InData);
+
+ Out.setData(InData, true);
+}
+
+/*
+ * op_movi_i64()
+ * args[0]: Out
+ * args[1]: In (const value)
+ */
+void IRFactory::op_movi_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_movi_i64);
+
+ Register &Out = Reg[args[0]];
+
+ AssertType(Out.Size == 64);
+
+ Out.setData(CONST64(args[1]), true);
+}
+
+void IRFactory::op_setcond_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_setcond_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+ CmpInst::Predicate Pred = getPred(args[3]);
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = ICMP(InData1, InData2, Pred);
+ OutData = ZEXT64(OutData);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_movcond_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_movcond_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+ Register &In3 = Reg[args[3]];
+ Register &In4 = Reg[args[4]];
+ CmpInst::Predicate Pred = getPred(args[5]);
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64 &&
+ In3.Size == 64 && In4.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *InData3 = LoadState(In3);
+ Value *InData4 = LoadState(In4);
+ Value *Cond = ICMP(InData1, InData2, Pred);
+ Value *OutData = SelectInst::Create(Cond, InData3, InData4, "", LastInst);
+ Out.setData(OutData, true);
+}
+
+
+/* load/store */
+void IRFactory::op_ld8u_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld8u_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 64);
+
+ Value *InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = new LoadInst(InData, "", false, LastInst);
+ InData = ZEXT64(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ld8s_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld8s_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 64);
+
+ Value *InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = new LoadInst(InData, "", false, LastInst);
+ InData = SEXT64(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ld16u_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld16u_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 64);
+
+ Value *InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = CASTPTR16(InData);
+ InData = new LoadInst(InData, "", false, LastInst);
+ InData = ZEXT64(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ld16s_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld16s_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 64);
+
+ Value *InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = CASTPTR16(InData);
+ InData = new LoadInst(InData, "", false, LastInst);
+ InData = SEXT64(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ld32u_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld32u_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 64);
+
+ Value *InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = CASTPTR32(InData);
+ InData = new LoadInst(InData, "", false, LastInst);
+ InData = ZEXT64(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ld32s_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld32s_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 64);
+
+ Value *InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = CASTPTR32(InData);
+ InData = new LoadInst(InData, "", false, LastInst);
+ InData = SEXT64(InData);
+ Out.setData(InData, true);
+}
+
+/*
+ * op_ld_i64()
+ * args[0]: Out
+ * args[1]: In (base)
+ * args[2]: In (offset)
+ */
+void IRFactory::op_ld_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ld_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(Out.Size == 64);
+
+ Value *InData;
+ if (In.isRev()) {
+ InData = StatePointer(In, Off, Int64PtrTy);
+ InData = new LoadInst(InData, "", false, LastInst);
+ if (isStateOfPC(Off))
+ static_cast<LoadInst*>(InData)->setVolatile(true);
+ } else {
+ InData = LoadState(In);
+ if (InData->getType() != Int8PtrTy)
+ InData = CASTPTR8(InData);
+ InData = GetElementPtrInst::CreateInBounds(InData, CONSTPtr(Off), "", LastInst);
+ InData = CASTPTR64(InData);
+ InData = new LoadInst(InData, "", false, LastInst);
+ }
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_st8_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_st8_i64);
+
+ Register &In1 = Reg[args[0]];
+ Register &In2 = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(In1.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = TRUNC8(InData1);
+ if (InData2->getType() != Int8PtrTy)
+ InData2 = CASTPTR8(InData2);
+ InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst);
+ new StoreInst(InData1, InData2, false, LastInst);
+}
+
+void IRFactory::op_st16_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_st16_i64);
+
+ Register &In1 = Reg[args[0]];
+ Register &In2 = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(In1.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = TRUNC16(InData1);
+ if (InData2->getType() != Int8PtrTy)
+ InData2 = CASTPTR8(InData2);
+ InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst);
+ InData2 = CASTPTR16(InData2);
+ new StoreInst(InData1, InData2, false, LastInst);
+}
+
+void IRFactory::op_st32_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_st32_i64);
+
+ Register &In1 = Reg[args[0]];
+ Register &In2 = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(In1.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = TRUNC32(InData1);
+ if (InData2->getType() != Int8PtrTy)
+ InData2 = CASTPTR8(InData2);
+ InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst);
+ InData2 = CASTPTR32(InData2);
+ new StoreInst(InData1, InData2, false, LastInst);
+}
+
+/*
+ * op_st_i64()
+ * args[0]: In1
+ * args[1]: In2 (base)
+ * args[2]: In3 (offset)
+ */
+void IRFactory::op_st_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_st_i64);
+
+ Register &In1 = Reg[args[0]];
+ Register &In2 = Reg[args[1]];
+ TCGArg Off = args[2];
+
+ AssertType(In1.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+
+ if (In2.isRev()) {
+ Value *InData2 = StatePointer(In2, Off, Int64PtrTy);
+ StoreInst *SI = new StoreInst(InData1, InData2, false, LastInst);
+ if (isStateOfPC(Off))
+ SI->setVolatile(true);
+ } else {
+ Value *InData2 = LoadState(In2);
+ if (InData2->getType() != Int8PtrTy)
+ InData2 = CASTPTR8(InData2);
+ InData2 = GetElementPtrInst::CreateInBounds(InData2, CONSTPtr(Off), "", LastInst);
+ InData2 = CASTPTR64(InData2);
+ new StoreInst(InData1, InData2, false, LastInst);
+ }
+}
+
+/* arith */
+/*
+ * op_add_i64()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_add_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_add_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData;
+ if (In1.isRev()) {
+ intptr_t Off = static_cast<ConstantInt*>(InData2)->getSExtValue();
+ OutData = StatePointer(In1, Off, Int64PtrTy);
+ } else
+ OutData = ADD(InData1, InData2);
+
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_sub_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_sub_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = SUB(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+/*
+ * op_mul_i64()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_mul_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_mul_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = MUL(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_div_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_div_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = SDIV(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_divu_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_divu_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = UDIV(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_rem_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_rem_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = SREM(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_remu_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_remu_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = UREM(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_div2_i64(const TCGArg *args)
+{
+ IRError("%s not implemented.\n", __func__);
+}
+
+void IRFactory::op_divu2_i64(const TCGArg *args)
+{
+ IRError("%s not implemented.\n", __func__);
+}
+
+void IRFactory::op_and_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_and_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ size_t Size = DL->getTypeSizeInBits(InData1->getType());
+ if (Size == 32)
+ InData1 = ZEXT64(InData1);
+ Size = DL->getTypeSizeInBits(InData2->getType());
+ if (Size == 32)
+ InData2 = ZEXT64(InData2);
+
+ Value *OutData = AND(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_or_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_or_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = OR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_xor_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_xor_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = XOR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+/* shifts/rotates */
+void IRFactory::op_shl_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_shl_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = SHL(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+/*
+ * op_shr_i64()
+ * args[0]: Out
+ * args[1]: In1
+ * args[2]: In2
+ */
+void IRFactory::op_shr_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_shr_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = LSHR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_sar_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_sar_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *OutData = ASHR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_rotl_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_rotl_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ Value *C = LSHR(InData1, SUB(CONST64(64), InData2));
+ Value *OutData = SHL(InData1, InData2);
+ OutData = OR(OutData, C);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_rotr_i64(const TCGArg *args)
+{
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ Value *c = SHL(InData1, SUB(CONST64(64), InData2));
+ Value *OutData = LSHR(InData1, InData2);
+ OutData = OR(OutData, c);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_deposit_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_deposit_i64);
+
+ /* Deposit the lowest args[4] bits of register args[2] into register
+ * args[1] starting from bits args[3]. */
+ APInt mask = APInt::getBitsSet(64, args[3], args[3] + args[4]);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ if (args[3])
+ InData2 = SHL(InData2, CONST64(args[3]));
+ InData2 = AND(InData2, ConstantInt::get(*Context, mask));
+ InData1 = AND(InData1, ConstantInt::get(*Context, ~mask));
+ InData1 = OR(InData1, InData2);
+ Out.setData(InData1, true);
+}
+
+void IRFactory::op_ext_i32_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ext_i32_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 32);
+
+ Value *InData = LoadState(In);
+ InData = SEXT64(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_extu_i32_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_extu_i32_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 32);
+
+ Value *InData = LoadState(In);
+ InData = ZEXT64(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_extrl_i64_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_extrl_i64_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 32 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ InData = TRUNC32(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_extrh_i64_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_extrh_i64_i32);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 32 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ InData = TRUNC32(LSHR(InData, CONST64(32)));
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_brcond_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_brcond_i64);
+
+ /* brcond_i32 format:
+ * brcond_i32 op1,op2,cond,<ifTrue>
+ * <ifFalse>:
+ * A
+ * <ifTrue>:
+ * B
+ */
+ Register &In1 = Reg[args[0]];
+ Register &In2 = Reg[args[1]];
+ CmpInst::Predicate Pred = getPred(args[2]);
+
+ AssertType(In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ SaveGlobals(COHERENCE_ALL, LastInst);
+
+ TCGArg label = args[3];
+ if (Labels.find(label) == Labels.end())
+ Labels[label] = BasicBlock::Create(*Context, "succ", Func);
+
+ BasicBlock *ifTrue = Labels[label];
+ BasicBlock *ifFalse = BasicBlock::Create(*Context, "succ", Func);
+
+ Value *Cond = ICMP(InData1, InData2, Pred);
+ BranchInst::Create(ifTrue, ifFalse, Cond, LastInst);
+ LastInst->eraseFromParent();
+
+ CurrBB = ifFalse;
+ LastInst = BranchInst::Create(ExitBB, CurrBB);
+}
+
+void IRFactory::op_ext8s_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ext8s_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ InData = TRUNC8(InData);
+ InData = SEXT64(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ext16s_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ext16s_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ InData = TRUNC16(InData);
+ InData = SEXT64(InData);
+ Out.setData(InData, true);
+}
+
+/*
+ * op_ext32s_i64()
+ * args[0]: Out
+ * args[1]: In
+ */
+void IRFactory::op_ext32s_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ext32s_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ if (DL->getTypeSizeInBits(InData->getType()) != 32)
+ InData = TRUNC32(InData);
+ InData = SEXT64(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ext8u_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ext8u_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ InData = AND(InData, CONST64(0xFF));
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_ext16u_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ext16u_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ InData = AND(InData, CONST64(0xFFFF));
+ Out.setData(InData, true);
+}
+
+/*
+ * op_ext32u_i64()
+ * args[0]: Out
+ * args[1]: In
+ */
+void IRFactory::op_ext32u_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_ext32u_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ if (DL->getTypeSizeInBits(InData->getType()) == 32)
+ InData = ZEXT64(InData);
+ else
+ InData = AND(InData, CONST64(0xFFFFFFFF));
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_bswap16_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_bswap16_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ InData = TRUNC16(InData);
+ InData = BSWAP16(InData);
+ InData = ZEXT64(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_bswap32_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_bswap32_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ InData = TRUNC32(InData);
+ InData = BSWAP32(InData);
+ InData = ZEXT64(InData);
+ Out.setData(InData, true);
+}
+
+void IRFactory::op_bswap64_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_bswap64_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ InData = BSWAP64(InData);
+ Out.setData(InData, true);
+
+}
+
+void IRFactory::op_not_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_not_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ Value *OutData = XOR(InData, CONST64((uint64_t)-1));
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_neg_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_neg_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In = Reg[args[1]];
+
+ AssertType(Out.Size == 64 && In.Size == 64);
+
+ Value *InData = LoadState(In);
+ Value *OutData = SUB(CONST64(0), InData);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_andc_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_andc_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData2 = XOR(InData2, CONST64((uint64_t)-1));
+ Value *OutData = AND(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_orc_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_orc_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData2 = XOR(InData2, CONST64((uint64_t)-1));
+ Value *OutData = OR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_eqv_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_eqv_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData2 = XOR(InData2, CONST64((uint64_t)-1));
+ Value *OutData = XOR(InData1, InData2);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_nand_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_nand_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ Value *OutData = AND(InData1, InData2);
+ OutData = XOR(OutData, CONST64((uint64_t)-1));
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_nor_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_nor_i64);
+
+ Register &Out = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ Value *OutData = OR(InData1, InData2);
+ OutData = XOR(OutData, CONST64((uint64_t)-1));
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_add2_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_add2_i64);
+
+ Register &Out1 = Reg[args[0]];
+ Register &Out2 = Reg[args[1]];
+ Register &In1 = Reg[args[2]];
+ Register &In2 = Reg[args[3]];
+ Register &In3 = Reg[args[4]];
+ Register &In4 = Reg[args[5]];
+
+ AssertType(Out1.Size == 64 && Out2.Size == 64 &&
+ In1.Size == 64 && In2.Size == 64 &&
+ In3.Size == 64 && In4.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *InData3 = LoadState(In3);
+ Value *InData4 = LoadState(In4);
+
+ InData1 = ZEXT128(InData1);
+ InData2 = SHL(ZEXT128(InData2), CONST128(64));
+ InData2 = OR(InData2, InData1);
+
+ InData3 = ZEXT128(InData3);
+ InData4 = SHL(ZEXT128(InData4), CONST128(64));
+ InData4 = OR(InData4, InData3);
+
+ InData2 = ADD(InData2, InData4);
+
+ Value *OutData1 = TRUNC64(InData2);
+ Value *OutData2 = TRUNC64(LSHR(InData2, CONST128(64)));
+ Out1.setData(OutData1, true);
+ Out2.setData(OutData2, true);
+}
+
+void IRFactory::op_sub2_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_sub2_i64);
+
+ Register &Out1 = Reg[args[0]];
+ Register &Out2 = Reg[args[1]];
+ Register &In1 = Reg[args[2]];
+ Register &In2 = Reg[args[3]];
+ Register &In3 = Reg[args[4]];
+ Register &In4 = Reg[args[5]];
+
+ AssertType(Out1.Size == 64 && Out2.Size == 64 &&
+ In1.Size == 64 && In2.Size == 64 &&
+ In3.Size == 64 && In4.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *InData3 = LoadState(In3);
+ Value *InData4 = LoadState(In4);
+
+ InData1 = ZEXT128(InData1);
+ InData2 = SHL(ZEXT128(InData2), CONST128(64));
+ InData2 = OR(InData2, InData1);
+
+ InData3 = ZEXT128(InData3);
+ InData4 = SHL(ZEXT128(InData4), CONST128(64));
+ InData4 = OR(InData4, InData3);
+
+ InData2 = SUB(InData2, InData4);
+
+ Value *OutData1 = TRUNC64(InData2);
+ Value *OutData2 = TRUNC64(LSHR(InData2, CONST128(64)));
+ Out1.setData(OutData1, true);
+ Out2.setData(OutData2, true);
+}
+
+void IRFactory::op_mulu2_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_mulu2_i64);
+
+ Register &Out1 = Reg[args[0]];
+ Register &Out2 = Reg[args[1]];
+ Register &In1 = Reg[args[2]];
+ Register &In2 = Reg[args[3]];
+
+ AssertType(Out1.Size == 64 && Out2.Size == 64 &&
+ In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = ZEXT128(InData1);
+ InData2 = ZEXT128(InData2);
+
+ Value *OutData = MUL(InData1, InData2);
+ Value *Low = TRUNC64(OutData);
+ Value *High = TRUNC64(LSHR(OutData, CONST128(64)));
+ Out1.setData(Low, true);
+ Out2.setData(High, true);
+}
+
+void IRFactory::op_muls2_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_muls2_i64);
+
+ Register &Out1 = Reg[args[0]];
+ Register &Out2 = Reg[args[1]];
+ Register &In1 = Reg[args[2]];
+ Register &In2 = Reg[args[3]];
+
+ AssertType(Out1.Size == 64 && Out2.Size == 64 &&
+ In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = SEXT128(InData1);
+ InData2 = SEXT128(InData2);
+
+ Value *OutData = MUL(InData1, InData2);
+ Value *Low = TRUNC64(OutData);
+ Value *High = TRUNC64(LSHR(OutData, CONST128(64)));
+ Out1.setData(Low, true);
+ Out2.setData(High, true);
+}
+
+void IRFactory::op_muluh_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_muluh_i64);
+
+ Register &Out1 = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out1.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = ZEXT128(InData1);
+ InData2 = ZEXT128(InData2);
+
+ Value *OutData = MUL(InData1, InData2);
+ Value *High = TRUNC64(LSHR(OutData, CONST128(64)));
+ Out1.setData(High, true);
+}
+
+void IRFactory::op_mulsh_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_mulsh_i64);
+
+ Register &Out1 = Reg[args[0]];
+ Register &In1 = Reg[args[1]];
+ Register &In2 = Reg[args[2]];
+
+ AssertType(Out1.Size == 64 && In1.Size == 64 && In2.Size == 64);
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+
+ InData1 = SEXT128(InData1);
+ InData2 = SEXT128(InData2);
+
+ Value *OutData = MUL(InData1, InData2);
+ Value *High = TRUNC64(LSHR(OutData, CONST128(64)));
+ Out1.setData(High, true);
+}
+
+/* QEMU specific */
+void IRFactory::op_insn_start(const TCGArg *args)
+{
+ IRDebug(INDEX_op_insn_start);
+ NI.NumInsts++;
+}
+
+void IRFactory::InsertLinkAndExit(Instruction *InsertPos)
+{
+ auto ChainSlot = LLEnv->getChainSlot();
+ size_t Key = ChainSlot.first;
+ uintptr_t RetVal = ChainSlot.second;
+ unsigned Idx = NI.setChainSlot(Key);
+ uintptr_t Addr = NI.getChainSlotAddr(Idx);
+
+ /* Here we use the llvm.trap intrinsic to notify LLVM backend to insert
+ * jump instruction for chaining. */
+ ConstantInt *Meta[] = { CONST32(PATCH_TRACE_BLOCK_CHAINING), CONSTPtr(Addr) };
+ Function *TrapFn = Intrinsic::getDeclaration(Mod, Intrinsic::trap);
+ CallInst *CI = CallInst::Create(TrapFn, "", InsertPos);
+ DebugLoc DL = MF->getDebugLoc(PATCH_TRACE_BLOCK_CHAINING, Idx, Func, Meta);
+ CI->setDebugLoc(DL);
+
+ MF->setExit(CI);
+
+ InsertExit(RetVal);
+}
+
+void IRFactory::InsertExit(uintptr_t RetVal, bool setExit)
+{
+ ConstantInt *Meta[] = { CONST32(PATCH_EXIT_TB), ExitAddr };
+ ReturnInst *RI = ReturnInst::Create(*Context, CONSTPtr(RetVal), LastInst);
+ DebugLoc DL = MF->getDebugLoc(PATCH_EXIT_TB, 0, Func, Meta);
+ RI->setDebugLoc(DL);
+
+ if (setExit)
+ MF->setExit(RI);
+}
+
+void IRFactory::InsertLookupIBTC(GraphNode *CurrNode)
+{
+ BasicBlock *BB = nullptr;
+
+ if (CommonBB.find("ibtc") == CommonBB.end()) {
+ BB = CommonBB["ibtc"] = BasicBlock::Create(*Context, "ibtc", Func);
+
+ SmallVector<Value *, 4> Params;
+ Function *F = ResolveFunction("helper_lookup_ibtc");
+ Value *Env = ConvertCPUType(F, 0, BB);
+
+ Params.push_back(Env);
+ CallInst *CI = CallInst::Create(F, Params, "", BB);
+ IndirectBrInst *IB = IndirectBrInst::Create(CI, 1, BB);
+ MF->setConst(CI);
+ MF->setExit(CI);
+
+ IndirectBrs.push_back(IB);
+ toSink.push_back(BB);
+ }
+
+ BB = CommonBB["ibtc"];
+ BranchInst::Create(BB, LastInst);
+}
+
+void IRFactory::InsertLookupCPBL(GraphNode *CurrNode)
+{
+ SmallVector<Value *, 4> Params;
+ Function *F = ResolveFunction("helper_lookup_cpbl");
+ Value *Env = ConvertCPUType(F, 0, LastInst);
+
+ Params.push_back(Env);
+ CallInst *CI = CallInst::Create(F, Params, "", LastInst);
+ IndirectBrInst *IB = IndirectBrInst::Create(CI, 1, LastInst);
+ MF->setConst(CI);
+ MF->setExit(CI);
+
+ IndirectBrs.push_back(IB);
+ toSink.push_back(CurrBB);
+}
+
+void IRFactory::TraceValidateCPBL(GraphNode *NextNode, StoreInst *StorePC)
+{
+ TranslationBlock *NextTB = NextNode->getTB();
+ Value *Cond;
+
+ SmallVector<Value *, 4> Params;
+ Function *F = ResolveFunction("helper_validate_cpbl");
+ Value *Env = ConvertCPUType(F, 0, LastInst);
+
+ Params.push_back(Env);
+ Params.push_back(ConstantInt::get(StorePC->getValueOperand()->getType(),
+ NextTB->pc));
+ Params.push_back(CONST32(NextTB->id));
+ CallInst *CI = CallInst::Create(F, Params, "", LastInst);
+ Cond = ICMP(CI, CONST32(1), ICmpInst::ICMP_EQ);
+
+ MF->setConst(CI);
+
+ BasicBlock *Valid = BasicBlock::Create(*Context, "cpbl.valid", Func);
+ BasicBlock *Invalid = BasicBlock::Create(*Context, "cpbl.invalid", Func);
+ toSink.push_back(Invalid);
+
+ BranchInst::Create(Valid, Invalid, Cond, LastInst);
+ LastInst->eraseFromParent();
+
+ LastInst = BranchInst::Create(ExitBB, Invalid);
+ Instruction *SI = StorePC->clone();
+ SI->insertBefore(LastInst);
+ InsertExit(0);
+ LastInst->eraseFromParent();
+
+ MF->setExit(SI);
+
+ CurrBB = Valid;
+ LastInst = BranchInst::Create(ExitBB, CurrBB);
+}
+
+/*
+ * TraceLinkIndirectJump()
+ * This routine implements IB inlining, i.e., linking two intra-trace blocks
+ * via indirect branch.
+ * Note that we don't need to validate CPBL because this routine is only
+ * used for user-mode emulation.
+ */
+void IRFactory::TraceLinkIndirectJump(GraphNode *NextNode, StoreInst *SI)
+{
+ dbg() << DEBUG_LLVM << " - Found an indirect branch. Guess pc "
+ << format("0x%" PRIx, NextNode->getGuestPC()) << "\n";
+
+ BasicBlock *ifTrue = BasicBlock::Create(*Context, "main_path", Func);
+ BasicBlock *ifFalse = BasicBlock::Create(*Context, "exit_stub", Func);
+
+ Value *NextPC = SI->getValueOperand();
+ Value *GuessPC = ConstantInt::get(NextPC->getType(),
+ Builder->getGuestPC(NextNode));
+
+ Value *Cond = ICMP(NextPC, GuessPC, ICmpInst::ICMP_EQ);
+ BranchInst::Create(ifTrue, ifFalse, Cond, LastInst);
+ LastInst->eraseFromParent();
+
+ CurrBB = ifTrue;
+
+ /* First set the branch to exit BB, and the link will be resolved
+ at the trace finalization procedure. */
+ BranchInst *BI = BranchInst::Create(ExitBB, CurrBB);
+ Builder->setBranch(BI, NextNode);
+
+ CurrBB = ifFalse;
+ LastInst = BranchInst::Create(ExitBB, CurrBB);
+}
+
+void IRFactory::TraceLinkDirectJump(GraphNode *NextNode, StoreInst *SI)
+{
+ ConstantInt *NextPC = static_cast<ConstantInt *>(SI->getValueOperand());
+ target_ulong next_pc = NextPC->getZExtValue() +
+ Builder->getCurrNode()->getTB()->cs_base;
+ NextPC = ConstantInt::get(NextPC->getType(), next_pc);
+
+ dbg() << DEBUG_LLVM << " - Found a direct branch to pc "
+ << format("0x%" PRIx, next_pc) << "\n";
+
+#if defined(CONFIG_SOFTMMU)
+ TranslationBlock *tb = Builder->getCurrNode()->getTB();
+ TranslationBlock *next_tb = NextNode->getTB();
+ /* If two blocks are not in the same page or the next block is across
+ * the page boundary, we have to handle it with CPBL. */
+ if ((tb->pc & TARGET_PAGE_MASK) != (next_tb->pc & TARGET_PAGE_MASK) ||
+ next_tb->page_addr[1] != (tb_page_addr_t)-1)
+ TraceValidateCPBL(NextNode, SI);
+#endif
+ /* First set the branch to exit BB, and the link will be resolved
+ at the trace finalization procedure. */
+ BranchInst *BI = BranchInst::Create(ExitBB, LastInst);
+ Builder->setBranch(BI, NextNode);
+}
+
+void IRFactory::TraceLinkDirectJump(StoreInst *SI)
+{
+ ConstantInt *NextPC = static_cast<ConstantInt *>(SI->getValueOperand());
+ target_ulong next_pc = NextPC->getZExtValue() +
+ Builder->getCurrNode()->getTB()->cs_base;
+ NextPC = ConstantInt::get(NextPC->getType(), next_pc);
+
+ dbg() << DEBUG_LLVM << " - Found a direct branch to pc "
+ << format("0x%" PRIx, next_pc) << " (exit)\n";
+
+#if defined(CONFIG_SOFTMMU)
+ TranslationBlock *tb = Builder->getCurrNode()->getTB();
+ if ((tb->pc & TARGET_PAGE_MASK) != (next_pc & TARGET_PAGE_MASK)) {
+ InsertLookupCPBL(Builder->getCurrNode());
+ return;
+ }
+#endif
+ InsertLinkAndExit(SI);
+}
+
+GraphNode *IRFactory::findNextNode(target_ulong pc)
+{
+#ifdef USE_TRACETREE_ONLY
+ for (auto Child : Builder->getCurrNode()->getChildren()) {
+ if (pc == Builder->getGuestPC(Child))
+ return Child;
+ }
+ return nullptr;
+#else
+ return Builder->getNode(pc);
+#endif
+}
+
+void IRFactory::TraceLink(StoreInst *SI)
+{
+ GraphNode *CurrNode = Builder->getCurrNode();
+ ConstantInt *CI = dyn_cast<ConstantInt>(SI->getValueOperand());
+ if (!CI) {
+ /* Indirect branch */
+ SaveGlobals(COHERENCE_ALL, LastInst);
+
+#if defined(CONFIG_USER_ONLY)
+ for (auto NextNode : CurrNode->getChildren())
+ TraceLinkIndirectJump(NextNode, SI);
+#endif
+ InsertLookupIBTC(CurrNode);
+ } else {
+ /* Direct branch. */
+ target_ulong pc = CI->getZExtValue();
+ GraphNode *NextNode = findNextNode(pc);
+ if (NextNode) {
+ TraceLinkDirectJump(NextNode, SI);
+ return;
+ }
+
+ TraceLinkDirectJump(SI);
+ std::string Name = CurrBB->getName().str() + ".exit";
+ CurrBB->setName(Name);
+ toSink.push_back(CurrBB);
+ }
+}
+
+StoreInst *IRFactory::getStorePC()
+{
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+ std::vector<std::pair<intptr_t, StoreInst *> > StorePC;
+
+ /* Search for store instructions that write value to PC in this block. */
+ bool hasAllConstantPC = true;
+ BasicBlock *BB = LastInst->getParent();
+ for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(BI)) {
+ intptr_t Off = 0;
+ Value *Base = getBaseWithConstantOffset(DL,
+ SI->getPointerOperand(), Off);
+ if (Base == BaseReg[TCG_AREG0].Base && isStateOfPC(Off)) {
+ StorePC.push_back(std::make_pair(Off, SI));
+ if (!isa<ConstantInt>(SI->getValueOperand()))
+ hasAllConstantPC = false;
+ }
+ }
+ }
+
+ if (StorePC.empty())
+ return nullptr;
+ if (StorePC.size() == 1)
+ return StorePC[0].second;
+
+ /* We only consider the last two stores. */
+ unsigned I1 = StorePC.size() - 2, I2 = StorePC.size() - 1;
+ if (StorePC[I1].first > StorePC[I2].first) {
+ unsigned tmp = I1;
+ I1 = I2;
+ I2 = tmp;
+ }
+
+ intptr_t OffsetA = StorePC[I1].first;
+ intptr_t OffsetB = StorePC[I2].first;
+ StoreInst *SA = StorePC[I1].second;
+ StoreInst *SB = StorePC[I2].second;
+ intptr_t SzA = DL->getTypeSizeInBits(SA->getValueOperand()->getType());
+ intptr_t SzB = DL->getTypeSizeInBits(SB->getValueOperand()->getType());
+ if (SzA != SzB || OffsetA + SzA != OffsetB || SzA + SzB != TARGET_LONG_BITS)
+ return nullptr;
+
+ Value *NewPC;
+ Type *Ty = (TARGET_LONG_BITS == 32) ? Int32Ty : Int64Ty;
+ Type *PTy = (TARGET_LONG_BITS == 32) ? Int32PtrTy : Int64PtrTy;
+ if (hasAllConstantPC) {
+ target_ulong PCA = static_cast<ConstantInt*>(SA->getValueOperand())->getZExtValue();
+ target_ulong PCB = static_cast<ConstantInt*>(SA->getValueOperand())->getZExtValue();
+ NewPC = ConstantInt::get(Ty, PCA | (PCB << SzA));
+ } else {
+ Value *PCA = ZEXT(SA->getValueOperand(), Ty);
+ Value *PCB = ZEXT(SB->getValueOperand(), Ty);
+ PCB = SHL(PCB, ConstantInt::get(Ty, SzA));
+ NewPC = OR(PCA, PCB);
+ }
+
+ toErase.push_back(SA);
+ toErase.push_back(SB);
+
+ Value *Addr = CAST(SA->getPointerOperand(), PTy);
+ return new StoreInst(NewPC, Addr, true, LastInst);
+
+#else
+ return dyn_cast<StoreInst>(--BasicBlock::iterator(LastInst));
+#endif
+}
+
+/*
+ * op_exit_tb()
+ * args[0]: return value
+ */
+void IRFactory::op_exit_tb(const TCGArg *args)
+{
+ IRDebug(INDEX_op_exit_tb);
+
+ if (!LastInst)
+ return;
+
+ /* Some guest architectures (e.g., ARM) do not explicitly generete a store
+ * instruction to sync the PC value to the memory before exit_tb. We
+ * generate the store PC instruction here so that the following routine can
+ * analyze the PC value it will branch to. Note that other dirty states will
+ * be synced later. */
+ CreateStorePC(LastInst);
+
+ if (LastInst == &*LastInst->getParent()->begin()) {
+ SaveGlobals(COHERENCE_ALL, LastInst);
+ InsertExit(0, true);
+ } else if (isa<CallInst>(--BasicBlock::iterator(LastInst))) {
+ /* Tail call. */
+ for (int i = 0, e = tcg_ctx.nb_globals; i != e; ++i) {
+ Register &reg = Reg[i];
+ if (reg.isReg() && reg.isDirty())
+ runPasses = false;
+ }
+
+ SaveGlobals(COHERENCE_ALL, LastInst);
+ InsertExit(0, true);
+ } else if (StoreInst *SI = getStorePC()) {
+ SaveGlobals(COHERENCE_ALL, SI);
+ TraceLink(SI);
+ } else {
+ runPasses = false;
+ SaveGlobals(COHERENCE_ALL, LastInst);
+ InsertExit(0, true);
+ }
+
+ LastInst->eraseFromParent();
+ LastInst = nullptr;
+}
+
+/*
+ * op_goto_tb()
+ * args[0]: jump index
+ */
+void IRFactory::op_goto_tb(const TCGArg *args)
+{
+ IRDebug(INDEX_op_goto_tb);
+}
+
+void IRFactory::op_qemu_ld_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_qemu_ld_i32);
+
+ TCGArg DataLo = *args++;
+ TCGArg AddrLo = *args++;
+ TCGArg AddrHi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) ? *args++ : 0;
+ TCGMemOpIdx oi = *args++;
+ TCGMemOp opc = get_memop(oi);
+
+ Register &Out = Reg[DataLo];
+ Register &In1 = Reg[AddrLo];
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = (AddrHi) ? LoadState(Reg[AddrHi]) : nullptr;
+
+ AssertType(In1.Size == 32 || In1.Size == 64);
+
+ SaveStates();
+
+ Value *OutData = QEMULoad(InData1, InData2, oi);
+ OutData = getExtendValue(OutData, Out.Ty, opc);
+ Out.setData(OutData, true);
+}
+
+void IRFactory::op_qemu_st_i32(const TCGArg *args)
+{
+ IRDebug(INDEX_op_qemu_st_i32);
+
+ TCGArg DataLo = *args++;
+ TCGArg AddrLo = *args++;
+ TCGArg AddrHi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) ? *args++ : 0;
+ TCGMemOpIdx oi = *args++;
+ TCGMemOp opc = get_memop(oi);
+
+ Register &In1 = Reg[DataLo];
+ Register &In2 = Reg[AddrLo];
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *InData3 = (AddrHi) ? LoadState(Reg[AddrHi]) : nullptr;
+
+ AssertType(In1.Size == 32 || In1.Size == 64);
+
+ SaveStates();
+
+ InData1 = getTruncValue(InData1, opc);
+ QEMUStore(InData1, InData2, InData3, oi);
+}
+
+void IRFactory::op_qemu_ld_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_qemu_ld_i64);
+
+ TCGArg DataLo = *args++;
+ TCGArg DataHi = (TCG_TARGET_REG_BITS == 32) ? *args++ : 0;
+ TCGArg AddrLo = *args++;
+ TCGArg AddrHi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) ? *args++ : 0;
+ TCGMemOpIdx oi = *args++;
+ TCGMemOp opc = get_memop(oi);
+
+ Register &Out = Reg[DataLo];
+ Register &In1 = Reg[AddrLo];
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = (AddrHi) ? LoadState(Reg[AddrHi]) : nullptr;
+
+ AssertType(In1.Size == 32 || In1.Size == 64);
+
+ SaveStates();
+
+ Value *OutData = QEMULoad(InData1, InData2, oi);
+ OutData = getExtendValue(OutData, Out.Ty, opc);
+
+ if (DataHi == 0)
+ Out.setData(OutData, true);
+ else {
+ Register &Out2 = Reg[DataHi];
+ Value *OutData1 = TRUNC32(OutData);
+ Value *OutData2 = TRUNC32(LSHR(OutData, CONST64(32)));
+ Out.setData(OutData1, true);
+ Out2.setData(OutData2, true);
+ }
+}
+
+void IRFactory::op_qemu_st_i64(const TCGArg *args)
+{
+ IRDebug(INDEX_op_qemu_st_i64);
+
+ TCGArg DataLo = *args++;
+ TCGArg DataHi = (TCG_TARGET_REG_BITS == 32) ? *args++ : 0;
+ TCGArg AddrLo = *args++;
+ TCGArg AddrHi = (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) ? *args++ : 0;
+ TCGMemOpIdx oi = *args++;
+ TCGMemOp opc = get_memop(oi);
+
+ Register &In1 = Reg[DataLo];
+ Register &In2 = Reg[AddrLo];
+
+ Value *InData1 = LoadState(In1);
+ Value *InData2 = LoadState(In2);
+ Value *InData3 = (AddrHi) ? LoadState(Reg[AddrHi]) : nullptr;
+
+ AssertType(In2.Size == 32 || In2.Size == 64);
+
+ SaveStates();
+
+ Value *InData;
+ if (DataHi == 0)
+ InData = InData1;
+ else {
+ InData = LoadState(Reg[DataHi]);
+ InData = SHL(ZEXT64(InData), CONST64(32));
+ InData = OR(InData, ZEXT64(InData1));
+ }
+
+ InData = getTruncValue(InData, opc);
+ QEMUStore(InData, InData2, InData3, oi);
+}
+
+
+/*
+ * Metadata Factory
+ */
+MDFactory::MDFactory(Module *M) : UID(0), Context(M->getContext())
+{
+ Dummy = getMDNode(ArrayRef<ConstantInt*>(getUID()));
+}
+
+MDFactory::~MDFactory() {}
+
+#if defined(LLVM_V35)
+void MDFactory::setConstStatic(LLVMContext &Context, Instruction *I,
+ ArrayRef<ConstantInt*> V)
+{
+ SmallVector<Value *, 4> MDs;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ MDs.push_back(V[i]);
+ I->setMetadata(META_CONST, MDNode::get(Context, MDs));
+}
+
+MDNode *MDFactory::getMDNode(ArrayRef<ConstantInt*> V)
+{
+ SmallVector<Value *, 4> MDs;
+ MDs.push_back(getUID());
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ MDs.push_back(V[i]);
+ return MDNode::get(Context, MDs);
+}
+#else
+void MDFactory::setConstStatic(LLVMContext &Context, Instruction *I,
+ ArrayRef<ConstantInt*> V)
+{
+ SmallVector<Metadata *, 4> MDs;
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ MDs.push_back(ConstantAsMetadata::get(V[i]));
+ I->setMetadata(META_CONST, MDNode::get(Context, MDs));
+}
+
+MDNode *MDFactory::getMDNode(ArrayRef<ConstantInt*> V)
+{
+ SmallVector<Metadata *, 4> MDs;
+ MDs.push_back(ConstantAsMetadata::get(getUID()));
+ for (unsigned i = 0, e = V.size(); i != e; ++i)
+ MDs.push_back(ConstantAsMetadata::get(V[i]));
+ return MDNode::get(Context, MDs);
+}
+#endif
+
+#if defined(ENABLE_MCJIT)
+DebugLoc MDFactory::getDebugLoc(unsigned Line, unsigned Col, Function *F,
+ ArrayRef<ConstantInt*> Meta)
+{
+ Module *M = F->getParent();
+ DIBuilder DIB(*M);
+ auto File = DIB.createFile(F->getName(), "hqemu/");
+#if defined(LLVM_V35)
+ auto CU = DIB.createCompileUnit(dwarf::DW_LANG_Cobol74, F->getName(),
+ "hqemu/", "hqemu", true, "", 0);
+ auto Type = DIB.createSubroutineType(File,
+ DIB.getOrCreateArray(ArrayRef<Value *>()));
+ auto SP = DIB.createFunction(CU, F->getName(), "", File, 1, Type, false,
+ true, 1, 0, true);
+ auto Scope = DIB.createLexicalBlockFile(SP, File);
+ DebugLoc DL = DebugLoc::get(Line, Col, Scope);
+ DIB.finalize();
+ SP.replaceFunction(F);
+#elif defined(LLVM_V38) || defined(LLVM_V39)
+ auto CU = DIB.createCompileUnit(dwarf::DW_LANG_Cobol74, F->getName(),
+ "hqemu/", "hqemu", true, "", 0);
+ auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
+ auto SP = DIB.createFunction(CU, F->getName(), "", File, 1, Type, false,
+ true, 1, 0, true);
+ auto Scope = DIB.createLexicalBlockFile(SP, File, 0);
+ DebugLoc DL = DebugLoc::get(Line, Col, Scope);
+ DIB.finalize();
+ F->setSubprogram(SP);
+#else
+ auto CU = DIB.createCompileUnit(dwarf::DW_LANG_Cobol74, File,
+ "hqemu", true, "", 0);
+ auto Type = DIB.createSubroutineType(DIB.getOrCreateTypeArray(None));
+ auto SP = DIB.createFunction(CU, F->getName(), "", File, 1, Type, false,
+ true, 1, DINode::FlagZero, true);
+ auto Scope = DIB.createLexicalBlockFile(SP, File, 0);
+ DebugLoc DL = DebugLoc::get(Line, Col, Scope);
+ DIB.finalize();
+ F->setSubprogram(SP);
+#endif
+
+ return DL;
+}
+#else
+DebugLoc MDFactory::getDebugLoc(unsigned Line, unsigned Col, Function *F,
+ ArrayRef<ConstantInt*> Meta)
+{
+ return DebugLoc::get(Line, Col, getMDNode(Meta));
+}
+#endif
+
+
+/*
+ * TraceBuilder()
+ */
+TraceBuilder::TraceBuilder(IRFactory *IRF, OptimizationInfo *Opt)
+ : IF(IRF), Opt(Opt), Aborted(false), Attribute(A_None), Trace(nullptr)
+{
+ GraphNode *EntryNode = Opt->getCFG();
+ if (!EntryNode)
+ hqemu_error("invalid optimization request.\n");
+
+ /* Find unique nodes. */
+ NodeVec VisitStack;
+ NodeSet Visited;
+ VisitStack.push_back(EntryNode);
+ do {
+ GraphNode *Node = VisitStack.back();
+ VisitStack.pop_back();
+ if (Visited.find(Node) == Visited.end()) {
+ Visited.insert(Node);
+
+ setUniqueNode(Node);
+
+ for (auto Child : Node->getChildren())
+ VisitStack.push_back(Child);
+ }
+ } while (!VisitStack.empty());
+
+ /* Add entry node into the building queue. */
+ NodeQueue.push_back(EntryNode);
+
+ IF->CreateSession(this);
+ IF->CreateFunction();
+}
+
+void TraceBuilder::ConvertToTCGIR(CPUArchState *env)
+{
+ TranslationBlock *tb = CurrNode->getTB();
+
+ if (LLEnv->isTraceMode()) {
+ env->image_base = (uintptr_t)tb->image - tb->pc;
+ tcg_copy_state(env, tb);
+ }
+
+ tcg_func_start(&tcg_ctx, tb);
+ gen_intermediate_code(env, tb);
+ tcg_liveness_analysis(&tcg_ctx);
+}
+
+static inline bool isVecOp(TCGOpcode opc)
+{
+ switch (opc) {
+ case INDEX_op_vector_start ... INDEX_op_vector_end:
+ return true;
+ default:
+ return false;
+ }
+}
+
+void TraceBuilder::ConvertToLLVMIR()
+{
+ IF->CreateBlock();
+
+ auto OpcFunc = (IRFactory::FuncPtr *)IF->getOpcFunc();
+ TCGArg *VecArgs = tcg_ctx.vec_opparam_buf;
+
+ IF->NI.setTB(CurrNode->getTB());
+ for (int oi = tcg_ctx.gen_first_op_idx; oi >= 0; ) {
+ TCGOp * const op = &tcg_ctx.gen_op_buf[oi];
+ TCGArg *args = &tcg_ctx.gen_opparam_buf[op->args];
+ oi = op->next;
+
+ if (isVecOp(op->opc)) {
+ args = VecArgs;
+ VecArgs += 3;
+ }
+
+ IF->NI.setOp(op);
+ (IF->*OpcFunc[op->opc])(args);
+
+ if (isAborted()) {
+ IF->DeleteSession();
+ return;
+ }
+ }
+}
+
+void TraceBuilder::Abort()
+{
+ Aborted = true;
+}
+
+void TraceBuilder::Finalize()
+{
+ /* Reconnect links of basic blocks. The links are previously
+ set to ExitBB. */
+ for (unsigned i = 0, e = Branches.size(); i != e; ++i) {
+ BranchInst *BI = Branches[i].first;
+ GraphNode *Node = Branches[i].second;
+ IF->setSuccessor(BI, getBasicBlock(Node));
+ }
+
+ Trace = new TraceInfo(NodeUsed, Attribute);
+ IF->Compile();
+ IF->DeleteSession();
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/llvm/llvm-soft-perfmon.cpp b/llvm/llvm-soft-perfmon.cpp
new file mode 100644
index 0000000..a5f9a56
--- /dev/null
+++ b/llvm/llvm-soft-perfmon.cpp
@@ -0,0 +1,357 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include <iostream>
+#include <sstream>
+#include "tracer.h"
+#include "utils.h"
+#include "llvm.h"
+#include "llvm-target.h"
+#include "llvm-soft-perfmon.h"
+
+
+extern LLVMEnv *LLEnv;
+extern unsigned ProfileThreshold;
+extern unsigned PredictThreshold;
+
+/*
+ * Software Performance Monitor (SPM)
+ */
+void SoftwarePerfmon::ParseProfileMode(std::string &ProfileLevel)
+{
+ static std::string profile_str[SPM_NUM] = {
+ "none", "basic", "trace", "cache", "pass", "hpm", "exit", "hotspot", "all"
+ };
+ static uint64_t profile_enum[SPM_NUM] = {
+ SPM_NONE, SPM_BASIC, SPM_TRACE, SPM_CACHE, SPM_PASS, SPM_HPM,
+ SPM_EXIT, SPM_HOTSPOT, SPM_ALL,
+ };
+
+ if (ProfileLevel.empty())
+ return;
+
+ std::istringstream ss(ProfileLevel);
+ std::string token;
+ while(getline(ss, token, ',')) {
+ for (int i = 0; i != SPM_NUM; ++i) {
+ if (token == profile_str[i]) {
+ Mode |= profile_enum[i];
+ break;
+ }
+ }
+ }
+}
+
+void SoftwarePerfmon::printProfile()
+{
+ if (!isEnabled())
+ return;
+
+ if (LLVMEnv::TransMode == TRANS_MODE_NONE ||
+ LLVMEnv::TransMode == TRANS_MODE_INVALID)
+ return;
+
+ if (LLVMEnv::TransMode == TRANS_MODE_BLOCK)
+ printBlockProfile();
+ else
+ printTraceProfile();
+}
+
+void SoftwarePerfmon::printBlockProfile()
+{
+ LLVMEnv::TransCodeList &TransCode = LLEnv->getTransCode();
+ uint32_t GuestSize = 0, GuestICount = 0, HostSize = 0;
+ uint64_t TransTime = 0, MaxTime = 0;
+
+ for (auto TC : TransCode) {
+ TraceInfo *Trace = TC->Trace;
+ TranslationBlock *TB = TC->EntryTB;
+ GuestSize += TB->size;
+ GuestICount += TB->icount;
+ HostSize += TC->Size;
+ TransTime += Trace->TransTime;
+ if (Trace->TransTime > MaxTime)
+ MaxTime = Trace->TransTime;
+ }
+
+ auto &OS = DM.debug();
+ OS << "\nBlock statistic:\n"
+ << "Num of Blocks : " << TransCode.size() << "\n"
+ << "G/H Code Size : " << GuestSize << "/" << HostSize << "bytes\n"
+ << "Guest ICount : " << GuestICount << "\n"
+ << "Translation Time : " << format("%.6f", (double)TransTime * 1e-6)
+ << " seconds (max=" << MaxTime /1000 << " ms)\n";
+}
+
+static void printBasic(LLVMEnv::TransCodeList &TransCode)
+{
+ uint32_t GuestSize = 0, GuestICount = 0, HostSize = 0;
+ uint32_t NumBlock = 0, NumLoop = 0, NumExit = 0, NumIndirectBr = 0;
+ uint32_t MaxBlock = 0, MaxLoop = 0, MaxExit = 0, MaxIndirectBr = 0;
+ uint64_t TransTime = 0, MaxTime = 0;
+ unsigned NumTraces = TransCode.size();
+ std::map<unsigned, unsigned> LenDist;
+
+ for (auto TC : TransCode) {
+ TraceInfo *Trace = TC->Trace;
+ TBVec &TBs = Trace->TBs;
+ for (unsigned i = 0, e = TBs.size(); i != e; ++i) {
+ GuestSize += TBs[i]->size;
+ GuestICount += TBs[i]->icount;
+ }
+ HostSize += TC->Size;
+
+ NumBlock += TBs.size();
+ NumLoop += Trace->NumLoop;
+ NumExit += Trace->NumExit;
+ NumIndirectBr += Trace->NumIndirectBr;
+ TransTime += Trace->TransTime;
+
+ if (TBs.size() > MaxBlock)
+ MaxBlock = TBs.size();
+ if (Trace->NumLoop > MaxLoop)
+ MaxLoop = Trace->NumLoop;
+ if (Trace->NumExit > MaxExit)
+ MaxExit = Trace->NumExit;
+ if (Trace->NumIndirectBr > MaxIndirectBr)
+ MaxIndirectBr = Trace->NumIndirectBr;
+ if (Trace->TransTime > MaxTime)
+ MaxTime = Trace->TransTime;
+ LenDist[TBs.size()]++;
+ }
+
+ auto &OS = DM.debug();
+ OS << "Trace statistic:\n"
+ << "Num of Traces : " << NumTraces << "\n"
+ << "Profile Thres. : " << ProfileThreshold << "\n"
+ << "Predict Thres. : " << PredictThreshold << "\n"
+ << "G/H Code Size : " << GuestSize << "/" << HostSize << " bytes\n"
+ << "Translation Time : " << format("%.6f", (double)TransTime * 1e-6)
+ << " seconds (max=" << MaxTime /1000 << " ms)\n"
+ << "Average # Blocks : " << format("%.1f", (double)NumBlock / NumTraces)
+ << " (max=" << MaxBlock << ")\n"
+ << "Average # Loops : " << format("%.1f", (double)NumLoop / NumTraces)
+ << " (max=" << MaxLoop << ")\n"
+ << "Average # Exits : " << format("%.1f", (double)NumExit / NumTraces)
+ << " (max=" << MaxExit << ")\n"
+ << "Average # IBs : " << format("%.1f", (double)NumIndirectBr / NumTraces)
+ << " (max=" << MaxIndirectBr << ")\n"
+ << "Flush Count : " << LLEnv->getNumFlush() << "\n";
+
+ OS << "Trace length distribution: (1-" << MaxBlock << ")\n ";
+ for (unsigned i = 1; i <= MaxBlock; i++)
+ OS << LenDist[i] << " ";
+ OS << "\n";
+}
+
+static void printTraceExec(LLVMEnv::TransCodeList &TransCode)
+{
+ unsigned NumThread = 0;
+ for (auto next_cpu = first_cpu; next_cpu != nullptr;
+ next_cpu = CPU_NEXT(next_cpu))
+ NumThread++;
+
+ /* Detailed trace information and runtime counters. */
+ auto &OS = DM.debug();
+ OS << "----------------------------\n"
+ << "Trace execution information:\n";
+
+ unsigned NumTraces = TransCode.size();
+ for (unsigned i = 0; i != NumThread; ++i) {
+ unsigned TraceUsed = 0;
+
+ OS << ">\n"
+ << "Thread " << i << ":\n"
+ << " dynamic exec count\n"
+ << " id pc #loop:#exit loop ibtc exit\n";
+ for (unsigned j = 0; j != NumTraces; ++j) {
+ TraceInfo *Trace = TransCode[j]->Trace;
+ uint64_t *Counter = Trace->ExecCount[i];
+ if (Counter[0] + Counter[1] + Counter[2] == 0)
+ continue;
+ TraceUsed++;
+ OS << format("%4d", j) << ") "
+ << format("0x%08" PRIx, Trace->getEntryPC()) << " "
+ << format("%2d", Trace->NumLoop) << " "
+ << format("%2d", Trace->NumExit) << " "
+ << format("%8" PRId64, Counter[0]) << " "
+ << format("%8" PRId64, Counter[1]) << " "
+ << format("%8" PRId64, Counter[2]) << "\n";
+ }
+ OS << "Trace used: " << TraceUsed << "/" << NumTraces <<"\n";
+ }
+}
+
+static void printHPM()
+{
+ auto &OS = DM.debug();
+ OS << "Num of Insns : " << SP->NumInsns << "\n"
+ << "Num of Loads : " << SP->NumLoads << "\n"
+ << "Num of Stores : " << SP->NumStores << "\n"
+ << "Num of Branches : " << SP->NumBranches << "\n"
+ << "Sample Time : " << format("%.6f seconds", (double)SP->SampleTime * 1e-6)
+ << "\n";
+}
+
+static void printHotspot(unsigned &CoverSet,
+ std::vector<std::vector<uint64_t> *> &SampleListVec)
+{
+ auto &OS = DM.debug();
+ auto &TransCode = LLEnv->getTransCode();
+ auto &SortedCode = LLEnv->getSortedCode();
+ uint64_t BlockCacheStart = (uintptr_t)tcg_ctx_global.code_gen_buffer;
+ uint64_t BlockCacheEnd = BlockCacheStart + tcg_ctx_global.code_gen_buffer_size;
+ uint64_t TraceCacheStart = (uintptr_t)LLVMEnv::TraceCache;
+ uint64_t TraceCacheEnd = TraceCacheStart + LLVMEnv::TraceCacheSize;
+ uint64_t TotalSamples = 0;
+ uint64_t NumBlockCache = 0, NumTraceCache = 0, NumOther = 0;
+
+ for (auto *L : SampleListVec) {
+ for (uint64_t IP : *L) {
+ if (IP >= BlockCacheStart && IP < BlockCacheEnd)
+ NumBlockCache++;
+ else if (IP >= TraceCacheStart && IP < TraceCacheEnd)
+ NumTraceCache++;
+ else
+ NumOther++;
+
+ auto IT = SortedCode.upper_bound(IP);
+ if (IT == SortedCode.begin())
+ continue;
+ auto TC = (--IT)->second;
+ if (IP < (uint64_t)TC->Code + TC->Size)
+ TC->SampleCount++;;
+ }
+ delete L;
+ }
+
+ TotalSamples = NumBlockCache + NumTraceCache + NumOther;
+ if (TotalSamples == 0 || TransCode.empty()) {
+ OS << CoverSet << "% CoverSet : 0\n";
+ return;
+ }
+
+ /* Print the time breakdown of block cache, trace cache and other. */
+ char buf[128] = {'\0'};
+ double RatioBlockCache = (double)NumBlockCache * 100 / TotalSamples;
+ double RatioTraceCache = (double)NumTraceCache * 100 / TotalSamples;
+ sprintf(buf, "block (%.1f%%) trace (%.1f%%) other (%.1f%%)", RatioBlockCache,
+ RatioTraceCache, 100.0f - RatioBlockCache - RatioTraceCache);
+ OS << "Breakdown : " << buf << "\n";
+
+ /* Print the amount of traces in the cover set. */
+ std::map<TranslatedCode *, unsigned> IndexMap;
+ for (unsigned i = 0, e = TransCode.size(); i != e; ++i)
+ IndexMap[TransCode[i]] = i;
+
+ LLVMEnv::TransCodeList Covered(TransCode.begin(), TransCode.end());
+ std::sort(Covered.begin(), Covered.end(),
+ [](const TranslatedCode *a, const TranslatedCode *b) {
+ return a->SampleCount > b->SampleCount;
+ });
+
+ uint64_t CoverSamples = TotalSamples * CoverSet / 100;
+ uint64_t AccuSamples = 0;
+ unsigned NumTracesInCoverSet = 0;
+ for (TranslatedCode *TC : Covered) {
+ if (AccuSamples >= CoverSamples || TC->SampleCount == 0)
+ break;
+ NumTracesInCoverSet++;
+ AccuSamples += TC->SampleCount;
+ }
+
+ OS << CoverSet << "% CoverSet : " << NumTracesInCoverSet << "\n";
+
+ if (NumTracesInCoverSet == 0)
+ return;
+
+ /* Print the percentage of time of the traces in the cover set. */
+ if (DM.getDebugMode() & DEBUG_IR_OPT) {
+ OS << "Traces of CoverSet:\n";
+ for (unsigned i = 0; i < NumTracesInCoverSet; ++i) {
+ TranslatedCode *TC = Covered[i];
+ sprintf(buf, "%4d (%.1f%%): ", IndexMap[TC],
+ (double)TC->SampleCount * 100 / TotalSamples);
+ OS << buf;
+ int j = 0;
+ for (auto *TB: TC->Trace->TBs) {
+ std::stringstream ss;
+ ss << std::hex << TB->pc;
+ OS << (j++ == 0 ? "" : ",") << ss.str();
+ }
+ OS << "\n";
+ }
+ } else {
+ unsigned top = 10;
+
+ OS << "Percentage of CoverSet (top 10): ";
+ if (NumTracesInCoverSet < top)
+ top = NumTracesInCoverSet;
+ for (unsigned i = 0; i < top; ++i) {
+ TranslatedCode *TC = Covered[i];
+ sprintf(buf, "%.1f%%", (double)TC->SampleCount * 100 / TotalSamples);
+ OS << (i == 0 ? "" : " ") << buf;
+ }
+ OS << "\n";
+ }
+}
+
+void SoftwarePerfmon::printTraceProfile()
+{
+ auto &OS = DM.debug();
+ unsigned NumTraces = LLEnv->getTransCode().size();
+
+ OS << "\n";
+ if (NumTraces == 0) {
+ OS << "Trace statistic:\n"
+ << "Num of Traces : " << NumTraces << "\n\n";
+ return;
+ }
+
+ /* Static information */
+ if (Mode & SPM_BASIC)
+ printBasic(LLEnv->getTransCode());
+ if (Mode & SPM_EXIT)
+ OS << "Num of TraceExit : " << NumTraceExits << "\n";
+ if (Mode & SPM_HPM)
+ printHPM();
+ if (Mode & SPM_HOTSPOT)
+ printHotspot(CoverSet, SP->SampleListVec);
+
+ /* Code cache infomation - start address and size */
+ if (Mode & SPM_CACHE) {
+ size_t BlockSize = (uintptr_t)tcg_ctx_global.code_gen_ptr -
+ (uintptr_t)tcg_ctx_global.code_gen_buffer;
+ size_t TraceSize = LLEnv->getMemoryManager()->getCodeSize();
+
+ OS << "-------------------------\n"
+ << "Block/Trace Cache information:\n";
+ OS << "Block: start=" << tcg_ctx_global.code_gen_buffer
+ << " size=" << tcg_ctx_global.code_gen_buffer_size
+ << " code=" << format("%8d", BlockSize) << " (ratio="
+ << format("%.2f", (double)BlockSize * 100 / tcg_ctx_global.code_gen_buffer_size)
+ << "%)\n";
+ OS << "Trace: start=" << LLVMEnv::TraceCache
+ << " size=" << LLVMEnv::TraceCacheSize
+ << " code=" << format("%8d", TraceSize) << " (ratio="
+ << format("%.2f", (double)TraceSize * 100 / LLVMEnv::TraceCacheSize)
+ << "%)\n\n";
+ }
+
+ if (Mode & SPM_TRACE)
+ printTraceExec(LLEnv->getTransCode());
+
+ if ((Mode & SPM_PASS) && !ExitFunc.empty()) {
+ OS << "\n-------------------------\n"
+ << "Pass information:\n";
+ for (unsigned i = 0, e = ExitFunc.size(); i != e; ++i)
+ (*ExitFunc[i])();
+ }
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/llvm/llvm-target.cpp b/llvm/llvm-target.cpp
new file mode 100644
index 0000000..609a4ad
--- /dev/null
+++ b/llvm/llvm-target.cpp
@@ -0,0 +1,812 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm-debug.h"
+#include "llvm-opc.h"
+#include "llvm-target.h"
+
+using namespace llvm::object;
+
+extern "C" {
+#if defined(TARGET_I386)
+extern const int comis_eflags[4];
+extern const int fcom_ccval[4];
+#endif
+}
+
+
+static std::vector<TCGHelperInfo> MMUHelper = {
+#if defined(CONFIG_SOFTMMU)
+ { (void *)llvm_ret_ldub_mmu, "llvm_ret_ldub_mmu", },
+ { (void *)llvm_le_lduw_mmu, "llvm_le_lduw_mmu", },
+ { (void *)llvm_le_ldul_mmu, "llvm_le_ldul_mmu", },
+ { (void *)llvm_le_ldq_mmu, "llvm_le_ldq_mmu", },
+ { (void *)llvm_be_lduw_mmu, "llvm_be_lduw_mmu", },
+ { (void *)llvm_be_ldul_mmu, "llvm_be_ldul_mmu", },
+ { (void *)llvm_be_ldq_mmu, "llvm_be_ldq_mmu", },
+ { (void *)llvm_ret_ldsb_mmu, "llvm_ret_ldsb_mmu", },
+ { (void *)llvm_le_ldsw_mmu, "llvm_le_ldsw_mmu", },
+ { (void *)llvm_le_ldsl_mmu, "llvm_le_ldsl_mmu", },
+ { (void *)llvm_be_ldsw_mmu, "llvm_be_ldsw_mmu", },
+ { (void *)llvm_be_ldsl_mmu, "llvm_be_ldsl_mmu", },
+
+ { (void *)llvm_ret_stb_mmu, "llvm_ret_stb_mmu", },
+ { (void *)llvm_le_stw_mmu, "llvm_le_stw_mmu", },
+ { (void *)llvm_le_stl_mmu, "llvm_le_stl_mmu", },
+ { (void *)llvm_le_stq_mmu, "llvm_le_stq_mmu", },
+ { (void *)llvm_be_stw_mmu, "llvm_be_stw_mmu", },
+ { (void *)llvm_be_stl_mmu, "llvm_be_stl_mmu", },
+ { (void *)llvm_be_stq_mmu, "llvm_be_stq_mmu", },
+#endif
+};
+
+
+/* Helper functions that cause side effect.
+ * For example, helpers modifying CPU states that cannot be identified,
+ * or helpers that call MMU helpers.
+ * During translating qemu_ld/st, we record MMU helper calls so that we
+ * know how to restore when page fault is handled. Unfortunately, we lose
+ * track of the MMU helper calls in a helper function and the restoration
+ * will fail. Currently, we mark such helper functions as illegal ones and
+ * we skip trace building when a call to one of them when translating
+ * op_call. */
+static std::vector<TCGHelperInfo> IllegalHelper = {
+#if defined(CONFIG_SOFTMMU)
+# if defined(TARGET_I386)
+ { (void *)helper_cmpxchg8b, "helper_cmpxchg8b", },
+ { (void *)helper_boundw, "helper_boundw", },
+ { (void *)helper_boundl, "helper_boundl", },
+# elif defined(TARGET_ARM)
+ { (void *)helper_dc_zva, "helper_dc_zva", },
+# endif
+#else
+# if defined(TARGET_AARCH64)
+ { (void *)helper_simd_tbl, "helper_simd_tbl", },
+# endif
+#endif
+};
+
+
+#define DEF_HELPER_FLAGS_0(name, flags, ret) { (void *)helper_##name, "helper_"#name },
+#define DEF_HELPER_FLAGS_1(name, flags, ret, t1) DEF_HELPER_FLAGS_0(name, flags, ret)
+#define DEF_HELPER_FLAGS_2(name, flags, ret, t1, t2) DEF_HELPER_FLAGS_0(name, flags, ret)
+#define DEF_HELPER_FLAGS_3(name, flags, ret, t1, t2, t3) DEF_HELPER_FLAGS_0(name, flags, ret)
+#define DEF_HELPER_FLAGS_4(name, flags, ret, t1, t2, t3, t4) DEF_HELPER_FLAGS_0(name, flags, ret)
+
+static std::vector<TCGHelperInfo> LMTHelper = {
+#if defined(CONFIG_SOFTMMU)
+#include "atomic-helper.h"
+#endif
+};
+
+#undef DEF_HELPER_FLAGS_0
+#undef DEF_HELPER_FLAGS_1
+#undef DEF_HELPER_FLAGS_2
+#undef DEF_HELPER_FLAGS_3
+#undef DEF_HELPER_FLAGS_4
+
+
+const char *getMMUFName(const void *func)
+{
+ for (unsigned i = 0, e = MMUHelper.size(); i != e; ++i) {
+ if (func == MMUHelper[i].func)
+ return MMUHelper[i].name;
+ }
+ return "";
+}
+
+bool isMMUFunction(std::string &Name)
+{
+ for (unsigned i = 0, e = MMUHelper.size(); i != e; ++i) {
+ if (Name == MMUHelper[i].name)
+ return true;
+ }
+ return false;
+}
+
+bool isLMTFunction(std::string &Name)
+{
+ for (unsigned i = 0, e = LMTHelper.size(); i != e; ++i) {
+ if (Name == LMTHelper[i].name)
+ return true;
+ }
+ return false;
+}
+
+bool isIllegalHelper(const void *func)
+{
+ for (unsigned i = 0, e = IllegalHelper.size(); i != e; ++i) {
+ if (func == IllegalHelper[i].func)
+ return true;
+ }
+ return false;
+}
+
+/* Determine whether the function name is a system library or not. */
+bool isLibcall(std::string &Name)
+{
+ if (Name == "fmodf" || Name == "fmod" || Name == "fmodl" ||
+ Name == "abs" || Name == "labs" || Name == "llabs" ||
+ Name == "fabs" || Name == "fabsf" || Name == "fabsl" ||
+ Name == "sqrtf" || Name == "sqrt" || Name == "sqrtl" ||
+ Name == "logf" || Name == "log" || Name == "logl" ||
+ Name == "log2f" || Name == "log2" || Name == "log2l" ||
+ Name == "log10f" || Name == "log10" || Name == "log10l" ||
+ Name == "expf" || Name == "exp" || Name == "expl" ||
+ Name == "exp2f" || Name == "exp2" || Name == "exp2l" ||
+ Name == "ldexpf" || Name == "ldexp" || Name == "ldexpl" ||
+ Name == "sinf" || Name == "sin" || Name == "sinl" ||
+ Name == "cosf" || Name == "cos" || Name == "cosl" ||
+ Name == "tanf" || Name == "tan" || Name == "tanl" ||
+ Name == "atanf" || Name == "atan" || Name == "atanl" ||
+ Name == "atanf2" || Name == "atan2" || Name == "atanl2" ||
+ Name == "powf" || Name == "pow" || Name == "powl" ||
+ Name == "ceilf" || Name == "ceil" || Name == "ceill" ||
+ Name == "truncf" || Name == "trunc" || Name == "truncl" ||
+ Name == "rintf" || Name == "rint" || Name == "rintl" ||
+ Name == "lrintf" || Name == "lrint" || Name == "lrintl" ||
+ Name == "nearbyintf" || Name == "nearbyint" || Name == "nearbyintl" ||
+ Name == "floorf" || Name == "floor" || Name == "floorl" ||
+ Name == "copysignf" || Name == "copysign" || Name == "copysignl" ||
+ Name == "memcpy" || Name == "memmove" || Name == "memset" ||
+ Name == "fegetround" || Name == "fesetround" ||
+ Name == "__isinfl" || Name == "__isnanl")
+ {
+ return true;
+ }
+
+ return false;
+}
+
+/* Determine whether the function name is a softfloat helper or not. */
+bool isSoftFPcall(std::string &Name)
+{
+ static char SoftFPName[][128] = {
+ "float16_to_float32",
+ "float32_add",
+ "float32_compare",
+ "float32_compare_quiet",
+ "float32_div",
+ "float32_mul",
+ "float32_scalbn",
+ "float32_sqrt",
+ "float32_sub",
+ "float32_to_float16",
+ "float32_to_float64",
+ "float32_to_int32",
+ "float32_to_int64",
+ "float32_to_uint32",
+ "float32_minnum",
+ "float32_maxnum",
+ "float64_add",
+ "float64_compare",
+ "float64_compare_quiet",
+ "float64_div",
+ "float64_mul",
+ "float64_scalbn",
+ "float64_sqrt",
+ "float64_sub",
+ "float64_to_float32",
+ "float64_to_int32",
+ "float64_to_int64",
+ "float64_to_uint32",
+ "float64_minnum",
+ "float64_maxnum",
+ "int32_to_float32",
+ "int32_to_float64",
+ "int64_to_float32",
+ "normalizeRoundAndPackFloat128",
+ "propagateFloat128NaN",
+ "propagateFloatx80NaN",
+ "roundAndPackFloat128",
+ "roundAndPackFloat32",
+ "roundAndPackFloat64",
+ "roundAndPackFloatx80",
+ "set_float_rounding_mode",
+ "subFloat128Sigs",
+ "subFloat32Sigs",
+ "subFloat64Sigs",
+ "subFloatx80Sigs",
+ "uint32_to_float32",
+ "uint32_to_float64",
+#if 0
+ /* FIXME: this function causes LLVM JIT error:
+ LLVM ERROR: Error reading function 'set_float_exception_flags' from bitcode file: Malformed block record */
+ "set_float_exception_flags",
+#endif
+ "addFloat32Sigs",
+ "addFloat64Sigs",
+
+ "float32_to_int32_round_to_zero",
+ "float64_to_int32_round_to_zero",
+
+ "int32_to_floatx80",
+ "int64_to_floatx80",
+ "float32_to_floatx80",
+ "float64_to_floatx80",
+ "floatx80_abs",
+ "floatx80_chs",
+ "floatx80_is_infinity",
+ "floatx80_is_neg",
+ "floatx80_is_zero",
+ "floatx80_is_zero_or_denormal",
+ "floatx80_is_any_nan",
+
+ "floatx80_to_int32",
+ "floatx80_to_int32_round_to_zero",
+ "floatx80_to_int64",
+ "floatx80_to_int64_round_to_zero",
+ "floatx80_to_float32",
+ "floatx80_to_float64",
+ "floatx80_to_float128",
+ "floatx80_round_to_int",
+ "floatx80_add",
+ "floatx80_sub",
+ "floatx80_mul",
+ "floatx80_div",
+ "floatx80_rem",
+ "floatx80_sqrt",
+ "floatx80_eq",
+ "floatx80_le",
+ "floatx80_lt",
+ "floatx80_unordered",
+ "floatx80_eq_quiet",
+ "floatx80_le_quiet",
+ "floatx80_lt_quiet",
+ "floatx80_unordered_quiet",
+ "floatx80_compare",
+ "floatx80_compare_quiet",
+ "floatx80_is_quiet_nan",
+ "floatx80_is_signaling_nan",
+ "floatx80_maybe_silence_nan",
+ "floatx80_scalbn",
+ };
+
+ for (int i = 0, e = ARRAY_SIZE(SoftFPName); i < e; i++) {
+ if (Name == SoftFPName[i])
+ return true;
+ }
+ return false;
+}
+
+/* Bind function names/addresses that are used in the softfloat helpers. */
+void AddFPUSymbols(LLVMTranslator *Translator)
+{
+#define AddSymbol(a) Translator->AddSymbol(#a, (void*)a)
+ AddSymbol(float32_add);
+ AddSymbol(float32_sub);
+ AddSymbol(float32_mul);
+ AddSymbol(float32_div);
+ AddSymbol(float32_sqrt);
+ AddSymbol(float32_scalbn);
+ AddSymbol(float32_compare);
+ AddSymbol(float32_compare_quiet);
+ AddSymbol(float32_minnum);
+ AddSymbol(float32_maxnum);
+ AddSymbol(float64_add);
+ AddSymbol(float64_sub);
+ AddSymbol(float64_mul);
+ AddSymbol(float64_div);
+ AddSymbol(float64_sqrt);
+ AddSymbol(float64_scalbn);
+ AddSymbol(float64_compare);
+ AddSymbol(float64_compare_quiet);
+ AddSymbol(float64_minnum);
+ AddSymbol(float64_maxnum);
+ AddSymbol(float16_to_float32);
+ AddSymbol(float32_to_float16);
+ AddSymbol(float32_to_float64);
+ AddSymbol(float32_to_int32);
+ AddSymbol(float32_to_int64);
+ AddSymbol(float32_to_uint32);
+ AddSymbol(float64_to_float32);
+ AddSymbol(float64_to_int32);
+ AddSymbol(float64_to_int64);
+ AddSymbol(float64_to_uint32);
+ AddSymbol(int32_to_float32);
+ AddSymbol(int32_to_float64);
+ AddSymbol(int64_to_float32);
+ AddSymbol(uint32_to_float32);
+ AddSymbol(uint32_to_float64);
+ AddSymbol(float32_to_int32_round_to_zero);
+ AddSymbol(float64_to_int32_round_to_zero);
+
+ AddSymbol(int32_to_floatx80);
+ AddSymbol(int64_to_floatx80);
+ AddSymbol(float32_to_floatx80);
+ AddSymbol(float64_to_floatx80);
+ AddSymbol(floatx80_abs);
+ AddSymbol(floatx80_chs);
+ AddSymbol(floatx80_is_infinity);
+ AddSymbol(floatx80_is_neg);
+ AddSymbol(floatx80_is_zero);
+ AddSymbol(floatx80_is_zero_or_denormal);
+ AddSymbol(floatx80_is_any_nan);
+
+ AddSymbol(floatx80_to_int32);
+ AddSymbol(floatx80_to_int32_round_to_zero);
+ AddSymbol(floatx80_to_int64);
+ AddSymbol(floatx80_to_int64_round_to_zero);
+ AddSymbol(floatx80_to_float32);
+ AddSymbol(floatx80_to_float64);
+ AddSymbol(floatx80_to_float128);
+ AddSymbol(floatx80_round_to_int);
+ AddSymbol(floatx80_add);
+ AddSymbol(floatx80_sub);
+ AddSymbol(floatx80_mul);
+ AddSymbol(floatx80_div);
+ AddSymbol(floatx80_rem);
+ AddSymbol(floatx80_sqrt);
+ AddSymbol(floatx80_eq);
+ AddSymbol(floatx80_le);
+ AddSymbol(floatx80_lt);
+ AddSymbol(floatx80_unordered);
+ AddSymbol(floatx80_eq_quiet);
+ AddSymbol(floatx80_le_quiet);
+ AddSymbol(floatx80_lt_quiet);
+ AddSymbol(floatx80_unordered_quiet);
+ AddSymbol(floatx80_compare);
+ AddSymbol(floatx80_compare_quiet);
+ AddSymbol(floatx80_is_quiet_nan);
+ AddSymbol(floatx80_is_signaling_nan);
+ AddSymbol(floatx80_maybe_silence_nan);
+ AddSymbol(floatx80_scalbn);
+
+ AddSymbol(rint);
+ AddSymbol(rintf);
+ AddSymbol(lrint);