summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTimothy Pearson <tpearson@raptorengineering.com>2019-11-29 19:25:14 -0600
committerTimothy Pearson <tpearson@raptorengineering.com>2019-11-29 19:25:14 -0600
commitd7b5e291ffb166239c44682af2363080c72888aa (patch)
tree226ff4668e6e5fd9309d792181bb54e256032338
parenta8c40fa2d667e585382080db36ac44e216b37a1c (diff)
downloadhqemu-d7b5e291ffb166239c44682af2363080c72888aa.zip
hqemu-d7b5e291ffb166239c44682af2363080c72888aa.tar.gz
Initial overlay of HQEMU 2.5.2 changes onto underlying 2.5.0 QEMU GIT tree2.5_overlay
-rw-r--r--Makefile.target9
-rwxr-xr-xconfigure61
-rw-r--r--cpu-exec.c45
-rw-r--r--cpus.c25
-rw-r--r--cputlb.c135
-rw-r--r--exec.c10
-rw-r--r--gdbstub.c2
-rw-r--r--include/exec/cpu-all.h6
-rw-r--r--include/exec/cpu-common.h4
-rw-r--r--include/exec/cpu-defs.h11
-rw-r--r--include/exec/cpu_ldst.h11
-rw-r--r--include/exec/cpu_ldst_template.h31
-rw-r--r--include/exec/exec-all.h9
-rw-r--r--include/exec/memory.h4
-rw-r--r--include/hw/qdev-core.h4
-rw-r--r--include/qemu-common.h2
-rw-r--r--include/qemu/atomic.h10
-rw-r--r--include/qemu/bitmap.h10
-rw-r--r--include/qemu/compiler.h4
-rw-r--r--include/qemu/queue.h2
-rw-r--r--include/qemu/rcu.h6
-rw-r--r--include/qemu/timer.h4
-rw-r--r--include/qom/cpu.h7
-rw-r--r--include/qom/object.h7
-rw-r--r--include/sysemu/cpus.h1
-rw-r--r--linux-user/elfload.c4
-rw-r--r--linux-user/main.c75
-rw-r--r--linux-user/strace.c1
-rw-r--r--linux-user/syscall.c13
-rw-r--r--qga/commands-posix.c1
-rw-r--r--qom/object.c8
-rw-r--r--softmmu_template.h51
-rw-r--r--target-arm/cpu.h64
-rw-r--r--target-arm/helper.c46
-rw-r--r--target-arm/helper.h11
-rw-r--r--target-arm/op_helper.c10
-rw-r--r--target-arm/translate-a64.c318
-rw-r--r--target-arm/translate.c324
-rw-r--r--target-arm/translate.h2
-rw-r--r--target-i386/cpu.h27
-rw-r--r--target-i386/fpu_helper.c2
-rw-r--r--target-i386/helper.c12
-rw-r--r--target-i386/helper.h3
-rw-r--r--target-i386/kvm.c4
-rw-r--r--target-i386/misc_helper.c4
-rw-r--r--target-i386/ops_sse.h2
-rw-r--r--target-i386/translate.c587
-rw-r--r--target-ppc/Makefile.objs2
-rw-r--r--target-ppc/cpu.h21
-rw-r--r--target-ppc/helper.h2
-rw-r--r--target-ppc/translate.c41
-rw-r--r--tcg/aarch64/tcg-target.c82
-rw-r--r--tcg/i386/tcg-target.c197
-rw-r--r--tcg/i386/tcg-target.h2
-rw-r--r--tcg/ppc/tcg-target.c123
-rw-r--r--tcg/tcg-op.c2
-rw-r--r--tcg/tcg-op.h11
-rw-r--r--tcg/tcg-opc.h6
-rw-r--r--tcg/tcg.c270
-rw-r--r--tcg/tcg.h58
-rw-r--r--translate-all.c215
-rw-r--r--user-exec.c24
-rw-r--r--util/memfd.c6
-rw-r--r--vl.c1
64 files changed, 2719 insertions, 333 deletions
diff --git a/Makefile.target b/Makefile.target
index 962d004..4e4b1fe 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -158,6 +158,8 @@ GENERATED_HEADERS += hmp-commands.h hmp-commands-info.h qmp-commands-old.h
endif # CONFIG_SOFTMMU
+include $(SRC_PATH)/llvm/hqemu.mk
+
# Workaround for http://gcc.gnu.org/PR55489, see configure.
%/translate.o: QEMU_CFLAGS += $(TRANSLATE_OPT_CFLAGS)
@@ -189,8 +191,8 @@ all-obj-$(CONFIG_SOFTMMU) += $(crypto-obj-y)
$(QEMU_PROG_BUILD): config-devices.mak
# build either PROG or PROGW
-$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a
- $(call LINK, $(filter-out %.mak, $^))
+$(QEMU_PROG_BUILD): $(all-obj-y) ../libqemuutil.a ../libqemustub.a $(LLVM_BITCODE)
+ $(call LINK, $(filter-out %.mak %.bc, $^))
ifdef CONFIG_DARWIN
$(call quiet-command,Rez -append $(SRC_PATH)/pc-bios/qemu.rsrc -o $@," REZ $(TARGET_DIR)$@")
$(call quiet-command,SetFile -a C $@," SETFILE $(TARGET_DIR)$@")
@@ -225,6 +227,9 @@ ifdef CONFIG_TRACE_SYSTEMTAP
$(INSTALL_DATA) $(QEMU_PROG).stp-installed "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG).stp"
$(INSTALL_DATA) $(QEMU_PROG)-simpletrace.stp "$(DESTDIR)$(qemu_datadir)/../systemtap/tapset/$(QEMU_PROG)-simpletrace.stp"
endif
+ifneq ($(LLVM_BITCODE),)
+ $(INSTALL) -m 644 $(LLVM_BITCODE) "$(DESTDIR)$(bindir)"
+endif
GENERATED_HEADERS += config-target.h
Makefile: $(GENERATED_HEADERS)
diff --git a/configure b/configure
index b9552fd..8619b44 100755
--- a/configure
+++ b/configure
@@ -345,6 +345,9 @@ vhdx=""
numa=""
tcmalloc="no"
jemalloc="no"
+llvm="no"
+bcflags=""
+libopencsd=""
# parse CC options first
for opt do
@@ -1169,6 +1172,12 @@ for opt do
;;
--enable-jemalloc) jemalloc="yes"
;;
+ --enable-llvm) llvm="yes"
+ ;;
+ --clang-flags=*) bcflags="$optarg"
+ ;;
+ --with-libopencsd=*) libopencsd="$optarg"
+ ;;
*)
echo "ERROR: unknown option $opt"
echo "Try '$0 --help' for more information"
@@ -1391,12 +1400,26 @@ disabled with --disable-FEATURE, default is enabled if available:
numa libnuma support
tcmalloc tcmalloc support
jemalloc jemalloc support
+ llvm enable LLVM optimization
+ --clang-flags flags for clang compiler
+ --with-libopencsd path to libopencsd library
NOTE: The object files are built at the place where configure is launched
EOF
exit 0
fi
+if test "$llvm" != "no" ; then
+ llvm-config --version > /dev/null 2>&1 || { echo >&2 "llvm-config is not in the PATH"; exit 1; }
+ llvm_major=`llvm-config --version | cut -d'.' -f1`
+ llvm_minor=`llvm-config --version | cut -d'.' -f2`
+ if test "$llvm_major" -lt "3" ; then
+ error_exit "LLVM version too old. Version 3.5 or later is required."
+ elif test "$llvm_major" -eq "3" && test "$llvm_minor" -lt "5" ; then
+ error_exit "LLVM version too old. Version 3.5 or later is required."
+ fi
+fi
+
# Now we have handled --enable-tcg-interpreter and know we're not just
# printing the help message, bail out if the host CPU isn't supported.
if test "$ARCH" = "unknown"; then
@@ -1469,6 +1492,7 @@ gcc_flags="-Wmissing-include-dirs -Wempty-body -Wnested-externs $gcc_flags"
gcc_flags="-Wendif-labels $gcc_flags"
gcc_flags="-Wno-initializer-overrides $gcc_flags"
gcc_flags="-Wno-string-plus-int $gcc_flags"
+gcc_flags="-Wno-format-truncation $gcc_flags"
# Note that we do not add -Werror to gcc_flags here, because that would
# enable it for all configure tests. If a configure test failed due
# to -Werror this would just silently disable some features,
@@ -4843,6 +4867,11 @@ echo "bzip2 support $bzip2"
echo "NUMA host support $numa"
echo "tcmalloc support $tcmalloc"
echo "jemalloc support $jemalloc"
+echo "LLVM enabled $llvm (version `llvm-config --version`)"
+
+if test "$libopencsd" != ""; then
+ echo "libopencsd $libopencsd"
+fi
if test "$sdl_too_old" = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support"
@@ -5248,6 +5277,21 @@ if test "$seccomp" = "yes"; then
echo "CONFIG_SECCOMP=y" >> $config_host_mak
fi
+if test "$llvm" != "no" ; then
+ echo "CONFIG_LLVM=y" >> $config_host_mak
+ echo "BCFLAGS=$bcflags" >> $config_host_mak
+ echo "LLVM_VERSION=LLVM_V`llvm-config --version | sed -e "s/\.//g" | cut -c 1-2`" >> $config_host_mak
+ echo "LLVM_CFLAGS=`llvm-config --cflags`" >> $config_host_mak
+ echo "LLVM_CXXFLAGS=`llvm-config --cxxflags`" >> $config_host_mak
+ echo "LLVM_LDFLAGS=`llvm-config --ldflags`" >> $config_host_mak
+ echo "LLVM_LIBS=`llvm-config --libs`" >> $config_host_mak
+fi
+
+if test "$libopencsd" != "" ; then
+ echo "CONFIG_LIBOPENCSD=y" >> $config_host_mak
+ echo "LIBOPENCSD=$libopencsd" >> $config_host_mak
+fi
+
# XXX: suppress that
if [ "$bsd" = "yes" ] ; then
echo "CONFIG_BSD=y" >> $config_host_mak
@@ -5848,6 +5892,23 @@ fi
echo "LDFLAGS+=$ldflags" >> $config_target_mak
echo "QEMU_CFLAGS+=$cflags" >> $config_target_mak
+if test "$cpu" = "i386" -o "$cpu" = "x86_64" -o "$cpu" = "arm" ; then
+ case "$target_name" in
+ i386|x86_64)
+ echo "CONFIG_COREMU=y" >> $config_target_mak
+ ;;
+ esac
+fi
+
+if test "$llvm" != "no" ; then
+ bitcode="llvm_helper_$target_name"
+ if test "$target_softmmu" = "yes" ; then
+ bitcode=$bitcode"_softmmu"
+ fi
+ echo "LLVM_EXTRA_FLAGS+=-I. -I\$(SRC_PATH) $cflags $LLVM_EXTRA_FLAGS" >> $config_target_mak
+ echo "CONFIG_LLVM_BITCODE=\"$prefix/bin/$bitcode.bc\"" >> $config_target_mak
+fi
+
done # for target in $targets
if [ "$pixman" = "internal" ]; then
diff --git a/cpu-exec.c b/cpu-exec.c
index c88d0ff..5e1f380 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -31,6 +31,7 @@
#include "hw/i386/apic.h"
#endif
#include "sysemu/replay.h"
+#include "hqemu.h"
/* -icount align implementation. */
@@ -104,6 +105,7 @@ static void print_delay(const SyncClocks *sc)
static void init_delay_params(SyncClocks *sc,
const CPUState *cpu)
{
+ memset(sc, 0, sizeof(SyncClocks));
if (!icount_align_option) {
return;
}
@@ -159,6 +161,10 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, uint8_t *tb_ptr)
trace_exec_tb_exit((void *) (next_tb & ~TB_EXIT_MASK),
next_tb & TB_EXIT_MASK);
+#if defined(CONFIG_LLVM)
+ if ((next_tb & TB_EXIT_MASK) == TB_EXIT_LLVM)
+ return next_tb;
+#endif
if ((next_tb & TB_EXIT_MASK) > TB_EXIT_IDX1) {
/* We didn't start executing this TB (eg because the instruction
* counter hit zero); we must restore the guest PC to the address
@@ -197,7 +203,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
max_cycles | CF_NOCACHE
| (ignore_icount ? CF_IGNORE_ICOUNT : 0));
- tb->orig_tb = tcg_ctx.tb_ctx.tb_invalidated_flag ? NULL : orig_tb;
+ tb->orig_tb = tcg_ctx.tb_ctx->tb_invalidated_flag ? NULL : orig_tb;
cpu->current_tb = tb;
/* execute the generated code */
trace_exec_tb_nocache(tb, tb->pc);
@@ -218,13 +224,13 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
tb_page_addr_t phys_pc, phys_page1;
target_ulong virt_page2;
- tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
+ tcg_ctx.tb_ctx->tb_invalidated_flag = 0;
/* find translated block using physical mappings */
phys_pc = get_page_addr_code(env, pc);
phys_page1 = phys_pc & TARGET_PAGE_MASK;
- h = tb_phys_hash_func(phys_pc);
- ptb1 = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+ h = tb_phys_hash_func(pc);
+ ptb1 = &tcg_ctx.tb_ctx->tb_phys_hash[h];
for(;;) {
tb = *ptb1;
if (!tb) {
@@ -253,8 +259,8 @@ static TranslationBlock *tb_find_physical(CPUState *cpu,
/* Move the TB to the head of the list */
*ptb1 = tb->phys_hash_next;
- tb->phys_hash_next = tcg_ctx.tb_ctx.tb_phys_hash[h];
- tcg_ctx.tb_ctx.tb_phys_hash[h] = tb;
+ tb->phys_hash_next = tcg_ctx.tb_ctx->tb_phys_hash[h];
+ tcg_ctx.tb_ctx->tb_phys_hash[h] = tb;
return tb;
}
@@ -315,6 +321,10 @@ static inline TranslationBlock *tb_find_fast(CPUState *cpu)
tb->flags != flags)) {
tb = tb_find_slow(cpu, pc, cs_base, flags);
}
+
+ itlb_update_entry(env, tb);
+ ibtc_update_entry(env, tb);
+
return tb;
}
@@ -492,29 +502,23 @@ int cpu_exec(CPUState *cpu)
tb = tb_find_fast(cpu);
/* Note: we do it here to avoid a gcc bug on Mac OS X when
doing it in tb_find_slow */
- if (tcg_ctx.tb_ctx.tb_invalidated_flag) {
+ if (tcg_ctx.tb_ctx->tb_invalidated_flag) {
/* as some TB could have been invalidated because
of memory exceptions while generating the code, we
must recompute the hash index here */
next_tb = 0;
- tcg_ctx.tb_ctx.tb_invalidated_flag = 0;
+ tcg_ctx.tb_ctx->tb_invalidated_flag = 0;
}
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
qemu_log("Trace %p [" TARGET_FMT_lx "] %s\n",
tb->tc_ptr, tb->pc, lookup_symbol(tb->pc));
}
- /* see if we can patch the calling TB. When the TB
- spans two pages, we cannot safely do a direct
- jump. */
- if (next_tb != 0 && tb->page_addr[1] == -1
- && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
- tb_add_jump((TranslationBlock *)(next_tb & ~TB_EXIT_MASK),
- next_tb & TB_EXIT_MASK, tb);
- }
+
+ tracer_exec_tb(cpu->env_ptr, next_tb, tb);
tb_unlock();
if (likely(!cpu->exit_request)) {
trace_exec_tb(tb, tb->pc);
- tc_ptr = tb->tc_ptr;
+ tc_ptr = tb->opt_ptr;
/* execute the generated code */
cpu->current_tb = tb;
next_tb = cpu_tb_exec(cpu, tc_ptr);
@@ -533,9 +537,14 @@ int cpu_exec(CPUState *cpu)
*/
smp_rmb();
next_tb = 0;
+
+ tracer_reset(cpu->env_ptr);
break;
case TB_EXIT_ICOUNT_EXPIRED:
{
+#if defined(CONFIG_LLVM)
+ break;
+#endif
/* Instruction counter expired. */
int insns_left = cpu->icount_decr.u32;
if (cpu->icount_extra && insns_left >= 0) {
@@ -590,6 +599,8 @@ int cpu_exec(CPUState *cpu)
#endif /* buggy compiler */
cpu->can_do_io = 1;
tb_lock_reset();
+
+ tracer_reset(cpu->env_ptr);
}
} /* for(;;) */
diff --git a/cpus.c b/cpus.c
index 43676fa..5a2312a 100644
--- a/cpus.c
+++ b/cpus.c
@@ -66,6 +66,9 @@
#endif /* CONFIG_LINUX */
+#include "tcg.h"
+#include "hqemu.h"
+
static CPUState *next_cpu;
int64_t max_delay;
int64_t max_advance;
@@ -892,6 +895,18 @@ void qemu_init_cpu_loop(void)
qemu_thread_get_self(&io_thread);
}
+void qemu_end_cpu_loop(void)
+{
+ CPUState *cpu;
+
+ CPU_FOREACH(cpu)
+ optimization_finalize(cpu->env_ptr);
+
+#if defined(CONFIG_LLVM)
+ llvm_finalize();
+#endif
+}
+
void run_on_cpu(CPUState *cpu, void (*func)(void *data), void *data)
{
struct qemu_work_item wi;
@@ -1134,6 +1149,16 @@ static void *qemu_tcg_cpu_thread_fn(void *arg)
/* process any pending work */
atomic_mb_set(&exit_request, 1);
+#if defined(CONFIG_LLVM)
+ llvm_init();
+#endif
+ /* we can safely initialize optimization resources after
+ * the setup of CPUArchState is completed. */
+ CPU_FOREACH(cpu) {
+ copy_tcg_context();
+ optimization_init(cpu->env_ptr);
+ }
+
while (1) {
tcg_exec_all();
diff --git a/cputlb.c b/cputlb.c
index bf1d50a..c81c3be 100644
--- a/cputlb.c
+++ b/cputlb.c
@@ -19,6 +19,7 @@
#include "config.h"
#include "cpu.h"
+#include "exec/tb-hash.h"
#include "exec/exec-all.h"
#include "exec/memory.h"
#include "exec/address-spaces.h"
@@ -30,12 +31,38 @@
#include "exec/ram_addr.h"
#include "tcg/tcg.h"
+#include "hqemu.h"
+
+#if defined(ENABLE_TLBVERSION)
+#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK | TLB_VERSION_MASK)
+#define page_val(addr, env) (((tlbaddr_t)addr & TARGET_PAGE_MASK) | tlb_version(env))
+#else
+#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK)
+#define page_val(addr, env) (addr & TARGET_PAGE_MASK)
+#endif
+
//#define DEBUG_TLB
//#define DEBUG_TLB_CHECK
/* statistics */
int tlb_flush_count;
+static inline void tlb_reset(CPUArchState *env)
+{
+#if defined(ENABLE_TLBVERSION)
+ tlbaddr_t version = env->tlb_version >> TLB_VERSION_SHIFT;
+ if (++version == TLB_VERSION_SIZE) {
+ version = 0;
+ memset(env->tlb_table, -1, sizeof(env->tlb_table));
+ memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
+ }
+ env->tlb_version = version << TLB_VERSION_SHIFT;
+#else
+ memset(env->tlb_table, -1, sizeof(env->tlb_table));
+ memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
+#endif
+}
+
/* NOTE:
* If flush_global is true (the usual case), flush all tlb entries.
* If flush_global is false, flush (at least) all tlb entries not
@@ -59,10 +86,12 @@ void tlb_flush(CPUState *cpu, int flush_global)
links while we are modifying them */
cpu->current_tb = NULL;
- memset(env->tlb_table, -1, sizeof(env->tlb_table));
- memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
+ tlb_reset(env);
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+ optimization_reset(env, 0);
+ lpt_reset(env);
+
env->vtlb_index = 0;
env->tlb_flush_addr = -1;
env->tlb_flush_mask = 0;
@@ -110,18 +139,67 @@ void tlb_flush_by_mmuidx(CPUState *cpu, ...)
va_end(argp);
}
-static inline void tlb_flush_entry(CPUTLBEntry *tlb_entry, target_ulong addr)
+static inline void tlb_flush_entry(CPUArchState *env, CPUTLBEntry *tlb_entry,
+ target_ulong addr)
{
- if (addr == (tlb_entry->addr_read &
- (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
- addr == (tlb_entry->addr_write &
- (TARGET_PAGE_MASK | TLB_INVALID_MASK)) ||
- addr == (tlb_entry->addr_code &
- (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (page_val(addr, env) == (tlb_entry->addr_read & TLB_NONIO_MASK) ||
+ page_val(addr, env) == (tlb_entry->addr_write & TLB_NONIO_MASK) ||
+ page_val(addr, env) == (tlb_entry->addr_code & TLB_NONIO_MASK)) {
memset(tlb_entry, -1, sizeof(*tlb_entry));
}
}
+#ifdef ENABLE_LPAGE
+static int tlb_flush_large_page(CPUState *cpu, target_ulong addr)
+{
+ int i, j, k, ret, mmu_idx, num_base_pages, max_flush_pages;
+ target_ulong page_addr, page_size, flush_addr;
+ CPUArchState *env = cpu->env_ptr;
+
+#if defined(DEBUG_TLB)
+ printf("tlb_flush:\n");
+#endif
+ /* must reset current TB so that interrupts cannot modify the
+ links while we are modifying them */
+ cpu->current_tb = NULL;
+
+ ret = lpt_flush_page(env, addr, &page_addr, &page_size);
+ if (ret == 0)
+ return 0;
+
+ /* If the large page occupies a small set of the tlb, do a partial flush
+ * optimzation, otherwise, do a full flush. */
+ num_base_pages = page_size / TARGET_PAGE_SIZE;
+ max_flush_pages = (CPU_TLB_SIZE / 4 < 1024) ? CPU_TLB_SIZE / 4 : 1024;
+ if (num_base_pages > max_flush_pages) {
+ tlb_flush(cpu, 1);
+ return 1;
+ }
+
+ for (i = 0; i < num_base_pages; i++) {
+ flush_addr = addr + i * TARGET_PAGE_SIZE;
+ j = (flush_addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++)
+ tlb_flush_entry(env, &env->tlb_table[mmu_idx][j], flush_addr);
+
+ /* check whether there are entries that need to be flushed in the vtlb */
+ for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
+ for (k = 0; k < CPU_VTLB_SIZE; k++)
+ tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], flush_addr);
+ }
+ }
+
+ for (i = -1; i < num_base_pages; i++) {
+ j = tb_jmp_cache_hash_page(addr + i * TARGET_PAGE_SIZE);
+ memset(&cpu->tb_jmp_cache[j], 0,
+ TB_JMP_PAGE_SIZE * sizeof(TranslationBlock *));
+ }
+ optimization_reset(env, 0);
+
+ return 1;
+}
+#endif
+
void tlb_flush_page(CPUState *cpu, target_ulong addr)
{
CPUArchState *env = cpu->env_ptr;
@@ -138,8 +216,14 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
TARGET_FMT_lx "/" TARGET_FMT_lx ")\n",
env->tlb_flush_addr, env->tlb_flush_mask);
#endif
+
+#ifdef ENABLE_LPAGE
+ if (tlb_flush_large_page(cpu, addr))
+ return;
+#else
tlb_flush(cpu, 1);
return;
+#endif
}
/* must reset current TB so that interrupts cannot modify the
links while we are modifying them */
@@ -148,18 +232,19 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr)
addr &= TARGET_PAGE_MASK;
i = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
- tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
+ tlb_flush_entry(env, &env->tlb_table[mmu_idx][i], addr);
}
/* check whether there are entries that need to be flushed in the vtlb */
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
int k;
for (k = 0; k < CPU_VTLB_SIZE; k++) {
- tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
+ tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], addr);
}
}
tb_flush_jmp_cache(cpu, addr);
+ optimization_flush_page(env, addr);
}
void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
@@ -202,11 +287,11 @@ void tlb_flush_page_by_mmuidx(CPUState *cpu, target_ulong addr, ...)
printf(" %d", mmu_idx);
#endif
- tlb_flush_entry(&env->tlb_table[mmu_idx][i], addr);
+ tlb_flush_entry(env, &env->tlb_table[mmu_idx][i], addr);
/* check whether there are vltb entries that need to be flushed */
for (k = 0; k < CPU_VTLB_SIZE; k++) {
- tlb_flush_entry(&env->tlb_v_table[mmu_idx][k], addr);
+ tlb_flush_entry(env, &env->tlb_v_table[mmu_idx][k], addr);
}
}
va_end(argp);
@@ -284,10 +369,11 @@ void tlb_reset_dirty(CPUState *cpu, ram_addr_t start1, ram_addr_t length)
}
}
-static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr)
+static inline void tlb_set_dirty1(CPUTLBEntry *tlb_entry, target_ulong vaddr,
+ tlbaddr_t version)
{
- if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY)) {
- tlb_entry->addr_write = vaddr;
+ if (tlb_entry->addr_write == (vaddr | TLB_NOTDIRTY | version)) {
+ tlb_entry->addr_write = vaddr | version;
}
}
@@ -302,13 +388,13 @@ void tlb_set_dirty(CPUState *cpu, target_ulong vaddr)
vaddr &= TARGET_PAGE_MASK;
i = (vaddr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
- tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr);
+ tlb_set_dirty1(&env->tlb_table[mmu_idx][i], vaddr, tlb_version(env));
}
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
int k;
for (k = 0; k < CPU_VTLB_SIZE; k++) {
- tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr);
+ tlb_set_dirty1(&env->tlb_v_table[mmu_idx][k], vaddr, tlb_version(env));
}
}
}
@@ -360,6 +446,7 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
assert(size >= TARGET_PAGE_SIZE);
if (size != TARGET_PAGE_SIZE) {
tlb_add_large_page(env, vaddr, size);
+ lpt_add_page(env, vaddr, size);
}
sz = size;
@@ -424,6 +511,13 @@ void tlb_set_page_with_attrs(CPUState *cpu, target_ulong vaddr,
} else {
te->addr_write = -1;
}
+
+#ifdef ENABLE_TLBVERSION
+ tlbaddr_t version = tlb_version(env);
+ te->addr_read |= version;
+ te->addr_write |= version;
+ te->addr_code |= version;
+#endif
}
/* Add a new TLB entry, but without specifying the memory
@@ -452,7 +546,7 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = cpu_mmu_index(env1, true);
if (unlikely(env1->tlb_table[mmu_idx][page_index].addr_code !=
- (addr & TARGET_PAGE_MASK))) {
+ page_val(addr, env1))) {
cpu_ldub_code(env1, addr);
}
pd = env1->iotlb[mmu_idx][page_index].addr & ~TARGET_PAGE_MASK;
@@ -471,6 +565,9 @@ tb_page_addr_t get_page_addr_code(CPUArchState *env1, target_ulong addr)
return qemu_ram_addr_from_host_nofail(p);
}
+#undef TLB_NONIO_MASK
+#undef page_val
+
#define MMUSUFFIX _mmu
#define SHIFT 0
diff --git a/exec.c b/exec.c
index 0bf0a6e..30ab09f 100644
--- a/exec.c
+++ b/exec.c
@@ -706,7 +706,7 @@ int cpu_watchpoint_insert(CPUState *cpu, vaddr addr, vaddr len,
}
wp = g_malloc(sizeof(*wp));
- wp->vaddr = addr;
+ wp->addr = addr;
wp->len = len;
wp->flags = flags;
@@ -731,7 +731,7 @@ int cpu_watchpoint_remove(CPUState *cpu, vaddr addr, vaddr len,
CPUWatchpoint *wp;
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
- if (addr == wp->vaddr && len == wp->len
+ if (addr == wp->addr && len == wp->len
&& flags == (wp->flags & ~BP_WATCHPOINT_HIT)) {
cpu_watchpoint_remove_by_ref(cpu, wp);
return 0;
@@ -745,7 +745,7 @@ void cpu_watchpoint_remove_by_ref(CPUState *cpu, CPUWatchpoint *watchpoint)
{
QTAILQ_REMOVE(&cpu->watchpoints, watchpoint, entry);
- tlb_flush_page(cpu, watchpoint->vaddr);
+ tlb_flush_page(cpu, watchpoint->addr);
g_free(watchpoint);
}
@@ -776,10 +776,10 @@ static inline bool cpu_watchpoint_address_matches(CPUWatchpoint *wp,
* exactly at the top of the address space and so addr + len
* wraps round to zero.
*/
- vaddr wpend = wp->vaddr + wp->len - 1;
+ vaddr wpend = wp->addr + wp->len - 1;
vaddr addrend = addr + len - 1;
- return !(addr > wpend || wp->vaddr > addrend);
+ return !(addr > wpend || wp->addr > addrend);
}
#endif
diff --git a/gdbstub.c b/gdbstub.c
index 9c29aa0..a24d9ef 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -1267,7 +1267,7 @@ static void gdb_vm_state_change(void *opaque, int running, RunState state)
snprintf(buf, sizeof(buf),
"T%02xthread:%02x;%swatch:" TARGET_FMT_lx ";",
GDB_SIGNAL_TRAP, cpu_index(cpu), type,
- (target_ulong)cpu->watchpoint_hit->vaddr);
+ (target_ulong)cpu->watchpoint_hit->addr);
cpu->watchpoint_hit = NULL;
goto send_packet;
}
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 83b1781..9471dc6 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -271,12 +271,12 @@ CPUArchState *cpu_copy(CPUArchState *env);
/* Flags stored in the low bits of the TLB virtual address. These are
defined so that fast path ram access is all zeros. */
/* Zero if TLB entry is valid. */
-#define TLB_INVALID_MASK (1 << 3)
+#define TLB_INVALID_MASK (1 << TLB_INVALID_SHIFT)
/* Set if TLB entry references a clean RAM page. The iotlb entry will
contain the page physical address. */
-#define TLB_NOTDIRTY (1 << 4)
+#define TLB_NOTDIRTY (1 << TLB_NOTDIRTY_SHIFT)
/* Set if TLB entry is an IO callback. */
-#define TLB_MMIO (1 << 5)
+#define TLB_MMIO (1 << TLB_MMIO_SHIFT)
void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
void dump_opcount_info(FILE *f, fprintf_function cpu_fprintf);
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
index 85aa403..ce7deb9 100644
--- a/include/exec/cpu-common.h
+++ b/include/exec/cpu-common.h
@@ -76,12 +76,12 @@ void cpu_physical_memory_rw(hwaddr addr, uint8_t *buf,
static inline void cpu_physical_memory_read(hwaddr addr,
void *buf, int len)
{
- cpu_physical_memory_rw(addr, buf, len, 0);
+ cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 0);
}
static inline void cpu_physical_memory_write(hwaddr addr,
const void *buf, int len)
{
- cpu_physical_memory_rw(addr, (void *)buf, len, 1);
+ cpu_physical_memory_rw(addr, (uint8_t *)buf, len, 1);
}
void *cpu_physical_memory_map(hwaddr addr,
hwaddr *plen,
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index 5093be2..b44e3f2 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -56,6 +56,8 @@ typedef uint64_t target_ulong;
#error TARGET_LONG_SIZE undefined
#endif
+#include "hqemu-config.h"
+
#if !defined(CONFIG_USER_ONLY)
/* use a fully associative victim tlb of 8 entries */
#define CPU_VTLB_SIZE 8
@@ -89,7 +91,7 @@ typedef uint64_t target_ulong;
* of tlb_table inside env (which is non-trivial but not huge).
*/
#define CPU_TLB_BITS \
- MIN(8, \
+ MIN(12, \
TCG_TARGET_TLB_DISPLACEMENT_BITS - CPU_TLB_ENTRY_BITS - \
(NB_MMU_MODES <= 1 ? 0 : \
NB_MMU_MODES <= 2 ? 1 : \
@@ -107,9 +109,9 @@ typedef struct CPUTLBEntry {
*/
union {
struct {
- target_ulong addr_read;
- target_ulong addr_write;
- target_ulong addr_code;
+ tlbaddr_t addr_read;
+ tlbaddr_t addr_write;
+ tlbaddr_t addr_code;
/* Addend to virtual address to get host address. IO accesses
use the corresponding iotlb value. */
uintptr_t addend;
@@ -140,6 +142,7 @@ typedef struct CPUIOTLBEntry {
target_ulong tlb_flush_addr; \
target_ulong tlb_flush_mask; \
target_ulong vtlb_index; \
+ tlbaddr_t tlb_version; \
#else
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index b573df5..72acce7 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -405,7 +405,7 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
#else
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
CPUTLBEntry *tlbentry = &env->tlb_table[mmu_idx][index];
- target_ulong tlb_addr;
+ tlbaddr_t tlb_addr;
uintptr_t haddr;
switch (access_type) {
@@ -422,13 +422,22 @@ static inline void *tlb_vaddr_to_host(CPUArchState *env, target_ulong addr,
g_assert_not_reached();
}
+#if defined(ENABLE_TLBVERSION)
+ if (tlb_version(env) != (tlb_addr & TLB_VERSION_MASK))
+ return NULL;
+#endif
+
if ((addr & TARGET_PAGE_MASK)
!= (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
/* TLB entry is for a different page */
return NULL;
}
+#if defined(ENABLE_TLBVERSION)
+ if (tlb_addr & (TLB_NOTDIRTY | TLB_MMIO)) {
+#else
if (tlb_addr & ~TARGET_PAGE_MASK) {
+#endif
/* IO access */
return NULL;
}
diff --git a/include/exec/cpu_ldst_template.h b/include/exec/cpu_ldst_template.h
index 3091c00..2a01c6f 100644
--- a/include/exec/cpu_ldst_template.h
+++ b/include/exec/cpu_ldst_template.h
@@ -67,6 +67,14 @@
#define SRETSUFFIX glue(s, SUFFIX)
#endif
+#include "hqemu.h"
+
+#if defined(ENABLE_TLBVERSION)
+#define page_val(addr, env) ((((tlbaddr_t)addr + DATA_SIZE - 1) & TARGET_PAGE_MASK) | tlb_version(env))
+#else
+#define page_val(addr, env) (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1)))
+#endif
+
/* generic load/store macros */
static inline RES_TYPE
@@ -80,12 +88,17 @@ glue(glue(glue(cpu_ld, USUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
int mmu_idx;
TCGMemOpIdx oi;
+#ifdef SOFTMMU_CODE_ACCESS
+ if (build_llvm_only(env))
+ return glue(glue(ld, USUFFIX), _p)((uint8_t *)env->image_base + ptr);
+#endif
+
addr = ptr;
page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = CPU_MMU_INDEX;
if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
- (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
- oi = make_memop_idx(SHIFT, mmu_idx);
+ page_val(addr, env))) {
+ oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx);
res = glue(glue(helper_ret_ld, URETSUFFIX), MMUSUFFIX)(env, addr,
oi, retaddr);
} else {
@@ -112,12 +125,17 @@ glue(glue(glue(cpu_lds, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
int mmu_idx;
TCGMemOpIdx oi;
+#ifdef SOFTMMU_CODE_ACCESS
+ if (build_llvm_only(env))
+ return glue(glue(lds, SUFFIX), _p)((uint8_t *)env->image_base + ptr);
+#endif
+
addr = ptr;
page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = CPU_MMU_INDEX;
if (unlikely(env->tlb_table[mmu_idx][page_index].ADDR_READ !=
- (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
- oi = make_memop_idx(SHIFT, mmu_idx);
+ page_val(addr, env))) {
+ oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx);
res = (DATA_STYPE)glue(glue(helper_ret_ld, SRETSUFFIX),
MMUSUFFIX)(env, addr, oi, retaddr);
} else {
@@ -152,8 +170,8 @@ glue(glue(glue(cpu_st, SUFFIX), MEMSUFFIX), _ra)(CPUArchState *env,
page_index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
mmu_idx = CPU_MMU_INDEX;
if (unlikely(env->tlb_table[mmu_idx][page_index].addr_write !=
- (addr & (TARGET_PAGE_MASK | (DATA_SIZE - 1))))) {
- oi = make_memop_idx(SHIFT, mmu_idx);
+ page_val(addr, env))) {
+ oi = make_memop_idx((TCGMemOp)SHIFT, mmu_idx);
glue(glue(helper_ret_st, SUFFIX), MMUSUFFIX)(env, addr, v, oi,
retaddr);
} else {
@@ -171,6 +189,7 @@ glue(glue(cpu_st, SUFFIX), MEMSUFFIX)(CPUArchState *env, target_ulong ptr,
#endif /* !SOFTMMU_CODE_ACCESS */
+#undef page_val
#undef RES_TYPE
#undef DATA_TYPE
#undef DATA_STYPE
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index d900b0d..a225bea 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -21,6 +21,7 @@
#define _EXEC_ALL_H_
#include "qemu-common.h"
+#include "hqemu-config.h"
/* allow to see translation results - the slowdown should be negligible, so we leave it */
#define DEBUG_DISAS
@@ -59,7 +60,7 @@ typedef struct TranslationBlock TranslationBlock;
* and up to 4 + N parameters on 64-bit archs
* (N = number of input arguments + output arguments). */
#define MAX_OPC_PARAM (4 + (MAX_OPC_PARAM_PER_ARG * MAX_OPC_PARAM_ARGS))
-#define OPC_BUF_SIZE 640
+#define OPC_BUF_SIZE 2048
#define OPC_MAX_SIZE (OPC_BUF_SIZE - MAX_OP_PER_INSTR)
#define OPPARAM_BUF_SIZE (OPC_BUF_SIZE * MAX_OPC_PARAM)
@@ -216,6 +217,8 @@ struct TranslationBlock {
jmp_first */
struct TranslationBlock *jmp_next[2];
struct TranslationBlock *jmp_first;
+
+ TB_OPTIMIZATION_COMMON
};
#include "qemu/thread.h"
@@ -305,7 +308,7 @@ static inline void tb_set_jmp_target(TranslationBlock *tb,
int n, uintptr_t addr)
{
uint16_t offset = tb->tb_jmp_offset[n];
- tb_set_jmp_target1((uintptr_t)(tb->tc_ptr + offset), addr);
+ tb_set_jmp_target1((uintptr_t)((uint8_t *)tb->tc_ptr + offset), addr);
}
#else
@@ -405,4 +408,6 @@ extern int singlestep;
extern CPUState *tcg_current_cpu;
extern bool exit_request;
+size_t get_cpu_size(void);
+
#endif
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 0f07159..c2a1cd3 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -208,9 +208,9 @@ struct MemoryListener {
void (*region_del)(MemoryListener *listener, MemoryRegionSection *section);
void (*region_nop)(MemoryListener *listener, MemoryRegionSection *section);
void (*log_start)(MemoryListener *listener, MemoryRegionSection *section,
- int old, int new);
+ int _old, int _new);
void (*log_stop)(MemoryListener *listener, MemoryRegionSection *section,
- int old, int new);
+ int _old, int _new);
void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section);
void (*log_global_start)(MemoryListener *listener);
void (*log_global_stop)(MemoryListener *listener);
diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h
index c537969..4453e5b 100644
--- a/include/hw/qdev-core.h
+++ b/include/hw/qdev-core.h
@@ -10,6 +10,8 @@
#include "qapi/error.h"
#include "hw/hotplug.h"
+#define typename QEMUtypename
+
enum {
DEV_NVECTORS_UNSPECIFIED = -1,
};
@@ -401,4 +403,6 @@ static inline bool qbus_is_hotpluggable(BusState *bus)
void device_listener_register(DeviceListener *listener);
void device_listener_unregister(DeviceListener *listener);
+#undef typename
+
#endif
diff --git a/include/qemu-common.h b/include/qemu-common.h
index 405364f..d0c2e20 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -454,7 +454,7 @@ int mod_utf8_codepoint(const char *s, size_t n, char **end);
void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size);
/* vector definitions */
-#ifdef __ALTIVEC__
+#if defined(__ALTIVEC__) && !defined(__clang__)
#include <altivec.h>
/* The altivec.h header says we're allowed to undef these for
* C++ compatibility. Here we don't care about C++, but we
diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h
index bd2c075..e2125bd 100644
--- a/include/qemu/atomic.h
+++ b/include/qemu/atomic.h
@@ -158,13 +158,13 @@
#ifndef atomic_rcu_read
#ifdef __ATOMIC_CONSUME
#define atomic_rcu_read(ptr) ({ \
- typeof(*ptr) _val; \
+ __typeof__(*ptr) _val; \
__atomic_load(ptr, &_val, __ATOMIC_CONSUME); \
_val; \
})
#else
#define atomic_rcu_read(ptr) ({ \
- typeof(*ptr) _val = atomic_read(ptr); \
+ __typeof__(*ptr) _val = atomic_read(ptr); \
smp_read_barrier_depends(); \
_val; \
})
@@ -185,7 +185,7 @@
#ifndef atomic_rcu_set
#ifdef __ATOMIC_RELEASE
#define atomic_rcu_set(ptr, i) do { \
- typeof(*ptr) _val = (i); \
+ __typeof__(*ptr) _val = (i); \
__atomic_store(ptr, &_val, __ATOMIC_RELEASE); \
} while(0)
#else
@@ -220,7 +220,7 @@
*/
#ifndef atomic_mb_read
#define atomic_mb_read(ptr) ({ \
- typeof(*ptr) _val = atomic_read(ptr); \
+ __typeof__(*ptr) _val = atomic_read(ptr); \
smp_rmb(); \
_val; \
})
@@ -239,7 +239,7 @@
#define atomic_xchg(ptr, i) __sync_swap(ptr, i)
#elif defined(__ATOMIC_SEQ_CST)
#define atomic_xchg(ptr, i) ({ \
- typeof(*ptr) _new = (i), _old; \
+ __typeof__(*ptr) _new = (i), _old; \
__atomic_exchange(ptr, &_new, &_old, __ATOMIC_SEQ_CST); \
_old; \
})
diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h
index 86dd9cd..b53f462 100644
--- a/include/qemu/bitmap.h
+++ b/include/qemu/bitmap.h
@@ -71,7 +71,7 @@
unsigned long name[BITS_TO_LONGS(bits)]
#define small_nbits(nbits) \
- ((nbits) <= BITS_PER_LONG)
+ ((nbits) <= (long)BITS_PER_LONG)
int slow_bitmap_empty(const unsigned long *bitmap, long bits);
int slow_bitmap_full(const unsigned long *bitmap, long bits);
@@ -97,7 +97,7 @@ int slow_bitmap_intersects(const unsigned long *bitmap1,
static inline unsigned long *bitmap_try_new(long nbits)
{
long len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
- return g_try_malloc0(len);
+ return (unsigned long *)g_try_malloc0(len);
}
static inline unsigned long *bitmap_new(long nbits)
@@ -241,9 +241,9 @@ static inline unsigned long *bitmap_zero_extend(unsigned long *old,
long old_nbits, long new_nbits)
{
long new_len = BITS_TO_LONGS(new_nbits) * sizeof(unsigned long);
- unsigned long *new = g_realloc(old, new_len);
- bitmap_clear(new, old_nbits, new_nbits - old_nbits);
- return new;
+ unsigned long *new_bitmap = (unsigned long *)g_realloc(old, new_len);
+ bitmap_clear(new_bitmap, old_nbits, new_nbits - old_nbits);
+ return new_bitmap;
}
#endif /* BITMAP_H */
diff --git a/include/qemu/compiler.h b/include/qemu/compiler.h
index d22eb01..0abf0f8 100644
--- a/include/qemu/compiler.h
+++ b/include/qemu/compiler.h
@@ -60,7 +60,7 @@
#ifndef container_of
#define container_of(ptr, type, member) ({ \
- const typeof(((type *) 0)->member) *__mptr = (ptr); \
+ const __typeof__(((type *) 0)->member) *__mptr = (ptr); \
(type *) ((char *) __mptr - offsetof(type, member));})
#endif
@@ -74,7 +74,7 @@
#define DO_UPCAST(type, field, dev) container_of(dev, type, field)
#endif
-#define typeof_field(type, field) typeof(((type *)0)->field)
+#define typeof_field(type, field) __typeof__(((type *)0)->field)
#define type_check(t1,t2) ((t1*)0 - (t2*)0)
#ifndef always_inline
diff --git a/include/qemu/queue.h b/include/qemu/queue.h
index f781aa2..b56bce5 100644
--- a/include/qemu/queue.h
+++ b/include/qemu/queue.h
@@ -198,7 +198,7 @@ struct { \
} while (/*CONSTCOND*/0)
#define QSLIST_INSERT_HEAD_ATOMIC(head, elm, field) do { \
- typeof(elm) save_sle_next; \
+ __typeof__(elm) save_sle_next; \
do { \
save_sle_next = (elm)->field.sle_next = (head)->slh_first; \
} while (atomic_cmpxchg(&(head)->slh_first, save_sle_next, (elm)) != \
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index f6d1d56..0d9f677 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -135,8 +135,8 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
#define call_rcu(head, func, field) \
call_rcu1(({ \
char __attribute__((unused)) \
- offset_must_be_zero[-offsetof(typeof(*(head)), field)], \
- func_type_invalid = (func) - (void (*)(typeof(head)))(func); \
+ offset_must_be_zero[-offsetof(__typeof__(*(head)), field)], \
+ func_type_invalid = (func) - (void (*)(__typeof__(head)))(func); \
&(head)->field; \
}), \
(RCUCBFunc *)(func))
@@ -144,7 +144,7 @@ extern void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
#define g_free_rcu(obj, field) \
call_rcu1(({ \
char __attribute__((unused)) \
- offset_must_be_zero[-offsetof(typeof(*(obj)), field)]; \
+ offset_must_be_zero[-offsetof(__typeof__(*(obj)), field)]; \
&(obj)->field; \
}), \
(RCUCBFunc *)g_free);
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index d0946cb..a16effa 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -523,7 +523,7 @@ static inline QEMUTimer *timer_new_tl(QEMUTimerList *timer_list,
QEMUTimerCB *cb,
void *opaque)
{
- QEMUTimer *ts = g_malloc0(sizeof(QEMUTimer));
+ QEMUTimer *ts = (QEMUTimer *)g_malloc0(sizeof(QEMUTimer));
timer_init_tl(ts, timer_list, scale, cb, opaque);
return ts;
}
@@ -965,7 +965,7 @@ static inline int64_t cpu_get_host_ticks (void)
#define MIPS_RDHWR(rd, value) { \
__asm__ __volatile__ (".set push\n\t" \
".set mips32r2\n\t" \
- "rdhwr %0, "rd"\n\t" \
+ "rdhwr %0, " rd "\n\t" \
".set pop" \
: "=r" (value)); \
}
diff --git a/include/qom/cpu.h b/include/qom/cpu.h
index 51a1323..4b005ff 100644
--- a/include/qom/cpu.h
+++ b/include/qom/cpu.h
@@ -30,6 +30,8 @@
#include "qemu/thread.h"
#include "qemu/typedefs.h"
+#define typename QEMUtypename
+
typedef int (*WriteCoreDumpFunction)(const void *buf, size_t size,
void *opaque);
@@ -196,7 +198,7 @@ typedef struct CPUBreakpoint {
} CPUBreakpoint;
typedef struct CPUWatchpoint {
- vaddr vaddr;
+ vaddr addr;
vaddr len;
vaddr hitaddr;
MemTxAttrs hitattrs;
@@ -775,4 +777,7 @@ extern const struct VMStateDescription vmstate_cpu_common;
.offset = 0, \
}
+CPUState *cpu_create(void);
+#undef typename
+
#endif
diff --git a/include/qom/object.h b/include/qom/object.h
index 4509166..118c227 100644
--- a/include/qom/object.h
+++ b/include/qom/object.h
@@ -20,6 +20,10 @@
#include "qemu/queue.h"
#include "qapi/error.h"
+#define Type QEMUType
+#define class QEMUclass
+#define typename QEMUtypename
+
struct Visitor;
struct TypeImpl;
@@ -1570,5 +1574,8 @@ int object_child_foreach_recursive(Object *obj,
*/
Object *container_get(Object *root, const char *path);
+#undef Type
+#undef class
+#undef typename
#endif
diff --git a/include/sysemu/cpus.h b/include/sysemu/cpus.h
index 3d1e5ba..d594ebf 100644
--- a/include/sysemu/cpus.h
+++ b/include/sysemu/cpus.h
@@ -4,6 +4,7 @@
/* cpus.c */
bool qemu_in_vcpu_thread(void);
void qemu_init_cpu_loop(void);
+void qemu_end_cpu_loop(void);
void resume_all_vcpus(void);
void pause_all_vcpus(void);
void cpu_stop_current(void);
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 8b17c0e..7be6e71 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -2001,9 +2001,13 @@ static void load_elf_image(const char *image_name, int image_fd,
info->brk = info->end_code;
}
+#if defined(CONFIG_LLVM)
+ load_symbols(ehdr, image_fd, load_bias);
+#else
if (qemu_log_enabled()) {
load_symbols(ehdr, image_fd, load_bias);
}
+#endif
close(image_fd);
return;
diff --git a/linux-user/main.c b/linux-user/main.c
index 8acfe0f..0f67ad4 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -33,11 +33,12 @@
#include "qemu/timer.h"
#include "qemu/envlist.h"
#include "elf.h"
+#include "hqemu.h"
char *exec_path;
int singlestep;
-static const char *filename;
+const char *filename;
static const char *argv0;
static int gdbstub_port;
static envlist_t *envlist;
@@ -105,7 +106,10 @@ static int pending_cpus;
/* Make sure everything is in a consistent state for calling fork(). */
void fork_start(void)
{
- qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
+#if defined(CONFIG_LLVM)
+ llvm_fork_start();
+#endif
+ qemu_mutex_lock(&tcg_ctx.tb_ctx->tb_lock);
pthread_mutex_lock(&exclusive_lock);
mmap_fork_start();
}
@@ -127,12 +131,15 @@ void fork_end(int child)
pthread_mutex_init(&cpu_list_mutex, NULL);
pthread_cond_init(&exclusive_cond, NULL);
pthread_cond_init(&exclusive_resume, NULL);
- qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
+ qemu_mutex_init(&tcg_ctx.tb_ctx->tb_lock);
gdbserver_fork(thread_cpu);
} else {
pthread_mutex_unlock(&exclusive_lock);
- qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+ qemu_mutex_unlock(&tcg_ctx.tb_ctx->tb_lock);
}
+#if defined(CONFIG_LLVM)
+ llvm_fork_end(child);
+#endif
}
/* Wait for pending exclusive operations to complete. The exclusive lock
@@ -276,6 +283,9 @@ void cpu_loop(CPUX86State *env)
abi_ulong pc;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
for(;;) {
cpu_exec_start(cs);
trapnr = cpu_x86_exec(cs);
@@ -670,6 +680,9 @@ void cpu_loop(CPUARMState *env)
target_siginfo_t info;
uint32_t addr;
+ copy_tcg_context();
+ optimization_init(env);
+
for(;;) {
cpu_exec_start(cs);
trapnr = cpu_arm_exec(cs);
@@ -1001,6 +1014,9 @@ void cpu_loop(CPUARMState *env)
int trapnr, sig;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
for (;;) {
cpu_exec_start(cs);
trapnr = cpu_arm_exec(cs);
@@ -1083,6 +1099,9 @@ void cpu_loop(CPUUniCore32State *env)
unsigned int n, insn;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
for (;;) {
cpu_exec_start(cs);
trapnr = uc32_cpu_exec(cs);
@@ -1284,6 +1303,9 @@ void cpu_loop (CPUSPARCState *env)
abi_long ret;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_sparc_exec(cs);
@@ -1564,6 +1586,9 @@ void cpu_loop(CPUPPCState *env)
int trapnr;
target_ulong ret;
+ copy_tcg_context();
+ optimization_init(env);
+
for(;;) {
cpu_exec_start(cs);
trapnr = cpu_ppc_exec(cs);
@@ -2416,6 +2441,9 @@ void cpu_loop(CPUMIPSState *env)
unsigned int syscall_num;
# endif
+ copy_tcg_context();
+ optimization_init(env);
+
for(;;) {
cpu_exec_start(cs);
trapnr = cpu_mips_exec(cs);
@@ -2653,6 +2681,9 @@ void cpu_loop(CPUOpenRISCState *env)
CPUState *cs = CPU(openrisc_env_get_cpu(env));
int trapnr, gdbsig;
+ copy_tcg_context();
+ optimization_init(env);
+
for (;;) {
cpu_exec_start(cs);
trapnr = cpu_openrisc_exec(cs);
@@ -2743,6 +2774,9 @@ void cpu_loop(CPUSH4State *env)
int trapnr, ret;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_sh4_exec(cs);
@@ -2805,6 +2839,9 @@ void cpu_loop(CPUCRISState *env)
int trapnr, ret;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_cris_exec(cs);
@@ -2866,6 +2903,9 @@ void cpu_loop(CPUMBState *env)
int trapnr, ret;
target_siginfo_t info;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_mb_exec(cs);
@@ -2971,6 +3011,9 @@ void cpu_loop(CPUM68KState *env)
target_siginfo_t info;
TaskState *ts = cs->opaque;
+ copy_tcg_context();
+ optimization_init(env);
+
for(;;) {
cpu_exec_start(cs);
trapnr = cpu_m68k_exec(cs);
@@ -3110,6 +3153,9 @@ void cpu_loop(CPUAlphaState *env)
target_siginfo_t info;
abi_long sysret;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_alpha_exec(cs);
@@ -3298,6 +3344,9 @@ void cpu_loop(CPUS390XState *env)
target_siginfo_t info;
target_ulong addr;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_s390x_exec(cs);
@@ -3602,6 +3651,9 @@ void cpu_loop(CPUTLGState *env)
CPUState *cs = CPU(tilegx_env_get_cpu(env));
int trapnr;
+ copy_tcg_context();
+ optimization_init(env);
+
while (1) {
cpu_exec_start(cs);
trapnr = cpu_tilegx_exec(cs);
@@ -3711,7 +3763,7 @@ CPUArchState *cpu_copy(CPUArchState *env)
cpu_breakpoint_insert(new_cpu, bp->pc, bp->flags, NULL);
}
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
- cpu_watchpoint_insert(new_cpu, wp->vaddr, wp->len, wp->flags, NULL);
+ cpu_watchpoint_insert(new_cpu, wp->addr, wp->len, wp->flags, NULL);
}
return new_env;
@@ -4009,6 +4061,12 @@ static void usage(int exitcode)
"Note that if you provide several changes to a single variable\n"
"the last change will stay in effect.\n");
+#if defined(CONFIG_LLVM)
+ printf("\n\nHQEMU ");
+ fflush(stdout);
+ hqemu_help();
+#endif
+
exit(exitcode);
}
@@ -4324,7 +4382,11 @@ int main(int argc, char **argv, char **envp)
/* Now that we've loaded the binary, GUEST_BASE is fixed. Delay
generating the prologue until now so that the prologue can take
the real value of GUEST_BASE into account. */
- tcg_prologue_init(&tcg_ctx);
+ tcg_prologue_init(&tcg_ctx_global);
+
+#if defined(CONFIG_LLVM)
+ llvm_init();
+#endif
#if defined(TARGET_I386)
env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
@@ -4663,6 +4725,7 @@ int main(int argc, char **argv, char **envp)
}
gdb_handlesig(cpu, 0);
}
+
cpu_loop(env);
/* never exits */
return 0;
diff --git a/linux-user/strace.c b/linux-user/strace.c
index ea6c1d2..69d5408 100644
--- a/linux-user/strace.c
+++ b/linux-user/strace.c
@@ -7,6 +7,7 @@
#include <sys/types.h>
#include <sys/mount.h>
#include <sys/mman.h>
+#include <sys/sysmacros.h>
#include <unistd.h>
#include <sched.h>
#include "qemu.h"
diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 6c64ba6..030eb2a 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -114,6 +114,7 @@ int __clone2(int (*fn)(void *), void *child_stack_base,
#include "uname.h"
#include "qemu.h"
+#include "hqemu.h"
#define CLONE_NPTL_FLAGS2 (CLONE_SETTLS | \
CLONE_PARENT_SETTID | CLONE_CHILD_SETTID | CLONE_CHILD_CLEARTID)
@@ -4495,7 +4496,7 @@ abi_long do_arch_prctl(CPUX86State *env, int code, abi_ulong addr)
#endif /* defined(TARGET_I386) */
-#define NEW_STACK_SIZE 0x40000
+#define NEW_STACK_SIZE 0x80000
static pthread_mutex_t clone_lock = PTHREAD_MUTEX_INITIALIZER;
@@ -5710,6 +5711,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
rcu_unregister_thread();
pthread_exit(NULL);
}
+
+ optimization_finalize((CPUArchState *)cpu_env);
+#if defined(CONFIG_LLVM)
+ llvm_finalize();
+#endif
+
#ifdef TARGET_GPROF
_mcleanup();
#endif
@@ -7615,6 +7622,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long arg1,
#ifdef __NR_exit_group
/* new thread calls */
case TARGET_NR_exit_group:
+ optimization_finalize((CPUArchState *)cpu_env);
+#if defined(CONFIG_LLVM)
+ llvm_finalize();
+#endif
#ifdef TARGET_GPROF
_mcleanup();
#endif
diff --git a/qga/commands-posix.c b/qga/commands-posix.c
index c2ff970..e6c9f51 100644
--- a/qga/commands-posix.c
+++ b/qga/commands-posix.c
@@ -15,6 +15,7 @@
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/wait.h>
+#include <sys/sysmacros.h>
#include <unistd.h>
#include <errno.h>
#include <fcntl.h>
diff --git a/qom/object.c b/qom/object.c
index d751569..deb182f 100644
--- a/qom/object.c
+++ b/qom/object.c
@@ -28,6 +28,10 @@
#include "qapi/qmp/qint.h"
#include "qapi/qmp/qstring.h"
+#define Type QEMUType
+#define class QEMUclass
+#define typename QEMUtypename
+
#define MAX_INTERFACES 32
typedef struct InterfaceImpl InterfaceImpl;
@@ -2126,3 +2130,7 @@ static void register_types(void)
}
type_init(register_types)
+
+#undef Type
+#undef class
+#undef typename
diff --git a/softmmu_template.h b/softmmu_template.h
index 6803890..4574545 100644
--- a/softmmu_template.h
+++ b/softmmu_template.h
@@ -24,6 +24,7 @@
#include "qemu/timer.h"
#include "exec/address-spaces.h"
#include "exec/memory.h"
+#include "hqemu-config.h"
#define DATA_SIZE (1 << SHIFT)
@@ -116,6 +117,16 @@
# define helper_te_st_name helper_le_st_name
#endif
+#if defined(ENABLE_TLBVERSION)
+#define TLB_IO_MASK (TLB_NOTDIRTY | TLB_MMIO)
+#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK | TLB_VERSION_MASK)
+#define page_val(addr, env) (((tlbaddr_t)addr & TARGET_PAGE_MASK) | tlb_version(env))
+#else
+#define TLB_IO_MASK (~TARGET_PAGE_MASK)
+#define TLB_NONIO_MASK (TARGET_PAGE_MASK | TLB_INVALID_MASK)
+#define page_val(addr, env) ((addr & TARGET_PAGE_MASK))
+#endif
+
/* macro to check the victim tlb */
#define VICTIM_TLB_HIT(ty) \
({ \
@@ -126,7 +137,7 @@
CPUIOTLBEntry tmpiotlb; \
CPUTLBEntry tmptlb; \
for (vidx = CPU_VTLB_SIZE-1; vidx >= 0; --vidx) { \
- if (env->tlb_v_table[mmu_idx][vidx].ty == (addr & TARGET_PAGE_MASK)) {\
+ if (env->tlb_v_table[mmu_idx][vidx].ty == page_val(addr, env)) { \
/* found entry in victim tlb, swap tlb and iotlb */ \
tmptlb = env->tlb_table[mmu_idx][index]; \
env->tlb_table[mmu_idx][index] = env->tlb_v_table[mmu_idx][vidx]; \
@@ -170,7 +181,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
{
unsigned mmu_idx = get_mmuidx(oi);
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+ tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
uintptr_t haddr;
DATA_TYPE res;
@@ -178,8 +189,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
retaddr -= GETPC_ADJ;
/* If the TLB entry is for a different page, reload and try again. */
- if ((addr & TARGET_PAGE_MASK)
- != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) {
if ((addr & (DATA_SIZE - 1)) != 0
&& (get_memop(oi) & MO_AMASK) == MO_ALIGN) {
cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
@@ -193,7 +203,7 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
}
/* Handle an IO access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+ if (unlikely(tlb_addr & TLB_IO_MASK)) {
CPUIOTLBEntry *iotlbentry;
if ((addr & (DATA_SIZE - 1)) != 0) {
goto do_unaligned_access;
@@ -254,7 +264,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
{
unsigned mmu_idx = get_mmuidx(oi);
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
+ tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
uintptr_t haddr;
DATA_TYPE res;
@@ -262,8 +272,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
retaddr -= GETPC_ADJ;
/* If the TLB entry is for a different page, reload and try again. */
- if ((addr & TARGET_PAGE_MASK)
- != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) {
if ((addr & (DATA_SIZE - 1)) != 0
&& (get_memop(oi) & MO_AMASK) == MO_ALIGN) {
cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
@@ -277,7 +286,7 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
}
/* Handle an IO access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+ if (unlikely(tlb_addr & TLB_IO_MASK)) {
CPUIOTLBEntry *iotlbentry;
if ((addr & (DATA_SIZE - 1)) != 0) {
goto do_unaligned_access;
@@ -375,15 +384,14 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
{
unsigned mmu_idx = get_mmuidx(oi);
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+ tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
uintptr_t haddr;
/* Adjust the given return address. */
retaddr -= GETPC_ADJ;
/* If the TLB entry is for a different page, reload and try again. */
- if ((addr & TARGET_PAGE_MASK)
- != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) {
if ((addr & (DATA_SIZE - 1)) != 0
&& (get_memop(oi) & MO_AMASK) == MO_ALIGN) {
cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
@@ -396,7 +404,7 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
}
/* Handle an IO access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+ if (unlikely(tlb_addr & TLB_IO_MASK)) {
CPUIOTLBEntry *iotlbentry;
if ((addr & (DATA_SIZE - 1)) != 0) {
goto do_unaligned_access;
@@ -455,15 +463,14 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
{
unsigned mmu_idx = get_mmuidx(oi);
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+ tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
uintptr_t haddr;
/* Adjust the given return address. */
retaddr -= GETPC_ADJ;
/* If the TLB entry is for a different page, reload and try again. */
- if ((addr & TARGET_PAGE_MASK)
- != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) {
if ((addr & (DATA_SIZE - 1)) != 0
&& (get_memop(oi) & MO_AMASK) == MO_ALIGN) {
cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
@@ -476,7 +483,7 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
}
/* Handle an IO access. */
- if (unlikely(tlb_addr & ~TARGET_PAGE_MASK)) {
+ if (unlikely(tlb_addr & TLB_IO_MASK)) {
CPUIOTLBEntry *iotlbentry;
if ((addr & (DATA_SIZE - 1)) != 0) {
goto do_unaligned_access;
@@ -537,10 +544,9 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
uintptr_t retaddr)
{
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
- target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
+ tlbaddr_t tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
- if ((addr & TARGET_PAGE_MASK)
- != (tlb_addr & (TARGET_PAGE_MASK | TLB_INVALID_MASK))) {
+ if (page_val(addr, env) != (tlb_addr & TLB_NONIO_MASK)) {
/* TLB entry is for a different page */
if (!VICTIM_TLB_HIT(addr_write)) {
tlb_fill(ENV_GET_CPU(env), addr, MMU_DATA_STORE, mmu_idx, retaddr);
@@ -550,6 +556,11 @@ void probe_write(CPUArchState *env, target_ulong addr, int mmu_idx,
#endif
#endif /* !defined(SOFTMMU_CODE_ACCESS) */
+#include "softmmu_template_llvm.h"
+
+#undef TLB_IO_MASK
+#undef TLB_NONIO_MASK
+#undef page_val
#undef READ_ACCESS_TYPE
#undef SHIFT
#undef DATA_TYPE
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 815fef8..1087075 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -437,7 +437,7 @@ typedef struct CPUARMState {
* the two execution states, and means we do not need to explicitly
* map these registers when changing states.
*/
- float64 regs[64];
+ float64 regs[64] __attribute__((aligned(16)));
uint32_t xregs[16];
/* We store these fpcsr fields separately for convenience. */
@@ -496,6 +496,8 @@ typedef struct CPUARMState {
/* Internal CPU feature flags. */
uint64_t features;
+ CPU_OPTIMIZATION_COMMON
+
/* PMSAv7 MPU */
struct {
uint32_t *drbar;
@@ -1509,7 +1511,7 @@ bool write_cpustate_to_list(ARMCPU *cpu);
/* The ARM MMU allows 1k pages. */
/* ??? Linux doesn't actually use these, and they're deprecated in recent
architecture revisions. Maybe a configure option to disable them. */
-#define TARGET_PAGE_BITS 10
+#define TARGET_PAGE_BITS 12
#endif
#if defined(TARGET_AARCH64)
@@ -1523,7 +1525,7 @@ bool write_cpustate_to_list(ARMCPU *cpu);
static inline bool arm_excp_unmasked(CPUState *cs, unsigned int excp_idx,
unsigned int target_el)
{
- CPUARMState *env = cs->env_ptr;
+ CPUARMState *env = (CPUARMState *)cs->env_ptr;
unsigned int cur_el = arm_current_el(env);
bool secure = arm_is_secure(env);
bool pstate_unmasked;
@@ -1983,6 +1985,62 @@ static inline void cpu_get_tb_cpu_state(CPUARMState *env, target_ulong *pc,
*cs_base = 0;
}
+static inline target_ulong cpu_get_pc(CPUARMState *env)
+{
+#if defined(TARGET_AARCH64)
+ return env->pc;
+#else
+ return env->regs[15];
+#endif
+}
+
+static inline int cpu_check_state(CPUARMState *env,
+ target_ulong cs_base, int flags)
+{
+ int f;
+ if (is_a64(env)) {
+ f = ARM_TBFLAG_AARCH64_STATE_MASK;
+ } else {
+ f = (env->thumb << ARM_TBFLAG_THUMB_SHIFT)
+ | (env->vfp.vec_len << ARM_TBFLAG_VECLEN_SHIFT)
+ | (env->vfp.vec_stride << ARM_TBFLAG_VECSTRIDE_SHIFT)
+ | (env->condexec_bits << ARM_TBFLAG_CONDEXEC_SHIFT)
+ | (env->bswap_code << ARM_TBFLAG_BSWAP_CODE_SHIFT);
+ if (!(access_secure_reg(env))) {
+ f |= ARM_TBFLAG_NS_MASK;
+ }
+ if (env->vfp.xregs[ARM_VFP_FPEXC] & (1 << 30)
+ || arm_el_is_aa64(env, 1)) {
+ f |= ARM_TBFLAG_VFPEN_MASK;
+ }
+ f |= (extract32(env->cp15.c15_cpar, 0, 2)
+ << ARM_TBFLAG_XSCALE_CPAR_SHIFT);
+ }
+
+ f |= (cpu_mmu_index(env, false) << ARM_TBFLAG_MMUIDX_SHIFT);
+ /* The SS_ACTIVE and PSTATE_SS bits correspond to the state machine
+ * states defined in the ARM ARM for software singlestep:
+ * SS_ACTIVE PSTATE.SS State
+ * 0 x Inactive (the TB flag for SS is always 0)
+ * 1 0 Active-pending
+ * 1 1 Active-not-pending
+ */
+ if (arm_singlestep_active(env)) {
+ f |= ARM_TBFLAG_SS_ACTIVE_MASK;
+ if (is_a64(env)) {
+ if (env->pstate & PSTATE_SS) {
+ f |= ARM_TBFLAG_PSTATE_SS_MASK;
+ }
+ } else {
+ if (env->uncached_cpsr & PSTATE_SS) {
+ f |= ARM_TBFLAG_PSTATE_SS_MASK;
+ }
+ }
+ }
+ f |= fp_exception_el(env) << ARM_TBFLAG_FPEXC_EL_SHIFT;
+ return f == flags;
+}
+
#include "exec/exec-all.h"
enum {
diff --git a/target-arm/helper.c b/target-arm/helper.c
index afc4163..302e88c 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -11,6 +11,7 @@
#include "arm_ldst.h"
#include <zlib.h> /* For crc32 */
#include "exec/semihost.h"
+#include "hqemu.h"
#define ARM_CPU_FREQ 1000000000 /* FIXME: 1 GHz, should be configurable */
@@ -2225,6 +2226,8 @@ static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
tlb_flush(CPU(cpu), 1);
}
raw_write(env, ri, value);
+
+ pcid = (target_ulong)value >> 12;
}
static void vttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -7977,29 +7980,23 @@ float64 VFP_HELPER(sqrt, d)(float64 a, CPUARMState *env)
/* XXX: check quiet/signaling case */
#define DO_VFP_cmp(p, type) \
-void VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \
+uint32_t VFP_HELPER(cmp, p)(type a, type b, CPUARMState *env) \
{ \
- uint32_t flags; \
- switch(type ## _compare_quiet(a, b, &env->vfp.fp_status)) { \
- case 0: flags = 0x6; break; \
- case -1: flags = 0x8; break; \
- case 1: flags = 0x2; break; \
- default: case 2: flags = 0x3; break; \
- } \
- env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \
- | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \
+ uint32_t flags = 0x3; \
+ int ret = type ## _compare_quiet(a, b, &env->vfp.fp_status); \
+ if (ret == 0) flags = 0x6; \
+ else if (ret == -1) flags = 0x8; \
+ else if (ret == 1) flags = 0x2; \
+ return flags << 28; \
} \
-void VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
+uint32_t VFP_HELPER(cmpe, p)(type a, type b, CPUARMState *env) \
{ \
- uint32_t flags; \
- switch(type ## _compare(a, b, &env->vfp.fp_status)) { \
- case 0: flags = 0x6; break; \
- case -1: flags = 0x8; break; \
- case 1: flags = 0x2; break; \
- default: case 2: flags = 0x3; break; \
- } \
- env->vfp.xregs[ARM_VFP_FPSCR] = (flags << 28) \
- | (env->vfp.xregs[ARM_VFP_FPSCR] & 0x0fffffff); \
+ uint32_t flags = 0x3; \
+ int ret = type ## _compare(a, b, &env->vfp.fp_status); \
+ if (ret == 0) flags = 0x6; \
+ else if (ret == -1) flags = 0x8; \
+ else if (ret == 1) flags = 0x2; \
+ return flags << 28; \
}
DO_VFP_cmp(s, float32)
DO_VFP_cmp(d, float64)
@@ -8777,3 +8774,12 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
/* Linux crc32c converts the output to one's complement. */
return crc32c(acc, buf, bytes) ^ 0xffffffff;
}
+
+CPUState *cpu_create(void)
+{
+ ARMCPU *cpu = g_malloc0(sizeof(ARMCPU));
+ CPUState *cs = CPU(cpu);
+ memcpy(cpu, ARM_CPU(first_cpu), sizeof(ARMCPU));
+ cs->env_ptr = &cpu->env;
+ return cs;
+}
diff --git a/target-arm/helper.h b/target-arm/helper.h
index c2a85c7..41c2c6d 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -56,6 +56,7 @@ DEF_HELPER_2(pre_smc, void, env, i32)
DEF_HELPER_1(check_breakpoints, void, env)
+DEF_HELPER_3(cpsr_write_nzcv, void, env, i32, i32)
DEF_HELPER_3(cpsr_write, void, env, i32, i32)
DEF_HELPER_1(cpsr_read, i32, env)
@@ -103,10 +104,10 @@ DEF_HELPER_1(vfp_abss, f32, f32)
DEF_HELPER_1(vfp_absd, f64, f64)
DEF_HELPER_2(vfp_sqrts, f32, f32, env)
DEF_HELPER_2(vfp_sqrtd, f64, f64, env)
-DEF_HELPER_3(vfp_cmps, void, f32, f32, env)
-DEF_HELPER_3(vfp_cmpd, void, f64, f64, env)
-DEF_HELPER_3(vfp_cmpes, void, f32, f32, env)
-DEF_HELPER_3(vfp_cmped, void, f64, f64, env)
+DEF_HELPER_3(vfp_cmps, i32, f32, f32, env)
+DEF_HELPER_3(vfp_cmpd, i32, f64, f64, env)
+DEF_HELPER_3(vfp_cmpes, i32, f32, f32, env)
+DEF_HELPER_3(vfp_cmped, i32, f64, f64, env)
DEF_HELPER_2(vfp_fcvtds, f64, f32, env)
DEF_HELPER_2(vfp_fcvtsd, f32, f64, env)
@@ -535,3 +536,5 @@ DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#endif
+
+#include "hqemu-helper.h"
diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c
index 6cd54c8..fdea907 100644
--- a/target-arm/op_helper.c
+++ b/target-arm/op_helper.c
@@ -386,6 +386,16 @@ void HELPER(cpsr_write)(CPUARMState *env, uint32_t val, uint32_t mask)
cpsr_write(env, val, mask);
}
+void HELPER(cpsr_write_nzcv)(CPUARMState *env, uint32_t val, uint32_t mask)
+{
+ if (mask & CPSR_NZCV) {
+ env->ZF = (~val) & CPSR_Z;
+ env->NF = val;
+ env->CF = (val >> 29) & 1;
+ env->VF = (val << 3) & 0x80000000;
+ }
+}
+
/* Access to user mode registers from privileged modes. */
uint32_t HELPER(get_user_reg)(CPUARMState *env, uint32_t regno)
{
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index 14e8131..21cf214 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -37,10 +37,17 @@
#include "exec/helper-gen.h"
#include "trace-tcg.h"
+#include "hqemu.h"
static TCGv_i64 cpu_X[32];
static TCGv_i64 cpu_pc;
+#if defined(CONFIG_USER_ONLY)
+#define IS_USER(s) 1
+#else
+#define IS_USER(s) (s->user)
+#endif
+
/* Load/store exclusive handling */
static TCGv_i64 cpu_exclusive_high;
@@ -119,6 +126,31 @@ static inline ARMMMUIdx get_a64_user_mem_index(DisasContext *s)
}
}
+static inline void gen_ibtc_stub(DisasContext *s)
+{
+#ifdef ENABLE_IBTC
+ if (!build_llvm(s->env)) {
+ TCGv_ptr ibtc_host_pc = tcg_temp_new_ptr();
+ gen_helper_lookup_ibtc(ibtc_host_pc, cpu_env);
+ tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(ibtc_host_pc));
+ tcg_temp_free_ptr(ibtc_host_pc);
+ s->gen_ibtc = 0;
+ }
+#endif
+}
+
+static inline void gen_cpbl_stub(DisasContext *s)
+{
+#ifdef ENABLE_CPBL
+ if (!build_llvm(s->env)) {
+ TCGv_ptr cpbl_host_pc = tcg_temp_new_ptr();
+ gen_helper_lookup_cpbl(cpbl_host_pc, cpu_env);
+ tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(cpbl_host_pc));
+ tcg_temp_free_ptr(cpbl_host_pc);
+ }
+#endif
+}
+
void aarch64_cpu_dump_state(CPUState *cs, FILE *f,
fprintf_function cpu_fprintf, int flags)
{
@@ -285,12 +317,38 @@ static inline bool use_goto_tb(DisasContext *s, int n, uint64_t dest)
return true;
}
+#if defined(CONFIG_USER_ONLY)
+static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
+{
+ TranslationBlock *tb;
+
+ tb = s->tb;
+ tcg_gen_goto_tb(n);
+ gen_a64_set_pc_im(dest);
+ tcg_gen_exit_tb((intptr_t)tb + n);
+ s->is_jmp = DISAS_TB_JUMP;
+ tb->jmp_pc[n] = dest;
+}
+#else
+static int try_link_pages(DisasContext *s, TranslationBlock *tb, target_ulong dest)
+{
+#ifdef ENABLE_LPAGE
+ if (!build_llvm(s->env)) {
+ target_ulong addr, size;
+ int ret = lpt_search_page(s->env, dest, &addr, &size);
+ if (ret == 1 && (tb->pc & ~(size - 1)) == addr)
+ return 1;
+ }
+#endif
+ return 0;
+}
+
static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
{
TranslationBlock *tb;
tb = s->tb;
- if (use_goto_tb(s, n, dest)) {
+ if (use_goto_tb(s, n, dest) || try_link_pages(s, tb, dest) == 1) {
tcg_gen_goto_tb(n);
gen_a64_set_pc_im(dest);
tcg_gen_exit_tb((intptr_t)tb + n);
@@ -302,11 +360,14 @@ static inline void gen_goto_tb(DisasContext *s, int n, uint64_t dest)
} else if (s->singlestep_enabled) {
gen_exception_internal(EXCP_DEBUG);
} else {
+ gen_cpbl_stub(s);
tcg_gen_exit_tb(0);
s->is_jmp = DISAS_TB_JUMP;
}
}
+ tb->jmp_pc[n] = dest;
}
+#endif
static void unallocated_encoding(DisasContext *s)
{
@@ -568,6 +629,7 @@ static void gen_add_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
tcg_gen_movi_i64(tmp, 0);
tcg_gen_add2_i64(result, flag, t0, tmp, t1, tmp);
+ tcg_gen_annotate(A_SetCC);
tcg_gen_extrl_i64_i32(cpu_CF, flag);
@@ -614,6 +676,7 @@ static void gen_sub_CC(int sf, TCGv_i64 dest, TCGv_i64 t0, TCGv_i64 t1)
result = tcg_temp_new_i64();
flag = tcg_temp_new_i64();
tcg_gen_sub_i64(result, t0, t1);
+ tcg_gen_annotate(A_SetCC);
gen_set_NZ64(result);
@@ -764,11 +827,51 @@ static void do_gpr_ld(DisasContext *s, TCGv_i64 dest, TCGv_i64 tcg_addr,
get_mem_index(s));
}
+#ifdef ENABLE_TCG_VECTOR
+#include "simd_helper.h"
+
+#define VFP_DREG(reg) \
+do { \
+ reg = reg * 2; \
+} while (0)
+#define tcg_vector_abort() \
+do {\
+ fprintf(stderr, "%s:%d: tcg fatal error - unhandled vector op.\n", __FILE__, __LINE__);\
+ exit(0);\
+} while (0)
+
+/*
+ * disas_neon_ls_vector()
+ * return true if the neon instruction is successfully translated to tcg vector opc.
+ */
+static int disas_neon_ls_vector(DisasContext *s, int reg, int is_load,
+ TCGv_i64 tcg_addr)
+{
+ TCGArg vop, alignment = 32;
+
+ if (!build_llvm(s->env))
+ return 0;
+
+ VFP_DREG(reg);
+ vop = (is_load) ? INDEX_op_vload_128 : INDEX_op_vstore_128;
+ gen_vector_op3(vop,
+ offsetof(CPUARMState, vfp.regs[reg]),
+ GET_TCGV_I64(tcg_addr),
+ alignment);
+ return 1;
+}
+#endif
+
/*
* Store from FP register to memory
*/
static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
{
+#ifdef ENABLE_TCG_VECTOR
+ if (size >= 4 && disas_neon_ls_vector(s, srcidx, 0, tcg_addr) == 1)
+ return;
+#endif
+
/* This writes the bottom N bits of a 128 bit wide vector to memory */
TCGv_i64 tmp = tcg_temp_new_i64();
tcg_gen_ld_i64(tmp, cpu_env, fp_reg_offset(s, srcidx, MO_64));
@@ -791,6 +894,11 @@ static void do_fp_st(DisasContext *s, int srcidx, TCGv_i64 tcg_addr, int size)
*/
static void do_fp_ld(DisasContext *s, int destidx, TCGv_i64 tcg_addr, int size)
{
+#ifdef ENABLE_TCG_VECTOR
+ if (size >= 4 && disas_neon_ls_vector(s, destidx, 1, tcg_addr) == 1)
+ return;
+#endif
+
/* This always zero-extends and writes to a full 128 bit wide vector */
TCGv_i64 tmplo = tcg_temp_new_i64();
TCGv_i64 tmphi;
@@ -1653,6 +1761,7 @@ static void disas_uncond_b_reg(DisasContext *s, uint32_t insn)
}
s->is_jmp = DISAS_JUMP;
+ s->gen_ibtc = 1;
}
/* C3.2 Branches, exception generating and system instructions */
@@ -3624,6 +3733,8 @@ static void disas_cc(DisasContext *s, uint32_t insn)
TCGv_i64 tcg_tmp, tcg_y, tcg_rn;
DisasCompare c;
+ tcg_gen_annotate(A_NoSIMDization);
+
if (!extract32(insn, 29, 1)) {
unallocated_encoding(s);
return;
@@ -8854,6 +8965,153 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
}
}
+#ifdef ENABLE_TCG_VECTOR
+static int disas_neon_misc(DisasContext *s, uint32_t insn)
+{
+ if (!build_llvm(s->env))
+ return 0;
+
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 5);
+ bool u = extract32(insn, 29, 1);
+ bool is_q = extract32(insn, 30, 1);
+ int rm = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+
+ VFP_DREG(rm);
+ VFP_DREG(rd);
+
+ switch (opcode) {
+ case 0xc ... 0xf:
+ case 0x16 ... 0x1d:
+ case 0x1f:
+ {
+ /* Floating point: U, size[1] and opcode indicate operation;
+ * size[0] indicates single or double precision.
+ */
+ int is_double = extract32(size, 0, 1);
+ opcode |= (extract32(size, 1, 1) << 5) | (u << 6);
+ size = is_double ? 64 : 32;
+
+ switch (opcode) {
+ case 0x1d: /* SCVTF */
+ case 0x5d: /* UCVTF */
+ {
+ if (is_double && !is_q) {
+ unallocated_encoding(s);
+ return 0;
+ }
+ if (!fp_access_check(s)) {
+ return 0;
+ }
+ if (opcode == 0x1d)
+ gen_vector_cvt(vsitofp, size);
+ else
+ gen_vector_cvt(vuitofp, size);
+ break;
+ }
+ case 0x1a: /* FCVTNS */
+ case 0x1b: /* FCVTMS */
+ case 0x1c: /* FCVTAS */
+ case 0x3a: /* FCVTPS */
+ case 0x3b: /* FCVTZS */
+ if (is_double && !is_q) {
+ unallocated_encoding(s);
+ return 0;
+ }
+ gen_vector_cvt(vfptosi, size);
+ break;
+ case 0x5a: /* FCVTNU */
+ case 0x5b: /* FCVTMU */
+ case 0x5c: /* FCVTAU */
+ case 0x7a: /* FCVTPU */
+ case 0x7b: /* FCVTZU */
+ if (is_double && !is_q) {
+ unallocated_encoding(s);
+ return 0;
+ }
+ gen_vector_cvt(vfptoui, size);
+ break;
+ default:
+ return 0;
+ }
+ break;
+ }
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * disas_neon_data_vector()
+ * return true if the neon instruction is successfully translated to tcg vector opc.
+ */
+static int disas_neon_data_vector(DisasContext *s, uint32_t insn)
+{
+ if (!build_llvm(s->env))
+ return 0;
+
+ int q = extract32(insn, 30, 1);
+ int u = extract32(insn, 29, 1);
+ int size = extract32(insn, 22, 2);
+ int op = extract32(insn, 11, 5);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+
+ VFP_DREG(rm);
+ VFP_DREG(rn);
+ VFP_DREG(rd);
+
+ switch(op) {
+ case 0x10: /* ADD, SUB */
+ if(!u) /* ADD */
+ gen_vector_arith(vadd, i, size);
+ else /* SUB */
+ gen_vector_arith(vsub, i, size);
+ break;
+ case 0x3: /* logic ops */
+ switch ((u << 2) | size) {
+ case 0: gen_vector_logical(vand); break; /* AND */
+ case 1: gen_vector_logical(vbic); break; /* BIC rd = rn&(~rm)*/
+ case 2: gen_vector_logical(vorr); break; /* ORR */
+ case 3: gen_vector_logical(vorn); break; /* ORN */
+ case 4: gen_vector_logical(veor); break; /* EOR */
+ case 5: gen_vector_logical(vbsl); break; /* BSL */
+ case 6: gen_vector_logical(vbit); break; /* BIT */
+ case 7: gen_vector_logical(vbif); break; /* BIF */
+ default:
+ return 0;
+ }
+ break;
+ case 0x18 ... 0x31:
+ {
+ int fpopcode = extract32(insn, 11, 5)
+ | (extract32(insn, 23, 1) << 5)
+ | (extract32(insn, 29, 1) << 6);
+ int size = extract32(insn, 22, 1);
+ switch (fpopcode) {
+ case 0x1a: gen_vector_fop2(vadd); break; /* FADD */
+ case 0x3a: gen_vector_fop2(vsub); break; /* FSUB */
+ case 0x5b: gen_vector_fop2(vmul); break; /* FMUL */
+ case 0x5f: gen_vector_fop2(vdiv); break; /* FDIV */
+ case 0x19: gen_vector_fop2(vmla); break; /* FMLA */
+ case 0x39: gen_vector_fop2(vmls); break; /* FMLS */
+ default:
+ return 0;
+ }
+ break;
+ }
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+#endif
+
/* Logic op (opcode == 3) subgroup of C3.6.16. */
static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
{
@@ -8870,6 +9128,11 @@ static void disas_simd_3same_logic(DisasContext *s, uint32_t insn)
return;
}
+#ifdef ENABLE_TCG_VECTOR
+ if (disas_neon_data_vector(s, insn) == 1)
+ return;
+#endif
+
tcg_op1 = tcg_temp_new_i64();
tcg_op2 = tcg_temp_new_i64();
tcg_res[0] = tcg_temp_new_i64();
@@ -9138,6 +9401,11 @@ static void disas_simd_3same_float(DisasContext *s, uint32_t insn)
return;
}
+#ifdef ENABLE_TCG_VECTOR
+ if (disas_neon_data_vector(s, insn) == 1)
+ return;
+#endif
+
switch (fpopcode) {
case 0x58: /* FMAXNMP */
case 0x5a: /* FADDP */
@@ -9232,6 +9500,11 @@ static void disas_simd_3same_int(DisasContext *s, uint32_t insn)
return;
}
+#ifdef ENABLE_TCG_VECTOR
+ if (disas_neon_data_vector(s, insn) == 1)
+ return;
+#endif
+
if (size == 3) {
assert(is_q);
for (pass = 0; pass < 2; pass++) {
@@ -9778,6 +10051,11 @@ static void disas_simd_two_reg_misc(DisasContext *s, uint32_t insn)
TCGv_i32 tcg_rmode;
TCGv_ptr tcg_fpstatus;
+#ifdef ENABLE_TCG_VECTOR
+ if (disas_neon_misc(s, insn) == 1)
+ return;
+#endif
+
switch (opcode) {
case 0x0: /* REV64, REV32 */
case 0x1: /* REV16 */
@@ -11018,6 +11296,8 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
pc_start = tb->pc;
+ dc->gen_ibtc = 0;
+ dc->env = env;
dc->tb = tb;
dc->is_jmp = DISAS_NEXT;
@@ -11078,7 +11358,12 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
max_insns = TCG_MAX_INSNS;
}
- gen_tb_start(tb);
+ if (!build_llvm(env)) {
+ gen_tb_start(tb);
+ if (tracer_mode != TRANS_MODE_NONE)
+ tcg_gen_hotpatch(IS_USER(dc), tracer_mode == TRANS_MODE_HYBRIDS ||
+ tracer_mode == TRANS_MODE_HYBRIDM);
+ }
tcg_clear_temp_count();
@@ -11144,6 +11429,9 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
* Also stop translation when a page boundary is reached. This
* ensures prefetch aborts occur at the right place.
*/
+
+ if (build_llvm(env) && num_insns == tb->icount)
+ break;
} while (!dc->is_jmp && !tcg_op_buf_full() &&
!cs->singlestep_enabled &&
!singlestep &&
@@ -11155,6 +11443,15 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
gen_io_end();
}
+ if (build_llvm(env) && tb->size != dc->pc - pc_start) {
+ /* consistency check with tb info. we must make sure
+ * guest basic blocks are the same. skip this trace if inconsistent */
+ fprintf(stderr, "inconsistent block with pc 0x"TARGET_FMT_lx" size=%d"
+ " icount=%d (error size="TARGET_FMT_ld")\n",
+ tb->pc, tb->size, tb->icount, dc->pc - pc_start);
+ exit(0);
+ }
+
if (unlikely(cs->singlestep_enabled || dc->ss_active)
&& dc->is_jmp != DISAS_EXC) {
/* Note that this means single stepping WFI doesn't halt the CPU.
@@ -11182,6 +11479,8 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
/* fall through */
case DISAS_JUMP:
/* indicate that the hash table must be used to find the next TB */
+ if (dc->gen_ibtc == 1)
+ gen_ibtc_stub(dc);
tcg_gen_exit_tb(0);
break;
case DISAS_TB_JUMP:
@@ -11211,10 +11510,15 @@ void gen_intermediate_code_a64(ARMCPU *cpu, TranslationBlock *tb)
}
done_generating:
- gen_tb_end(tb, num_insns);
+ if (build_llvm(env)) {
+ /* Terminate the linked list. */
+ tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1;
+ } else {
+ gen_tb_end(tb, num_insns);
+ }
#ifdef DEBUG_DISAS
- if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+ if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) {
qemu_log("----------------\n");
qemu_log("IN: %s\n", lookup_symbol(pc_start));
log_target_disas(cs, pc_start, dc->pc - pc_start,
@@ -11222,6 +11526,8 @@ done_generating:
qemu_log("\n");
}
#endif
- tb->size = dc->pc - pc_start;
- tb->icount = num_insns;
+ if (!build_llvm(env)) {
+ tb->size = dc->pc - pc_start;
+ tb->icount = num_insns;
+ }
}
diff --git a/target-arm/translate.c b/target-arm/translate.c
index 5d22879..256227b 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -36,6 +36,7 @@
#include "exec/helper-gen.h"
#include "trace-tcg.h"
+#include "hqemu.h"
#define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
@@ -110,6 +111,33 @@ void arm_translate_init(void)
#endif
a64_translate_init();
+
+ copy_tcg_context_global();
+}
+
+static inline void gen_ibtc_stub(DisasContext *s)
+{
+#ifdef ENABLE_IBTC
+ if (!build_llvm(s->env)) {
+ TCGv_ptr ibtc_host_pc = tcg_temp_new_ptr();
+ gen_helper_lookup_ibtc(ibtc_host_pc, cpu_env);
+ tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(ibtc_host_pc));
+ tcg_temp_free_ptr(ibtc_host_pc);
+ s->gen_ibtc = 0;
+ }
+#endif
+}
+
+static inline void gen_cpbl_stub(DisasContext *s)
+{
+#ifdef ENABLE_CPBL
+ if (!build_llvm(s->env)) {
+ TCGv_ptr cpbl_host_pc = tcg_temp_new_ptr();
+ gen_helper_lookup_cpbl(cpbl_host_pc, cpu_env);
+ tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(cpbl_host_pc));
+ tcg_temp_free_ptr(cpbl_host_pc);
+ }
+#endif
}
static inline ARMMMUIdx get_a32_user_mem_index(DisasContext *s)
@@ -201,7 +229,10 @@ static void store_reg(DisasContext *s, int reg, TCGv_i32 var)
static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
{
TCGv_i32 tmp_mask = tcg_const_i32(mask);
- gen_helper_cpsr_write(cpu_env, var, tmp_mask);
+ if (mask & ~CPSR_NZCV)
+ gen_helper_cpsr_write(cpu_env, var, tmp_mask);
+ else
+ gen_helper_cpsr_write_nzcv(cpu_env, var, tmp_mask);
tcg_temp_free_i32(tmp_mask);
}
/* Set NZCV flags from the high 4 bits of var. */
@@ -493,6 +524,7 @@ static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
{
TCGv_i32 tmp;
tcg_gen_sub_i32(cpu_NF, t0, t1);
+ tcg_gen_annotate(A_SetCC);
tcg_gen_mov_i32(cpu_ZF, cpu_NF);
tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
@@ -878,6 +910,7 @@ static inline void gen_bx_im(DisasContext *s, uint32_t addr)
tcg_temp_free_i32(tmp);
}
tcg_gen_movi_i32(cpu_R[15], addr & ~1);
+ s->gen_ibtc = 1;
}
/* Set PC and Thumb state from var. var is marked as dead. */
@@ -887,6 +920,7 @@ static inline void gen_bx(DisasContext *s, TCGv_i32 var)
tcg_gen_andi_i32(cpu_R[15], var, ~1);
tcg_gen_andi_i32(var, var, 1);
store_cpu_field(var, thumb);
+ s->gen_ibtc = 1;
}
/* Variant of store_reg which uses branch&exchange logic when storing
@@ -1199,20 +1233,38 @@ static inline void gen_vfp_sqrt(int dp)
gen_helper_vfp_sqrts(cpu_F0s, cpu_F0s, cpu_env);
}
+static inline void gen_update_fpscr(TCGv_i32 flags)
+{
+ TCGv_i32 tmp;
+ tmp = load_cpu_field(vfp.xregs[ARM_VFP_FPSCR]);
+ tcg_gen_andi_i32(tmp, tmp, 0x0fffffff);
+ tcg_gen_or_i32(tmp, tmp, flags);
+ store_cpu_field(tmp, vfp.xregs[ARM_VFP_FPSCR]);
+ tcg_temp_free_i32(tmp);
+}
+
static inline void gen_vfp_cmp(int dp)
{
+ TCGv_i32 flags = tcg_temp_new_i32();
if (dp)
- gen_helper_vfp_cmpd(cpu_F0d, cpu_F1d, cpu_env);
+ gen_helper_vfp_cmpd(flags, cpu_F0d, cpu_F1d, cpu_env);
else
- gen_helper_vfp_cmps(cpu_F0s, cpu_F1s, cpu_env);
+ gen_helper_vfp_cmps(flags, cpu_F0s, cpu_F1s, cpu_env);
+
+ gen_update_fpscr(flags);
+ tcg_temp_free_i32(flags);
}
static inline void gen_vfp_cmpe(int dp)
{
+ TCGv_i32 flags = tcg_temp_new_i32();
if (dp)
- gen_helper_vfp_cmped(cpu_F0d, cpu_F1d, cpu_env);
+ gen_helper_vfp_cmped(flags, cpu_F0d, cpu_F1d, cpu_env);
else
- gen_helper_vfp_cmpes(cpu_F0s, cpu_F1s, cpu_env);
+ gen_helper_vfp_cmpes(flags, cpu_F0s, cpu_F1s, cpu_env);
+
+ gen_update_fpscr(flags);
+ tcg_temp_free_i32(flags);
}
static inline void gen_vfp_F1_ld0(int dp)
@@ -3977,20 +4029,49 @@ static int disas_vfp_insn(DisasContext *s, uint32_t insn)
return 0;
}
+#if defined(CONFIG_USER_ONLY)
+static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
+{
+ TranslationBlock *tb;
+
+ tb = s->tb;
+ tcg_gen_goto_tb(n);
+ gen_set_pc_im(s, dest);
+ tcg_gen_exit_tb((uintptr_t)tb + n);
+ tb->jmp_pc[n] = dest;
+}
+#else
+static int try_link_pages(DisasContext *s, TranslationBlock *tb, target_ulong dest)
+{
+#ifdef ENABLE_LPAGE
+ if (!build_llvm(s->env)) {
+ target_ulong addr, size;
+ int ret = lpt_search_page(s->env, dest, &addr, &size);
+ if (ret == 1 && (tb->pc & ~(size - 1)) == addr)
+ return 1;
+ }
+#endif
+ return 0;
+}
+
static inline void gen_goto_tb(DisasContext *s, int n, target_ulong dest)
{
TranslationBlock *tb;
tb = s->tb;
- if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK)) {
+ if ((tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) ||
+ try_link_pages(s, tb, dest) == 1) {
tcg_gen_goto_tb(n);
gen_set_pc_im(s, dest);
tcg_gen_exit_tb((uintptr_t)tb + n);
} else {
gen_set_pc_im(s, dest);
+ gen_cpbl_stub(s);
tcg_gen_exit_tb(0);
}
+ tb->jmp_pc[n] = dest;
}
+#endif
static inline void gen_jmp (DisasContext *s, uint32_t dest)
{
@@ -4372,6 +4453,54 @@ static struct {
{2, 1, 1}
};
+#ifdef ENABLE_TCG_VECTOR
+#include "simd_helper.h"
+
+#define tcg_vector_abort() \
+do {\
+ fprintf(stderr, "%s:%d: tcg fatal error - unhandled vector op.\n", __FILE__, __LINE__);\
+ exit(0);\
+} while (0)
+
+/*
+ * disas_neon_ls_vector()
+ * return true if the neon instruction is successfully translated to tcg vector opc.
+ */
+static int disas_neon_ls_vector(DisasContext *s, uint32_t insn, TCGv_i32 addr)
+{
+ int rd, op, load;
+ int nregs, reg;
+ int interleave, spacing;
+ TCGArg vop, alignment = 32;
+
+ if (!build_llvm(s->env))
+ return 0;
+
+ /* Load store all elements. */
+ op = (insn >> 8) & 0xf;
+ nregs = neon_ls_element_type[op].nregs;
+ interleave = neon_ls_element_type[op].interleave;
+ spacing = neon_ls_element_type[op].spacing;
+
+ if (interleave != 1 || nregs % 2 != 0)
+ return 0;
+
+ VFP_DREG_D(rd, insn);
+ load = (insn & (1 << 21)) != 0;
+ vop = (load) ? INDEX_op_vload_128 : INDEX_op_vstore_128;
+
+ for (reg = 0; reg < nregs; reg += 2) {
+ gen_vector_op3(vop,
+ offsetof(CPUARMState, vfp.regs[rd]),
+ GET_TCGV_I32(addr),
+ alignment);
+ rd += spacing * 2;
+ tcg_gen_addi_i32(addr, addr, 16);
+ }
+ return 1;
+}
+#endif
+
/* Translate a NEON load/store element instruction. Return nonzero if the
instruction is invalid. */
static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
@@ -4438,6 +4567,11 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
addr = tcg_temp_new_i32();
load_reg_var(s, addr, rn);
stride = (1 << size) * interleave;
+
+#ifdef ENABLE_TCG_VECTOR
+ if (disas_neon_ls_vector(s, insn, addr) == 1)
+ goto vector_done;
+#endif
for (reg = 0; reg < nregs; reg++) {
if (interleave > 2 || (interleave == 2 && nregs == 2)) {
load_reg_var(s, addr, rn);
@@ -4529,6 +4663,9 @@ static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
}
rd += spacing;
}
+#ifdef ENABLE_TCG_VECTOR
+vector_done:
+#endif
tcg_temp_free_i32(addr);
stride = nregs * 8;
} else {
@@ -5111,6 +5248,131 @@ static const uint8_t neon_2rm_sizes[] = {
[NEON_2RM_VCVT_UF] = 0x4,
};
+#ifdef ENABLE_TCG_VECTOR
+static int disas_neon_misc(DisasContext *s, uint32_t insn)
+{
+ int op, rd, rm;
+
+ if (!build_llvm(s->env))
+ return 0;
+
+ op = ((insn >> 12) & 0x30) | ((insn >> 7) & 0xf);
+ VFP_DREG_D(rd, insn);
+ VFP_DREG_M(rm, insn);
+
+ switch (op) {
+ case NEON_2RM_VCVT_FS: /* VCVT.F32.S32 */
+ gen_vector_cvt(vsitofp, 32);
+ break;
+ case NEON_2RM_VCVT_FU: /* VCVT.F32.U32 */
+ gen_vector_cvt(vuitofp, 32);
+ break;
+ case NEON_2RM_VCVT_SF: /* VCVT.S32.F32 */
+ gen_vector_cvt(vfptosi, 32);
+ break;
+ case NEON_2RM_VCVT_UF: /* VCVT.U32.F32 */
+ gen_vector_cvt(vfptoui, 32);
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * disas_neon_data_vector()
+ * return true if the neon instruction is successfully translated to tcg vector opc.
+ */
+static int disas_neon_data_vector(DisasContext *s, uint32_t insn)
+{
+ int op, q, u, size;
+ int rd, rn, rm;
+
+ if (!build_llvm(s->env))
+ return 0;
+
+ /* Three register same length. */
+ q = (insn & (1 << 6)) != 0;
+ u = (insn >> 24) & 1;
+ VFP_DREG_D(rd, insn);
+ VFP_DREG_N(rn, insn);
+ VFP_DREG_M(rm, insn);
+ size = (insn >> 20) & 3;
+ op = ((insn >> 7) & 0x1e) | ((insn >> 4) & 1);
+
+ switch (op) {
+ case NEON_3R_VSHL:
+ case NEON_3R_VQSHL:
+ case NEON_3R_VRSHL:
+ case NEON_3R_VQRSHL:
+ {
+ int rtmp;
+ /* Shift instruction operands are reversed. */
+ rtmp = rn;
+ rn = rm;
+ rm = rtmp;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch(op) {
+ case NEON_3R_VADD_VSUB:
+ if(!u) /* VADD */
+ gen_vector_arith(vadd, i, size);
+ else /* VSUB */
+ gen_vector_arith(vsub, i, size);
+ break;
+ case NEON_3R_LOGIC:
+ switch ((u << 2) | size) {
+ case 0: gen_vector_logical(vand); break; /* VAND */
+ case 1: gen_vector_logical(vbic); break; /* BIC rd = rn&(~rm)*/
+ case 2: gen_vector_logical(vorr); break; /* VORR */
+ case 3: gen_vector_logical(vorn); break; /* VORN OR NOT */
+ case 4: gen_vector_logical(veor); break; /* VEOR Vector Bitwise Exclusive OR*/
+ case 5: gen_vector_logical(vbsl); break; /* VBSL */
+ case 6: gen_vector_logical(vbit); break; /* VBIT */
+ case 7: gen_vector_logical(vbif); break; /* VBIF */
+ }
+ break;
+ case NEON_3R_VFM:
+ if (size) /* VFMS */
+ gen_vector_fop(vfms);
+ else /* VFMA */
+ gen_vector_fop(vfma);
+ break;
+ case NEON_3R_FLOAT_ARITH: /* Floating point arithmetic. */
+ switch ((u << 2) | size) {
+ case 0: gen_vector_fop(vadd); break; /* VADD */
+ case 4: gen_vector_fop(vpadd); break; /* VPADD */
+ case 2: gen_vector_fop(vsub); break; /* VSUB */
+ case 6: gen_vector_fop(vabd); break; /* VABD */
+ default:
+ tcg_vector_abort();
+ break;
+ }
+ break;
+ case NEON_3R_FLOAT_MULTIPLY: /* float VMLA, VMLS, VMUL */
+ if(u)
+ gen_vector_fop(vmul);
+ else if (!u) {
+ if (size == 0)
+ gen_vector_fop(vmla);
+ else
+ gen_vector_fop(vmls);
+ } else
+ tcg_vector_abort();
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+#endif
+
/* Translate a NEON data processing instruction. Return nonzero if the
instruction is invalid.
We process data in a mixture of 32-bit and 64-bit chunks.
@@ -5341,6 +5603,11 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
return 1;
}
+#ifdef ENABLE_TCG_VECTOR
+ if (!pairwise && disas_neon_data_vector(s, insn) == 1)
+ return 0;
+#endif
+
for (pass = 0; pass < (q ? 4 : 2); pass++) {
if (pairwise) {
@@ -6741,6 +7008,10 @@ static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
break;
default:
elementwise:
+#ifdef ENABLE_TCG_VECTOR
+ if (disas_neon_misc(s, insn) == 1)
+ return 0;
+#endif
for (pass = 0; pass < (q ? 4 : 2); pass++) {
if (neon_2rm_is_float_op(op)) {
tcg_gen_ld_f32(cpu_F0s, cpu_env,
@@ -11234,6 +11505,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
pc_start = tb->pc;
+ dc->gen_ibtc = 0;
+ dc->env = env;
dc->tb = tb;
dc->is_jmp = DISAS_NEXT;
@@ -11303,7 +11576,12 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
max_insns = TCG_MAX_INSNS;
}
- gen_tb_start(tb);
+ if (!build_llvm(env)) {
+ gen_tb_start(tb);
+ if (tracer_mode != TRANS_MODE_NONE)
+ tcg_gen_hotpatch(IS_USER(dc), tracer_mode == TRANS_MODE_HYBRIDS ||
+ tracer_mode == TRANS_MODE_HYBRIDM);
+ }
tcg_clear_temp_count();
@@ -11460,6 +11738,12 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
end_of_page = (dc->pc >= next_page_start) ||
((dc->pc >= next_page_start - 3) && insn_crosses_page(env, dc));
+#if defined(CONFIG_LLVM) && defined(CONFIG_USER_ONLY)
+ if (llvm_has_annotation(dc->pc, ANNOTATION_LOOP))
+ break;
+#endif
+ if (build_llvm(env) && num_insns == tb->icount)
+ break;
} while (!dc->is_jmp && !tcg_op_buf_full() &&
!cs->singlestep_enabled &&
!singlestep &&
@@ -11476,6 +11760,15 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
gen_io_end();
}
+ if (build_llvm(env) && tb->size != dc->pc - pc_start) {
+ /* consistency check with tb info. we must make sure
+ * guest basic blocks are the same. skip this trace if inconsistent */
+ fprintf(stderr, "inconsistent block with pc 0x"TARGET_FMT_lx" size=%d"
+ " icount=%d (error size="TARGET_FMT_ld")\n",
+ tb->pc, tb->size, tb->icount, dc->pc - pc_start);
+ exit(0);
+ }
+
/* At this stage dc->condjmp will only be set when the skipped
instruction was a conditional branch or trap, and the PC has
already been written. */
@@ -11543,6 +11836,8 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
case DISAS_JUMP:
default:
/* indicate that the hash table must be used to find the next TB */
+ if (dc->gen_ibtc == 1)
+ gen_ibtc_stub(dc);
tcg_gen_exit_tb(0);
break;
case DISAS_TB_JUMP:
@@ -11581,10 +11876,15 @@ void gen_intermediate_code(CPUARMState *env, TranslationBlock *tb)
}
done_generating:
- gen_tb_end(tb, num_insns);
+ if (build_llvm(env)) {
+ /* Terminate the linked list. */
+ tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1;
+ } else {
+ gen_tb_end(tb, num_insns);
+ }
#ifdef DEBUG_DISAS
- if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+ if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) {
qemu_log("----------------\n");
qemu_log("IN: %s\n", lookup_symbol(pc_start));
log_target_disas(cs, pc_start, dc->pc - pc_start,
@@ -11592,8 +11892,10 @@ done_generating:
qemu_log("\n");
}
#endif
- tb->size = dc->pc - pc_start;
- tb->icount = num_insns;
+ if (!build_llvm(env)) {
+ tb->size = dc->pc - pc_start;
+ tb->icount = num_insns;
+ }
}
static const char *cpu_mode_names[16] = {
diff --git a/target-arm/translate.h b/target-arm/translate.h
index 53ef971..10f6a05 100644
--- a/target-arm/translate.h
+++ b/target-arm/translate.h
@@ -61,6 +61,8 @@ typedef struct DisasContext {
#define TMP_A64_MAX 16
int tmp_a64_count;
TCGv_i64 tmp_a64[TMP_A64_MAX];
+ int gen_ibtc;
+ CPUArchState *env;
} DisasContext;
typedef struct DisasCompare {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 84edfd0..cbd8b2a 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -845,7 +845,7 @@ typedef struct CPUX86State {
uint64_t efer;
/* Beginning of state preserved by INIT (dummy marker). */
- struct {} start_init_save;
+ struct { int dummy; } start_init_save;
/* FPU state */
unsigned int fpstt; /* top of stack index */
@@ -865,8 +865,8 @@ typedef struct CPUX86State {
float_status mmx_status; /* for 3DNow! float ops */
float_status sse_status;
uint32_t mxcsr;
- XMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32];
- XMMReg xmm_t0;
+ XMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32] __attribute__((aligned(64)));
+ XMMReg xmm_t0 __attribute__((aligned(64)));
MMXReg mmx_t0;
uint64_t opmask_regs[NB_OPMASK_REGS];
@@ -906,7 +906,7 @@ typedef struct CPUX86State {
uint32_t smbase;
/* End of state preserved by INIT (dummy marker). */
- struct {} end_init_save;
+ struct { int dummy; } end_init_save;
uint64_t system_time_msr;
uint64_t wall_clock_msr;
@@ -966,6 +966,8 @@ typedef struct CPUX86State {
uint64_t mtrr_deftype;
MTRRVar mtrr_var[MSR_MTRRcap_VCNT];
+ CPU_OPTIMIZATION_COMMON
+
/* For KVM */
uint32_t mp_state;
int32_t exception_injected;
@@ -1237,6 +1239,19 @@ static inline void cpu_get_tb_cpu_state(CPUX86State *env, target_ulong *pc,
(env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK));
}
+static inline target_ulong cpu_get_pc(CPUX86State *env)
+{
+ return env->eip + env->segs[R_CS].base;
+}
+
+static inline int cpu_check_state(CPUX86State *env,
+ target_ulong cs_base, int flags)
+{
+ int mask = IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK;
+ return (cs_base == env->segs[R_CS].base) &&
+ ((uint32_t)flags == (env->hflags | (env->eflags & mask)));
+}
+
void do_cpu_init(X86CPU *cpu);
void do_cpu_sipi(X86CPU *cpu);
@@ -1297,7 +1312,9 @@ static inline void cpu_load_efer(CPUX86State *env, uint64_t val)
static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State *env)
{
- return ((MemTxAttrs) { .secure = (env->hflags & HF_SMM_MASK) != 0 });
+ MemTxAttrs attrs = { 0 };
+ attrs.secure = (env->hflags & HF_SMM_MASK) != 0;
+ return attrs;
}
/* fpu_helper.c */
diff --git a/target-i386/fpu_helper.c b/target-i386/fpu_helper.c
index d421a47..4f50cd9 100644
--- a/target-i386/fpu_helper.c
+++ b/target-i386/fpu_helper.c
@@ -385,7 +385,7 @@ void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
/* FPU operations */
-static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
+const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
void helper_fcom_ST0_FT0(CPUX86State *env)
{
diff --git a/target-i386/helper.c b/target-i386/helper.c
index d18be95..4bc1e13 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -25,6 +25,7 @@
#include "monitor/monitor.h"
#include "hw/i386/apic_internal.h"
#endif
+#include "hqemu.h"
static void cpu_x86_version(CPUX86State *env, int *family, int *model)
{
@@ -641,6 +642,8 @@ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3)
"CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3);
tlb_flush(CPU(cpu), 0);
}
+
+ pcid = new_cr3 >> 12;
}
void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4)
@@ -1432,3 +1435,12 @@ void x86_stq_phys(CPUState *cs, hwaddr addr, uint64_t val)
NULL);
}
#endif
+
+CPUState *cpu_create(void)
+{
+ X86CPU *cpu = g_malloc0(sizeof(X86CPU));
+ CPUState *cs = CPU(cpu);
+ memcpy(cpu, X86_CPU(first_cpu), sizeof(X86CPU));
+ cs->env_ptr = &cpu->env;
+ return cs;
+}
diff --git a/target-i386/helper.h b/target-i386/helper.h
index ecfcfd1..8fbdde6 100644
--- a/target-i386/helper.h
+++ b/target-i386/helper.h
@@ -219,3 +219,6 @@ DEF_HELPER_3(rcrl, tl, env, tl, tl)
DEF_HELPER_3(rclq, tl, env, tl, tl)
DEF_HELPER_3(rcrq, tl, env, tl, tl)
#endif
+
+#include "hqemu-helper.h"
+#include "atomic-helper.h"
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 6dc9846..4a98890 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -2785,13 +2785,13 @@ static int kvm_handle_debug(X86CPU *cpu,
case 0x1:
ret = EXCP_DEBUG;
cs->watchpoint_hit = &hw_watchpoint;
- hw_watchpoint.vaddr = hw_breakpoint[n].addr;
+ hw_watchpoint.addr = hw_breakpoint[n].addr;
hw_watchpoint.flags = BP_MEM_WRITE;
break;
case 0x3:
ret = EXCP_DEBUG;
cs->watchpoint_hit = &hw_watchpoint;
- hw_watchpoint.vaddr = hw_breakpoint[n].addr;
+ hw_watchpoint.addr = hw_breakpoint[n].addr;
hw_watchpoint.flags = BP_MEM_ACCESS;
break;
}
diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c
index 13bd4f5..b446daa 100644
--- a/target-i386/misc_helper.c
+++ b/target-i386/misc_helper.c
@@ -599,3 +599,7 @@ void helper_debug(CPUX86State *env)
cs->exception_index = EXCP_DEBUG;
cpu_loop_exit(cs);
}
+
+#ifdef CONFIG_COREMU
+#include "atomic-x86.c"
+#endif
diff --git a/target-i386/ops_sse.h b/target-i386/ops_sse.h
index 1780d1d..4a96ed7 100644
--- a/target-i386/ops_sse.h
+++ b/target-i386/ops_sse.h
@@ -995,7 +995,7 @@ SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT)
SSE_HELPER_CMP(cmpnle, FPU_CMPNLE)
SSE_HELPER_CMP(cmpord, FPU_CMPORD)
-static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
+const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s)
{
diff --git a/target-i386/translate.c b/target-i386/translate.c
index a3dd167..7204635 100644
--- a/target-i386/translate.c
+++ b/target-i386/translate.c
@@ -32,7 +32,13 @@
#include "exec/helper-gen.h"
#include "trace-tcg.h"
+#include "hqemu.h"
+#if defined(CONFIG_USER_ONLY)
+#define IS_USER(s) 1
+#else
+#define IS_USER(s) (s->cpl == 3)
+#endif
#define PREFIX_REPZ 0x01
#define PREFIX_REPNZ 0x02
@@ -59,26 +65,35 @@
# define clztl clz32
#endif
+#ifdef CONFIG_COREMU
+#ifdef TARGET_X86_64
+#define X86_64_HREGS x86_64_hregs
+#else
+#define X86_64_HREGS 0
+#endif
+#endif
+
//#define MACRO_TEST 1
/* global register indexes */
static TCGv_ptr cpu_env;
-static TCGv cpu_A0;
-static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2, cpu_cc_srcT;
+static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2;
static TCGv_i32 cpu_cc_op;
static TCGv cpu_regs[CPU_NB_REGS];
/* local temps */
-static TCGv cpu_T[2];
+static __thread TCGv cpu_T[2];
/* local register indexes (only used inside old micro ops) */
-static TCGv cpu_tmp0, cpu_tmp4;
-static TCGv_ptr cpu_ptr0, cpu_ptr1;
-static TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
-static TCGv_i64 cpu_tmp1_i64;
+static __thread TCGv cpu_A0;
+static __thread TCGv cpu_tmp0, cpu_tmp4;
+static __thread TCGv_ptr cpu_ptr0, cpu_ptr1;
+static __thread TCGv_i32 cpu_tmp2_i32, cpu_tmp3_i32;
+static __thread TCGv_i64 cpu_tmp1_i64;
+static __thread TCGv cpu_cc_srcT;
#include "exec/gen-icount.h"
#ifdef TARGET_X86_64
-static int x86_64_hregs;
+static __thread int x86_64_hregs;
#endif
typedef struct DisasContext {
@@ -123,6 +138,10 @@ typedef struct DisasContext {
int cpuid_ext2_features;
int cpuid_ext3_features;
int cpuid_7_0_ebx_features;
+ int fallthrough;
+ int gen_ibtc;
+ int gen_cpbl;
+ CPUX86State *env;
} DisasContext;
static void gen_eob(DisasContext *s);
@@ -209,6 +228,36 @@ static const uint8_t cc_op_live[CC_OP_NB] = {
[CC_OP_CLR] = 0,
};
+static inline void gen_ibtc_stub(DisasContext *s)
+{
+#ifdef ENABLE_IBTC
+ if (!build_llvm(s->env)) {
+ TCGv_ptr ibtc_host_pc = tcg_temp_new_ptr();
+ if (s->fallthrough) {
+ tcg_gen_st_i32(tcg_const_i32(1), cpu_env, offsetof(CPUX86State, fallthrough));
+ s->fallthrough = 0;
+ }
+ gen_helper_lookup_ibtc(ibtc_host_pc, cpu_env);
+ tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(ibtc_host_pc));
+ tcg_temp_free_ptr(ibtc_host_pc);
+ s->gen_ibtc = 0;
+ }
+#endif
+}
+
+static inline void gen_cpbl_stub(DisasContext *s)
+{
+#ifdef ENABLE_CPBL
+ if (!build_llvm(s->env)) {
+ TCGv_ptr cpbl_host_pc = tcg_temp_new_ptr();
+ gen_helper_lookup_cpbl(cpbl_host_pc, cpu_env);
+ tcg_gen_op1i(INDEX_op_jmp, GET_TCGV_PTR(cpbl_host_pc));
+ tcg_temp_free_ptr(cpbl_host_pc);
+ s->gen_cpbl = 0;
+ }
+#endif
+}
+
static void set_cc_op(DisasContext *s, CCOp op)
{
int dead;
@@ -1312,6 +1361,30 @@ static void gen_helper_fp_arith_STN_ST0(int op, int opreg)
/* if d == OR_TMP0, it means memory operand (address in A0) */
static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
{
+#ifdef CONFIG_COREMU
+ if (s1->prefix & PREFIX_LOCK) {
+ gen_update_cc_op(s1);
+
+ switch (ot & 3) {
+ case 0:
+ gen_helper_atomic_opb(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op));
+ break;
+ case 1:
+ gen_helper_atomic_opw(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op));
+ break;
+ case 2:
+ gen_helper_atomic_opl(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op));
+ break;
+#ifdef TARGET_X86_64
+ case 3:
+ gen_helper_atomic_opq(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(op));
+#endif
+ }
+ set_cc_op(s1, CC_OP_EFLAGS);
+ return;
+ }
+#endif
+
if (d != OR_TMP0) {
gen_op_mov_v_reg(ot, cpu_T[0], d);
} else {
@@ -1378,6 +1451,35 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d)
/* if d == OR_TMP0, it means memory operand (address in A0) */
static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c)
{
+#ifdef CONFIG_COREMU
+ /* with lock prefix */
+ if (s1->prefix & PREFIX_LOCK) {
+ assert(d == OR_TMP0);
+
+ /* The helper will use CAS1 as a unified way to
+ implement atomic inc (locked inc) */
+ gen_update_cc_op(s1);
+
+ switch(ot & 3) {
+ case 0:
+ gen_helper_atomic_incb(cpu_env, cpu_A0, tcg_const_i32(c));
+ break;
+ case 1:
+ gen_helper_atomic_incw(cpu_env, cpu_A0, tcg_const_i32(c));
+ break;
+ case 2:
+ gen_helper_atomic_incl(cpu_env, cpu_A0, tcg_const_i32(c));
+ break;
+#ifdef TARGET_X86_64
+ case 3:
+ gen_helper_atomic_incq(cpu_env, cpu_A0, tcg_const_i32(c));
+#endif
+ }
+ set_cc_op(s1, CC_OP_EFLAGS);
+ return;
+ }
+#endif
+
if (d != OR_TMP0) {
gen_op_mov_v_reg(ot, cpu_T[0], d);
} else {
@@ -2205,6 +2307,31 @@ static inline int insn_const_size(TCGMemOp ot)
}
}
+#if defined(CONFIG_USER_ONLY)
+static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
+{
+ TranslationBlock *tb;
+
+ tb = s->tb;
+ tcg_gen_goto_tb(tb_num);
+ gen_jmp_im(eip);
+ tcg_gen_exit_tb((uintptr_t)tb + tb_num);
+ tb->jmp_pc[tb_num] = tb->cs_base + eip;
+}
+#else
+static int try_link_pages(DisasContext *s, TranslationBlock *tb, target_ulong dest)
+{
+#ifdef ENABLE_LPAGE
+ if (!build_llvm(s->env)) {
+ target_ulong addr, size;
+ int ret = lpt_search_page(s->env, dest, &addr, &size);
+ if (ret == 1 && (tb->pc & ~(size - 1)) == addr)
+ return 1;
+ }
+#endif
+ return 0;
+}
+
static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
{
TranslationBlock *tb;
@@ -2214,7 +2341,8 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
tb = s->tb;
/* NOTE: we handle the case where the TB spans two pages here */
if ((pc & TARGET_PAGE_MASK) == (tb->pc & TARGET_PAGE_MASK) ||
- (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK)) {
+ (pc & TARGET_PAGE_MASK) == ((s->pc - 1) & TARGET_PAGE_MASK) ||
+ try_link_pages(s, tb, pc) == 1) {
/* jump to same page: we can use a direct jump */
tcg_gen_goto_tb(tb_num);
gen_jmp_im(eip);
@@ -2222,9 +2350,12 @@ static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip)
} else {
/* jump to another page: currently not optimized */
gen_jmp_im(eip);
+ s->gen_cpbl = 1;
gen_eob(s);
}
+ tb->jmp_pc[tb_num] = pc;
}
+#endif
static inline void gen_jcc(DisasContext *s, int b,
target_ulong val, target_ulong next_eip)
@@ -2561,6 +2692,10 @@ static void gen_eob(DisasContext *s)
} else if (s->tf) {
gen_helper_single_step(cpu_env);
} else {
+ if (s->gen_ibtc == 1)
+ gen_ibtc_stub(s);
+ if (s->gen_cpbl == 1)
+ gen_cpbl_stub(s);
tcg_gen_exit_tb(0);
}
s->is_jmp = DISAS_TB_JUMP;
@@ -2974,6 +3109,192 @@ static const struct SSEOpHelper_eppi sse_op_table7[256] = {
[0xdf] = AESNI_OP(aeskeygenassist),
};
+#ifdef ENABLE_TCG_VECTOR
+#include "simd_helper.h"
+
+#define tcg_vector_abort() \
+do {\
+ fprintf(stderr, "%s:%d: tcg fatal error - unhandled vector op.\n", __FILE__, __LINE__);\
+ exit(0);\
+} while (0)
+
+static int gen_vload(DisasContext *s, int op, int mod, int modrm, int reg)
+{
+ int rm;
+ TCGArg alignment = 128;
+ CPUX86State *env = s->env;
+
+ if (!build_llvm(env))
+ return 0;
+
+ switch (op) {
+ case 0x010: /* movups */
+ case 0x110: /* movupd */
+ case 0x26f: /* movdqu xmm, ea */
+ alignment = (TCGArg)-1;
+ break;
+ default:
+ break;
+ }
+
+ if (mod != 3) {
+ gen_lea_modrm(env, s, modrm);
+ gen_vector_op3(INDEX_op_vload_128,
+ offsetof(CPUX86State, xmm_regs[reg]),
+ (TCGArg)cpu_A0,
+ alignment);
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_vector_op3(INDEX_op_vmov_128,
+ offsetof(CPUX86State, xmm_regs[reg]),
+ offsetof(CPUX86State, xmm_regs[rm]),
+ alignment);
+ }
+
+ return 1;
+}
+
+static int gen_vstore(DisasContext *s, int op, int mod, int modrm, int reg)
+{
+ int rm;
+ TCGArg alignment = 128;
+ CPUX86State *env = s->env;
+
+ if (!build_llvm(env))
+ return 0;
+
+ switch (op) {
+ case 0x011: /* movups */
+ case 0x111: /* movupd */
+ case 0x27f: /* movdqu ea, xmm */
+ alignment = (TCGArg)-1;
+ break;
+ default:
+ break;
+ }
+
+ if (mod != 3) {
+ gen_lea_modrm(env, s, modrm);
+ gen_vector_op3(INDEX_op_vstore_128,
+ offsetof(CPUX86State, xmm_regs[reg]),
+ (TCGArg)cpu_A0,
+ alignment);
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ gen_vector_op3(INDEX_op_vmov_128,
+ offsetof(CPUX86State, xmm_regs[rm]),
+ offsetof(CPUX86State, xmm_regs[reg]),
+ alignment);
+ }
+
+ return 1;
+}
+
+static int gen_tcg_vector(DisasContext *s, int op, int b1, int mod, int modrm, int reg)
+{
+ int rd, rm, rn;
+ TCGArg alignment = 128;
+ CPUX86State *env = s->env;
+
+ if (!build_llvm(env))
+ return 0;
+
+ switch(op) {
+ case 0x54 ... 0x59:
+ case 0x5b: /* cvtdq2ps cvtps2dq cvttps2dq */
+ case 0x5c:
+ case 0x5e:
+ case 0xd4:
+ case 0xdb:
+ case 0xdf:
+ case 0xeb:
+ case 0xef:
+ case 0xf8 ... 0xfe:
+ break;
+ default: /* unhandled op */
+ return 0;
+ }
+
+ switch (op) {
+ case 0x50 ... 0x5a:
+ case 0x5c ... 0x5f:
+ case 0xc2:
+ /* Most sse scalar operations. */
+ if (b1 == 2 || b1 == 3)
+ return 0;
+ break;
+ }
+
+ rd = rn = reg;
+ if (mod != 3) {
+ gen_lea_modrm(env, s, modrm);
+ gen_vector_op3(INDEX_op_vload_128,
+ offsetof(CPUX86State, xmm_t0),
+ (TCGArg)cpu_A0,
+ alignment);
+ rm = -1;
+ } else {
+ rm = (modrm & 7) | REX_B(s);
+ }
+
+ switch(op) {
+ case 0x54: /* andps, andpd */
+ case 0xdb: /* MMX_OP2(pand) */
+ gen_vector_logical(vand); break;
+ case 0x55: /* andnps, andnpd */
+ case 0xdf: /* MMX_OP2(pandn) */
+ {
+ int rtmp = rn;
+ rn = rm;
+ rm = rtmp;
+ gen_vector_logical(vbic); break;
+ }
+ case 0x56: /* orps, orpd */
+ case 0xeb: /* por */
+ gen_vector_logical(vorr); break;
+ case 0x57: /* xorps, xorpd */
+ case 0xef: /* pxor */
+ gen_vector_logical(veor); break;
+ case 0x58: /* SSE_FOP(add) */
+ gen_vector_fop(vadd, b1); break;
+ case 0x59: /* SSE_FOP(mul) */
+ gen_vector_fop(vmul, b1); break;
+ case 0x5c: /* SSE_FOP(sub) */
+ gen_vector_fop(vsub, b1); break;
+ case 0x5e: /* SSE_FOP(div) */
+ gen_vector_fop(vdiv, b1); break;
+ case 0x5b: /* cvtdq2ps cvtps2dq cvttps2dq */
+ if(b1 == 0)
+ gen_vector_cvt(vsitofp, 32);
+ else if(b1 == 1)
+ gen_vector_cvt(vfptosi, 32);
+ else if(b1 == 2)
+ gen_vector_cvt(vfptosi, 32);
+ else
+ tcg_vector_abort();
+ break;
+ case 0xd4: /* MMX_OP2(paddq) */
+ if (b1 != 1)
+ tcg_vector_abort();
+ gen_vector_arith(vadd, i, 3); break;
+ case 0xf8 ... 0xfb: /* MMX_OP2(psubb ... psubq) */
+ if (b1 != 1)
+ tcg_vector_abort();
+ gen_vector_arith(vsub, i, (op-0xf8)); break;
+ case 0xfc ... 0xfe: /* MMX_OP2(paddb ... paddl) */
+ if (b1 != 1)
+ tcg_vector_abort();
+ gen_vector_arith(vadd, i, (op-0xfc)); break;
+ default:
+ tcg_vector_abort();
+ break;
+ }
+
+ return 1;
+}
+
+#endif
+
static void gen_sse(CPUX86State *env, DisasContext *s, int b,
target_ulong pc_start, int rex_r)
{
@@ -3131,6 +3452,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
case 0x128: /* movapd */
case 0x16f: /* movdqa xmm, ea */
case 0x26f: /* movdqu xmm, ea */
+#ifdef ENABLE_TCG_VECTOR
+ if (gen_vload(s, b, mod, modrm, reg) == 1)
+ break;
+#endif
if (mod != 3) {
gen_lea_modrm(env, s, modrm);
gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
@@ -3317,6 +3642,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
case 0x129: /* movapd */
case 0x17f: /* movdqa ea, xmm */
case 0x27f: /* movdqu ea, xmm */
+#ifdef ENABLE_TCG_VECTOR
+ if (gen_vstore(s, b, mod, modrm, reg) == 1)
+ break;
+#endif
if (mod != 3) {
gen_lea_modrm(env, s, modrm);
gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg]));
@@ -4283,6 +4612,10 @@ static void gen_sse(CPUX86State *env, DisasContext *s, int b,
default:
break;
}
+#ifdef ENABLE_TCG_VECTOR
+ if (is_xmm && gen_tcg_vector(s, b, b1, mod, modrm, reg) == 1)
+ return;
+#endif
if (is_xmm) {
op1_offset = offsetof(CPUX86State,xmm_regs[reg]);
if (mod != 3) {
@@ -4565,9 +4898,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
s->aflag = aflag;
s->dflag = dflag;
+#ifndef CONFIG_COREMU
/* lock generation */
if (prefixes & PREFIX_LOCK)
gen_helper_lock();
+#endif
/* now check op code */
reswitch:
@@ -4719,6 +5054,29 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
set_cc_op(s, CC_OP_LOGICB + ot);
break;
case 2: /* not */
+#ifdef CONFIG_COREMU
+ if (s->prefix & PREFIX_LOCK) {
+ if (mod == 3)
+ goto illegal_op;
+
+ switch(ot & 3) {
+ case 0:
+ gen_helper_atomic_notb(cpu_env, cpu_A0);
+ break;
+ case 1:
+ gen_helper_atomic_notw(cpu_env, cpu_A0);
+ break;
+ case 2:
+ gen_helper_atomic_notl(cpu_env, cpu_A0);
+ break;
+#ifdef TARGET_X86_64
+ case 3:
+ gen_helper_atomic_notq(cpu_env, cpu_A0);
+#endif
+ }
+ break;
+ }
+#endif
tcg_gen_not_tl(cpu_T[0], cpu_T[0]);
if (mod != 3) {
gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
@@ -4727,6 +5085,32 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
break;
case 3: /* neg */
+#ifdef CONFIG_COREMU
+ if (s->prefix & PREFIX_LOCK) {
+ if (mod == 3)
+ goto illegal_op;
+
+ gen_update_cc_op(s);
+
+ switch(ot & 3) {
+ case 0:
+ gen_helper_atomic_negb(cpu_env, cpu_A0);
+ break;
+ case 1:
+ gen_helper_atomic_negw(cpu_env, cpu_A0);
+ break;
+ case 2:
+ gen_helper_atomic_negl(cpu_env, cpu_A0);
+ break;
+#ifdef TARGET_X86_64
+ case 3:
+ gen_helper_atomic_negq(cpu_env, cpu_A0);
+#endif
+ }
+ set_cc_op(s, CC_OP_EFLAGS);
+ break;
+ }
+#endif
tcg_gen_neg_tl(cpu_T[0], cpu_T[0]);
if (mod != 3) {
gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
@@ -4936,6 +5320,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_movi_tl(cpu_T[1], next_eip);
gen_push_v(s, cpu_T[1]);
gen_op_jmp_v(cpu_T[0]);
+ s->gen_ibtc = 1;
gen_eob(s);
break;
case 3: /* lcall Ev */
@@ -4954,6 +5339,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_const_i32(dflag - 1),
tcg_const_i32(s->pc - s->cs_base));
}
+ s->gen_ibtc = 1;
gen_eob(s);
break;
case 4: /* jmp Ev */
@@ -4961,6 +5347,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_ext16u_tl(cpu_T[0], cpu_T[0]);
}
gen_op_jmp_v(cpu_T[0]);
+ s->gen_ibtc = 1;
gen_eob(s);
break;
case 5: /* ljmp Ev */
@@ -4976,6 +5363,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_movl_seg_T0_vm(R_CS);
gen_op_jmp_v(cpu_T[1]);
}
+ s->gen_ibtc = 1;
gen_eob(s);
break;
case 6: /* push Ev */
@@ -5124,7 +5512,36 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_gen_add_tl(cpu_T[0], cpu_T[0], cpu_T[1]);
gen_op_mov_reg_v(ot, reg, cpu_T[1]);
gen_op_mov_reg_v(ot, rm, cpu_T[0]);
- } else {
+ } else
+#ifdef CONFIG_COREMU
+ if (s->prefix & PREFIX_LOCK) {
+ gen_lea_modrm(env, s, modrm);
+ gen_update_cc_op(s);
+
+ switch (ot & 3) {
+ case 0:
+ gen_helper_atomic_xaddb(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(X86_64_HREGS));
+ break;
+ case 1:
+ gen_helper_atomic_xaddw(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(X86_64_HREGS));
+ break;
+ case 2:
+ gen_helper_atomic_xaddl(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(X86_64_HREGS));
+ break;
+#ifdef TARGET_X86_64
+ case 3:
+ gen_helper_atomic_xaddq(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(x86_64_hregs));
+#endif
+ }
+ set_cc_op(s, CC_OP_EFLAGS);
+ break;
+ } else
+#endif
+ {
gen_lea_modrm(env, s, modrm);
gen_op_mov_v_reg(ot, cpu_T[0], reg);
gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
@@ -5145,6 +5562,38 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
modrm = cpu_ldub_code(env, s->pc++);
reg = ((modrm >> 3) & 7) | rex_r;
mod = (modrm >> 6) & 3;
+
+#ifdef CONFIG_COREMU
+ if (s->prefix & PREFIX_LOCK) {
+ if (mod == 3)
+ goto illegal_op;
+
+ gen_lea_modrm(env, s, modrm);
+ gen_update_cc_op(s);
+
+ switch(ot & 3) {
+ case 0:
+ gen_helper_atomic_cmpxchgb(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(X86_64_HREGS));
+ break;
+ case 1:
+ gen_helper_atomic_cmpxchgw(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(X86_64_HREGS));
+ break;
+ case 2:
+ gen_helper_atomic_cmpxchgl(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(X86_64_HREGS));
+ break;
+#ifdef TARGET_X86_64
+ case 3:
+ gen_helper_atomic_cmpxchgq(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(x86_64_hregs));
+#endif
+ }
+ set_cc_op(s, CC_OP_EFLAGS);
+ break;
+ }
+#endif
t0 = tcg_temp_local_new();
t1 = tcg_temp_local_new();
t2 = tcg_temp_local_new();
@@ -5201,6 +5650,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (!(s->cpuid_ext_features & CPUID_EXT_CX16))
goto illegal_op;
gen_lea_modrm(env, s, modrm);
+#ifdef CONFIG_COREMU
+ if (s->prefix | PREFIX_LOCK) {
+ gen_helper_atomic_cmpxchg16b(cpu_env, cpu_A0);
+ } else
+#endif
gen_helper_cmpxchg16b(cpu_env, cpu_A0);
} else
#endif
@@ -5208,6 +5662,11 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
if (!(s->cpuid_features & CPUID_CX8))
goto illegal_op;
gen_lea_modrm(env, s, modrm);
+#ifdef CONFIG_COREMU
+ if (s->prefix | PREFIX_LOCK) {
+ gen_helper_atomic_cmpxchg8b(cpu_env, cpu_A0);
+ } else
+#endif
gen_helper_cmpxchg8b(cpu_env, cpu_A0);
}
set_cc_op(s, CC_OP_EFLAGS);
@@ -5550,15 +6009,41 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_op_mov_reg_v(ot, reg, cpu_T[1]);
} else {
gen_lea_modrm(env, s, modrm);
+#ifdef CONFIG_COREMU
+ /* for xchg, lock is implicit.
+ XXX: none flag is affected! */
+ switch (ot & 3) {
+ case 0:
+ gen_helper_xchgb(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(X86_64_HREGS));
+ break;
+ case 1:
+ gen_helper_xchgw(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(X86_64_HREGS));
+ break;
+ case 2:
+ gen_helper_xchgl(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(X86_64_HREGS));
+ break;
+#ifdef TARGET_X86_64
+ case 3:
+ gen_helper_xchgq(cpu_env, cpu_A0, tcg_const_i32(reg),
+ tcg_const_i32(x86_64_hregs));
+#endif
+ }
+#else
gen_op_mov_v_reg(ot, cpu_T[0], reg);
/* for xchg, lock is implicit */
if (!(prefixes & PREFIX_LOCK))
gen_helper_lock();
gen_op_ld_v(s, ot, cpu_T[1], cpu_A0);
gen_op_st_v(s, ot, cpu_T[0], cpu_A0);
+#ifndef CONFIG_COREMU
if (!(prefixes & PREFIX_LOCK))
gen_helper_unlock();
+#endif
gen_op_mov_reg_v(ot, reg, cpu_T[1]);
+#endif
}
break;
case 0xc4: /* les Gv */
@@ -6360,6 +6845,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_stack_update(s, val + (1 << ot));
/* Note that gen_pop_T0 uses a zero-extending load. */
gen_op_jmp_v(cpu_T[0]);
+ s->gen_ibtc = 1;
gen_eob(s);
break;
case 0xc3: /* ret */
@@ -6367,6 +6853,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
gen_pop_update(s, ot);
/* Note that gen_pop_T0 uses a zero-extending load. */
gen_op_jmp_v(cpu_T[0]);
+ s->gen_ibtc = 1;
gen_eob(s);
break;
case 0xca: /* lret im */
@@ -6392,6 +6879,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
/* add stack offset */
gen_stack_update(s, val + (2 << dflag));
}
+ s->gen_ibtc = 1;
gen_eob(s);
break;
case 0xcb: /* lret */
@@ -6415,6 +6903,7 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
tcg_const_i32(s->pc - s->cs_base));
set_cc_op(s, CC_OP_EFLAGS);
}
+ s->gen_ibtc = 1;
gen_eob(s);
break;
case 0xe8: /* call im */
@@ -6680,6 +7169,27 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
}
bt_op:
tcg_gen_andi_tl(cpu_T[1], cpu_T[1], (1 << (3 + ot)) - 1);
+#ifdef CONFIG_COREMU
+ if (s->prefix & PREFIX_LOCK) {
+ gen_update_cc_op(s);
+
+ switch (op) {
+ case 0:
+ goto illegal_op;
+ break;
+ case 1:
+ gen_helper_atomic_bts(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(ot));
+ break;
+ case 2:
+ gen_helper_atomic_btr(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(ot));
+ break;
+ case 3:
+ gen_helper_atomic_btc(cpu_env, cpu_A0, cpu_T[1], tcg_const_i32(ot));
+ }
+ set_cc_op(s, CC_OP_EFLAGS);
+ break;
+ }
+#endif
tcg_gen_shr_tl(cpu_tmp4, cpu_T[0], cpu_T[1]);
switch(op) {
case 0:
@@ -7818,12 +8328,16 @@ static target_ulong disas_insn(CPUX86State *env, DisasContext *s,
goto illegal_op;
}
/* lock generation */
+#ifndef CONFIG_COREMU
if (s->prefix & PREFIX_LOCK)
gen_helper_unlock();
+#endif
return s->pc;
illegal_op:
+#ifndef CONFIG_COREMU
if (s->prefix & PREFIX_LOCK)
gen_helper_unlock();
+#endif
/* XXX: ensure that no lock was generated */
gen_exception(s, EXCP06_ILLOP, pc_start - s->cs_base);
return s->pc;
@@ -7879,6 +8393,8 @@ void optimize_flags_init(void)
}
helper_lock_init();
+
+ copy_tcg_context_global();
}
/* generate intermediate code in gen_opc_buf and gen_opparam_buf for
@@ -7900,6 +8416,10 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
cs_base = tb->cs_base;
flags = tb->flags;
+ dc->fallthrough = 0;
+ dc->gen_ibtc = 0;
+ dc->gen_cpbl = 0;
+ dc->env = env;
dc->pe = (flags >> HF_PE_SHIFT) & 1;
dc->code32 = (flags >> HF_CS32_SHIFT) & 1;
dc->ss32 = (flags >> HF_SS32_SHIFT) & 1;
@@ -7977,7 +8497,12 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
max_insns = TCG_MAX_INSNS;
}
- gen_tb_start(tb);
+ if (!build_llvm(env)) {
+ gen_tb_start(tb);
+ if (tracer_mode != TRANS_MODE_NONE)
+ tcg_gen_hotpatch(IS_USER(dc), tracer_mode == TRANS_MODE_HYBRIDS ||
+ tracer_mode == TRANS_MODE_HYBRIDM);
+ }
for(;;) {
tcg_gen_insn_start(pc_ptr, dc->cc_op);
num_insns++;
@@ -8027,12 +8552,27 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
gen_eob(dc);
break;
}
+
+#if defined(CONFIG_LLVM) && defined(CONFIG_USER_ONLY)
+ if (llvm_has_annotation(pc_ptr, ANNOTATION_LOOP))
+ break;
+#endif
+ if (build_llvm(env) && num_insns == tb->icount) {
+ gen_jmp_im(pc_ptr - dc->cs_base);
+ gen_eob(dc);
+ break;
+ }
+
/* if too long translation, stop generation too */
if (tcg_op_buf_full() ||
(pc_ptr - pc_start) >= (TARGET_PAGE_SIZE - 32) ||
num_insns >= max_insns) {
gen_jmp_im(pc_ptr - dc->cs_base);
+ dc->fallthrough = 1;
+ dc->gen_ibtc = 1;
gen_eob(dc);
+
+ tb->jmp_pc[0] = pc_ptr;
break;
}
if (singlestep) {
@@ -8041,13 +8581,28 @@ void gen_intermediate_code(CPUX86State *env, TranslationBlock *tb)
break;
}
}
+ if (build_llvm(env) && tb->size != dc->pc - pc_start) {
+ /* consistency check with tb info. we must make sure
+ * guest basic blocks are the same. skip this trace if inconsistent */
+ fprintf(stderr, "inconsistent block with pc 0x"TARGET_FMT_lx" size=%d"
+ " icount=%d (error size="TARGET_FMT_ld")\n",
+ tb->pc, tb->size, tb->icount, dc->pc - pc_start);
+ exit(0);
+ }
+
if (tb->cflags & CF_LAST_IO)
gen_io_end();
done_generating:
- gen_tb_end(tb, num_insns);
+
+ if (build_llvm(env)) {
+ /* Terminate the linked list. */
+ tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1;
+ } else {
+ gen_tb_end(tb, num_insns);
+ }
#ifdef DEBUG_DISAS
- if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+ if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) {
int disas_flags;
qemu_log("----------------\n");
qemu_log("IN: %s\n", lookup_symbol(pc_start));
@@ -8062,8 +8617,10 @@ done_generating:
}
#endif
- tb->size = pc_ptr - pc_start;
- tb->icount = num_insns;
+ if (!build_llvm(env)) {
+ tb->size = pc_ptr - pc_start;
+ tb->icount = num_insns;
+ }
}
void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb,
diff --git a/target-ppc/Makefile.objs b/target-ppc/Makefile.objs
index e667e69..363a701 100644
--- a/target-ppc/Makefile.objs
+++ b/target-ppc/Makefile.objs
@@ -1,5 +1,5 @@
obj-y += cpu-models.o
-obj-y += translate.o
+obj-y += translate.o helper.o
ifeq ($(CONFIG_SOFTMMU),y)
obj-y += machine.o mmu_helper.o mmu-hash32.o monitor.o
obj-$(TARGET_PPC64) += mmu-hash64.o arch_dump.o
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 9706000..bf1481a 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -88,7 +88,6 @@
/*****************************************************************************/
/* MMU model */
-typedef enum powerpc_mmu_t powerpc_mmu_t;
enum powerpc_mmu_t {
POWERPC_MMU_UNKNOWN = 0x00000000,
/* Standard 32 bits PowerPC MMU */
@@ -133,10 +132,10 @@ enum powerpc_mmu_t {
| 0x00000004,
#endif /* defined(TARGET_PPC64) */
};
+typedef enum powerpc_mmu_t powerpc_mmu_t;
/*****************************************************************************/
/* Exception model */
-typedef enum powerpc_excp_t powerpc_excp_t;
enum powerpc_excp_t {
POWERPC_EXCP_UNKNOWN = 0,
/* Standard PowerPC exception model */
@@ -170,6 +169,7 @@ enum powerpc_excp_t {
POWERPC_EXCP_POWER7,
#endif /* defined(TARGET_PPC64) */
};
+typedef enum powerpc_excp_t powerpc_excp_t;
/*****************************************************************************/
/* Exception vectors definitions */
@@ -298,7 +298,6 @@ enum {
/*****************************************************************************/
/* Input pins model */
-typedef enum powerpc_input_t powerpc_input_t;
enum powerpc_input_t {
PPC_FLAGS_INPUT_UNKNOWN = 0,
/* PowerPC 6xx bus */
@@ -316,6 +315,7 @@ enum powerpc_input_t {
/* Freescale RCPU bus */
PPC_FLAGS_INPUT_RCPU,
};
+typedef enum powerpc_input_t powerpc_input_t;
#define PPC_INPUT(env) (env->bus_model)
@@ -1168,6 +1168,8 @@ struct CPUPPCState {
uint32_t tm_vscr;
uint64_t tm_dscr;
uint64_t tm_tar;
+
+ CPU_OPTIMIZATION_COMMON
};
#define SET_FIT_PERIOD(a_, b_, c_, d_) \
@@ -2226,6 +2228,17 @@ static inline void cpu_get_tb_cpu_state(CPUPPCState *env, target_ulong *pc,
*flags = env->hflags;
}
+static inline target_ulong cpu_get_pc(CPUPPCState *env)
+{
+ return env->nip;
+}
+
+static inline int cpu_check_state(CPUPPCState *env,
+ target_ulong cs_base, int flags)
+{
+ return cs_base == 0 && (uint32_t)flags == env->hflags;
+}
+
#if !defined(CONFIG_USER_ONLY)
static inline int booke206_tlbm_id(CPUPPCState *env, ppcmas_tlb_t *tlbm)
{
@@ -2311,7 +2324,7 @@ static inline uint32_t booke206_tlbnps(CPUPPCState *env, const int tlbn)
uint32_t tlbncfg = env->spr[SPR_BOOKE_TLB0CFG + tlbn];
uint32_t min = (tlbncfg & TLBnCFG_MINSIZE) >> TLBnCFG_MINSIZE_SHIFT;
uint32_t max = (tlbncfg & TLBnCFG_MAXSIZE) >> TLBnCFG_MAXSIZE_SHIFT;
- int i;
+ unsigned i;
for (i = min; i <= max; i++) {
ret |= (1 << (i << 1));
}
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 869be15..c96f51b 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -667,3 +667,5 @@ DEF_HELPER_4(dscli, void, env, fprp, fprp, i32)
DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
DEF_HELPER_1(tbegin, void, env)
+
+#include "hqemu-helper.h"
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 41a7258..15cedc5 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -28,7 +28,13 @@
#include "exec/helper-gen.h"
#include "trace-tcg.h"
+#include "hqemu.h"
+#if defined(CONFIG_USER_ONLY)
+#define IS_USER(s) 1
+#else
+#define IS_USER(s) (s->mem_idx == MMU_USER_IDX)
+#endif
#define CPU_SINGLE_STEP 0x1
#define CPU_BRANCH_STEP 0x2
@@ -180,6 +186,8 @@ void ppc_translate_init(void)
offsetof(CPUPPCState, access_type), "access_type");
done_init = 1;
+
+ copy_tcg_context_global();
}
/* internal defines */
@@ -11479,7 +11487,12 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
max_insns = TCG_MAX_INSNS;
}
- gen_tb_start(tb);
+ if (!build_llvm(env)) {
+ gen_tb_start(tb);
+ if (tracer_mode != TRANS_MODE_NONE)
+ tcg_gen_hotpatch(IS_USER(ctxp), tracer_mode == TRANS_MODE_HYBRIDS ||
+ tracer_mode == TRANS_MODE_HYBRIDM);
+ }
tcg_clear_temp_count();
/* Set env in case of segfault during code fetch */
while (ctx.exception == POWERPC_EXCP_NONE && !tcg_op_buf_full()) {
@@ -11553,6 +11566,9 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
#if defined(DO_PPC_STATISTICS)
handler->count++;
#endif
+ if (build_llvm(env) && num_insns == tb->icount)
+ break;
+
/* Check trace mode exceptions */
if (unlikely(ctx.singlestep_enabled & CPU_SINGLE_STEP &&
(ctx.nip <= 0x100 || ctx.nip > 0xF00) &&
@@ -11576,6 +11592,16 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
exit(1);
}
}
+
+ if (build_llvm(env) && tb->size != ctx.nip - pc_start) {
+ /* consistency check with tb info. we must make sure
+ * guest basic blocks are the same */
+ fprintf(stderr, "inconsistant block with pc 0x"TARGET_FMT_lx" size %d"
+ " icount=%d (error size="TARGET_FMT_ld")\n",
+ tb->pc, tb->size, tb->icount, ctx.nip - pc_start);
+ exit(0);
+ }
+
if (tb->cflags & CF_LAST_IO)
gen_io_end();
if (ctx.exception == POWERPC_EXCP_NONE) {
@@ -11587,13 +11613,18 @@ void gen_intermediate_code(CPUPPCState *env, struct TranslationBlock *tb)
/* Generate the return instruction */
tcg_gen_exit_tb(0);
}
- gen_tb_end(tb, num_insns);
- tb->size = ctx.nip - pc_start;
- tb->icount = num_insns;
+ if (build_llvm(env)) {
+ /* Terminate the linked list. */
+ tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1;
+ } else {
+ gen_tb_end(tb, num_insns);
+ tb->size = ctx.nip - pc_start;
+ tb->icount = num_insns;
+ }
#if defined(DEBUG_DISAS)
- if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)) {
+ if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM) && !build_llvm(env)) {
int flags;
flags = env->bfd_mach;
flags |= ctx.le_mode << 16;
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 0ed10a9..05e26af 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -1264,7 +1264,56 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data_reg, TCGReg addr_reg,
#endif /* CONFIG_SOFTMMU */
}
-static tcg_insn_unit *tb_ret_addr;
+tcg_insn_unit *tb_ret_addr;
+tcg_insn_unit *ibtc_ret_addr;
+
+/*
+ * Emit trace profiling/prediction stubs. The code sequence is as following:
+ * S1: direct jump (the reloc part requires 4-byte alignment)
+ * S2: trace profiling stub
+ * S3: trace prediction stub
+ * S4: beginning of QEMU emulation code
+ *
+ * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2).
+ * Remember the offset of S3 (patch_next) which is used to turn the
+ * trace profiling off. Also remember the offset of S4 (patch_skip)
+ * so that the trace stubs can be skipped quickly while searching pc.
+ */
+static void tcg_out_hotpatch(TCGContext *s, int is_user, int emit_helper)
+{
+ tcg_insn_unit *label_ptr[2];
+ TranslationBlock *tb = s->tb;
+
+ tb->patch_jmp = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf);
+
+ /* S1: Direct Jump */
+ if (is_user == 0 || emit_helper == 0) {
+ tcg_out_goto(s, s->code_ptr + 1);
+ tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf);
+ return;
+ }
+
+ label_ptr[0] = s->code_ptr;
+ tcg_out_goto_noaddr(s);
+ /* S2: Trace Profiling Stub */
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id);
+ tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile);
+ reloc_pc26(label_ptr[0], s->code_ptr);
+
+ /* S3: Trace Prediction stub */
+ tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf);
+
+ tcg_out_ld(s, TCG_TYPE_I32, tcg_target_reg_alloc_order[0],
+ TCG_AREG0, offsetof(CPUArchState, start_trace_prediction));
+ tcg_out_cmp(s, 0, tcg_target_reg_alloc_order[0], 0, 1);
+ label_ptr[1] = s->code_ptr;
+ tcg_out_goto_cond_noaddr(s, TCG_COND_EQ);
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id);
+ tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict);
+ reloc_pc19(label_ptr[1], s->code_ptr);
+}
static void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg args[TCG_MAX_OP_ARGS],
@@ -1302,6 +1351,16 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
s->tb_next_offset[a0] = tcg_current_code_size(s);
break;
+ case INDEX_op_hotpatch:
+ tcg_out_hotpatch(s, args[0], args[1]);
+ break;
+ case INDEX_op_jmp:
+ if (const_args[0]) {
+ tcg_out_goto(s, (tcg_insn_unit *)args[0]);
+ } else {
+ tcg_out_insn(s, 3207, BR, args[0]);
+ }
+ break;
case INDEX_op_br:
tcg_out_goto_label(s, arg_label(a0));
break;
@@ -1637,6 +1696,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
static const TCGTargetOpDef aarch64_op_defs[] = {
+ { INDEX_op_hotpatch, { "i", "i" } },
+ { INDEX_op_jmp, { "ri" } },
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_br, { } },
@@ -1748,6 +1809,10 @@ static const TCGTargetOpDef aarch64_op_defs[] = {
{ INDEX_op_muluh_i64, { "r", "r", "r" } },
{ INDEX_op_mulsh_i64, { "r", "r", "r" } },
+#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} },
+#include "tcg-opc-vector.h"
+#undef DEF
+
{ -1 },
};
@@ -1777,12 +1842,24 @@ static void tcg_target_init(TCGContext *s)
tcg_add_target_add_op_defs(aarch64_op_defs);
}
+static void tcg_out_epilogue(TCGContext *s)
+{
+ /* IBTC exit entry */
+ ibtc_ret_addr = s->code_ptr;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_X0, 0);
+}
+
+#if defined(CONFIG_LLVM)
+#define STACK_SIZE 0x800
+#else
+#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE
+#endif
/* Saving pairs: (X19, X20) .. (X27, X28), (X29(fp), X30(lr)). */
#define PUSH_SIZE ((30 - 19 + 1) * 8)
#define FRAME_SIZE \
((PUSH_SIZE \
- + TCG_STATIC_CALL_ARGS_SIZE \
+ + STACK_SIZE \
+ CPU_TEMP_BUF_NLONGS * sizeof(long) \
+ TCG_TARGET_STACK_ALIGN - 1) \
& ~(TCG_TARGET_STACK_ALIGN - 1))
@@ -1828,6 +1905,7 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
tcg_out_insn(s, 3207, BR, tcg_target_call_iarg_regs[1]);
+ tcg_out_epilogue(s);
tb_ret_addr = s->code_ptr;
/* Remove TCG locals stack space. */
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 9187d34..b95f5fb 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -139,7 +139,8 @@ static bool have_bmi2;
# define have_bmi2 0
#endif
-static tcg_insn_unit *tb_ret_addr;
+tcg_insn_unit *tb_ret_addr;
+tcg_insn_unit *ibtc_ret_addr;
static void patch_reloc(tcg_insn_unit *code_ptr, int type,
intptr_t value, intptr_t addend)
@@ -323,6 +324,7 @@ static inline int tcg_target_const_match(tcg_target_long val, TCGType type,
#define OPC_MOVB_EvGv (0x88) /* stores, more or less */
#define OPC_MOVL_EvGv (0x89) /* stores, more or less */
#define OPC_MOVL_GvEv (0x8b) /* loads, more or less */
+#define OPC_NOP (0x90)
#define OPC_MOVB_EvIz (0xc6)
#define OPC_MOVL_EvIz (0xc7)
#define OPC_MOVL_Iv (0xb8)
@@ -1150,6 +1152,62 @@ static void * const qemu_st_helpers[16] = {
[MO_BEQ] = helper_be_stq_mmu,
};
+/* helpers for LLVM */
+void * const llvm_ld_helpers[16] = {
+ [MO_UB] = llvm_ret_ldub_mmu,
+ [MO_LEUW] = llvm_le_lduw_mmu,
+ [MO_LEUL] = llvm_le_ldul_mmu,
+ [MO_LEQ] = llvm_le_ldq_mmu,
+ [MO_BEUW] = llvm_be_lduw_mmu,
+ [MO_BEUL] = llvm_be_ldul_mmu,
+ [MO_BEQ] = llvm_be_ldq_mmu,
+};
+
+void * const llvm_st_helpers[16] = {
+ [MO_UB] = llvm_ret_stb_mmu,
+ [MO_LEUW] = llvm_le_stw_mmu,
+ [MO_LEUL] = llvm_le_stl_mmu,
+ [MO_LEQ] = llvm_le_stq_mmu,
+ [MO_BEUW] = llvm_be_stw_mmu,
+ [MO_BEUL] = llvm_be_stl_mmu,
+ [MO_BEQ] = llvm_be_stq_mmu,
+};
+
+static inline void tcg_out_compute_gva(TCGContext *s, TCGReg addrlo,
+ TCGMemOp opc, int trexw, int tv_hrexw)
+{
+ const TCGReg r1 = TCG_REG_L1;
+ int s_mask = (1 << (opc & MO_SIZE)) - 1;
+
+#if defined(ALIGNED_ONLY)
+ TCGType ttype = TCG_TYPE_I32;
+ bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0;
+ if (TCG_TARGET_REG_BITS == 64 && TARGET_LONG_BITS == 64)
+ ttype = TCG_TYPE_I64;
+ if (aligned) {
+ tcg_out_mov(s, ttype, r1, addrlo);
+ } else {
+ /* For unaligned access check that we don't cross pages using
+ the page address of the last byte. */
+ tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask);
+ }
+ tgen_arithi(s, ARITH_AND + trexw, r1,
+ TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0);
+#elif defined(ENABLE_TLBVERSION)
+ /* the following code is as equivalent to
+ * (((addr + (size - 1)) & TARGET_PAGE_MASK) | env->tlb_version) */
+ tcg_out_modrm_sib_offset(s, OPC_LEA + trexw, r1, addrlo, -1, 0, s_mask);
+ tgen_arithi(s, ARITH_AND + trexw, r1, TARGET_PAGE_MASK, 0);
+ tcg_out_modrm_offset(s, (OPC_ARITH_GvEv | (ARITH_OR << 3)) + trexw + tv_hrexw,
+ r1, TCG_AREG0, offsetof(CPUArchState, tlb_version));
+#else
+ /* the following code is as equivalent to
+ * ((addr + (size - 1)) & TARGET_PAGE_MASK) */
+ tcg_out_modrm_sib_offset(s, OPC_LEA + trexw, r1, addrlo, -1, 0, s_mask);
+ tgen_arithi(s, ARITH_AND + trexw, r1, TARGET_PAGE_MASK, 0);
+#endif
+}
+
/* Perform the TLB load and compare.
Inputs:
@@ -1179,9 +1237,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
const TCGReg r1 = TCG_REG_L1;
TCGType ttype = TCG_TYPE_I32;
TCGType tlbtype = TCG_TYPE_I32;
- int trexw = 0, hrexw = 0, tlbrexw = 0;
- int s_mask = (1 << (opc & MO_SIZE)) - 1;
- bool aligned = (opc & MO_AMASK) == MO_ALIGN || s_mask == 0;
+ int trexw = 0, hrexw = 0, tlbrexw = 0, tv_hrexw = 0;
if (TCG_TARGET_REG_BITS == 64) {
if (TARGET_LONG_BITS == 64) {
@@ -1197,20 +1253,18 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
}
}
+#if defined(ENABLE_TLBVERSION_EXT)
+ trexw = 0;
+ tv_hrexw = P_REXW;
+#endif
+
tcg_out_mov(s, tlbtype, r0, addrlo);
- if (aligned) {
- tcg_out_mov(s, ttype, r1, addrlo);
- } else {
- /* For unaligned access check that we don't cross pages using
- the page address of the last byte. */
- tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask);
- }
tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
- tgen_arithi(s, ARITH_AND + trexw, r1,
- TARGET_PAGE_MASK | (aligned ? s_mask : 0), 0);
+ tcg_out_compute_gva(s, addrlo, opc, trexw, tv_hrexw);
+
tgen_arithi(s, ARITH_AND + tlbrexw, r0,
(CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
@@ -1219,7 +1273,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
+ which);
/* cmp 0(r0), r1 */
- tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw, r1, r0, 0);
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw + tv_hrexw, r1, r0, 0);
/* Prepare for both the fast path add of the tlb addend, and the slow
path function argument setup. There are two cases worth note:
@@ -1754,6 +1808,73 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
#endif
}
+/*
+ * Emit trace profiling/prediction stubs. The code sequence is as following:
+ * S1: direct jump (the reloc part requires 4-byte alignment)
+ * S2: trace profiling stub
+ * S3: trace prediction stub
+ * S4: beginning of QEMU emulation code
+ *
+ * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2).
+ * Remember the offset of S3 (patch_next) which is used to turn the
+ * trace profiling off. Also remember the offset of S4 (patch_skip)
+ * so that the trace stubs can be skipped quickly while searching pc.
+ */
+static void tcg_out_hotpatch(TCGContext *s, uint32_t is_user, uint32_t emit_helper)
+{
+ uint8_t *label_ptr[2];
+ TranslationBlock *tb = s->tb;
+
+ /* S1: direct jump */
+ while (((uintptr_t)s->code_ptr + 1) % 4)
+ tcg_out8(s, OPC_NOP);
+
+ tb->patch_jmp = (uint16_t)(s->code_ptr - s->code_buf);
+
+ tcg_out8(s, OPC_JMP_long);
+ label_ptr[0] = s->code_ptr;
+ s->code_ptr += 4;
+
+ if (is_user == 0 || emit_helper == 0) {
+ *(uint32_t *)label_ptr[0] = s->code_ptr - label_ptr[0] - 4;
+ tb->patch_next = (uint16_t)(s->code_ptr - s->code_buf);
+ return;
+ }
+
+ /* S2: trace profiling stub */
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, 0);
+ tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, 4, tb->id);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id);
+ }
+ tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile);
+ *(uint32_t *)label_ptr[0] = s->code_ptr - label_ptr[0] - 4;
+
+ /* S3: trace prediction stub */
+ tb->patch_next = (uint16_t)(s->code_ptr - s->code_buf);
+
+ tcg_out_ld(s, TCG_TYPE_I32, tcg_target_reg_alloc_order[0],
+ TCG_AREG0, offsetof(CPUArchState, start_trace_prediction));
+ tcg_out_cmp(s, tcg_target_reg_alloc_order[0], 0, 1, 0);
+ tcg_out_opc(s, OPC_JCC_long + JCC_JE, 0, 0, 0);
+ label_ptr[1] = s->code_ptr;
+ s->code_ptr += 4;
+
+ if (TCG_TARGET_REG_BITS == 32) {
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP, 0);
+ tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, 4, tb->id);
+ } else {
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id);
+ }
+ tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict);
+ *(uint32_t *)label_ptr[1] = s->code_ptr - label_ptr[1] - 4;
+
+ /* S4: QEMU emulation code */
+}
+
static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
const TCGArg *args, const int *const_args)
{
@@ -1777,6 +1898,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_goto_tb:
if (s->tb_jmp_offset) {
/* direct jump method */
+#if defined(CONFIG_USER_ONLY)
+ while (((uintptr_t)s->code_ptr + 1) % 4) /* need 4-byte aligned */
+ tcg_out8(s, OPC_NOP);
+#endif
tcg_out8(s, OPC_JMP_long); /* jmp im */
s->tb_jmp_offset[args[0]] = tcg_current_code_size(s);
tcg_out32(s, 0);
@@ -1787,6 +1912,17 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
s->tb_next_offset[args[0]] = tcg_current_code_size(s);
break;
+ case INDEX_op_hotpatch:
+ tcg_out_hotpatch(s, args[0], args[1]);
+ break;
+ case INDEX_op_jmp:
+ if (const_args[0]) {
+ tcg_out_jmp(s, (tcg_insn_unit *)args[0]);
+ } else {
+ /* jmp *reg */
+ tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, args[0]);
+ }
+ break;
case INDEX_op_br:
tcg_out_jxx(s, JCC_JMP, arg_label(args[0]), 0);
break;
@@ -2110,6 +2246,8 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
}
static const TCGTargetOpDef x86_op_defs[] = {
+ { INDEX_op_hotpatch, { "i", "i" } },
+ { INDEX_op_jmp, { "ri" } },
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_br, { } },
@@ -2238,6 +2376,11 @@ static const TCGTargetOpDef x86_op_defs[] = {
{ INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
{ INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
#endif
+
+#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} },
+#include "tcg-opc-vector.h"
+#undef DEF
+
{ -1 },
};
@@ -2261,16 +2404,29 @@ static int tcg_target_callee_save_regs[] = {
#endif
};
+static void tcg_out_epilogue(TCGContext *s)
+{
+ /* IBTC exit entry */
+ ibtc_ret_addr = s->code_ptr;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_EAX, 0);
+}
+
/* Compute frame size via macros, to share between tcg_target_qemu_prologue
and tcg_register_jit. */
+#if defined(CONFIG_LLVM)
+#define STACK_SIZE 0x2000
+#else
+#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE
+#endif
+
#define PUSH_SIZE \
((1 + ARRAY_SIZE(tcg_target_callee_save_regs)) \
* (TCG_TARGET_REG_BITS / 8))
#define FRAME_SIZE \
((PUSH_SIZE \
- + TCG_STATIC_CALL_ARGS_SIZE \
+ + STACK_SIZE \
+ CPU_TEMP_BUF_NLONGS * sizeof(long) \
+ TCG_TARGET_STACK_ALIGN - 1) \
& ~(TCG_TARGET_STACK_ALIGN - 1))
@@ -2279,10 +2435,12 @@ static int tcg_target_callee_save_regs[] = {
static void tcg_target_qemu_prologue(TCGContext *s)
{
int i, stack_addend;
+ tcg_target_long stack_align_mask;
/* TB prologue */
/* Reserve some stack space, also for TCG temps. */
+ stack_align_mask = ~(TCG_TARGET_STACK_ALIGN - 1);
stack_addend = FRAME_SIZE - PUSH_SIZE;
tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
CPU_TEMP_BUF_NLONGS * sizeof(long));
@@ -2296,6 +2454,9 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out_ld(s, TCG_TYPE_PTR, TCG_AREG0, TCG_REG_ESP,
(ARRAY_SIZE(tcg_target_callee_save_regs) + 1) * 4);
tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0,
+ offsetof(CPUArchState, sp));
+ tgen_arithi(s, ARITH_AND, TCG_REG_ESP, stack_align_mask, 0);
/* jmp *tb. */
tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, TCG_REG_ESP,
(ARRAY_SIZE(tcg_target_callee_save_regs) + 2) * 4
@@ -2303,13 +2464,19 @@ static void tcg_target_qemu_prologue(TCGContext *s)
#else
tcg_out_mov(s, TCG_TYPE_PTR, TCG_AREG0, tcg_target_call_iarg_regs[0]);
tcg_out_addi(s, TCG_REG_ESP, -stack_addend);
+ tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0,
+ offsetof(CPUArchState, sp));
+ tgen_arithi(s, ARITH_AND + P_REXW, TCG_REG_ESP, stack_align_mask, 0);
/* jmp *tb. */
tcg_out_modrm(s, OPC_GRP5, EXT5_JMPN_Ev, tcg_target_call_iarg_regs[1]);
#endif
/* TB epilogue */
+ tcg_out_epilogue(s);
tb_ret_addr = s->code_ptr;
+ tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_ESP, TCG_AREG0,
+ offsetof(CPUArchState, sp));
tcg_out_addi(s, TCG_REG_CALL_STACK, stack_addend);
for (i = ARRAY_SIZE(tcg_target_callee_save_regs) - 1; i >= 0; i--) {
diff --git a/tcg/i386/tcg-target.h b/tcg/i386/tcg-target.h
index 92be341..c5715dc 100644
--- a/tcg/i386/tcg-target.h
+++ b/tcg/i386/tcg-target.h
@@ -67,7 +67,7 @@ typedef enum {
/* used for function call generation */
#define TCG_REG_CALL_STACK TCG_REG_ESP
-#define TCG_TARGET_STACK_ALIGN 16
+#define TCG_TARGET_STACK_ALIGN 32
#if defined(_WIN64)
#define TCG_TARGET_CALL_STACK_OFFSET 32
#else
diff --git a/tcg/ppc/tcg-target.c b/tcg/ppc/tcg-target.c
index 2c72565..ca5c7a4 100644
--- a/tcg/ppc/tcg-target.c
+++ b/tcg/ppc/tcg-target.c
@@ -78,7 +78,8 @@
#define TCG_CT_CONST_ZERO 0x1000
#define TCG_CT_CONST_MONE 0x2000
-static tcg_insn_unit *tb_ret_addr;
+tcg_insn_unit *tb_ret_addr;
+tcg_insn_unit *ibtc_ret_addr;
#include "elf.h"
static bool have_isa_2_06;
@@ -1785,8 +1786,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
#define CPU_TEMP_BUF_SIZE (CPU_TEMP_BUF_NLONGS * (int)sizeof(long))
#define REG_SAVE_SIZE ((int)ARRAY_SIZE(tcg_target_callee_save_regs) * SZR)
+#if defined(CONFIG_LLVM)
+#define STACK_SIZE 0x800
+#else
+#define STACK_SIZE TCG_STATIC_CALL_ARGS_SIZE
+#endif
+
#define FRAME_SIZE ((TCG_TARGET_CALL_STACK_OFFSET \
- + TCG_STATIC_CALL_ARGS_SIZE \
+ + STACK_SIZE \
+ CPU_TEMP_BUF_SIZE \
+ REG_SAVE_SIZE \
+ TCG_TARGET_STACK_ALIGN - 1) \
@@ -1794,6 +1801,14 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is_64)
#define REG_SAVE_BOT (FRAME_SIZE - REG_SAVE_SIZE)
+static unsigned num_epilogue_insns = 1;
+static void tcg_out_epilogue(TCGContext *s)
+{
+ /* IBTC exit entry */
+ ibtc_ret_addr = s->code_ptr;
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R3, 0);
+}
+
static void tcg_target_qemu_prologue(TCGContext *s)
{
int i;
@@ -1832,27 +1847,29 @@ static void tcg_target_qemu_prologue(TCGContext *s)
if (USE_REG_RA) {
#ifdef _CALL_AIX
/* Make the caller load the value as the TOC into R2. */
- tb_ret_addr = s->code_ptr + 2;
+ tb_ret_addr = s->code_ptr + 2 + num_epilogue_insns;
desc[1] = tb_ret_addr;
tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_RA, TCG_REG_R2);
tcg_out32(s, BCCTR | BO_ALWAYS);
#elif defined(_CALL_ELF) && _CALL_ELF == 2
/* Compute from the incoming R12 value. */
- tb_ret_addr = s->code_ptr + 2;
+ tb_ret_addr = s->code_ptr + 2 + num_epilogue_insns;
tcg_out32(s, ADDI | TAI(TCG_REG_RA, TCG_REG_R12,
tcg_ptr_byte_diff(tb_ret_addr, s->code_buf)));
tcg_out32(s, BCCTR | BO_ALWAYS);
#else
/* Reserve max 5 insns for the constant load. */
- tb_ret_addr = s->code_ptr + 6;
+ tb_ret_addr = s->code_ptr + 6 + num_epilogue_insns;
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)tb_ret_addr);
tcg_out32(s, BCCTR | BO_ALWAYS);
while (s->code_ptr < tb_ret_addr) {
tcg_out32(s, NOP);
}
#endif
+ tcg_out_epilogue(s);
} else {
tcg_out32(s, BCCTR | BO_ALWAYS);
+ tcg_out_epilogue(s);
tb_ret_addr = s->code_ptr;
}
@@ -1869,6 +1886,85 @@ static void tcg_target_qemu_prologue(TCGContext *s)
tcg_out32(s, BCLR | BO_ALWAYS);
}
+static void tcg_out_jmp_short(uintptr_t jmp_addr, uintptr_t addr)
+{
+ tcg_insn_unit i1, i2;
+ uint64_t pair;
+ intptr_t diff = addr - jmp_addr;
+
+ if (!in_range_b(diff))
+ tcg_abort();
+
+ i1 = B | (diff & 0x3fffffc);
+ i2 = NOP;
+#ifdef HOST_WORDS_BIGENDIAN
+ pair = (uint64_t)i1 << 32 | i2;
+#else
+ pair = (uint64_t)i2 << 32 | i1;
+#endif
+ *(uint64_t *)jmp_addr = pair;
+}
+
+/*
+ * Emit trace profiling/prediction stubs. The code sequence is as following:
+ * S1: direct jump (the reloc part requires 4-byte alignment)
+ * S2: trace profiling stub
+ * S3: trace prediction stub
+ * S4: beginning of QEMU emulation code
+ *
+ * The jump inst of S1 is initially set to jump to S3 (i.e. skipping S2).
+ * Remember the offset of S3 (patch_next) which is used to turn the
+ * trace profiling off. Also remember the offset of S4 (patch_skip)
+ * so that the trace stubs can be skipped quickly while searching pc.
+ */
+static void tcg_out_hotpatch(TCGContext *s, int is_user, int emit_helper)
+{
+ tcg_insn_unit *label_ptr[2];
+ TranslationBlock *tb = s->tb;
+
+ /* S1: direct jump. Ensure the next insns are 8-byte aligned. */
+ if ((uintptr_t)s->code_ptr & 7)
+ tcg_out32(s, NOP);
+
+ tb->patch_jmp = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf);
+
+ /* S1: Direct Jump */
+ if (is_user == 0 || emit_helper == 0) {
+ tcg_out_jmp_short((uintptr_t)s->code_ptr, (uintptr_t)(s->code_ptr + 4));
+ s->code_ptr += 2;
+ tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+ tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf);
+ return;
+ }
+
+ label_ptr[0] = s->code_ptr;
+ s->code_ptr += 2;
+ tcg_out32(s, MTSPR | RS(TCG_REG_TMP1) | CTR);
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+
+ /* S2: Trace Profiling Stub */
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id);
+ tcg_out_call(s, (tcg_insn_unit *)helper_NET_profile);
+ tcg_out_jmp_short((uintptr_t)label_ptr[0], (uintptr_t)s->code_ptr);
+
+ /* S3: Trace Prediction stub */
+ tb->patch_next = (uint16_t)((intptr_t)s->code_ptr - (intptr_t)s->code_buf);
+
+ tcg_out_ld(s, TCG_TYPE_I32, TCG_REG_TMP1, TCG_AREG0,
+ offsetof(CPUArchState, start_trace_prediction));
+ tcg_out_cmp(s, TCG_COND_EQ, TCG_REG_TMP1, 0, 1, 7, TCG_TYPE_I32);
+ label_ptr[1] = s->code_ptr;
+ tcg_out_bc_noaddr(s, tcg_to_bc[TCG_COND_EQ]);
+ tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
+ tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[1], tb->id);
+ tcg_out_call(s, (tcg_insn_unit *)helper_NET_predict);
+ reloc_pc14(label_ptr[1], s->code_ptr);
+
+ /* S4: QEMU emulation code */
+}
+
static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
const int *const_args)
{
@@ -1906,6 +2002,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
tcg_out32(s, BCCTR | BO_ALWAYS);
s->tb_next_offset[args[0]] = tcg_current_code_size(s);
break;
+ case INDEX_op_hotpatch:
+ tcg_out_hotpatch(s, args[0], args[1]);
+ break;
+ case INDEX_op_jmp:
+ if (const_args[0]) {
+ tcg_out_b(s, 0, (tcg_insn_unit *)args[0]);
+ } else {
+ tcg_out32(s, MTSPR | RS(args[0]) | CTR);
+ tcg_out32(s, BCCTR | BO_ALWAYS);
+ }
+ break;
case INDEX_op_br:
{
TCGLabel *l = arg_label(args[0]);
@@ -2436,6 +2543,8 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
}
static const TCGTargetOpDef ppc_op_defs[] = {
+ { INDEX_op_hotpatch, { "i", "i" } },
+ { INDEX_op_jmp, { "ri" } },
{ INDEX_op_exit_tb, { } },
{ INDEX_op_goto_tb, { } },
{ INDEX_op_br, { } },
@@ -2572,6 +2681,10 @@ static const TCGTargetOpDef ppc_op_defs[] = {
{ INDEX_op_qemu_st_i64, { "S", "S", "S", "S" } },
#endif
+#define DEF(name,a1,a2,a3,a4) { INDEX_op_##name, {} },
+#include "tcg-opc-vector.h"
+#undef DEF
+
{ -1 },
};
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 0b9dd8f..3773253 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -39,7 +39,7 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
Up to and including filling in the forward link immediately. We'll do
proper termination of the end of the list after we finish translation. */
-static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args)
+void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args)
{
int oi = ctx->gen_next_op_idx;
int ni = oi + 1;
diff --git a/tcg/tcg-op.h b/tcg/tcg-op.h
index 4e20dc1..17d31df 100644
--- a/tcg/tcg-op.h
+++ b/tcg/tcg-op.h
@@ -28,6 +28,7 @@
/* Basic output routines. Not for general consumption. */
+void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args);
void tcg_gen_op1(TCGContext *, TCGOpcode, TCGArg);
void tcg_gen_op2(TCGContext *, TCGOpcode, TCGArg, TCGArg);
void tcg_gen_op3(TCGContext *, TCGOpcode, TCGArg, TCGArg, TCGArg);
@@ -311,6 +312,16 @@ void tcg_gen_ext16u_i32(TCGv_i32 ret, TCGv_i32 arg);
void tcg_gen_bswap16_i32(TCGv_i32 ret, TCGv_i32 arg);
void tcg_gen_bswap32_i32(TCGv_i32 ret, TCGv_i32 arg);
+static inline void tcg_gen_hotpatch(uint32_t arg1, uint32_t arg2)
+{
+ tcg_gen_op2(&tcg_ctx, INDEX_op_hotpatch, arg1, arg2);
+}
+
+static inline void tcg_gen_annotate(uint32_t arg)
+{
+ tcg_gen_op1(&tcg_ctx, INDEX_op_annotate, arg);
+}
+
static inline void tcg_gen_discard_i32(TCGv_i32 arg)
{
tcg_gen_op1_i32(INDEX_op_discard, arg);
diff --git a/tcg/tcg-opc.h b/tcg/tcg-opc.h
index 6d0410c..5ba1e05 100644
--- a/tcg/tcg-opc.h
+++ b/tcg/tcg-opc.h
@@ -26,12 +26,16 @@
* DEF(name, oargs, iargs, cargs, flags)
*/
+DEF(hotpatch, 0, 0, 2, 0)
+DEF(annotate, 0, 0, 1, TCG_OPF_NOT_PRESENT)
+
/* predefined ops */
DEF(discard, 1, 0, 0, TCG_OPF_NOT_PRESENT)
DEF(set_label, 0, 0, 1, TCG_OPF_BB_END | TCG_OPF_NOT_PRESENT)
/* variable number of parameters */
DEF(call, 0, 0, 3, TCG_OPF_CALL_CLOBBER | TCG_OPF_NOT_PRESENT)
+DEF(jmp, 0, 1, 0, TCG_OPF_BB_END)
DEF(br, 0, 0, 1, TCG_OPF_BB_END)
@@ -191,6 +195,8 @@ DEF(qemu_ld_i64, DATA64_ARGS, TLADDR_ARGS, 1,
DEF(qemu_st_i64, 0, TLADDR_ARGS + DATA64_ARGS, 1,
TCG_OPF_CALL_CLOBBER | TCG_OPF_SIDE_EFFECTS | TCG_OPF_64BIT)
+#include "tcg-opc-vector.h"
+
#undef TLADDR_ARGS
#undef DATA64_ARGS
#undef IMPL
diff --git a/tcg/tcg.c b/tcg/tcg.c
index a163541..ea5091b 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -304,19 +304,22 @@ void tcg_pool_reset(TCGContext *s)
s->pool_current = NULL;
}
-typedef struct TCGHelperInfo {
- void *func;
- const char *name;
- unsigned flags;
- unsigned sizemask;
-} TCGHelperInfo;
-
#include "exec/helper-proto.h"
-static const TCGHelperInfo all_helpers[] = {
+const TCGHelperInfo all_helpers[] = {
#include "exec/helper-tcg.h"
};
+int tcg_num_helpers(void)
+{
+ return ARRAY_SIZE(all_helpers);
+}
+
+const TCGHelperInfo *get_tcg_helpers(void)
+{
+ return all_helpers;
+}
+
void tcg_context_init(TCGContext *s)
{
int op, total_args, n, i;
@@ -413,7 +416,7 @@ void tcg_set_frame(TCGContext *s, int reg, intptr_t start, intptr_t size)
s->frame_reg = reg;
}
-void tcg_func_start(TCGContext *s)
+void tcg_func_start(TCGContext *s, TranslationBlock *tb)
{
tcg_pool_reset(s);
s->nb_temps = s->nb_globals;
@@ -432,8 +435,10 @@ void tcg_func_start(TCGContext *s)
s->gen_last_op_idx = -1;
s->gen_next_op_idx = 0;
s->gen_next_parm_idx = 0;
+ s->vec_opparam_ptr = s->vec_opparam_buf;
s->be = tcg_malloc(sizeof(TCGBackendData));
+ s->tb = tb;
}
static inline void tcg_temp_alloc(TCGContext *s, int n)
@@ -1004,6 +1009,7 @@ void tcg_dump_ops(TCGContext *s)
char buf[128];
TCGOp *op;
int oi;
+ const TCGArg *vec_args = s->vec_opparam_buf;
for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
int i, k, nb_oargs, nb_iargs, nb_cargs;
@@ -1051,8 +1057,29 @@ void tcg_dump_ops(TCGContext *s)
qemu_log(",%s", t);
}
} else {
+ int is_vec = 0;
qemu_log(" %s ", def->name);
+ /* print vector opc */
+ switch (c) {
+ case INDEX_op_vector_start ... INDEX_op_vector_end:
+ is_vec = 1;
+ break;
+ default:
+ break;
+ }
+ if (is_vec) {
+ qemu_log("$0x%" TCG_PRIlx, vec_args[0]);
+ if (c == INDEX_op_vload_128 || c == INDEX_op_vstore_128)
+ qemu_log(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), vec_args[1]));
+ else
+ qemu_log(",$0x%" TCG_PRIlx, vec_args[1]);
+ qemu_log(",$0x%" TCG_PRIlx, vec_args[2]);
+ qemu_log("\n");
+ vec_args += 3;
+ continue;
+ }
+
nb_oargs = def->nb_oargs;
nb_iargs = def->nb_iargs;
nb_cargs = def->nb_cargs;
@@ -1138,6 +1165,172 @@ void tcg_dump_ops(TCGContext *s)
}
}
+void tcg_dump_ops_fn(TCGContext *s, void (*fn)(const char *))
+{
+ char buf[128];
+ char outbuf[128];
+ TCGOp *op;
+ int oi;
+ const TCGArg *vec_args = s->vec_opparam_buf;
+
+#define printops(args...) \
+ do { snprintf(outbuf, 128, ##args); (*fn)(outbuf); } while(0)
+
+ for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
+ int i, k, nb_oargs, nb_iargs, nb_cargs;
+ const TCGOpDef *def;
+ const TCGArg *args;
+ TCGOpcode c;
+
+ op = &s->gen_op_buf[oi];
+ c = op->opc;
+ def = &tcg_op_defs[c];
+ args = &s->gen_opparam_buf[op->args];
+
+ if (c == INDEX_op_insn_start) {
+ printops("%s ----", oi != s->gen_first_op_idx ? "\n" : "");
+
+ for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
+ target_ulong a;
+#if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
+ a = ((target_ulong)args[i * 2 + 1] << 32) | args[i * 2];
+#else
+ a = args[i];
+#endif
+ printops(" " TARGET_FMT_lx, a);
+ }
+ } else if (c == INDEX_op_call) {
+ /* variable number of arguments */
+ nb_oargs = op->callo;
+ nb_iargs = op->calli;
+ nb_cargs = def->nb_cargs;
+
+ /* function name, flags, out args */
+ printops(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
+ tcg_find_helper(s, args[nb_oargs + nb_iargs]),
+ args[nb_oargs + nb_iargs + 1], nb_oargs);
+ for (i = 0; i < nb_oargs; i++) {
+ printops(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[i]));
+ }
+ for (i = 0; i < nb_iargs; i++) {
+ TCGArg arg = args[nb_oargs + i];
+ const char *t = "<dummy>";
+ if (arg != TCG_CALL_DUMMY_ARG) {
+ t = tcg_get_arg_str_idx(s, buf, sizeof(buf), arg);
+ }
+ printops(",%s", t);
+ }
+ } else {
+ int is_vec = 0;
+ printops(" %s ", def->name);
+
+ /* print vector opc */
+ switch (c) {
+ case INDEX_op_vector_start ... INDEX_op_vector_end:
+ is_vec = 1;
+ break;
+ default:
+ break;
+ }
+ if (is_vec) {
+ printops("$0x%" TCG_PRIlx, vec_args[0]);
+ if (c == INDEX_op_vload_128 || c == INDEX_op_vstore_128)
+ printops(",%s", tcg_get_arg_str_idx(s, buf, sizeof(buf), vec_args[1]));
+ else
+ printops(",$0x%" TCG_PRIlx, vec_args[1]);
+ printops(",$0x%" TCG_PRIlx, vec_args[2]);
+ printops("\n");
+ vec_args += 3;
+ continue;
+ }
+
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+ nb_cargs = def->nb_cargs;
+
+ k = 0;
+ for (i = 0; i < nb_oargs; i++) {
+ if (k != 0) {
+ printops(",");
+ }
+ printops("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[k++]));
+ }
+ for (i = 0; i < nb_iargs; i++) {
+ if (k != 0) {
+ printops(",");
+ }
+ printops("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
+ args[k++]));
+ }
+ switch (c) {
+ case INDEX_op_brcond_i32:
+ case INDEX_op_setcond_i32:
+ case INDEX_op_movcond_i32:
+ case INDEX_op_brcond2_i32:
+ case INDEX_op_setcond2_i32:
+ case INDEX_op_brcond_i64:
+ case INDEX_op_setcond_i64:
+ case INDEX_op_movcond_i64:
+ if (args[k] < ARRAY_SIZE(cond_name) && cond_name[args[k]]) {
+ printops(",%s", cond_name[args[k++]]);
+ } else {
+ printops(",$0x%" TCG_PRIlx, args[k++]);
+ }
+ i = 1;
+ break;
+ case INDEX_op_qemu_ld_i32:
+ case INDEX_op_qemu_st_i32:
+ case INDEX_op_qemu_ld_i64:
+ case INDEX_op_qemu_st_i64:
+ {
+ TCGMemOpIdx oi = args[k++];
+ TCGMemOp op = get_memop(oi);
+ unsigned ix = get_mmuidx(oi);
+
+ if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
+ printops(",$0x%x,%u", op, ix);
+ } else {
+ const char *s_al = "", *s_op;
+ if (op & MO_AMASK) {
+ if ((op & MO_AMASK) == MO_ALIGN) {
+ s_al = "al+";
+ } else {
+ s_al = "un+";
+ }
+ }
+ s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
+ printops(",%s%s,%u", s_al, s_op, ix);
+ }
+ i = 1;
+ }
+ break;
+ default:
+ i = 0;
+ break;
+ }
+ switch (c) {
+ case INDEX_op_set_label:
+ case INDEX_op_br:
+ case INDEX_op_brcond_i32:
+ case INDEX_op_brcond_i64:
+ case INDEX_op_brcond2_i32:
+ printops("%s$L%d", k ? "," : "", arg_label(args[k])->id);
+ i++, k++;
+ break;
+ default:
+ break;
+ }
+ for (; i < nb_cargs; i++, k++) {
+ printops("%s$0x%" TCG_PRIlx, k ? "," : "", args[k]);
+ }
+ }
+ printops("\n");
+ }
+#undef printops
+}
+
/* we give more priority to constraints with less registers */
static int get_constraint_priority(const TCGOpDef *def, int k)
{
@@ -1334,10 +1527,11 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps,
/* Liveness analysis : update the opc_dead_args array to tell if a
given input arguments is dead. Instructions updating dead
temporaries are removed. */
-static void tcg_liveness_analysis(TCGContext *s)
+void tcg_liveness_analysis(TCGContext *s)
{
uint8_t *dead_temps, *mem_temps;
int oi, oi_prev, nb_ops;
+ TCGArg *vec_args = s->vec_opparam_ptr;
nb_ops = s->gen_next_op_idx;
s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
@@ -1427,6 +1621,7 @@ static void tcg_liveness_analysis(TCGContext *s)
}
}
break;
+ case INDEX_op_annotate:
case INDEX_op_insn_start:
break;
case INDEX_op_discard:
@@ -1434,7 +1629,11 @@ static void tcg_liveness_analysis(TCGContext *s)
dead_temps[args[0]] = 1;
mem_temps[args[0]] = 0;
break;
-
+ case INDEX_op_vector_start ... INDEX_op_vector_end:
+ vec_args -= 3;
+ if (opc == INDEX_op_vload_128 || opc == INDEX_op_vstore_128)
+ dead_temps[vec_args[1]] = 0;
+ break;
case INDEX_op_add2_i32:
opc_new = INDEX_op_add_i32;
goto do_addsub2;
@@ -1577,7 +1776,7 @@ static void tcg_liveness_analysis(TCGContext *s)
}
#else
/* dummy liveness analysis */
-static void tcg_liveness_analysis(TCGContext *s)
+void tcg_liveness_analysis(TCGContext *s)
{
int nb_ops;
nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
@@ -2418,6 +2617,8 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf)
s->gen_insn_data[num_insns][i] = a;
}
break;
+ case INDEX_op_annotate:
+ break;
case INDEX_op_discard:
temp_dead(s, args[0]);
break;
@@ -2554,15 +2755,15 @@ struct jit_descriptor {
struct jit_code_entry *first_entry;
};
-void __jit_debug_register_code(void) __attribute__((noinline));
-void __jit_debug_register_code(void)
+void qemu_jit_debug_register_code(void) __attribute__((noinline));
+void qemu_jit_debug_register_code(void)
{
asm("");
}
/* Must statically initialize the version, because GDB may check
the version before we can set it. */
-struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
+struct jit_descriptor qemu_jit_debug_descriptor = { 1, 0, 0, 0 };
/* End GDB interface. */
@@ -2771,10 +2972,10 @@ static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
one_entry.symfile_addr = img;
one_entry.symfile_size = img_size;
- __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
- __jit_debug_descriptor.relevant_entry = &one_entry;
- __jit_debug_descriptor.first_entry = &one_entry;
- __jit_debug_register_code();
+ qemu_jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
+ qemu_jit_debug_descriptor.relevant_entry = &one_entry;
+ qemu_jit_debug_descriptor.first_entry = &one_entry;
+ qemu_jit_debug_register_code();
}
#else
/* No support for the feature. Provide the entry point expected by exec.c,
@@ -2790,3 +2991,34 @@ void tcg_register_jit(void *buf, size_t buf_size)
{
}
#endif /* ELF_HOST_MACHINE */
+
+
+/*
+ * copy_tcg_context_global()
+ * Copy thread's local TCG context to the global TCG context.
+ *
+ * We first initialize main thread's tcg_ctx and copy it to tcg_ctx_global
+ * at this point. The tcg_ctx_global is copied to each thread's local
+ * tcg_ctx later using copy_tcg_context().
+ *
+ * Note: This copy must be done after tcg_ctx is completely initialized
+ * and should be setup by the main thread.
+ */
+void copy_tcg_context_global(void)
+{
+ static int init_once = 0;
+ if (init_once == 1)
+ return;
+
+ memcpy(&tcg_ctx_global, &tcg_ctx, sizeof(TCGContext));
+ init_once = 1;
+}
+
+/*
+ * copy_tcg_context()
+ * Copy the global TCG context to the thread's local TCG context.
+ */
+void copy_tcg_context(void)
+{
+ memcpy(&tcg_ctx, &tcg_ctx_global, sizeof(TCGContext));
+}
diff --git a/tcg/tcg.h b/tcg/tcg.h
index a696922..3374257 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -193,6 +193,7 @@ typedef struct TCGPool {
#define TCG_POOL_CHUNK_SIZE 32768
+#define TCG_MAX_LABELS 512
#define TCG_MAX_TEMPS 512
#define TCG_MAX_INSNS 512
@@ -564,7 +565,7 @@ struct TCGContext {
/* Threshold to flush the translated code buffer. */
void *code_gen_highwater;
- TBContext tb_ctx;
+ TBContext *tb_ctx;
/* The TCGBackendData structure is private to tcg-target.c. */
struct TCGBackendData *be;
@@ -578,12 +579,33 @@ struct TCGContext {
TCGOp gen_op_buf[OPC_BUF_SIZE];
TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
+ TCGArg vec_opparam_buf[OPPARAM_BUF_SIZE];
+ TCGArg *vec_opparam_ptr;
uint16_t gen_insn_end_off[TCG_MAX_INSNS];
target_ulong gen_insn_data[TCG_MAX_INSNS][TARGET_INSN_START_WORDS];
+
+ TranslationBlock *tb;
};
-extern TCGContext tcg_ctx;
+extern TCGContext tcg_ctx_global;
+extern __thread TCGContext tcg_ctx;
+
+typedef struct TCGHelperInfo {
+ void *func;
+ const char *name;
+ unsigned flags;
+ unsigned sizemask;
+} TCGHelperInfo;
+
+void copy_tcg_context_global(void);
+void copy_tcg_context(void);
+int tcg_num_helpers(void);
+const TCGHelperInfo *get_tcg_helpers(void);
+void tcg_liveness_analysis(TCGContext *s);
+void tcg_dump_ops_fn(TCGContext *s, void (*fn)(const char *));
+target_long decode_sleb128(uint8_t **pp);
+
/* The number of opcodes emitted so far. */
static inline int tcg_op_buf_count(void)
@@ -624,7 +646,7 @@ static inline void *tcg_malloc(int size)
void tcg_context_init(TCGContext *s);
void tcg_prologue_init(TCGContext *s);
-void tcg_func_start(TCGContext *s);
+void tcg_func_start(TCGContext *s, TranslationBlock *tb);
int tcg_gen_code(TCGContext *s, tcg_insn_unit *gen_code_buf);
@@ -822,7 +844,7 @@ static inline TCGLabel *arg_label(TCGArg i)
static inline ptrdiff_t tcg_ptr_byte_diff(void *a, void *b)
{
- return a - b;
+ return (ptrdiff_t)a - (ptrdiff_t)b;
}
/**
@@ -876,7 +898,7 @@ static inline TCGMemOpIdx make_memop_idx(TCGMemOp op, unsigned idx)
*/
static inline TCGMemOp get_memop(TCGMemOpIdx oi)
{
- return oi >> 4;
+ return (TCGMemOp)(oi >> 4);
}
/**
@@ -939,6 +961,7 @@ static inline unsigned get_mmuidx(TCGMemOpIdx oi)
#define TB_EXIT_IDX1 1
#define TB_EXIT_ICOUNT_EXPIRED 2
#define TB_EXIT_REQUESTED 3
+#define TB_EXIT_LLVM TB_EXIT_ICOUNT_EXPIRED
#ifdef HAVE_TCG_QEMU_TB_EXEC
uintptr_t tcg_qemu_tb_exec(CPUArchState *env, uint8_t *tb_ptr);
@@ -1011,6 +1034,31 @@ uint32_t helper_be_ldl_cmmu(CPUArchState *env, target_ulong addr,
uint64_t helper_be_ldq_cmmu(CPUArchState *env, target_ulong addr,
TCGMemOpIdx oi, uintptr_t retaddr);
+
+/* Value zero-extended to tcg register size. */
+tcg_target_ulong llvm_ret_ldub_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+tcg_target_ulong llvm_le_lduw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+tcg_target_ulong llvm_le_ldul_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+uint64_t llvm_le_ldq_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+tcg_target_ulong llvm_be_lduw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+tcg_target_ulong llvm_be_ldul_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+uint64_t llvm_be_ldq_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+
+/* Value sign-extended to tcg register size. */
+tcg_target_ulong llvm_ret_ldsb_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+tcg_target_ulong llvm_le_ldsw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+tcg_target_ulong llvm_le_ldsl_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+tcg_target_ulong llvm_be_ldsw_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+tcg_target_ulong llvm_be_ldsl_mmu(CPUArchState *env, target_ulong addr, TCGMemOpIdx oi);
+
+void llvm_ret_stb_mmu(CPUArchState *env, target_ulong addr, uint8_t val, TCGMemOpIdx oi);
+void llvm_le_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, TCGMemOpIdx oi);
+void llvm_le_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, TCGMemOpIdx oi);
+void llvm_le_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, TCGMemOpIdx oi);
+void llvm_be_stw_mmu(CPUArchState *env, target_ulong addr, uint16_t val, TCGMemOpIdx oi);
+void llvm_be_stl_mmu(CPUArchState *env, target_ulong addr, uint32_t val, TCGMemOpIdx oi);
+void llvm_be_stq_mmu(CPUArchState *env, target_ulong addr, uint64_t val, TCGMemOpIdx oi);
+
/* Temporary aliases until backends are converted. */
#ifdef TARGET_WORDS_BIGENDIAN
# define helper_ret_ldsw_mmu helper_be_ldsw_mmu
diff --git a/translate-all.c b/translate-all.c
index 042a857..bf05326 100644
--- a/translate-all.c
+++ b/translate-all.c
@@ -63,6 +63,10 @@
#include "qemu/bitmap.h"
#include "qemu/timer.h"
+#include "hqemu.h"
+
+size_t get_cpu_size(void) { return sizeof(CPUArchState); }
+
//#define DEBUG_TB_INVALIDATE
//#define DEBUG_FLUSH
/* make various TB consistency checks */
@@ -124,7 +128,8 @@ intptr_t qemu_host_page_mask;
static void *l1_map[V_L1_SIZE];
/* code generation context */
-TCGContext tcg_ctx;
+TCGContext tcg_ctx_global;
+__thread TCGContext tcg_ctx;
/* translation block context */
#ifdef CONFIG_USER_ONLY
@@ -135,7 +140,7 @@ void tb_lock(void)
{
#ifdef CONFIG_USER_ONLY
assert(!have_tb_lock);
- qemu_mutex_lock(&tcg_ctx.tb_ctx.tb_lock);
+ qemu_mutex_lock(&tcg_ctx.tb_ctx->tb_lock);
have_tb_lock++;
#endif
}
@@ -145,7 +150,7 @@ void tb_unlock(void)
#ifdef CONFIG_USER_ONLY
assert(have_tb_lock);
have_tb_lock--;
- qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+ qemu_mutex_unlock(&tcg_ctx.tb_ctx->tb_lock);
#endif
}
@@ -153,7 +158,7 @@ void tb_lock_reset(void)
{
#ifdef CONFIG_USER_ONLY
if (have_tb_lock) {
- qemu_mutex_unlock(&tcg_ctx.tb_ctx.tb_lock);
+ qemu_mutex_unlock(&tcg_ctx.tb_ctx->tb_lock);
have_tb_lock = 0;
}
#endif
@@ -161,11 +166,12 @@ void tb_lock_reset(void)
static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
tb_page_addr_t phys_page2);
-static TranslationBlock *tb_find_pc(uintptr_t tc_ptr);
+static TranslationBlock *tb_find_pc(CPUState *cpu, uintptr_t tc_ptr);
void cpu_gen_init(void)
{
tcg_context_init(&tcg_ctx);
+ tcg_ctx.tb_ctx = g_malloc0(sizeof(TBContext));
}
/* Encode VAL as a signed leb128 sequence at P.
@@ -190,7 +196,7 @@ static uint8_t *encode_sleb128(uint8_t *p, target_long val)
/* Decode a signed leb128 sequence at *PP; increment *PP past the
decoded value. Return the decoded value. */
-static target_long decode_sleb128(uint8_t **pp)
+target_long decode_sleb128(uint8_t **pp)
{
uint8_t *p = *pp;
target_long val = 0;
@@ -268,6 +274,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
int64_t ti = profile_getclock();
#endif
+#if defined(CONFIG_LLVM)
+ if (llvm_locate_trace(searched_pc))
+ return llvm_restore_state(cpu, tb, searched_pc);
+#endif
+
if (searched_pc < host_pc) {
return -1;
}
@@ -297,8 +308,8 @@ static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
restore_state_to_opc(env, tb, data);
#ifdef CONFIG_PROFILER
- tcg_ctx.restore_time += profile_getclock() - ti;
- tcg_ctx.restore_count++;
+ tcg_ctx_global.restore_time += profile_getclock() - ti;
+ tcg_ctx_global.restore_count++;
#endif
return 0;
}
@@ -307,7 +318,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t retaddr)
{
TranslationBlock *tb;
- tb = tb_find_pc(retaddr);
+ tb = tb_find_pc(cpu, retaddr);
if (tb) {
cpu_restore_state_from_tb(cpu, tb, retaddr);
if (tb->cflags & CF_NOCACHE) {
@@ -485,7 +496,13 @@ static inline PageDesc *page_find(tb_page_addr_t index)
# define MAX_CODE_GEN_BUFFER_SIZE ((size_t)-1)
#endif
+/* Note: The size of the code buffer is doubled. We steal half of the buffer
+ * acting as the trace code cache. */
+#if defined(CONFIG_LLVM)
+#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024 * 2)
+#else
#define DEFAULT_CODE_GEN_BUFFER_SIZE_1 (32u * 1024 * 1024)
+#endif
#define DEFAULT_CODE_GEN_BUFFER_SIZE \
(DEFAULT_CODE_GEN_BUFFER_SIZE_1 < MAX_CODE_GEN_BUFFER_SIZE \
@@ -503,6 +520,9 @@ static inline size_t size_code_gen_buffer(size_t tb_size)
static buffer, we could size this on RESERVED_VA, on the text
segment size of the executable, or continue to use the default. */
tb_size = (unsigned long)(ram_size / 4);
+#if defined(CONFIG_LLVM)
+ tb_size = (unsigned long)(ram_size / 2);
+#endif
#endif
}
if (tb_size < MIN_CODE_GEN_BUFFER_SIZE) {
@@ -730,15 +750,18 @@ static inline void code_gen_alloc(size_t tb_size)
fprintf(stderr, "Could not allocate dynamic translator buffer\n");
exit(1);
}
+#if defined(CONFIG_LLVM)
+ llvm_alloc_cache();
+#endif
/* Estimate a good size for the number of TBs we can support. We
still haven't deducted the prologue from the buffer size here,
but that's minimal and won't affect the estimate much. */
tcg_ctx.code_gen_max_blocks
= tcg_ctx.code_gen_buffer_size / CODE_GEN_AVG_BLOCK_SIZE;
- tcg_ctx.tb_ctx.tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks);
+ tcg_ctx.tb_ctx->tbs = g_new(TranslationBlock, tcg_ctx.code_gen_max_blocks);
- qemu_mutex_init(&tcg_ctx.tb_ctx.tb_lock);
+ qemu_mutex_init(&tcg_ctx.tb_ctx->tb_lock);
}
/* Must be called before using the QEMU cpus. 'tb_size' is the size
@@ -765,26 +788,35 @@ bool tcg_enabled(void)
too many translation blocks or too much generated code. */
static TranslationBlock *tb_alloc(target_ulong pc)
{
+ TCGContext *s = &tcg_ctx_global;
TranslationBlock *tb;
- if (tcg_ctx.tb_ctx.nb_tbs >= tcg_ctx.code_gen_max_blocks) {
+ if (s->tb_ctx->nb_tbs >= s->code_gen_max_blocks) {
return NULL;
}
- tb = &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs++];
+#if defined(CONFIG_LLVM)
+ if (llvm_check_cache() == 1)
+ return NULL;
+#endif
+
+ tb = &s->tb_ctx->tbs[s->tb_ctx->nb_tbs++];
tb->pc = pc;
tb->cflags = 0;
+
+ optimization_init_tb(tb, s->tb_ctx->nb_tbs - 1);
return tb;
}
void tb_free(TranslationBlock *tb)
{
+ TCGContext *s = &tcg_ctx_global;
/* In practice this is mostly used for single use temporary TB
Ignore the hard cases and just back up if this TB happens to
be the last one generated. */
- if (tcg_ctx.tb_ctx.nb_tbs > 0 &&
- tb == &tcg_ctx.tb_ctx.tbs[tcg_ctx.tb_ctx.nb_tbs - 1]) {
- tcg_ctx.code_gen_ptr = tb->tc_ptr;
- tcg_ctx.tb_ctx.nb_tbs--;
+ if (s->tb_ctx->nb_tbs > 0 &&
+ tb == &s->tb_ctx->tbs[s->tb_ctx->nb_tbs - 1]) {
+ s->code_gen_ptr = tb->tc_ptr;
+ s->tb_ctx->nb_tbs--;
}
}
@@ -832,42 +864,49 @@ static void page_flush_tb(void)
/* XXX: tb_flush is currently not thread safe */
void tb_flush(CPUState *cpu)
{
+ TCGContext *s = &tcg_ctx_global;
#if defined(DEBUG_FLUSH)
printf("qemu: flush code_size=%ld nb_tbs=%d avg_tb_size=%ld\n",
- (unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer),
- tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.tb_ctx.nb_tbs > 0 ?
- ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)) /
- tcg_ctx.tb_ctx.nb_tbs : 0);
+ (unsigned long)(s->code_gen_ptr - s->code_gen_buffer),
+ s->tb_ctx->nb_tbs, s->tb_ctx->nb_tbs > 0 ?
+ ((unsigned long)(s->code_gen_ptr - s->code_gen_buffer)) /
+ s->tb_ctx->nb_tbs : 0);
#endif
- if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)
- > tcg_ctx.code_gen_buffer_size) {
+ if ((unsigned long)(s->code_gen_ptr - s->code_gen_buffer)
+ > s->code_gen_buffer_size) {
cpu_abort(cpu, "Internal error: code buffer overflow\n");
}
- tcg_ctx.tb_ctx.nb_tbs = 0;
+#if defined(CONFIG_LLVM)
+ llvm_tb_flush();
+#endif
+
+ s->tb_ctx->nb_tbs = 0;
CPU_FOREACH(cpu) {
memset(cpu->tb_jmp_cache, 0, sizeof(cpu->tb_jmp_cache));
+ optimization_reset(cpu->env_ptr, 1);
}
- memset(tcg_ctx.tb_ctx.tb_phys_hash, 0, sizeof(tcg_ctx.tb_ctx.tb_phys_hash));
+ memset(s->tb_ctx->tb_phys_hash, 0, sizeof(s->tb_ctx->tb_phys_hash));
page_flush_tb();
- tcg_ctx.code_gen_ptr = tcg_ctx.code_gen_buffer;
+ s->code_gen_ptr = s->code_gen_buffer;
/* XXX: flush processor icache at this point if cache flush is
expensive */
- tcg_ctx.tb_ctx.tb_flush_count++;
+ s->tb_ctx->tb_flush_count++;
}
#ifdef DEBUG_TB_CHECK
static void tb_invalidate_check(target_ulong address)
{
+ TCGContext *s = &tcg_ctx_global;
TranslationBlock *tb;
int i;
address &= TARGET_PAGE_MASK;
for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
- for (tb = tb_ctx.tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
+ for (tb = s->tb_ctx->tb_phys_hash[i]; tb != NULL; tb = tb->phys_hash_next) {
if (!(address + TARGET_PAGE_SIZE <= tb->pc ||
address >= tb->pc + tb->size)) {
printf("ERROR invalidate: address=" TARGET_FMT_lx
@@ -881,11 +920,12 @@ static void tb_invalidate_check(target_ulong address)
/* verify that all the pages have correct rights for code */
static void tb_page_check(void)
{
+ TCGContext *s = &tcg_ctx_global;
TranslationBlock *tb;
int i, flags1, flags2;
for (i = 0; i < CODE_GEN_PHYS_HASH_SIZE; i++) {
- for (tb = tcg_ctx.tb_ctx.tb_phys_hash[i]; tb != NULL;
+ for (tb = s->tb_ctx->tb_phys_hash[i]; tb != NULL;
tb = tb->phys_hash_next) {
flags1 = page_get_flags(tb->pc);
flags2 = page_get_flags(tb->pc + tb->size - 1);
@@ -911,6 +951,10 @@ static inline void tb_hash_remove(TranslationBlock **ptb, TranslationBlock *tb)
}
ptb = &tb1->phys_hash_next;
}
+#if defined(CONFIG_LLVM)
+ tb->mode = BLOCK_INVALID;
+ llvm_tb_remove(tb);
+#endif
}
static inline void tb_page_remove(TranslationBlock **ptb, TranslationBlock *tb)
@@ -969,16 +1013,15 @@ static inline void tb_reset_jump(TranslationBlock *tb, int n)
/* invalidate one TB */
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
{
+ TCGContext *s = &tcg_ctx_global;
CPUState *cpu;
PageDesc *p;
unsigned int h, n1;
- tb_page_addr_t phys_pc;
TranslationBlock *tb1, *tb2;
/* remove the TB from the hash list */
- phys_pc = tb->page_addr[0] + (tb->pc & ~TARGET_PAGE_MASK);
- h = tb_phys_hash_func(phys_pc);
- tb_hash_remove(&tcg_ctx.tb_ctx.tb_phys_hash[h], tb);
+ h = tb_phys_hash_func(tb->pc);
+ tb_hash_remove(&s->tb_ctx->tb_phys_hash[h], tb);
/* remove the TB from the page list */
if (tb->page_addr[0] != page_addr) {
@@ -992,7 +1035,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
invalidate_page_bitmap(p);
}
- tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
+ s->tb_ctx->tb_invalidated_flag = 1;
/* remove the TB from the hash list */
h = tb_jmp_cache_hash_func(tb->pc);
@@ -1000,6 +1043,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
if (cpu->tb_jmp_cache[h] == tb) {
cpu->tb_jmp_cache[h] = NULL;
}
+ optimization_remove_entry(cpu->env_ptr, tb);
}
/* suppress this TB from the two jump lists */
@@ -1021,7 +1065,7 @@ void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr)
}
tb->jmp_first = (TranslationBlock *)((uintptr_t)tb | 2); /* fail safe */
- tcg_ctx.tb_ctx.tb_phys_invalidate_count++;
+ s->tb_ctx->tb_phys_invalidate_count++;
}
static void build_page_bitmap(PageDesc *p)
@@ -1058,6 +1102,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
target_ulong pc, target_ulong cs_base,
int flags, int cflags)
{
+ TCGContext *s = &tcg_ctx_global;
CPUArchState *env = cpu->env_ptr;
TranslationBlock *tb;
tb_page_addr_t phys_pc, phys_page2;
@@ -1082,22 +1127,22 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb = tb_alloc(pc);
assert(tb != NULL);
/* Don't forget to invalidate previous TB info. */
- tcg_ctx.tb_ctx.tb_invalidated_flag = 1;
+ s->tb_ctx->tb_invalidated_flag = 1;
}
- gen_code_buf = tcg_ctx.code_gen_ptr;
- tb->tc_ptr = gen_code_buf;
+ gen_code_buf = s->code_gen_ptr;
+ tb->tc_ptr = tb->opt_ptr = gen_code_buf;
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
#ifdef CONFIG_PROFILER
- tcg_ctx.tb_count1++; /* includes aborted translations because of
+ s->tb_count1++; /* includes aborted translations because of
exceptions */
ti = profile_getclock();
#endif
- tcg_func_start(&tcg_ctx);
+ tcg_func_start(&tcg_ctx, tb);
gen_intermediate_code(env, tb);
@@ -1116,9 +1161,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
#endif
#ifdef CONFIG_PROFILER
- tcg_ctx.tb_count++;
- tcg_ctx.interm_time += profile_getclock() - ti;
- tcg_ctx.code_time -= profile_getclock();
+ s->tb_count++;
+ s->interm_time += profile_getclock() - ti;
+ s->code_time -= profile_getclock();
#endif
/* ??? Overflow could be handled better here. In particular, we
@@ -1136,10 +1181,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
}
#ifdef CONFIG_PROFILER
- tcg_ctx.code_time += profile_getclock();
- tcg_ctx.code_in_len += tb->size;
- tcg_ctx.code_out_len += gen_code_size;
- tcg_ctx.search_out_len += search_size;
+ s->code_time += profile_getclock();
+ s->code_in_len += tb->size;
+ s->code_out_len += gen_code_size;
+ s->search_out_len += search_size;
#endif
#ifdef DEBUG_DISAS
@@ -1151,7 +1196,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
}
#endif
- tcg_ctx.code_gen_ptr = (void *)
+ s->code_gen_ptr = (void *)
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
CODE_GEN_ALIGN);
@@ -1247,7 +1292,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
current_tb = NULL;
if (cpu->mem_io_pc) {
/* now we have a real cpu fault */
- current_tb = tb_find_pc(cpu->mem_io_pc);
+ current_tb = tb_find_pc(cpu, cpu->mem_io_pc);
}
}
if (current_tb == tb &&
@@ -1365,7 +1410,7 @@ static void tb_invalidate_phys_page(tb_page_addr_t addr,
tb = p->first_tb;
#ifdef TARGET_HAS_PRECISE_SMC
if (tb && pc != 0) {
- current_tb = tb_find_pc(pc);
+ current_tb = tb_find_pc(cpu, pc);
}
if (cpu != NULL) {
env = cpu->env_ptr;
@@ -1475,12 +1520,13 @@ static inline void tb_alloc_page(TranslationBlock *tb,
static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
tb_page_addr_t phys_page2)
{
+ TCGContext *s = &tcg_ctx_global;
unsigned int h;
TranslationBlock **ptb;
/* add in the physical hash table */
- h = tb_phys_hash_func(phys_pc);
- ptb = &tcg_ctx.tb_ctx.tb_phys_hash[h];
+ h = tb_phys_hash_func(tb->pc);
+ ptb = &s->tb_ctx->tb_phys_hash[h];
tb->phys_hash_next = *ptb;
*ptb = tb;
@@ -1511,25 +1557,31 @@ static void tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
/* find the TB 'tb' such that tb[0].tc_ptr <= tc_ptr <
tb[1].tc_ptr. Return NULL if not found */
-static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
+static TranslationBlock *tb_find_pc(CPUState *cpu, uintptr_t tc_ptr)
{
+ TCGContext *s = &tcg_ctx_global;
int m_min, m_max, m;
uintptr_t v;
TranslationBlock *tb;
- if (tcg_ctx.tb_ctx.nb_tbs <= 0) {
+ if (s->tb_ctx->nb_tbs <= 0) {
return NULL;
}
- if (tc_ptr < (uintptr_t)tcg_ctx.code_gen_buffer ||
- tc_ptr >= (uintptr_t)tcg_ctx.code_gen_ptr) {
+#if defined(CONFIG_LLVM)
+ tb = llvm_find_pc(cpu, tc_ptr);
+ if (tb)
+ return tb;
+#endif
+ if (tc_ptr < (uintptr_t)s->code_gen_buffer ||
+ tc_ptr >= (uintptr_t)s->code_gen_ptr) {
return NULL;
}
/* binary search (cf Knuth) */
m_min = 0;
- m_max = tcg_ctx.tb_ctx.nb_tbs - 1;
+ m_max = s->tb_ctx->nb_tbs - 1;
while (m_min <= m_max) {
m = (m_min + m_max) >> 1;
- tb = &tcg_ctx.tb_ctx.tbs[m];
+ tb = &s->tb_ctx->tbs[m];
v = (uintptr_t)tb->tc_ptr;
if (v == tc_ptr) {
return tb;
@@ -1539,7 +1591,7 @@ static TranslationBlock *tb_find_pc(uintptr_t tc_ptr)
m_min = m + 1;
}
}
- return &tcg_ctx.tb_ctx.tbs[m_max];
+ return &s->tb_ctx->tbs[m_max];
}
#if !defined(CONFIG_USER_ONLY)
@@ -1567,7 +1619,7 @@ void tb_check_watchpoint(CPUState *cpu)
{
TranslationBlock *tb;
- tb = tb_find_pc(cpu->mem_io_pc);
+ tb = tb_find_pc(cpu, cpu->mem_io_pc);
if (tb) {
/* We can use retranslation to find the PC. */
cpu_restore_state_from_tb(cpu, tb, cpu->mem_io_pc);
@@ -1599,7 +1651,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
target_ulong pc, cs_base;
uint64_t flags;
- tb = tb_find_pc(retaddr);
+ tb = tb_find_pc(cpu, retaddr);
if (!tb) {
cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
(void *)retaddr);
@@ -1675,6 +1727,7 @@ void tb_flush_jmp_cache(CPUState *cpu, target_ulong addr)
void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
{
+ TCGContext *s = &tcg_ctx_global;
int i, target_code_size, max_target_code_size;
int direct_jmp_count, direct_jmp2_count, cross_page;
TranslationBlock *tb;
@@ -1684,8 +1737,8 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
cross_page = 0;
direct_jmp_count = 0;
direct_jmp2_count = 0;
- for (i = 0; i < tcg_ctx.tb_ctx.nb_tbs; i++) {
- tb = &tcg_ctx.tb_ctx.tbs[i];
+ for (i = 0; i < s->tb_ctx->nb_tbs; i++) {
+ tb = &s->tb_ctx->tbs[i];
target_code_size += tb->size;
if (tb->size > max_target_code_size) {
max_target_code_size = tb->size;
@@ -1703,35 +1756,35 @@ void dump_exec_info(FILE *f, fprintf_function cpu_fprintf)
/* XXX: avoid using doubles ? */
cpu_fprintf(f, "Translation buffer state:\n");
cpu_fprintf(f, "gen code size %td/%zd\n",
- tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
- tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer);
+ s->code_gen_ptr - s->code_gen_buffer,
+ s->code_gen_highwater - s->code_gen_buffer);
cpu_fprintf(f, "TB count %d/%d\n",
- tcg_ctx.tb_ctx.nb_tbs, tcg_ctx.code_gen_max_blocks);
+ s->tb_ctx->nb_tbs, s->code_gen_max_blocks);
cpu_fprintf(f, "TB avg target size %d max=%d bytes\n",
- tcg_ctx.tb_ctx.nb_tbs ? target_code_size /
- tcg_ctx.tb_ctx.nb_tbs : 0,
+ s->tb_ctx->nb_tbs ? target_code_size /
+ s->tb_ctx->nb_tbs : 0,
max_target_code_size);
cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
- tcg_ctx.tb_ctx.nb_tbs ? (tcg_ctx.code_gen_ptr -
- tcg_ctx.code_gen_buffer) /
- tcg_ctx.tb_ctx.nb_tbs : 0,
- target_code_size ? (double) (tcg_ctx.code_gen_ptr -
- tcg_ctx.code_gen_buffer) /
+ s->tb_ctx->nb_tbs ? (s->code_gen_ptr -
+ s->code_gen_buffer) /
+ s->tb_ctx->nb_tbs : 0,
+ target_code_size ? (double) (s->code_gen_ptr -
+ s->code_gen_buffer) /
target_code_size : 0);
cpu_fprintf(f, "cross page TB count %d (%d%%)\n", cross_page,
- tcg_ctx.tb_ctx.nb_tbs ? (cross_page * 100) /
- tcg_ctx.tb_ctx.nb_tbs : 0);
+ s->tb_ctx->nb_tbs ? (cross_page * 100) /
+ s->tb_ctx->nb_tbs : 0);
cpu_fprintf(f, "direct jump count %d (%d%%) (2 jumps=%d %d%%)\n",
direct_jmp_count,
- tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp_count * 100) /
- tcg_ctx.tb_ctx.nb_tbs : 0,
+ s->tb_ctx->nb_tbs ? (direct_jmp_count * 100) /
+ s->tb_ctx->nb_tbs : 0,
direct_jmp2_count,
- tcg_ctx.tb_ctx.nb_tbs ? (direct_jmp2_count * 100) /
- tcg_ctx.tb_ctx.nb_tbs : 0);
+ s->tb_ctx->nb_tbs ? (direct_jmp2_count * 100) /
+ s->tb_ctx->nb_tbs : 0);
cpu_fprintf(f, "\nStatistics:\n");
- cpu_fprintf(f, "TB flush count %d\n", tcg_ctx.tb_ctx.tb_flush_count);
+ cpu_fprintf(f, "TB flush count %d\n", s->tb_ctx->tb_flush_count);
cpu_fprintf(f, "TB invalidate count %d\n",
- tcg_ctx.tb_ctx.tb_phys_invalidate_count);
+ s->tb_ctx->tb_phys_invalidate_count);
cpu_fprintf(f, "TLB flush count %d\n", tlb_flush_count);
tcg_dump_info(f, cpu_fprintf);
}
diff --git a/user-exec.c b/user-exec.c
index 8ad89a4..dbf04be 100644
--- a/user-exec.c
+++ b/user-exec.c
@@ -58,7 +58,7 @@ static void exception_action(CPUState *cpu)
void cpu_resume_from_signal(CPUState *cpu, void *puc)
{
#ifdef __linux__
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
#elif defined(__OpenBSD__)
struct sigcontext *uc = puc;
#endif
@@ -172,7 +172,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
#elif defined(__OpenBSD__)
struct sigcontext *uc = puc;
#else
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
#endif
unsigned long pc;
int trapno;
@@ -227,7 +227,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
#elif defined(__OpenBSD__)
struct sigcontext *uc = puc;
#else
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
#endif
pc = PC_sig(uc);
@@ -332,7 +332,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
ucontext_t *uc = puc;
#else
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
#endif
unsigned long pc;
int is_write;
@@ -359,7 +359,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
void *puc)
{
siginfo_t *info = pinfo;
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
uint32_t *pc = uc->uc_mcontext.sc_pc;
uint32_t insn = *pc;
int is_write = 0;
@@ -457,7 +457,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
#if defined(__NetBSD__)
ucontext_t *uc = puc;
#else
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
#endif
unsigned long pc;
int is_write;
@@ -484,7 +484,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
{
siginfo_t *info = pinfo;
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
uintptr_t pc = uc->uc_mcontext.pc;
uint32_t insn = *(uint32_t *)pc;
bool is_write;
@@ -513,7 +513,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
void *puc)
{
siginfo_t *info = pinfo;
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
unsigned long pc;
int is_write;
@@ -535,7 +535,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
int cpu_signal_handler(int host_signum, void *pinfo, void *puc)
{
siginfo_t *info = pinfo;
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
unsigned long ip;
int is_write = 0;
@@ -566,7 +566,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
void *puc)
{
siginfo_t *info = pinfo;
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
unsigned long pc;
uint16_t *pinsn;
int is_write = 0;
@@ -619,7 +619,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
void *puc)
{
siginfo_t *info = pinfo;
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
greg_t pc = uc->uc_mcontext.pc;
int is_write;
@@ -635,7 +635,7 @@ int cpu_signal_handler(int host_signum, void *pinfo,
void *puc)
{
siginfo_t *info = pinfo;
- struct ucontext *uc = puc;
+ ucontext_t *uc = puc;
unsigned long pc = uc->uc_mcontext.sc_iaoq[0];
uint32_t insn = *(uint32_t *)pc;
int is_write = 0;
diff --git a/util/memfd.c b/util/memfd.c
index 7c40691..587ef5a 100644
--- a/util/memfd.c
+++ b/util/memfd.c
@@ -40,7 +40,7 @@
#include <sys/syscall.h>
#include <asm/unistd.h>
-static int memfd_create(const char *name, unsigned int flags)
+static int qemu_memfd_create(const char *name, unsigned int flags)
{
#ifdef __NR_memfd_create
return syscall(__NR_memfd_create, name, flags);
@@ -74,12 +74,12 @@ void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
#ifdef CONFIG_LINUX
if (seals) {
- mfd = memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+ mfd = qemu_memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
}
if (mfd == -1) {
/* some systems have memfd without sealing */
- mfd = memfd_create(name, MFD_CLOEXEC);
+ mfd = qemu_memfd_create(name, MFD_CLOEXEC);
seals = 0;
}
#endif
diff --git a/vl.c b/vl.c
index 4211ff1..9b8d8de 100644
--- a/vl.c
+++ b/vl.c
@@ -4690,6 +4690,7 @@ int main(int argc, char **argv, char **envp)
#ifdef CONFIG_TPM
tpm_cleanup();
#endif
+ qemu_end_cpu_loop();
return 0;
}
OpenPOWER on IntegriCloud