summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Maydell <peter.maydell@linaro.org>2014-06-09 17:04:13 +0100
committerPeter Maydell <peter.maydell@linaro.org>2014-06-09 17:04:13 +0100
commit7721a3044234c46cd6f5f899e7467dc9351f3c8d (patch)
tree82162c617563aee0f9d9ca1d9bf2073733975538
parent14ac57339288c07b47e7e91fa192735158aa6a1e (diff)
parent3b1a41381254f6080b5cfeb149c28a9237d42a0b (diff)
downloadhqemu-7721a3044234c46cd6f5f899e7467dc9351f3c8d.zip
hqemu-7721a3044234c46cd6f5f899e7467dc9351f3c8d.tar.gz
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20140609-1' into staging
---------------------------------------------------------------- target-arm queue: * support -bios option in vexpress boards * register the Cortex-A57 impdef system registers * fix handling of UXN bit in ARMv8 page tables * complete support of crypto insns in A32/T32 * implement CRC and crypto insns in A64 * fix bugs in generic timer control register ---------------------------------------------------------------- # gpg: Signature made Mon 09 Jun 2014 16:08:26 BST using RSA key ID 14360CDE # gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" * remotes/pmaydell/tags/pull-target-arm-20140609-1: target-arm: Delete unused iwmmxt_msadb helper target-arm: Fix errors in writes to generic timer control registers target-arm: A64: Implement two-register SHA instructions target-arm: A64: Implement 3-register SHA instructions target-arm: A64: Implement AES instructions target-arm: A32/T32: Mask CRC value in calling code, not helper target-arm: A64: Implement CRC instructions target-arm: VFPv4 implies half-precision extension target-arm: Clean up handling of ARMv8 optional feature bits target-arm: Remove unnecessary setting of feature bits target-arm: arm_any_initfn() should never set ARM_FEATURE_AARCH64 target-arm: A64: Use PMULL feature bit for PMULL target-arm: add support for v8 VMULL.P64 instruction target-arm: Allow 3reg_wide undefreq to encode more bad size options target-arm: add support for v8 SHA1 and SHA256 instructions target-arm: Correct handling of UXN bit in ARMv8 LPAE page tables target-arm: Prepare cpreg writefns/readfns for EL3/SecExt target-arm/cpu64.c: Actually register Cortex-A57 impdef registers vexpress: Add support for the -bios flag to provide firmware Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
-rw-r--r--hw/arm/vexpress.c13
-rw-r--r--linux-user/elfload.c9
-rw-r--r--target-arm/cpu.c13
-rw-r--r--target-arm/cpu.h3
-rw-r--r--target-arm/cpu64.c15
-rw-r--r--target-arm/crypto_helper.c257
-rw-r--r--target-arm/helper-a64.c60
-rw-r--r--target-arm/helper-a64.h4
-rw-r--r--target-arm/helper.c76
-rw-r--r--target-arm/helper.h14
-rw-r--r--target-arm/iwmmxt_helper.c9
-rw-r--r--target-arm/neon_helper.c30
-rw-r--r--target-arm/translate-a64.c211
-rw-r--r--target-arm/translate.c144
14 files changed, 731 insertions, 127 deletions
diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c
index 33ff422..f311595 100644
--- a/hw/arm/vexpress.c
+++ b/hw/arm/vexpress.c
@@ -28,6 +28,7 @@
#include "net/net.h"
#include "sysemu/sysemu.h"
#include "hw/boards.h"
+#include "hw/loader.h"
#include "exec/address-spaces.h"
#include "sysemu/blockdev.h"
#include "hw/block/flash.h"
@@ -528,6 +529,18 @@ static void vexpress_common_init(VEDBoardInfo *daughterboard,
daughterboard->init(daughterboard, machine->ram_size, machine->cpu_model,
pic);
+ /*
+ * If a bios file was provided, attempt to map it into memory
+ */
+ if (bios_name) {
+ const char *fn = qemu_find_file(QEMU_FILE_TYPE_BIOS, bios_name);
+ if (!fn || load_image_targphys(fn, map[VE_NORFLASH0],
+ VEXPRESS_FLASH_SIZE) < 0) {
+ error_report("Could not load ROM image '%s'", bios_name);
+ exit(1);
+ }
+ }
+
/* Motherboard peripherals: the wiring is the same but the
* addresses vary between the legacy and A-Series memory maps.
*/
diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 995f999..68b9793 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -468,6 +468,9 @@ static uint32_t get_elf_hwcap2(void)
uint32_t hwcaps = 0;
GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP2_ARM_AES);
+ GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP2_ARM_PMULL);
+ GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP2_ARM_SHA1);
+ GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP2_ARM_SHA2);
GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP2_ARM_CRC32);
return hwcaps;
}
@@ -536,7 +539,11 @@ static uint32_t get_elf_hwcap(void)
/* probe for the extra features */
#define GET_FEATURE(feat, hwcap) \
do { if (arm_feature(&cpu->env, feat)) { hwcaps |= hwcap; } } while (0)
- GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_PMULL);
+ GET_FEATURE(ARM_FEATURE_V8_AES, ARM_HWCAP_A64_AES);
+ GET_FEATURE(ARM_FEATURE_V8_PMULL, ARM_HWCAP_A64_PMULL);
+ GET_FEATURE(ARM_FEATURE_V8_SHA1, ARM_HWCAP_A64_SHA1);
+ GET_FEATURE(ARM_FEATURE_V8_SHA256, ARM_HWCAP_A64_SHA2);
+ GET_FEATURE(ARM_FEATURE_CRC, ARM_HWCAP_A64_CRC32);
#undef GET_FEATURE
return hwcaps;
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index 794dcb9..b877835 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -316,7 +316,6 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
set_feature(env, ARM_FEATURE_V7);
set_feature(env, ARM_FEATURE_ARM_DIV);
set_feature(env, ARM_FEATURE_LPAE);
- set_feature(env, ARM_FEATURE_V8_AES);
}
if (arm_feature(env, ARM_FEATURE_V7)) {
set_feature(env, ARM_FEATURE_VAPA);
@@ -349,6 +348,7 @@ static void arm_cpu_realizefn(DeviceState *dev, Error **errp)
}
if (arm_feature(env, ARM_FEATURE_VFP4)) {
set_feature(env, ARM_FEATURE_VFP3);
+ set_feature(env, ARM_FEATURE_VFP_FP16);
}
if (arm_feature(env, ARM_FEATURE_VFP3)) {
set_feature(env, ARM_FEATURE_VFP);
@@ -745,7 +745,6 @@ static void cortex_a15_initfn(Object *obj)
cpu->dtb_compatible = "arm,cortex-a15";
set_feature(&cpu->env, ARM_FEATURE_V7);
set_feature(&cpu->env, ARM_FEATURE_VFP4);
- set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
@@ -954,15 +953,13 @@ static void arm_any_initfn(Object *obj)
ARMCPU *cpu = ARM_CPU(obj);
set_feature(&cpu->env, ARM_FEATURE_V8);
set_feature(&cpu->env, ARM_FEATURE_VFP4);
- set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_THUMB2EE);
- set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
- set_feature(&cpu->env, ARM_FEATURE_V7MP);
+ set_feature(&cpu->env, ARM_FEATURE_V8_AES);
+ set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
+ set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
+ set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
set_feature(&cpu->env, ARM_FEATURE_CRC);
-#ifdef TARGET_AARCH64
- set_feature(&cpu->env, ARM_FEATURE_AARCH64);
-#endif
cpu->midr = 0xffffffff;
}
#endif
diff --git a/target-arm/cpu.h b/target-arm/cpu.h
index 7d8332e..79e7f82 100644
--- a/target-arm/cpu.h
+++ b/target-arm/cpu.h
@@ -635,6 +635,9 @@ enum arm_features {
ARM_FEATURE_CBAR_RO, /* has cp15 CBAR and it is read-only */
ARM_FEATURE_EL2, /* has EL2 Virtualization support */
ARM_FEATURE_EL3, /* has EL3 Secure monitor support */
+ ARM_FEATURE_V8_SHA1, /* implements SHA1 part of v8 Crypto Extensions */
+ ARM_FEATURE_V8_SHA256, /* implements SHA256 part of v8 Crypto Extensions */
+ ARM_FEATURE_V8_PMULL, /* implements PMULL part of v8 Crypto Extensions */
};
static inline int arm_feature(CPUARMState *env, int feature)
diff --git a/target-arm/cpu64.c b/target-arm/cpu64.c
index 8daa622..8b2081c 100644
--- a/target-arm/cpu64.c
+++ b/target-arm/cpu64.c
@@ -93,11 +93,15 @@ static void aarch64_a57_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_V8);
set_feature(&cpu->env, ARM_FEATURE_VFP4);
- set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
set_feature(&cpu->env, ARM_FEATURE_NEON);
set_feature(&cpu->env, ARM_FEATURE_GENERIC_TIMER);
set_feature(&cpu->env, ARM_FEATURE_AARCH64);
set_feature(&cpu->env, ARM_FEATURE_CBAR_RO);
+ set_feature(&cpu->env, ARM_FEATURE_V8_AES);
+ set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
+ set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
+ set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
+ set_feature(&cpu->env, ARM_FEATURE_CRC);
cpu->kvm_target = QEMU_KVM_ARM_TARGET_CORTEX_A57;
cpu->midr = 0x411fd070;
cpu->reset_fpsid = 0x41034070;
@@ -128,6 +132,7 @@ static void aarch64_a57_initfn(Object *obj)
cpu->ccsidr[1] = 0x201fe012; /* 48KB L1 icache */
cpu->ccsidr[2] = 0x70ffe07a; /* 2048KB L2 cache */
cpu->dcz_blocksize = 4; /* 64 bytes */
+ define_arm_cp_regs(cpu, cortexa57_cp_reginfo);
}
#ifdef CONFIG_USER_ONLY
@@ -137,11 +142,13 @@ static void aarch64_any_initfn(Object *obj)
set_feature(&cpu->env, ARM_FEATURE_V8);
set_feature(&cpu->env, ARM_FEATURE_VFP4);
- set_feature(&cpu->env, ARM_FEATURE_VFP_FP16);
set_feature(&cpu->env, ARM_FEATURE_NEON);
- set_feature(&cpu->env, ARM_FEATURE_ARM_DIV);
- set_feature(&cpu->env, ARM_FEATURE_V7MP);
set_feature(&cpu->env, ARM_FEATURE_AARCH64);
+ set_feature(&cpu->env, ARM_FEATURE_V8_AES);
+ set_feature(&cpu->env, ARM_FEATURE_V8_SHA1);
+ set_feature(&cpu->env, ARM_FEATURE_V8_SHA256);
+ set_feature(&cpu->env, ARM_FEATURE_V8_PMULL);
+ set_feature(&cpu->env, ARM_FEATURE_CRC);
cpu->ctr = 0x80030003; /* 32 byte I and D cacheline size, VIPT icache */
cpu->dcz_blocksize = 7; /* 512 bytes */
}
diff --git a/target-arm/crypto_helper.c b/target-arm/crypto_helper.c
index d8898ed..3e4b5f7 100644
--- a/target-arm/crypto_helper.c
+++ b/target-arm/crypto_helper.c
@@ -1,7 +1,7 @@
/*
* crypto_helper.c - emulate v8 Crypto Extensions instructions
*
- * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ * Copyright (C) 2013 - 2014 Linaro Ltd <ard.biesheuvel@linaro.org>
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
@@ -15,9 +15,9 @@
#include "exec/exec-all.h"
#include "exec/helper-proto.h"
-union AES_STATE {
+union CRYPTO_STATE {
uint8_t bytes[16];
- uint32_t cols[4];
+ uint32_t words[4];
uint64_t l[2];
};
@@ -99,11 +99,11 @@ void HELPER(crypto_aese)(CPUARMState *env, uint32_t rd, uint32_t rm,
/* ShiftRows permutation vector for decryption */
{ 0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3 },
};
- union AES_STATE rk = { .l = {
+ union CRYPTO_STATE rk = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
- union AES_STATE st = { .l = {
+ union CRYPTO_STATE st = { .l = {
float64_val(env->vfp.regs[rd]),
float64_val(env->vfp.regs[rd + 1])
} };
@@ -260,7 +260,7 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm,
0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d,
} };
- union AES_STATE st = { .l = {
+ union CRYPTO_STATE st = { .l = {
float64_val(env->vfp.regs[rm]),
float64_val(env->vfp.regs[rm + 1])
} };
@@ -269,7 +269,7 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm,
assert(decrypt < 2);
for (i = 0; i < 16; i += 4) {
- st.cols[i >> 2] = cpu_to_le32(
+ st.words[i >> 2] = cpu_to_le32(
mc[decrypt][st.bytes[i]] ^
rol32(mc[decrypt][st.bytes[i + 1]], 8) ^
rol32(mc[decrypt][st.bytes[i + 2]], 16) ^
@@ -279,3 +279,246 @@ void HELPER(crypto_aesmc)(CPUARMState *env, uint32_t rd, uint32_t rm,
env->vfp.regs[rd] = make_float64(st.l[0]);
env->vfp.regs[rd + 1] = make_float64(st.l[1]);
}
+
+/*
+ * SHA-1 logical functions
+ */
+
+static uint32_t cho(uint32_t x, uint32_t y, uint32_t z)
+{
+ return (x & (y ^ z)) ^ z;
+}
+
+static uint32_t par(uint32_t x, uint32_t y, uint32_t z)
+{
+ return x ^ y ^ z;
+}
+
+static uint32_t maj(uint32_t x, uint32_t y, uint32_t z)
+{
+ return (x & y) | ((x | y) & z);
+}
+
+void HELPER(crypto_sha1_3reg)(CPUARMState *env, uint32_t rd, uint32_t rn,
+ uint32_t rm, uint32_t op)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE n = { .l = {
+ float64_val(env->vfp.regs[rn]),
+ float64_val(env->vfp.regs[rn + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+
+ if (op == 3) { /* sha1su0 */
+ d.l[0] ^= d.l[1] ^ m.l[0];
+ d.l[1] ^= n.l[0] ^ m.l[1];
+ } else {
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ uint32_t t;
+
+ switch (op) {
+ case 0: /* sha1c */
+ t = cho(d.words[1], d.words[2], d.words[3]);
+ break;
+ case 1: /* sha1p */
+ t = par(d.words[1], d.words[2], d.words[3]);
+ break;
+ case 2: /* sha1m */
+ t = maj(d.words[1], d.words[2], d.words[3]);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ t += rol32(d.words[0], 5) + n.words[0] + m.words[i];
+
+ n.words[0] = d.words[3];
+ d.words[3] = d.words[2];
+ d.words[2] = ror32(d.words[1], 2);
+ d.words[1] = d.words[0];
+ d.words[0] = t;
+ }
+ }
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha1h)(CPUARMState *env, uint32_t rd, uint32_t rm)
+{
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+
+ m.words[0] = ror32(m.words[0], 2);
+ m.words[1] = m.words[2] = m.words[3] = 0;
+
+ env->vfp.regs[rd] = make_float64(m.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(m.l[1]);
+}
+
+void HELPER(crypto_sha1su1)(CPUARMState *env, uint32_t rd, uint32_t rm)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+
+ d.words[0] = rol32(d.words[0] ^ m.words[1], 1);
+ d.words[1] = rol32(d.words[1] ^ m.words[2], 1);
+ d.words[2] = rol32(d.words[2] ^ m.words[3], 1);
+ d.words[3] = rol32(d.words[3] ^ d.words[0], 1);
+
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+/*
+ * The SHA-256 logical functions, according to
+ * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
+ */
+
+static uint32_t S0(uint32_t x)
+{
+ return ror32(x, 2) ^ ror32(x, 13) ^ ror32(x, 22);
+}
+
+static uint32_t S1(uint32_t x)
+{
+ return ror32(x, 6) ^ ror32(x, 11) ^ ror32(x, 25);
+}
+
+static uint32_t s0(uint32_t x)
+{
+ return ror32(x, 7) ^ ror32(x, 18) ^ (x >> 3);
+}
+
+static uint32_t s1(uint32_t x)
+{
+ return ror32(x, 17) ^ ror32(x, 19) ^ (x >> 10);
+}
+
+void HELPER(crypto_sha256h)(CPUARMState *env, uint32_t rd, uint32_t rn,
+ uint32_t rm)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE n = { .l = {
+ float64_val(env->vfp.regs[rn]),
+ float64_val(env->vfp.regs[rn + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ uint32_t t = cho(n.words[0], n.words[1], n.words[2]) + n.words[3]
+ + S1(n.words[0]) + m.words[i];
+
+ n.words[3] = n.words[2];
+ n.words[2] = n.words[1];
+ n.words[1] = n.words[0];
+ n.words[0] = d.words[3] + t;
+
+ t += maj(d.words[0], d.words[1], d.words[2]) + S0(d.words[0]);
+
+ d.words[3] = d.words[2];
+ d.words[2] = d.words[1];
+ d.words[1] = d.words[0];
+ d.words[0] = t;
+ }
+
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha256h2)(CPUARMState *env, uint32_t rd, uint32_t rn,
+ uint32_t rm)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE n = { .l = {
+ float64_val(env->vfp.regs[rn]),
+ float64_val(env->vfp.regs[rn + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ uint32_t t = cho(d.words[0], d.words[1], d.words[2]) + d.words[3]
+ + S1(d.words[0]) + m.words[i];
+
+ d.words[3] = d.words[2];
+ d.words[2] = d.words[1];
+ d.words[1] = d.words[0];
+ d.words[0] = n.words[3 - i] + t;
+ }
+
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha256su0)(CPUARMState *env, uint32_t rd, uint32_t rm)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+
+ d.words[0] += s0(d.words[1]);
+ d.words[1] += s0(d.words[2]);
+ d.words[2] += s0(d.words[3]);
+ d.words[3] += s0(m.words[0]);
+
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
+
+void HELPER(crypto_sha256su1)(CPUARMState *env, uint32_t rd, uint32_t rn,
+ uint32_t rm)
+{
+ union CRYPTO_STATE d = { .l = {
+ float64_val(env->vfp.regs[rd]),
+ float64_val(env->vfp.regs[rd + 1])
+ } };
+ union CRYPTO_STATE n = { .l = {
+ float64_val(env->vfp.regs[rn]),
+ float64_val(env->vfp.regs[rn + 1])
+ } };
+ union CRYPTO_STATE m = { .l = {
+ float64_val(env->vfp.regs[rm]),
+ float64_val(env->vfp.regs[rm + 1])
+ } };
+
+ d.words[0] += s1(m.words[2]) + n.words[1];
+ d.words[1] += s1(m.words[3]) + n.words[2];
+ d.words[2] += s1(d.words[0]) + n.words[3];
+ d.words[3] += s1(d.words[1]) + m.words[0];
+
+ env->vfp.regs[rd] = make_float64(d.l[0]);
+ env->vfp.regs[rd + 1] = make_float64(d.l[1]);
+}
diff --git a/target-arm/helper-a64.c b/target-arm/helper-a64.c
index cccda74..2b4ce6a 100644
--- a/target-arm/helper-a64.c
+++ b/target-arm/helper-a64.c
@@ -24,6 +24,8 @@
#include "sysemu/sysemu.h"
#include "qemu/bitops.h"
#include "internals.h"
+#include "qemu/crc32c.h"
+#include <zlib.h> /* For crc32 */
/* C2.4.7 Multiply and divide */
/* special cases for 0 and LLONG_MIN are mandated by the standard */
@@ -186,36 +188,6 @@ uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
return result;
}
-/* Helper function for 64 bit polynomial multiply case:
- * perform PolynomialMult(op1, op2) and return either the top or
- * bottom half of the 128 bit result.
- */
-uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
-{
- int bitnum;
- uint64_t res = 0;
-
- for (bitnum = 0; bitnum < 64; bitnum++) {
- if (op1 & (1ULL << bitnum)) {
- res ^= op2 << bitnum;
- }
- }
- return res;
-}
-uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
-{
- int bitnum;
- uint64_t res = 0;
-
- /* bit 0 of op1 can't influence the high 64 bits at all */
- for (bitnum = 1; bitnum < 64; bitnum++) {
- if (op1 & (1ULL << bitnum)) {
- res ^= op2 >> (64 - bitnum);
- }
- }
- return res;
-}
-
/* 64bit/double versions of the neon float compare functions */
uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
{
@@ -438,6 +410,34 @@ float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
return r;
}
+/* 64-bit versions of the CRC helpers. Note that although the operation
+ * (and the prototypes of crc32c() and crc32() mean that only the bottom
+ * 32 bits of the accumulator and result are used, we pass and return
+ * uint64_t for convenience of the generated code. Unlike the 32-bit
+ * instruction set versions, val may genuinely have 64 bits of data in it.
+ * The upper bytes of val (above the number specified by 'bytes') must have
+ * been zeroed out by the caller.
+ */
+uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes)
+{
+ uint8_t buf[8];
+
+ stq_le_p(buf, val);
+
+ /* zlib crc32 converts the accumulator and output to one's complement. */
+ return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
+}
+
+uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
+{
+ uint8_t buf[8];
+
+ stq_le_p(buf, val);
+
+ /* Linux crc32c converts the output to one's complement. */
+ return crc32c(acc, buf, bytes) ^ 0xffffffff;
+}
+
/* Handle a CPU exception. */
void aarch64_cpu_do_interrupt(CPUState *cs)
{
diff --git a/target-arm/helper-a64.h b/target-arm/helper-a64.h
index 3f05bed..1d3d10f 100644
--- a/target-arm/helper-a64.h
+++ b/target-arm/helper-a64.h
@@ -28,8 +28,6 @@ DEF_HELPER_3(vfp_cmpes_a64, i64, f32, f32, ptr)
DEF_HELPER_3(vfp_cmpd_a64, i64, f64, f64, ptr)
DEF_HELPER_3(vfp_cmped_a64, i64, f64, f64, ptr)
DEF_HELPER_FLAGS_5(simd_tbl, TCG_CALL_NO_RWG_SE, i64, env, i64, i64, i32, i32)
-DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
-DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
DEF_HELPER_FLAGS_3(vfp_mulxs, TCG_CALL_NO_RWG, f32, f32, f32, ptr)
DEF_HELPER_FLAGS_3(vfp_mulxd, TCG_CALL_NO_RWG, f64, f64, f64, ptr)
DEF_HELPER_FLAGS_3(neon_ceq_f64, TCG_CALL_NO_RWG, i64, i64, i64, ptr)
@@ -46,3 +44,5 @@ DEF_HELPER_FLAGS_1(neon_addlp_u16, TCG_CALL_NO_RWG_SE, i64, i64)
DEF_HELPER_FLAGS_2(frecpx_f64, TCG_CALL_NO_RWG, f64, f64, ptr)
DEF_HELPER_FLAGS_2(frecpx_f32, TCG_CALL_NO_RWG, f32, f32, ptr)
DEF_HELPER_FLAGS_2(fcvtx_f64_to_f32, TCG_CALL_NO_RWG, f32, f64, env)
+DEF_HELPER_FLAGS_3(crc32_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
+DEF_HELPER_FLAGS_3(crc32c_64, TCG_CALL_NO_RWG_SE, i64, i64, i64, i32)
diff --git a/target-arm/helper.c b/target-arm/helper.c
index 95af624..050c409 100644
--- a/target-arm/helper.c
+++ b/target-arm/helper.c
@@ -319,7 +319,7 @@ static void dacr_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
ARMCPU *cpu = arm_env_get_cpu(env);
- env->cp15.c3 = value;
+ raw_write(env, ri, value);
tlb_flush(CPU(cpu), 1); /* Flush TLB as domain not tracked in TLB */
}
@@ -327,12 +327,12 @@ static void fcse_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
ARMCPU *cpu = arm_env_get_cpu(env);
- if (env->cp15.c13_fcse != value) {
+ if (raw_read(env, ri) != value) {
/* Unlike real hardware the qemu TLB uses virtual addresses,
* not modified virtual addresses, so this causes a TLB flush.
*/
tlb_flush(CPU(cpu), 1);
- env->cp15.c13_fcse = value;
+ raw_write(env, ri, value);
}
}
@@ -341,7 +341,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
ARMCPU *cpu = arm_env_get_cpu(env);
- if (env->cp15.contextidr_el1 != value && !arm_feature(env, ARM_FEATURE_MPU)
+ if (raw_read(env, ri) != value && !arm_feature(env, ARM_FEATURE_MPU)
&& !extended_addresses_enabled(env)) {
/* For VMSA (when not using the LPAE long descriptor page table
* format) this register includes the ASID, so do a TLB flush.
@@ -349,7 +349,7 @@ static void contextidr_write(CPUARMState *env, const ARMCPRegInfo *ri,
*/
tlb_flush(CPU(cpu), 1);
}
- env->cp15.contextidr_el1 = value;
+ raw_write(env, ri, value);
}
static void tlbiall_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -693,7 +693,7 @@ static uint64_t ccsidr_read(CPUARMState *env, const ARMCPRegInfo *ri)
static void csselr_write(CPUARMState *env, const ARMCPRegInfo *ri,
uint64_t value)
{
- env->cp15.c0_cssel = value & 0xf;
+ raw_write(env, ri, value & 0xf);
}
static uint64_t isr_read(CPUARMState *env, const ARMCPRegInfo *ri)
@@ -1040,16 +1040,16 @@ static void gt_ctl_write(CPUARMState *env, const ARMCPRegInfo *ri,
int timeridx = ri->crm & 1;
uint32_t oldval = env->cp15.c14_timer[timeridx].ctl;
- env->cp15.c14_timer[timeridx].ctl = value & 3;
+ env->cp15.c14_timer[timeridx].ctl = deposit64(oldval, 0, 2, value);
if ((oldval ^ value) & 1) {
/* Enable toggled */
gt_recalc_timer(cpu, timeridx);
- } else if ((oldval & value) & 2) {
+ } else if ((oldval ^ value) & 2) {
/* IMASK toggled: don't need to recalculate,
* just set the interrupt line based on ISTATUS
*/
qemu_set_irq(cpu->gt_timer_outputs[timeridx],
- (oldval & 4) && (value & 2));
+ (oldval & 4) && !(value & 2));
}
}
@@ -1216,11 +1216,11 @@ static const ARMCPRegInfo generic_timer_cp_reginfo[] = {
static void par_write(CPUARMState *env, const ARMCPRegInfo *ri, uint64_t value)
{
if (arm_feature(env, ARM_FEATURE_LPAE)) {
- env->cp15.par_el1 = value;
+ raw_write(env, ri, value);
} else if (arm_feature(env, ARM_FEATURE_V7)) {
- env->cp15.par_el1 = value & 0xfffff6ff;
+ raw_write(env, ri, value & 0xfffff6ff);
} else {
- env->cp15.par_el1 = value & 0xfffff1ff;
+ raw_write(env, ri, value & 0xfffff1ff);
}
}
@@ -1423,7 +1423,7 @@ static void vmsa_ttbcr_raw_write(CPUARMState *env, const ARMCPRegInfo *ri,
* for long-descriptor tables the TTBCR fields are used differently
* and the c2_mask and c2_base_mask values are meaningless.
*/
- env->cp15.c2_control = value;
+ raw_write(env, ri, value);
env->cp15.c2_mask = ~(((uint32_t)0xffffffffu) >> maskshift);
env->cp15.c2_base_mask = ~((uint32_t)0x3fffu >> maskshift);
}
@@ -1445,7 +1445,7 @@ static void vmsa_ttbcr_write(CPUARMState *env, const ARMCPRegInfo *ri,
static void vmsa_ttbcr_reset(CPUARMState *env, const ARMCPRegInfo *ri)
{
env->cp15.c2_base_mask = 0xffffc000u;
- env->cp15.c2_control = 0;
+ raw_write(env, ri, 0);
env->cp15.c2_mask = 0;
}
@@ -1456,7 +1456,7 @@ static void vmsa_tcr_el1_write(CPUARMState *env, const ARMCPRegInfo *ri,
/* For AArch64 the A1 bit could result in a change of ASID, so TLB flush. */
tlb_flush(CPU(cpu), 1);
- env->cp15.c2_control = value;
+ raw_write(env, ri, value);
}
static void vmsa_ttbr_write(CPUARMState *env, const ARMCPRegInfo *ri,
@@ -2151,14 +2151,14 @@ static void sctlr_write(CPUARMState *env, const ARMCPRegInfo *ri,
{
ARMCPU *cpu = arm_env_get_cpu(env);
- if (env->cp15.c1_sys == value) {
+ if (raw_read(env, ri) == value) {
/* Skip the TLB flush if nothing actually changed; Linux likes
* to do a lot of pointless SCTLR writes.
*/
return;
}
- env->cp15.c1_sys = value;
+ raw_write(env, ri, value);
/* ??? Lots of these bits are not implemented. */
/* This may enable/disable the MMU, so do a TLB flush. */
tlb_flush(CPU(cpu), 1);
@@ -3929,13 +3929,8 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
page_size = (1 << ((granule_sz * (4 - level)) + 3));
descaddr |= (address & (page_size - 1));
/* Extract attributes from the descriptor and merge with table attrs */
- if (arm_feature(env, ARM_FEATURE_V8)) {
- attrs = extract64(descriptor, 2, 10)
- | (extract64(descriptor, 53, 11) << 10);
- } else {
- attrs = extract64(descriptor, 2, 10)
- | (extract64(descriptor, 52, 12) << 10);
- }
+ attrs = extract64(descriptor, 2, 10)
+ | (extract64(descriptor, 52, 12) << 10);
attrs |= extract32(tableattrs, 0, 2) << 11; /* XN, PXN */
attrs |= extract32(tableattrs, 3, 1) << 5; /* APTable[1] => AP[2] */
/* The sense of AP[1] vs APTable[0] is reversed, as APTable[0] == 1
@@ -3961,8 +3956,12 @@ static int get_phys_addr_lpae(CPUARMState *env, target_ulong address,
goto do_fault;
}
*prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC;
- if (attrs & (1 << 12) || (!is_user && (attrs & (1 << 11)))) {
- /* XN or PXN */
+ if ((arm_feature(env, ARM_FEATURE_V8) && is_user && (attrs & (1 << 12))) ||
+ (!arm_feature(env, ARM_FEATURE_V8) && (attrs & (1 << 12))) ||
+ (!is_user && (attrs & (1 << 11)))) {
+ /* XN/UXN or PXN. Since we only implement EL0/EL1 we unconditionally
+ * treat XN/UXN as UXN for v8.
+ */
if (access_type == 2) {
goto do_fault;
}
@@ -5560,28 +5559,15 @@ int arm_rmode_to_sf(int rmode)
return rmode;
}
-static void crc_init_buffer(uint8_t *buf, uint32_t val, uint32_t bytes)
-{
- memset(buf, 0, 4);
-
- if (bytes == 1) {
- buf[0] = val & 0xff;
- } else if (bytes == 2) {
- buf[0] = val & 0xff;
- buf[1] = (val >> 8) & 0xff;
- } else {
- buf[0] = val & 0xff;
- buf[1] = (val >> 8) & 0xff;
- buf[2] = (val >> 16) & 0xff;
- buf[3] = (val >> 24) & 0xff;
- }
-}
-
+/* CRC helpers.
+ * The upper bytes of val (above the number specified by 'bytes') must have
+ * been zeroed out by the caller.
+ */
uint32_t HELPER(crc32)(uint32_t acc, uint32_t val, uint32_t bytes)
{
uint8_t buf[4];
- crc_init_buffer(buf, val, bytes);
+ stl_le_p(buf, val);
/* zlib crc32 converts the accumulator and output to one's complement. */
return crc32(acc ^ 0xffffffff, buf, bytes) ^ 0xffffffff;
@@ -5591,7 +5577,7 @@ uint32_t HELPER(crc32c)(uint32_t acc, uint32_t val, uint32_t bytes)
{
uint8_t buf[4];
- crc_init_buffer(buf, val, bytes);
+ stl_le_p(buf, val);
/* Linux crc32c converts the output to one's complement. */
return crc32c(acc, buf, bytes) ^ 0xffffffff;
diff --git a/target-arm/helper.h b/target-arm/helper.h
index b63fd0f..facfcd2 100644
--- a/target-arm/helper.h
+++ b/target-arm/helper.h
@@ -456,8 +456,6 @@ DEF_HELPER_3(iwmmxt_avgb1, i64, env, i64, i64)
DEF_HELPER_3(iwmmxt_avgw0, i64, env, i64, i64)
DEF_HELPER_3(iwmmxt_avgw1, i64, env, i64, i64)
-DEF_HELPER_2(iwmmxt_msadb, i64, i64, i64)
-
DEF_HELPER_3(iwmmxt_align, i64, i64, i64, i32)
DEF_HELPER_4(iwmmxt_insr, i64, i64, i32, i32, i32)
@@ -512,10 +510,22 @@ DEF_HELPER_3(neon_qzip32, void, env, i32, i32)
DEF_HELPER_4(crypto_aese, void, env, i32, i32, i32)
DEF_HELPER_4(crypto_aesmc, void, env, i32, i32, i32)
+DEF_HELPER_5(crypto_sha1_3reg, void, env, i32, i32, i32, i32)
+DEF_HELPER_3(crypto_sha1h, void, env, i32, i32)
+DEF_HELPER_3(crypto_sha1su1, void, env, i32, i32)
+
+DEF_HELPER_4(crypto_sha256h, void, env, i32, i32, i32)
+DEF_HELPER_4(crypto_sha256h2, void, env, i32, i32, i32)
+DEF_HELPER_3(crypto_sha256su0, void, env, i32, i32)
+DEF_HELPER_4(crypto_sha256su1, void, env, i32, i32, i32)
+
DEF_HELPER_FLAGS_3(crc32, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_FLAGS_3(crc32c, TCG_CALL_NO_RWG_SE, i32, i32, i32, i32)
DEF_HELPER_2(dc_zva, void, env, i64)
+DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64)
+
#ifdef TARGET_AARCH64
#include "helper-a64.h"
#endif
diff --git a/target-arm/iwmmxt_helper.c b/target-arm/iwmmxt_helper.c
index 398cbcb..a506914 100644
--- a/target-arm/iwmmxt_helper.c
+++ b/target-arm/iwmmxt_helper.c
@@ -369,15 +369,6 @@ IWMMXT_OP_AVGW(1)
#undef IWMMXT_OP_AVGW
#undef AVGW
-uint64_t HELPER(iwmmxt_msadb)(uint64_t a, uint64_t b)
-{
- a = ((((a >> 0 ) & 0xffff) * ((b >> 0) & 0xffff) +
- ((a >> 16) & 0xffff) * ((b >> 16) & 0xffff)) & 0xffffffff) |
- ((((a >> 32) & 0xffff) * ((b >> 32) & 0xffff) +
- ((a >> 48) & 0xffff) * ((b >> 48) & 0xffff)) << 32);
- return a;
-}
-
uint64_t HELPER(iwmmxt_align)(uint64_t a, uint64_t b, uint32_t n)
{
a >>= n << 3;
diff --git a/target-arm/neon_helper.c b/target-arm/neon_helper.c
index 492e500..47d13e9 100644
--- a/target-arm/neon_helper.c
+++ b/target-arm/neon_helper.c
@@ -2211,3 +2211,33 @@ void HELPER(neon_zip16)(CPUARMState *env, uint32_t rd, uint32_t rm)
env->vfp.regs[rm] = make_float64(m0);
env->vfp.regs[rd] = make_float64(d0);
}
+
+/* Helper function for 64 bit polynomial multiply case:
+ * perform PolynomialMult(op1, op2) and return either the top or
+ * bottom half of the 128 bit result.
+ */
+uint64_t HELPER(neon_pmull_64_lo)(uint64_t op1, uint64_t op2)
+{
+ int bitnum;
+ uint64_t res = 0;
+
+ for (bitnum = 0; bitnum < 64; bitnum++) {
+ if (op1 & (1ULL << bitnum)) {
+ res ^= op2 << bitnum;
+ }
+ }
+ return res;
+}
+uint64_t HELPER(neon_pmull_64_hi)(uint64_t op1, uint64_t op2)
+{
+ int bitnum;
+ uint64_t res = 0;
+
+ /* bit 0 of op1 can't influence the high 64 bits at all */
+ for (bitnum = 1; bitnum < 64; bitnum++) {
+ if (op1 & (1ULL << bitnum)) {
+ res ^= op2 >> (64 - bitnum);
+ }
+ }
+ return res;
+}
diff --git a/target-arm/translate-a64.c b/target-arm/translate-a64.c
index a9c4633..63ad787 100644
--- a/target-arm/translate-a64.c
+++ b/target-arm/translate-a64.c
@@ -85,6 +85,8 @@ typedef void NeonGenWidenFn(TCGv_i64, TCGv_i32);
typedef void NeonGenTwoSingleOPFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
typedef void NeonGenTwoDoubleOPFn(TCGv_i64, TCGv_i64, TCGv_i64, TCGv_ptr);
typedef void NeonGenOneOpFn(TCGv_i64, TCGv_i64);
+typedef void CryptoTwoOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32);
+typedef void CryptoThreeOpEnvFn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
/* initialize TCG globals. */
void a64_translate_init(void)
@@ -3774,6 +3776,54 @@ static void handle_shift_reg(DisasContext *s,
tcg_temp_free_i64(tcg_shift);
}
+/* CRC32[BHWX], CRC32C[BHWX] */
+static void handle_crc32(DisasContext *s,
+ unsigned int sf, unsigned int sz, bool crc32c,
+ unsigned int rm, unsigned int rn, unsigned int rd)
+{
+ TCGv_i64 tcg_acc, tcg_val;
+ TCGv_i32 tcg_bytes;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_CRC)
+ || (sf == 1 && sz != 3)
+ || (sf == 0 && sz == 3)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (sz == 3) {
+ tcg_val = cpu_reg(s, rm);
+ } else {
+ uint64_t mask;
+ switch (sz) {
+ case 0:
+ mask = 0xFF;
+ break;
+ case 1:
+ mask = 0xFFFF;
+ break;
+ case 2:
+ mask = 0xFFFFFFFF;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ tcg_val = new_tmp_a64(s);
+ tcg_gen_andi_i64(tcg_val, cpu_reg(s, rm), mask);
+ }
+
+ tcg_acc = cpu_reg(s, rn);
+ tcg_bytes = tcg_const_i32(1 << sz);
+
+ if (crc32c) {
+ gen_helper_crc32c_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
+ } else {
+ gen_helper_crc32_64(cpu_reg(s, rd), tcg_acc, tcg_val, tcg_bytes);
+ }
+
+ tcg_temp_free_i32(tcg_bytes);
+}
+
/* C3.5.8 Data-processing (2 source)
* 31 30 29 28 21 20 16 15 10 9 5 4 0
* +----+---+---+-----------------+------+--------+------+------+
@@ -3821,8 +3871,12 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn)
case 21:
case 22:
case 23: /* CRC32 */
- unsupported_encoding(s, insn);
+ {
+ int sz = extract32(opcode, 0, 2);
+ bool crc32c = extract32(opcode, 2, 1);
+ handle_crc32(s, sf, sz, crc32c, rm, rn, rd);
break;
+ }
default:
unallocated_encoding(s);
break;
@@ -8574,7 +8628,7 @@ static void disas_simd_three_reg_diff(DisasContext *s, uint32_t insn)
return;
}
if (size == 3) {
- if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)) {
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_PMULL)) {
unallocated_encoding(s);
return;
}
@@ -10497,7 +10551,55 @@ static void disas_simd_indexed(DisasContext *s, uint32_t insn)
*/
static void disas_crypto_aes(DisasContext *s, uint32_t insn)
{
- unsupported_encoding(s, insn);
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ int decrypt;
+ TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_decrypt;
+ CryptoThreeOpEnvFn *genfn;
+
+ if (!arm_dc_feature(s, ARM_FEATURE_V8_AES)
+ || size != 0) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (opcode) {
+ case 0x4: /* AESE */
+ decrypt = 0;
+ genfn = gen_helper_crypto_aese;
+ break;
+ case 0x6: /* AESMC */
+ decrypt = 0;
+ genfn = gen_helper_crypto_aesmc;
+ break;
+ case 0x5: /* AESD */
+ decrypt = 1;
+ genfn = gen_helper_crypto_aese;
+ break;
+ case 0x7: /* AESIMC */
+ decrypt = 1;
+ genfn = gen_helper_crypto_aesmc;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ /* Note that we convert the Vx register indexes into the
+ * index within the vfp.regs[] array, so we can share the
+ * helper with the AArch32 instructions.
+ */
+ tcg_rd_regno = tcg_const_i32(rd << 1);
+ tcg_rn_regno = tcg_const_i32(rn << 1);
+ tcg_decrypt = tcg_const_i32(decrypt);
+
+ genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_decrypt);
+
+ tcg_temp_free_i32(tcg_rd_regno);
+ tcg_temp_free_i32(tcg_rn_regno);
+ tcg_temp_free_i32(tcg_decrypt);
}
/* C3.6.20 Crypto three-reg SHA
@@ -10508,7 +10610,64 @@ static void disas_crypto_aes(DisasContext *s, uint32_t insn)
*/
static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
{
- unsupported_encoding(s, insn);
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 3);
+ int rm = extract32(insn, 16, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ CryptoThreeOpEnvFn *genfn;
+ TCGv_i32 tcg_rd_regno, tcg_rn_regno, tcg_rm_regno;
+ int feature = ARM_FEATURE_V8_SHA256;
+
+ if (size != 0) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (opcode) {
+ case 0: /* SHA1C */
+ case 1: /* SHA1P */
+ case 2: /* SHA1M */
+ case 3: /* SHA1SU0 */
+ genfn = NULL;
+ feature = ARM_FEATURE_V8_SHA1;
+ break;
+ case 4: /* SHA256H */
+ genfn = gen_helper_crypto_sha256h;
+ break;
+ case 5: /* SHA256H2 */
+ genfn = gen_helper_crypto_sha256h2;
+ break;
+ case 6: /* SHA256SU1 */
+ genfn = gen_helper_crypto_sha256su1;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!arm_dc_feature(s, feature)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ tcg_rd_regno = tcg_const_i32(rd << 1);
+ tcg_rn_regno = tcg_const_i32(rn << 1);
+ tcg_rm_regno = tcg_const_i32(rm << 1);
+
+ if (genfn) {
+ genfn(cpu_env, tcg_rd_regno, tcg_rn_regno, tcg_rm_regno);
+ } else {
+ TCGv_i32 tcg_opcode = tcg_const_i32(opcode);
+
+ gen_helper_crypto_sha1_3reg(cpu_env, tcg_rd_regno,
+ tcg_rn_regno, tcg_rm_regno, tcg_opcode);
+ tcg_temp_free_i32(tcg_opcode);
+ }
+
+ tcg_temp_free_i32(tcg_rd_regno);
+ tcg_temp_free_i32(tcg_rn_regno);
+ tcg_temp_free_i32(tcg_rm_regno);
}
/* C3.6.21 Crypto two-reg SHA
@@ -10519,7 +10678,49 @@ static void disas_crypto_three_reg_sha(DisasContext *s, uint32_t insn)
*/
static void disas_crypto_two_reg_sha(DisasContext *s, uint32_t insn)
{
- unsupported_encoding(s, insn);
+ int size = extract32(insn, 22, 2);
+ int opcode = extract32(insn, 12, 5);
+ int rn = extract32(insn, 5, 5);
+ int rd = extract32(insn, 0, 5);
+ CryptoTwoOpEnvFn *genfn;
+ int feature;
+ TCGv_i32 tcg_rd_regno, tcg_rn_regno;
+
+ if (size != 0) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ switch (opcode) {
+ case 0: /* SHA1H */
+ feature = ARM_FEATURE_V8_SHA1;
+ genfn = gen_helper_crypto_sha1h;
+ break;
+ case 1: /* SHA1SU1 */
+ feature = ARM_FEATURE_V8_SHA1;
+ genfn = gen_helper_crypto_sha1su1;
+ break;
+ case 2: /* SHA256SU0 */
+ feature = ARM_FEATURE_V8_SHA256;
+ genfn = gen_helper_crypto_sha256su0;
+ break;
+ default:
+ unallocated_encoding(s);
+ return;
+ }
+
+ if (!arm_dc_feature(s, feature)) {
+ unallocated_encoding(s);
+ return;
+ }
+
+ tcg_rd_regno = tcg_const_i32(rd << 1);
+ tcg_rn_regno = tcg_const_i32(rn << 1);
+
+ genfn(cpu_env, tcg_rd_regno, tcg_rn_regno);
+
+ tcg_temp_free_i32(tcg_rd_regno);
+ tcg_temp_free_i32(tcg_rn_regno);
}
/* C3.6 Data processing - SIMD, inc Crypto
diff --git a/target-arm/translate.c b/target-arm/translate.c
index d499caa..cf4e767 100644
--- a/target-arm/translate.c
+++ b/target-arm/translate.c
@@ -1382,8 +1382,6 @@ IWMMXT_OP_ENV(avgb1)
IWMMXT_OP_ENV(avgw0)
IWMMXT_OP_ENV(avgw1)
-IWMMXT_OP(msadb)
-
IWMMXT_OP_ENV(packuw)
IWMMXT_OP_ENV(packul)
IWMMXT_OP_ENV(packuq)
@@ -4776,6 +4774,7 @@ static void gen_neon_narrow_op(int op, int u, int size,
#define NEON_3R_VPMIN 21
#define NEON_3R_VQDMULH_VQRDMULH 22
#define NEON_3R_VPADD 23
+#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
#define NEON_3R_VFM 25 /* VFMA, VFMS : float fused multiply-add */
#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
@@ -4809,6 +4808,7 @@ static const uint8_t neon_3r_sizes[] = {
[NEON_3R_VPMIN] = 0x7,
[NEON_3R_VQDMULH_VQRDMULH] = 0x6,
[NEON_3R_VPADD] = 0x7,
+ [NEON_3R_SHA] = 0xf, /* size field encodes op type */
[NEON_3R_VFM] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_ARITH] = 0x5, /* size bit 1 encodes op */
[NEON_3R_FLOAT_MULTIPLY] = 0x5, /* size bit 1 encodes op */
@@ -4842,6 +4842,7 @@ static const uint8_t neon_3r_sizes[] = {
#define NEON_2RM_VCEQ0 18
#define NEON_2RM_VCLE0 19
#define NEON_2RM_VCLT0 20
+#define NEON_2RM_SHA1H 21
#define NEON_2RM_VABS 22
#define NEON_2RM_VNEG 23
#define NEON_2RM_VCGT0_F 24
@@ -4858,6 +4859,7 @@ static const uint8_t neon_3r_sizes[] = {
#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
#define NEON_2RM_VSHLL 38
+#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
#define NEON_2RM_VRINTN 40
#define NEON_2RM_VRINTX 41
#define NEON_2RM_VRINTA 42
@@ -4918,6 +4920,7 @@ static const uint8_t neon_2rm_sizes[] = {
[NEON_2RM_VCEQ0] = 0x7,
[NEON_2RM_VCLE0] = 0x7,
[NEON_2RM_VCLT0] = 0x7,
+ [NEON_2RM_SHA1H] = 0x4,
[NEON_2RM_VABS] = 0x7,
[NEON_2RM_VNEG] = 0x7,
[NEON_2RM_VCGT0_F] = 0x4,
@@ -4934,6 +4937,7 @@ static const uint8_t neon_2rm_sizes[] = {
[NEON_2RM_VMOVN] = 0x7,
[NEON_2RM_VQMOVN] = 0x7,
[NEON_2RM_VSHLL] = 0x7,
+ [NEON_2RM_SHA1SU1] = 0x4,
[NEON_2RM_VRINTN] = 0x4,
[NEON_2RM_VRINTX] = 0x4,
[NEON_2RM_VRINTA] = 0x4,
@@ -5011,6 +5015,49 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
if (q && ((rd | rn | rm) & 1)) {
return 1;
}
+ /*
+ * The SHA-1/SHA-256 3-register instructions require special treatment
+ * here, as their size field is overloaded as an op type selector, and
+ * they all consume their input in a single pass.
+ */
+ if (op == NEON_3R_SHA) {
+ if (!q) {
+ return 1;
+ }
+ if (!u) { /* SHA-1 */
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rn);
+ tmp3 = tcg_const_i32(rm);
+ tmp4 = tcg_const_i32(size);
+ gen_helper_crypto_sha1_3reg(cpu_env, tmp, tmp2, tmp3, tmp4);
+ tcg_temp_free_i32(tmp4);
+ } else { /* SHA-256 */
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA256) || size == 3) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rn);
+ tmp3 = tcg_const_i32(rm);
+ switch (size) {
+ case 0:
+ gen_helper_crypto_sha256h(cpu_env, tmp, tmp2, tmp3);
+ break;
+ case 1:
+ gen_helper_crypto_sha256h2(cpu_env, tmp, tmp2, tmp3);
+ break;
+ case 2:
+ gen_helper_crypto_sha256su1(cpu_env, tmp, tmp2, tmp3);
+ break;
+ }
+ }
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ tcg_temp_free_i32(tmp3);
+ return 0;
+ }
if (size == 3 && op != NEON_3R_LOGIC) {
/* 64-bit element instructions. */
for (pass = 0; pass < (q ? 2 : 1); pass++) {
@@ -5905,10 +5952,11 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
int src1_wide;
int src2_wide;
int prewiden;
- /* undefreq: bit 0 : UNDEF if size != 0
- * bit 1 : UNDEF if size == 0
- * bit 2 : UNDEF if U == 1
- * Note that [1:0] set implies 'always UNDEF'
+ /* undefreq: bit 0 : UNDEF if size == 0
+ * bit 1 : UNDEF if size == 1
+ * bit 2 : UNDEF if size == 2
+ * bit 3 : UNDEF if U == 1
+ * Note that [2:0] set implies 'always UNDEF'
*/
int undefreq;
/* prewiden, src1_wide, src2_wide, undefreq */
@@ -5922,13 +5970,13 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
{0, 1, 1, 0}, /* VSUBHN */
{0, 0, 0, 0}, /* VABDL */
{0, 0, 0, 0}, /* VMLAL */
- {0, 0, 0, 6}, /* VQDMLAL */
+ {0, 0, 0, 9}, /* VQDMLAL */
{0, 0, 0, 0}, /* VMLSL */
- {0, 0, 0, 6}, /* VQDMLSL */
+ {0, 0, 0, 9}, /* VQDMLSL */
{0, 0, 0, 0}, /* Integer VMULL */
- {0, 0, 0, 2}, /* VQDMULL */
- {0, 0, 0, 5}, /* Polynomial VMULL */
- {0, 0, 0, 3}, /* Reserved: always UNDEF */
+ {0, 0, 0, 1}, /* VQDMULL */
+ {0, 0, 0, 0xa}, /* Polynomial VMULL */
+ {0, 0, 0, 7}, /* Reserved: always UNDEF */
};
prewiden = neon_3reg_wide[op][0];
@@ -5936,9 +5984,8 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
src2_wide = neon_3reg_wide[op][2];
undefreq = neon_3reg_wide[op][3];
- if (((undefreq & 1) && (size != 0)) ||
- ((undefreq & 2) && (size == 0)) ||
- ((undefreq & 4) && u)) {
+ if ((undefreq & (1 << size)) ||
+ ((undefreq & 8) && u)) {
return 1;
}
if ((src1_wide && (rn & 1)) ||
@@ -5947,6 +5994,30 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
return 1;
}
+ /* Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)
+ * outside the loop below as it only performs a single pass.
+ */
+ if (op == 14 && size == 2) {
+ TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
+
+ if (!arm_feature(env, ARM_FEATURE_V8_PMULL)) {
+ return 1;
+ }
+ tcg_rn = tcg_temp_new_i64();
+ tcg_rm = tcg_temp_new_i64();
+ tcg_rd = tcg_temp_new_i64();
+ neon_load_reg64(tcg_rn, rn);
+ neon_load_reg64(tcg_rm, rm);
+ gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
+ neon_store_reg64(tcg_rd, rd);
+ gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
+ neon_store_reg64(tcg_rd, rd + 1);
+ tcg_temp_free_i64(tcg_rn);
+ tcg_temp_free_i64(tcg_rm);
+ tcg_temp_free_i64(tcg_rd);
+ return 0;
+ }
+
/* Avoid overlapping operands. Wide source operands are
always aligned so will never overlap with wide
destinations in problematic ways. */
@@ -6486,6 +6557,41 @@ static int disas_neon_data_insn(CPUARMState * env, DisasContext *s, uint32_t ins
tcg_temp_free_i32(tmp2);
tcg_temp_free_i32(tmp3);
break;
+ case NEON_2RM_SHA1H:
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA1)
+ || ((rm | rd) & 1)) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rm);
+
+ gen_helper_crypto_sha1h(cpu_env, tmp, tmp2);
+
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ break;
+ case NEON_2RM_SHA1SU1:
+ if ((rm | rd) & 1) {
+ return 1;
+ }
+ /* bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 */
+ if (q) {
+ if (!arm_feature(env, ARM_FEATURE_V8_SHA256)) {
+ return 1;
+ }
+ } else if (!arm_feature(env, ARM_FEATURE_V8_SHA1)) {
+ return 1;
+ }
+ tmp = tcg_const_i32(rd);
+ tmp2 = tcg_const_i32(rm);
+ if (q) {
+ gen_helper_crypto_sha256su0(cpu_env, tmp, tmp2);
+ } else {
+ gen_helper_crypto_sha1su1(cpu_env, tmp, tmp2);
+ }
+ tcg_temp_free_i32(tmp);
+ tcg_temp_free_i32(tmp2);
+ break;
default:
elementwise:
for (pass = 0; pass < (q ? 4 : 2); pass++) {
@@ -7698,6 +7804,11 @@ static void disas_arm_insn(CPUARMState * env, DisasContext *s)
tmp = load_reg(s, rn);
tmp2 = load_reg(s, rm);
+ if (op1 == 0) {
+ tcg_gen_andi_i32(tmp2, tmp2, 0xff);
+ } else if (op1 == 1) {
+ tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
+ }
tmp3 = tcg_const_i32(1 << op1);
if (c & 0x2) {
gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
@@ -9330,6 +9441,11 @@ static int disas_thumb2_insn(CPUARMState *env, DisasContext *s, uint16_t insn_hw
}
tmp2 = load_reg(s, rm);
+ if (sz == 0) {
+ tcg_gen_andi_i32(tmp2, tmp2, 0xff);
+ } else if (sz == 1) {
+ tcg_gen_andi_i32(tmp2, tmp2, 0xffff);
+ }
tmp3 = tcg_const_i32(1 << sz);
if (c) {
gen_helper_crc32c(tmp, tmp, tmp2, tmp3);
OpenPOWER on IntegriCloud