diff options
Diffstat (limited to 'llvm/atomic')
-rw-r--r-- | llvm/atomic/atomic-arm.c | 158 | ||||
-rw-r--r-- | llvm/atomic/atomic-helper.h | 74 | ||||
-rw-r--r-- | llvm/atomic/atomic-x86.c | 504 | ||||
-rw-r--r-- | llvm/atomic/coremu-atomic.h | 412 | ||||
-rw-r--r-- | llvm/atomic/coremu-template.h | 101 |
5 files changed, 1249 insertions, 0 deletions
diff --git a/llvm/atomic/atomic-arm.c b/llvm/atomic/atomic-arm.c new file mode 100644 index 0000000..4176caa --- /dev/null +++ b/llvm/atomic/atomic-arm.c @@ -0,0 +1,158 @@ +/* + * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ. + * <http://ppi.fudan.edu.cn/system_research_group> + * + * Authors: + * Zhaoguo Wang <zgwang@fudan.edu.cn> + * Yufei Chen <chenyufei@fudan.edu.cn> + * Ran Liu <naruilone@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* We include this file in op_helper.c */ + +#include <stdlib.h> +#include <pthread.h> +#include "coremu-atomic.h" + +__thread uint64_t cm_exclusive_val; +__thread uint32_t cm_exclusive_addr = -1; + +#define GEN_LOAD_EXCLUSIVE(type, TYPE) \ +void HELPER(load_exclusive##type)(CPUArchState *env, uint32_t reg, \ + uint32_t addr) \ +{ \ + unsigned long q_addr = 0; \ + DATA_##type val = 0; \ + \ + cm_exclusive_addr = addr; \ + CM_GET_QEMU_ADDR(env, q_addr,addr); \ + val = *(DATA_##type *)q_addr; \ + cm_exclusive_val = val; \ + env->regs[reg] = val; \ +} + +GEN_LOAD_EXCLUSIVE(b, B); +GEN_LOAD_EXCLUSIVE(w, W); +GEN_LOAD_EXCLUSIVE(l, L); +//GEN_LOAD_EXCLUSIVE(q, Q); + +#define GEN_STORE_EXCLUSIVE(type, TYPE) \ +void HELPER(store_exclusive##type)(CPUArchState *env, uint32_t res, \ + uint32_t reg, uint32_t addr) \ +{ \ + unsigned long q_addr = 0; \ + DATA_##type val = 0; \ + DATA_##type r = 0; \ + \ + if(addr != cm_exclusive_addr) \ + goto fail; \ + \ + CM_GET_QEMU_ADDR(env, q_addr,addr); \ + val = (DATA_##type)env->regs[reg]; \ + \ + r = atomic_compare_exchange##type((DATA_##type *)q_addr, \ + (DATA_##type)cm_exclusive_val, val); \ + \ + if(r == (DATA_##type)cm_exclusive_val) { \ + env->regs[res] = 0; \ + goto done; \ + } else { \ + goto fail; \ + } \ + \ +fail: \ + env->regs[res] = 1; \ + \ +done: \ + cm_exclusive_addr = -1; \ + return; \ +} + +GEN_STORE_EXCLUSIVE(b, B); +GEN_STORE_EXCLUSIVE(w, W); +GEN_STORE_EXCLUSIVE(l, L); +//GEN_STORE_EXCLUSIVE(q, Q); + +void HELPER(load_exclusiveq)(CPUArchState *env, uint32_t reg, uint32_t addr) +{ + unsigned long q_addr = 0; + uint64_t val = 0; + + cm_exclusive_addr = addr; + CM_GET_QEMU_ADDR(env, q_addr,addr); + val = *(uint64_t *)q_addr; + cm_exclusive_val = val; + env->regs[reg] = (uint32_t)val; + env->regs[reg + 1] = (uint32_t)(val>>32); +} + +void HELPER(store_exclusiveq)(CPUArchState *env, uint32_t res, uint32_t reg, uint32_t addr) +{ + unsigned long q_addr = 0; + uint64_t val = 0; + uint64_t r = 0; + + if(addr != cm_exclusive_addr) + goto fail; + + CM_GET_QEMU_ADDR(env, q_addr,addr); + val = (uint32_t)env->regs[reg]; + val |= ((uint64_t)env->regs[reg + 1]) << 32; + + r = atomic_compare_exchangeq((uint64_t *)q_addr, + (uint64_t)cm_exclusive_val, val); + + if(r == (uint64_t)cm_exclusive_val) { + env->regs[res] = 0; + goto done; + } else { + goto fail; + } + +fail: + env->regs[res] = 1; + +done: + cm_exclusive_addr = -1; + return; +} + +void HELPER(clear_exclusive)(CPUArchState *env) +{ + cm_exclusive_addr = -1; +} + +void HELPER(swpb)(CPUArchState *env, uint32_t dst, uint32_t src, uint32_t addr) +{ + uint8_t old, val; + unsigned long q_addr; + CM_GET_QEMU_ADDR(env, q_addr,env->regs[addr]); + val = (uint8_t)env->regs[src]; + old = atomic_exchangeb((uint8_t *)q_addr, (uint8_t)val); + env->regs[dst] = old; + //printf("SWPB\n"); +} + +void HELPER(swp)(CPUArchState *env, uint32_t dst, uint32_t src, uint32_t addr) +{ + uint32_t old, val; + unsigned long q_addr; + CM_GET_QEMU_ADDR(env, q_addr,env->regs[addr]); + val = env->regs[src]; + old = atomic_exchangel((uint32_t *)q_addr, val); + env->regs[dst] = old; + //printf("SWP\n"); +} diff --git a/llvm/atomic/atomic-helper.h b/llvm/atomic/atomic-helper.h new file mode 100644 index 0000000..9e3cedf --- /dev/null +++ b/llvm/atomic/atomic-helper.h @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ. + * <http://ppi.fudan.edu.cn/system_research_group> + * + * Authors: + * Zhaoguo Wang <zgwang@fudan.edu.cn> + * Yufei Chen <chenyufei@fudan.edu.cn> + * Ran Liu <naruilone@gmail.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "config-target.h" + +#ifdef CONFIG_COREMU + +#if defined(TARGET_I386) +#define __GEN_HEADER(type) \ +DEF_HELPER_3(atomic_inc##type, void, env, tl, int) \ +DEF_HELPER_4(xchg##type, void, env, tl, int, int) \ +DEF_HELPER_4(atomic_op##type, void, env, tl, tl, int) \ +DEF_HELPER_4(atomic_xadd##type, void, env, tl, int, int) \ +DEF_HELPER_4(atomic_cmpxchg##type, void, env, tl, int, int) \ +DEF_HELPER_2(atomic_not##type, void, env, tl) \ +DEF_HELPER_2(atomic_neg##type, void, env, tl) + +__GEN_HEADER(b) +__GEN_HEADER(w) +__GEN_HEADER(l) +#ifdef TARGET_X86_64 +__GEN_HEADER(q) +#endif + +DEF_HELPER_2(atomic_cmpxchg8b, void, env, tl) +DEF_HELPER_2(atomic_cmpxchg16b, void, env, tl) + +DEF_HELPER_4(atomic_bts, void, env, tl, tl, int) +DEF_HELPER_4(atomic_btr, void, env, tl, tl, int) +DEF_HELPER_4(atomic_btc, void, env, tl, tl, int) + +/* fence */ +DEF_HELPER_1(fence, void, env) + +#elif defined(TARGET_ARM) +#define __GEN_HEADER(type) \ +DEF_HELPER_3(load_exclusive##type, void, env, i32, i32) \ +DEF_HELPER_4(store_exclusive##type, void, env, i32, i32, i32) + +__GEN_HEADER(b) +__GEN_HEADER(w) +__GEN_HEADER(l) +__GEN_HEADER(q) + +DEF_HELPER_1(clear_exclusive, void, env) + +DEF_HELPER_4(swpb, void, env, i32, i32, i32) +DEF_HELPER_4(swp, void, env, i32, i32, i32) +#else +#error "unsupported processor type" +#endif + +#endif + diff --git a/llvm/atomic/atomic-x86.c b/llvm/atomic/atomic-x86.c new file mode 100644 index 0000000..dc0baf0 --- /dev/null +++ b/llvm/atomic/atomic-x86.c @@ -0,0 +1,504 @@ +/* + * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ. + * <http://ppi.fudan.edu.cn/system_research_group> + * + * Authors: + * Zhaoguo Wang <zgwang@fudan.edu.cn> + * Yufei Chen <chenyufei@fudan.edu.cn> + * Ran Liu <naruilone@gmail.com> + * Xi Wu <wuxi@fudan.edu.cn> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* We include this file in op_helper.c */ + +#include <stdlib.h> +#include <pthread.h> +#include <assert.h> +#include "coremu-atomic.h" + +#define EAX (env->regs[R_EAX]) +#define ECX (env->regs[R_ECX]) +#define EDX (env->regs[R_EDX]) +#define EBX (env->regs[R_EBX]) + +/* These definitions are copied from translate.c */ +#if defined(WORDS_BIGENDIAN) +#define REG_B_OFFSET (sizeof(target_ulong) - 1) +#define REG_H_OFFSET (sizeof(target_ulong) - 2) +#define REG_W_OFFSET (sizeof(target_ulong) - 2) +#define REG_L_OFFSET (sizeof(target_ulong) - 4) +#define REG_LH_OFFSET (sizeof(target_ulong) - 8) +#else +#define REG_B_OFFSET 0 +#define REG_H_OFFSET 1 +#define REG_W_OFFSET 0 +#define REG_L_OFFSET 0 +#define REG_LH_OFFSET 4 +#endif + +#ifdef TARGET_X86_64 +#define X86_64_DEF(...) __VA_ARGS__ +#else +#define X86_64_DEF(...) +#endif + +#define REG_LOW_MASK (~(uint64_t)0x0>>32) + +/* gen_op instructions */ +/* i386 arith/logic operations */ +enum { + OP_ADDL, + OP_ORL, + OP_ADCL, + OP_SBBL, + OP_ANDL, + OP_SUBL, + OP_XORL, + OP_CMPL, +}; + +/* */ +static target_ulong cm_get_reg_val(CPUX86State *env, int ot, int hregs, int reg) +{ + target_ulong val, offset; + CPUX86State *env1 = env; + + switch(ot) { + case 0: /* OT_BYTE */ + if (reg < 4 X86_64_DEF( || reg >= 8 || hregs)) { + goto std_case; + } else { + offset = offsetof(CPUX86State, regs[reg - 4]) + REG_H_OFFSET; + val = *(((uint8_t *)env1) + offset); + } + break; + default: + std_case: + val = env1->regs[reg]; + break; + } + + return val; +} + +static void cm_set_reg_val(CPUX86State *env, int ot, int hregs, int reg, target_ulong val) +{ + target_ulong offset; + + CPUX86State *env1 = env; + + switch(ot) { + case 0: /* OT_BYTE */ + if (reg < 4 X86_64_DEF (|| reg >= 8 || hregs)) { + offset = offsetof(CPUX86State, regs[reg]) + REG_B_OFFSET; + *(((uint8_t *) env1) + offset) = (uint8_t)val; + } else { + offset = offsetof(CPUX86State, regs[reg - 4]) + REG_H_OFFSET; + *(((uint8_t *) env1) + offset) = (uint8_t)val; + } + break; + case 1: /* OT_WORD */ + offset = offsetof(CPUX86State, regs[reg]) + REG_W_OFFSET; + *((uint16_t *)((uint8_t *)env1 + offset)) = (uint16_t)val; + break; + case 2: /* OT_LONG */ + env1->regs[reg] = REG_LOW_MASK & val; + break; + default: + case 3: /* OT_QUAD */ + env1->regs[reg] = val; + break; + } +} + +#define LD_b ldub_p +#define LD_w lduw_p +#define LD_l ldl_p +#define LD_q ldq_p + +/* Lightweight transactional memory. */ +#define TX(vaddr, type, value, command) \ + unsigned long __q_addr; \ + DATA_##type __oldv; \ + DATA_##type value; \ + \ + CM_GET_QEMU_ADDR(env, __q_addr, vaddr); \ + do { \ + __oldv = value = LD_##type((DATA_##type *)__q_addr); \ + {command;}; \ + mb(); \ + } while (__oldv != (atomic_compare_exchange##type( \ + (DATA_##type *)__q_addr, __oldv, value))) + +/* Atomically emulate INC instruction using CAS1 and memory transaction. */ + +#define GEN_ATOMIC_INC(type, TYPE) \ +void helper_atomic_inc##type(CPUX86State *env, target_ulong a0, int c) \ +{ \ + int eflags_c, eflags; \ + int cc_op; \ + \ + /* compute the previous instruction c flags */ \ + eflags_c = helper_cc_compute_c(CC_DST, CC_SRC, CC_SRC2, CC_OP); \ + \ + TX(a0, type, value, { \ + if (c > 0) { \ + value++; \ + cc_op = CC_OP_INC##TYPE; \ + } else { \ + value--; \ + cc_op = CC_OP_DEC##TYPE; \ + } \ + }); \ + \ + CC_SRC = eflags_c; \ + CC_DST = value; \ + \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, cc_op); \ + CC_SRC = eflags; \ +} \ + +GEN_ATOMIC_INC(b, B); +GEN_ATOMIC_INC(w, W); +GEN_ATOMIC_INC(l, L); +#ifdef TARGET_X86_64 +GEN_ATOMIC_INC(q, Q); +#endif + +#define OT_b 0 +#define OT_w 1 +#define OT_l 2 +#define OT_q 3 + +#define GEN_ATOMIC_XCHG(type) \ +void helper_xchg##type(CPUX86State *env, target_ulong a0, int reg, \ + int hreg) \ +{ \ + DATA_##type val, out; \ + unsigned long q_addr; \ + \ + CM_GET_QEMU_ADDR(env, q_addr, a0); \ + val = (DATA_##type)cm_get_reg_val(env, OT_##type, hreg, reg); \ + out = atomic_exchange##type((DATA_##type *)q_addr, val); \ + mb(); \ + \ + cm_set_reg_val(env, OT_##type, hreg, reg, out); \ +} + +GEN_ATOMIC_XCHG(b); +GEN_ATOMIC_XCHG(w); +GEN_ATOMIC_XCHG(l); +#ifdef TARGET_X86_64 +GEN_ATOMIC_XCHG(q); +#endif + +#define GEN_ATOMIC_OP(type, TYPE) \ +void helper_atomic_op##type(CPUX86State *env, target_ulong a0, \ + target_ulong t1, int op) \ +{ \ + DATA_##type operand; \ + int eflags_c, eflags; \ + int cc_op; \ + \ + /* compute the previous instruction c flags */ \ + eflags_c = helper_cc_compute_c(CC_DST, CC_SRC, CC_SRC2, CC_OP); \ + operand = (DATA_##type)t1; \ + \ + TX(a0, type, value, { \ + switch(op) { \ + case OP_ADCL: \ + value += operand + eflags_c; \ + cc_op = CC_OP_ADD##TYPE + (eflags_c << 2); \ + CC_SRC = operand; \ + break; \ + case OP_SBBL: \ + value = value - operand - eflags_c; \ + cc_op = CC_OP_SUB##TYPE + (eflags_c << 2); \ + CC_SRC = operand; \ + break; \ + case OP_ADDL: \ + value += operand; \ + cc_op = CC_OP_ADD##TYPE; \ + CC_SRC = operand; \ + break; \ + case OP_SUBL: \ + value -= operand; \ + cc_op = CC_OP_SUB##TYPE; \ + CC_SRC = operand; \ + break; \ + default: \ + case OP_ANDL: \ + value &= operand; \ + cc_op = CC_OP_LOGIC##TYPE; \ + break; \ + case OP_ORL: \ + value |= operand; \ + cc_op = CC_OP_LOGIC##TYPE; \ + break; \ + case OP_XORL: \ + value ^= operand; \ + cc_op = CC_OP_LOGIC##TYPE; \ + break; \ + case OP_CMPL: \ + abort(); \ + break; \ + } \ + }); \ + CC_DST = value; \ + /* successful transaction, compute the eflags */ \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, cc_op); \ + CC_SRC = eflags; \ +} + +GEN_ATOMIC_OP(b, B); +GEN_ATOMIC_OP(w, W); +GEN_ATOMIC_OP(l, L); +#ifdef TARGET_X86_64 +GEN_ATOMIC_OP(q, Q); +#endif + +/* xadd */ +#define GEN_ATOMIC_XADD(type, TYPE) \ +void helper_atomic_xadd##type(CPUX86State *env, target_ulong a0, \ + int reg, int hreg) \ +{ \ + DATA_##type operand, oldv; \ + int eflags; \ + \ + operand = (DATA_##type)cm_get_reg_val( \ + env, OT_##type, hreg, reg); \ + \ + TX(a0, type, newv, { \ + oldv = newv; \ + newv += operand; \ + }); \ + \ + /* transaction successes */ \ + /* xchg the register and compute the eflags */ \ + cm_set_reg_val(env, OT_##type, hreg, reg, oldv); \ + CC_SRC = oldv; \ + CC_DST = newv; \ + \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \ + CC_OP_ADD##TYPE); \ + CC_SRC = eflags; \ +} + +GEN_ATOMIC_XADD(b, B); +GEN_ATOMIC_XADD(w, W); +GEN_ATOMIC_XADD(l, L); +#ifdef TARGET_X86_64 +GEN_ATOMIC_XADD(q, Q); +#endif + +/* cmpxchg */ +#define GEN_ATOMIC_CMPXCHG(type, TYPE) \ +void helper_atomic_cmpxchg##type(CPUX86State *env, target_ulong a0, \ + int reg, int hreg) \ +{ \ + DATA_##type reg_v, eax_v, res; \ + int eflags; \ + unsigned long q_addr; \ + \ + CM_GET_QEMU_ADDR(env, q_addr, a0); \ + reg_v = (DATA_##type)cm_get_reg_val(env, OT_##type, hreg, reg); \ + eax_v = (DATA_##type)cm_get_reg_val(env, OT_##type, 0, R_EAX); \ + \ + res = atomic_compare_exchange##type( \ + (DATA_##type *)q_addr, eax_v, reg_v); \ + mb(); \ + \ + if (res != eax_v) \ + cm_set_reg_val(env, OT_##type, 0, R_EAX, res); \ + \ + CC_SRC = res; \ + CC_DST = eax_v - res; \ + \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \ + CC_OP_SUB##TYPE); \ + CC_SRC = eflags; \ +} + +GEN_ATOMIC_CMPXCHG(b, B); +GEN_ATOMIC_CMPXCHG(w, W); +GEN_ATOMIC_CMPXCHG(l, L); +#ifdef TARGET_X86_64 +GEN_ATOMIC_CMPXCHG(q, Q); +#endif + +#if defined(_LP64) +/* cmpxchgb (8, 16) */ +void helper_atomic_cmpxchg8b(CPUX86State *env, target_ulong a0) +{ + uint64_t edx_eax, ecx_ebx, res; + int eflags; + unsigned long q_addr; + + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, CC_OP); + CM_GET_QEMU_ADDR(env, q_addr, a0); + + edx_eax = (((uint64_t)EDX << 32) | (uint32_t)EAX); + ecx_ebx = (((uint64_t)ECX << 32) | (uint32_t)EBX); + + res = atomic_compare_exchangeq((uint64_t *)q_addr, edx_eax, ecx_ebx); + mb(); + + if (res == edx_eax) { + eflags |= CC_Z; + } else { + EDX = (uint32_t)(res >> 32); + EAX = (uint32_t)res; + eflags &= ~CC_Z; + } + + CC_SRC = eflags; +} +#else +void helper_atomic_cmpxchg8b(CPUX86State *env, target_ulong a0) +{ + assert("helper_atomic_cmpxchg8b: not supported.\n"); + exit(0); +} +#endif + +void helper_atomic_cmpxchg16b(CPUX86State *env, target_ulong a0) +{ + uint8_t res; + int eflags; + unsigned long q_addr; + + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, CC_OP); + CM_GET_QEMU_ADDR(env, q_addr, a0); + + uint64_t old_rax = *(uint64_t *)q_addr; + uint64_t old_rdx = *(uint64_t *)(q_addr + 8); + res = atomic_compare_exchange16b((uint64_t *)q_addr, EAX, EDX, EBX, ECX); + mb(); + + if (res) { + eflags |= CC_Z; /* swap success */ + } else { + EDX = old_rdx; + EAX = old_rax; + eflags &= ~CC_Z; /* read the old value ! */ + } + + CC_SRC = eflags; +} + +/* not */ +#define GEN_ATOMIC_NOT(type) \ +void helper_atomic_not##type(CPUX86State *env, \ + target_ulong a0) \ +{ \ + TX(a0, type, value, { \ + value = ~value; \ + }); \ +} + +GEN_ATOMIC_NOT(b); +GEN_ATOMIC_NOT(w); +GEN_ATOMIC_NOT(l); +#ifdef TARGET_X86_64 +GEN_ATOMIC_NOT(q); +#endif + +/* neg */ +#define GEN_ATOMIC_NEG(type, TYPE) \ +void helper_atomic_neg##type(CPUX86State *env, \ + target_ulong a0) \ +{ \ + int eflags; \ + \ + TX(a0, type, value, { \ + value = -value; \ + }); \ + \ + /* We should use the old value to compute CC */ \ + CC_SRC = CC_DST = -value; \ + \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \ + CC_OP_SUB##TYPE); \ + CC_SRC = eflags; \ +} \ + +GEN_ATOMIC_NEG(b, B); +GEN_ATOMIC_NEG(w, W); +GEN_ATOMIC_NEG(l, L); +#ifdef TARGET_X86_64 +GEN_ATOMIC_NEG(q, Q); +#endif + +/* This is only used in BTX instruction, with an additional offset. + * Note that, when using register bitoffset, the value can be larger than + * operand size - 1 (operand size can be 16/32/64), refer to intel manual 2A + * page 3-11. */ +#define TX2(vaddr, type, value, offset, command) \ + unsigned long __q_addr; \ + DATA_##type __oldv; \ + DATA_##type value; \ + \ + CM_GET_QEMU_ADDR(env, __q_addr, vaddr); \ + __q_addr += offset >> 3; \ + do { \ + __oldv = value = LD_##type((DATA_##type *)__q_addr); \ + {command;}; \ + mb(); \ + } while (__oldv != (atomic_compare_exchange##type( \ + (DATA_##type *)__q_addr, __oldv, value))) + +#define GEN_ATOMIC_BTX(ins, command) \ +void helper_atomic_##ins(CPUX86State *env, target_ulong a0, \ + target_ulong offset, int ot) \ +{ \ + uint8_t old_byte; \ + int eflags; \ + \ + TX2(a0, b, value, offset, { \ + old_byte = value; \ + {command;}; \ + }); \ + \ + CC_SRC = (old_byte >> (offset & 0x7)); \ + CC_DST = 0; \ + eflags = helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, \ + CC_OP_SARB + ot); \ + CC_SRC = eflags; \ +} + +/* bts */ +GEN_ATOMIC_BTX(bts, { + value |= (1 << (offset & 0x7)); +}); +/* btr */ +GEN_ATOMIC_BTX(btr, { + value &= ~(1 << (offset & 0x7)); +}); +/* btc */ +GEN_ATOMIC_BTX(btc, { + value ^= (1 << (offset & 0x7)); +}); + +/* fence **/ +void helper_fence(CPUX86State *env) +{ + mb(); +} + +#undef EAX +#undef ECX +#undef EDX +#undef EBX diff --git a/llvm/atomic/coremu-atomic.h b/llvm/atomic/coremu-atomic.h new file mode 100644 index 0000000..998232b --- /dev/null +++ b/llvm/atomic/coremu-atomic.h @@ -0,0 +1,412 @@ +/* + * COREMU Parallel Emulator Framework + * + * Atomic support for COREMU system. + * XXX: Now only support x86-64 architecture. + * + * Copyright (C) 2010 Parallel Processing Institute (PPI), Fudan Univ. + * <http://ppi.fudan.edu.cn/system_research_group> + * + * Authors: + * Zhaoguo Wang <zgwang@fudan.edu.cn> + * Yufei Chen <chenyufei@fudan.edu.cn> + * Ran Liu <naruilone@gmail.com> + * Xi Wu <wuxi@fudan.edu.cn> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _COREMU_ATOMIC_H +#define _COREMU_ATOMIC_H + +#include <stdint.h> +#include <stdlib.h> +#include <assert.h> +#include "config-target.h" +#include "hqemu.h" + +/* Given the guest virtual address, get the corresponding host address. + * This macro resembles ldxxx in softmmu_template.h + * NOTE: This must be inlined since the use of GETPC needs to get the + * return address. Using always inline also works, we use macro here to be more + * explicit. */ +#if defined(CONFIG_USER_ONLY) +#define CM_GET_QEMU_ADDR(__env1, q_addr, v_addr) \ +do { \ + q_addr = v_addr + GUEST_BASE; \ +} while (0) + +#else +#define CM_GET_QEMU_ADDR(__env1, q_addr, v_addr) \ +do { \ + CPUState *cpu = ENV_GET_CPU(__env1); \ + int __mmu_idx, __index; \ + uintptr_t __retaddr; \ + __index = (v_addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1); \ + /* get the CPL, hence determine the MMU mode */ \ + __mmu_idx = cpu_mmu_index(__env1, false); \ + /* We use this function in the implementation of atomic instructions */ \ + /* and we are going to modify these memory. So we use addr_write. */ \ + if (unlikely(__env1->tlb_table[__mmu_idx][__index].addr_write \ + != ((v_addr & TARGET_PAGE_MASK) | tlb_version(__env1)))) { \ + __retaddr = GETPC(); \ + tlb_fill(cpu, v_addr, 1, __mmu_idx, __retaddr); \ + } \ + q_addr = v_addr + __env1->tlb_table[__mmu_idx][__index].addend; \ +} while(0) +#endif + +/* XXX These are also used by atomic instruction handling. + * Put these defines in some other files? */ +#define DATA_b uint8_t +#define DATA_w uint16_t +#define DATA_l uint32_t +#define DATA_q uint64_t + +#define __inline__ inline __attribute__((always_inline)) + +#if defined(__i386__) || defined(__x86_64__) +// Is this the correct way to detect 64 system? +#if defined(_LP64) +static __inline__ uint8_t +atomic_compare_exchange16b(uint64_t *memp, + uint64_t rax, uint64_t rdx, + uint64_t rbx, uint64_t rcx) +{ + uint8_t z; + __asm __volatile__ ( "lock; cmpxchg16b %3\n\t" + "setz %2\n\t" + : "=a" (rax), "=d" (rdx), "=r" (z), "+m" (*memp) + : "a" (rax), "d" (rdx), "b" (rbx), "c" (rcx) + : "memory", "cc" ); + return z; +} +#else +static __inline__ uint8_t +atomic_compare_exchange16b(uint64_t *memp, + uint64_t rax, uint64_t rdx, + uint64_t rbx, uint64_t rcx) +{ + assert("atomic_compare_exchange16b: not supported.\n"); + exit(0); +} + +static __inline__ uint8_t +atomic_compare_exchangeq(uint64_t *addr, + uint64_t oldval, uint64_t newval) +{ + assert("atomic_compare_exchangeq: not supported.\n"); + exit(0); +} + +#endif + +/* Memory Barriers: x86-64 ONLY now */ +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence" ::: "memory") + +#define LOCK_PREFIX "lock; " + +#define coremu_xglue(a, b) a ## b +// If a/b is macro, it will expand first, then pass to coremu_xglue +#define coremu_glue(a, b) coremu_xglue(a, b) + +#define coremu_xstr(s) # s +#define coremu_str(s) coremu_xstr(s) + +#define DATA_BITS 8 +#include "coremu-template.h" + +#define DATA_BITS 16 +#include "coremu-template.h" + +#define DATA_BITS 32 +#include "coremu-template.h" + +#if defined(_LP64) +#define DATA_BITS 64 +#include "coremu-template.h" +#else +static inline uint64_t atomic_exchangeq(uint64_t *p, uint64_t val) +{ + assert("atomic_exchangeq: not supported.\n"); + exit(0); +} + +#endif + +#elif defined(__arm__) + +#if defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7EM__) || \ + defined(__ARM_ARCH_7M__) || \ + defined(__ARM_ARCH_7R__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) +#define USE_ARMV6_INSTRUCTIONS +#endif + +#ifdef USE_ARMV6_INSTRUCTIONS +#define mb() __asm__ __volatile__("dmb" : : : "memory") +#define raw_local_irq_save(x) \ + ({ \ + __asm__ __volatile__( \ + "mrs %0, cpsr @ local_irq_save\n" \ + "cpsid i" \ + : "=r" (x) : : "memory", "cc"); \ + }) +#else +#define mb() __asm__ __volatile__("":::"memory") +#define raw_local_irq_save(x) \ + ({ \ + unsigned long temp; \ + (void) (&temp == &x); \ + __asm__ __volatile__( \ + "mrs %0, cpsr @ local_irq_save\n" \ +" orr %1, %0, #128\n" \ +" msr cpsr_c, %1" \ + : "=r" (x), "=r" (temp) \ + : \ + : "memory", "cc"); \ + }) +#endif + +#define raw_local_irq_restore(x) \ + __asm__ __volatile( \ + "msr cpsr_c, %0 @ local_irq_restore\n" \ + : \ + : "r" (x) \ + : "memory", "cc") + +static __inline__ uint8_t atomic_compare_exchangeb(uint8_t *addr, + uint8_t oldval, uint8_t newval) +{ + uint8_t ret; +#ifdef USE_ARMV6_INSTRUCTIONS + unsigned long tmp; + __asm__ __volatile__("@ atomic_cmpxchgl\n" + "1: ldrexb %1, [%3]\n" + " mov %0, #0\n" + " teq %1, %4\n" + " strexbeq %0, %5, [%3]\n" + " teq %0, #0\n" + " bne 1b\n" + : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr) + : "r" (addr), "Ir" (oldval), "r" (newval) + : "cc"); +#else + unsigned long flags; + raw_local_irq_save(flags); + ret = *addr; + if (likely(ret == oldval)) + *addr = newval; + raw_local_irq_restore(flags); +#endif + return ret; +} + +static __inline__ uint16_t atomic_compare_exchangew(uint16_t *addr, + uint16_t oldval, uint16_t newval) +{ + uint16_t ret; +#ifdef USE_ARMV6_INSTRUCTIONS + unsigned long tmp; + __asm__ __volatile__("@ atomic_cmpxchgl\n" + "1: ldrexh %1, [%3]\n" + " mov %0, #0\n" + " teq %1, %4\n" + " strexheq %0, %5, [%3]\n" + " teq %0, #0\n" + " bne 1b\n" + : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr) + : "r" (addr), "Ir" (oldval), "r" (newval) + : "cc"); +#else + unsigned long flags; + raw_local_irq_save(flags); + ret = *addr; + if (likely(ret == oldval)) + *addr = newval; + raw_local_irq_restore(flags); +#endif + return ret; +} + +static __inline__ uint32_t atomic_compare_exchangel(uint32_t *addr, + uint32_t oldval, uint32_t newval) +{ + uint32_t ret; +#ifdef USE_ARMV6_INSTRUCTIONS + unsigned long tmp; + __asm__ __volatile__("@ atomic_cmpxchgl\n" + "1: ldrex %1, [%3]\n" + " mov %0, #0\n" + " teq %1, %4\n" + " strexeq %0, %5, [%3]\n" + " teq %0, #0\n" + " bne 1b\n" + : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr) + : "r" (addr), "Ir" (oldval), "r" (newval) + : "cc"); +#else + unsigned long flags; + raw_local_irq_save(flags); + ret = *addr; + if (likely(ret == oldval)) + *addr = newval; + raw_local_irq_restore(flags); +#endif + return ret; +} + +static __inline__ uint64_t atomic_compare_exchangeq(uint64_t *addr, + uint64_t oldval, uint64_t newval) +{ + uint64_t ret; +#ifdef USE_ARMV6_INSTRUCTIONS + unsigned long tmp; + __asm__ __volatile__("@ atomic_cmpxchgl\n" + "1: ldrexd %1, %H1, [%3]\n" + " mov %0, #0\n" + " teq %1, %4\n" + " teqeq %H1, %H4\n" + " strexdeq %0, %5, %H5, [%3]\n" + " teq %0, #0\n" + " bne 1b\n" + : "=&r" (tmp), "=&r" (ret), "+Qo" (*addr) + : "r" (addr), "Ir" (oldval), "r" (newval) + : "cc"); +#else + unsigned long flags; + raw_local_irq_save(flags); + ret = *addr; + if (likely(ret == oldval)) + *addr = newval; + raw_local_irq_restore(flags); +#endif + return ret; +} + +static __inline__ uint8_t +atomic_compare_exchange16b(uint64_t *memp, + uint64_t old_less, uint64_t old_most, + uint64_t new_less, uint64_t new_most) +{ + uint8_t ret = 0; + unsigned long flags; + raw_local_irq_save(flags); + ret = *memp; + if (likely(*memp == old_less && *(memp+1) == old_most)) + { + *memp = new_less; + *(memp+1) = new_most; + ret = 1; + } + raw_local_irq_restore(flags); + return ret; +} + +static __inline__ unsigned long __xchg(unsigned long x, volatile void *ptr, int size) +{ + unsigned long ret; +#ifdef USE_ARMV6_INSTRUCTIONS + unsigned int tmp; +#endif + + mb(); + + switch (size) { +#ifdef USE_ARMV6_INSTRUCTIONS + case 1: + __asm __volatile("@ __xchg1\n" + "1: ldrexb %0, [%3]\n" + " strexb %1, %2, [%3]\n" + " teq %1, #0\n" + " bne 1b" + : "=&r" (ret), "=&r" (tmp) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + case 2: + __asm __volatile("@ __xchg1\n" + "1: ldrexh %0, [%3]\n" + " strexh %1, %2, [%3]\n" + " teq %1, #0\n" + " bne 1b" + : "=&r" (ret), "=&r" (tmp) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + case 4: + __asm __volatile("@ __xchg4\n" + "1: ldrex %0, [%3]\n" + " strex %1, %2, [%3]\n" + " teq %1, #0\n" + " bne 1b" + : "=&r" (ret), "=&r" (tmp) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; +#else + case 1: + __asm __volatile("@ __xchg1\n" + " swpb %0, %1, [%2]" + : "=&r" (ret) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + + case 4: + __asm __volatile("@ __xchg4\n" + " swp %0, %1, [%2]" + : "=&r" (ret) + : "r" (x), "r" (ptr) + : "memory", "cc"); + break; + case 2: + { + unsigned long flags = 0; + raw_local_irq_save(flags); + ret = *(volatile uint16_t *)ptr; + *(volatile uint16_t *)ptr = x; + raw_local_irq_restore(flags); + break; + } + +#endif + default: + exit(0); + } + mb(); + + return ret; +} + +#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)))) +#define GEN_ATOMIC_XCHG_HELPER(TYPE) \ +static __inline__ DATA_##TYPE atomic_exchange##TYPE(DATA_##TYPE *p, DATA_##TYPE val) { return xchg(p, val); } + +GEN_ATOMIC_XCHG_HELPER(b); +GEN_ATOMIC_XCHG_HELPER(w); +GEN_ATOMIC_XCHG_HELPER(l); + +#endif + +#endif /* _COREMU_ATOMIC_H */ + diff --git a/llvm/atomic/coremu-template.h b/llvm/atomic/coremu-template.h new file mode 100644 index 0000000..66b185c --- /dev/null +++ b/llvm/atomic/coremu-template.h @@ -0,0 +1,101 @@ +/* The following code may be included multiple times in a single file. */ + +#if DATA_BITS == 64 +# define DATA_TYPE uint64_t +# define SUFFIX q +#elif DATA_BITS == 32 +# define DATA_TYPE uint32_t +# define SUFFIX l +#elif DATA_BITS == 16 +# define DATA_TYPE uint16_t +# define SUFFIX w +#elif DATA_BITS == 8 +# define DATA_TYPE uint8_t +# define SUFFIX b +#else +#error unsupported data size +#endif + +static __inline__ void coremu_glue(atomic_inc, SUFFIX)(DATA_TYPE *p) { + asm volatile( + LOCK_PREFIX "inc"coremu_str(SUFFIX)" %0" + : "+m"(*p) + : + : "cc"); +} + +static __inline__ void coremu_glue(atomic_dec, SUFFIX)(DATA_TYPE *p) { + asm volatile( + LOCK_PREFIX "dec"coremu_str(SUFFIX)" %0" + : "+m"(*p) + : + : "cc"); +} + +static __inline__ void coremu_glue(atomic_add, SUFFIX)(DATA_TYPE* addr, + DATA_TYPE val) { + asm volatile( + LOCK_PREFIX "add"coremu_str(SUFFIX)" %1, %0" + : "+m"(*addr) + : "a"(val) + : "cc"); +} + +/* swap the value VAL and *p. + * Return the value swapped out from memory. */ +static inline DATA_TYPE coremu_glue(atomic_exchange, SUFFIX)( + DATA_TYPE *p, DATA_TYPE val) +{ + DATA_TYPE out; + __asm __volatile( + "lock; xchg"coremu_str(SUFFIX)" %1,%2 \n\t" + : "=a" (out), "+m" (*p) + : "a" (val) + ); + return out; +} +/* Return previous value in addr. So if the return value is the same as oldval, + * swap occured. */ +static __inline__ DATA_TYPE coremu_glue(atomic_compare_exchange, SUFFIX)(DATA_TYPE *addr, + DATA_TYPE oldval, DATA_TYPE newval) { + asm volatile( + LOCK_PREFIX "cmpxchg"coremu_str(SUFFIX)" %2, %1" + : "+a"(oldval), "+m"(*addr) + : "q"(newval) + : "cc"); + + return oldval; +} + +static __inline__ void coremu_glue(atomic_and, SUFFIX)(DATA_TYPE *addr, + DATA_TYPE mask) { + asm volatile( + LOCK_PREFIX "and"coremu_str(SUFFIX)" %1, %0" + : "+m"(*addr) + : "r"(mask) + : "cc"); +} + +static __inline__ void coremu_glue(atomic_or, SUFFIX)(DATA_TYPE *addr, + DATA_TYPE mask) { + asm volatile( + LOCK_PREFIX "or"coremu_str(SUFFIX)" %1, %0" + : "+m"(*addr) + : "r"(mask) + : "cc"); +} + +static __inline__ DATA_TYPE coremu_glue(atomic_xadd, SUFFIX)( + DATA_TYPE* addr, DATA_TYPE val) { + asm volatile( + LOCK_PREFIX "xadd"coremu_str(SUFFIX)" %0, %1" + : "+a"(val), "+m"(*addr) + : + : "cc"); + + return val; +} + +#undef DATA_BITS +#undef DATA_TYPE +#undef SUFFIX |