summaryrefslogtreecommitdiffstats
path: root/src/llvm/pass
diff options
context:
space:
mode:
Diffstat (limited to 'src/llvm/pass')
-rw-r--r--src/llvm/pass/CombineCasts.cpp321
-rw-r--r--src/llvm/pass/CombineGuestMemory.cpp389
-rw-r--r--src/llvm/pass/CombineZExtTrunc.cpp70
-rw-r--r--src/llvm/pass/FastMathPass.cpp87
-rw-r--r--src/llvm/pass/ProfileExec.cpp172
-rw-r--r--src/llvm/pass/RedundantStateElimination.cpp179
-rw-r--r--src/llvm/pass/ReplaceIntrinsic.cpp137
-rw-r--r--src/llvm/pass/SimplifyPointer.cpp334
-rw-r--r--src/llvm/pass/StateMappingPass.cpp885
9 files changed, 2574 insertions, 0 deletions
diff --git a/src/llvm/pass/CombineCasts.cpp b/src/llvm/pass/CombineCasts.cpp
new file mode 100644
index 0000000..71a74ff
--- /dev/null
+++ b/src/llvm/pass/CombineCasts.cpp
@@ -0,0 +1,321 @@
+/*
+ * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm-target.h"
+#include "llvm-opc.h"
+#include "llvm-pass.h"
+#include "utils.h"
+
+#define PASS_NAME "CombineCasts"
+
+/*
+ * CombineCasts Pass
+ */
+class CombineCasts : public FunctionPass {
+ IRFactory *IF;
+ const DataLayout *DL;
+ MDFactory *MF;
+ IntegerType *Int8Ty;
+ IntegerType *Int32Ty;
+ IntegerType *Int64Ty;
+ IntegerType *IntPtrTy;
+ PointerType *Int8PtrTy;
+ PointerType *Int32PtrTy;
+ PointerType *Int64PtrTy;
+ Type *FloatTy;
+ Type *DoubleTy;
+ IVec toErase;
+
+public:
+ static char ID;
+ explicit CombineCasts() : FunctionPass(ID) {}
+ explicit CombineCasts(IRFactory *IF)
+ : FunctionPass(ID), IF(IF), DL(IF->getDL()), MF(IF->getMDFactory())
+ {
+ LLVMContext &Context = IF->getContext();;
+ Int8Ty = IntegerType::get(Context, 8);
+ Int32Ty = IntegerType::get(Context, 32);
+ Int64Ty = IntegerType::get(Context, 64);
+ IntPtrTy = DL->getIntPtrType(Context);
+ Int8PtrTy = Type::getInt8PtrTy(Context, 0);
+ Int32PtrTy = Type::getInt32PtrTy(Context, 0);
+ Int64PtrTy = Type::getInt64PtrTy(Context, 0);
+ FloatTy = Type::getFloatTy(Context);
+ DoubleTy = Type::getDoubleTy(Context);
+ }
+
+ Instruction *getUniqueUser(Instruction *I) {
+ if (I->hasOneUse())
+ return I->user_back();
+ return nullptr;
+ };
+
+ bool combineLoadCast(LoadInst *LI);
+ bool combineStoreCast(StoreInst *SI);
+ bool combineCastCast(Function &F);
+ bool simplifySignChange(Function &F);
+ bool runOnFunction(Function &F);
+};
+
+char CombineCasts::ID = 0;
+INITIALIZE_PASS(CombineCasts, "combinecast",
+ "Combine bitcast with guest memory loads/stores", false, false)
+
+FunctionPass *llvm::createCombineCasts(IRFactory *IF)
+{
+ return new CombineCasts(IF);
+}
+
+static bool hasSameCastingTy(ArrayRef<BitCastInst *> IL) {
+ Type *SrcTy = IL[0]->getSrcTy();
+ Type *DstTy = IL[0]->getDestTy();
+ for (BitCastInst *I : IL) {
+ if (I->getSrcTy() != SrcTy)
+ return false;
+ if (I->getDestTy() != DstTy)
+ return false;
+ }
+ return true;
+}
+
+/* This function aims to change the load type if (1) the type of loaded data is
+ * casted to another type, (2) only one user of the load instruction is bitcast,
+ * and (3) all other users of the load instruction are stores.
+ *
+ * For example:
+ * %0 = load <typeA>* %0 = load <typeB>*
+ * %1 = bitcast %0, <typeB> %1 = bitcast %0, <typeA>
+ *
+ * %2 = op <typeB> %1, ... => %2 = op <typeB> %0, ...
+ *
+ * store %0, <typeA>* store %1, <typeA>*
+ * store %1, <typeB>* store %0, <typeB>*
+ */
+bool CombineCasts::combineLoadCast(LoadInst *LI)
+{
+ Instruction *Ptr = dyn_cast<Instruction>(LI->getPointerOperand());
+
+ if (!Ptr)
+ return false;
+
+ /* Find all bitcast users of this load. */
+ SmallVector<BitCastInst *, 4> BCIs;
+ for (User *U : LI->users()) {
+ Instruction *UI = cast<Instruction>(U);
+ switch (UI->getOpcode()) {
+ default:
+ return false;
+ case Instruction::PHI:
+ case Instruction::Load:
+ case Instruction::Store:
+ break;
+ case Instruction::BitCast:
+ BCIs.push_back(cast<BitCastInst>(UI));
+ break;
+ }
+ }
+
+ if (BCIs.empty() || !hasSameCastingTy(BCIs))
+ return false;
+
+ Instruction *InsertPos = LI;
+ unsigned Alignment = LI->getAlignment();
+ unsigned Volatile = LI->isVolatile();
+ Type *SrcTy = LI->getType();
+ Type *DstTy = BCIs[0]->getDestTy();
+
+ Type *PtrTy = PointerType::get(DstTy, LI->getPointerAddressSpace());
+ if (isa<IntToPtrInst>(Ptr))
+ Ptr = new IntToPtrInst(Ptr->getOperand(0), PtrTy, "", InsertPos);
+ else
+ Ptr = new BitCastInst(Ptr, PtrTy, "", InsertPos);
+
+ Instruction *NewLI = new LoadInst(Ptr, "", Volatile, Alignment, InsertPos);
+ Instruction *NewBCI = new BitCastInst(NewLI, SrcTy, "", InsertPos);
+
+ if (MF->isGuestMemory(LI))
+ MF->setGuestMemory(NewLI);
+ for (BitCastInst *BCI : BCIs)
+ BCI->replaceAllUsesWith(NewLI);
+ LI->replaceAllUsesWith(NewBCI);
+
+ toErase.push_back(LI);
+ for (BitCastInst *BCI : BCIs)
+ toErase.push_back(BCI);
+
+ return true;
+}
+
+/* This function aims to change the store type if stored data is casted from
+ * another type.
+ *
+ * For example:
+ * %0 = <typeA>
+ * %1 = bitcast %0, <typeB> => store %0, <typeA>*
+ * store %1, <typeB>*
+ */
+bool CombineCasts::combineStoreCast(StoreInst *SI)
+{
+ Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand());
+ Instruction *Data = dyn_cast<Instruction>(SI->getValueOperand());
+
+ if (!Ptr || !Data || !isa<BitCastInst>(Data))
+ return false;
+
+ Instruction *InsertPos = SI;
+ unsigned Alignment = SI->getAlignment();
+ unsigned Volatile = SI->isVolatile();
+ BitCastInst *BCI = cast<BitCastInst>(Data);
+ Value *V = BCI->getOperand(0);
+ Type *SrcTy = V->getType();
+
+ Type *PtrTy = PointerType::get(SrcTy, SI->getPointerAddressSpace());
+ if (isa<IntToPtrInst>(Ptr))
+ Ptr = new IntToPtrInst(Ptr->getOperand(0), PtrTy, "", InsertPos);
+ else
+ Ptr = new BitCastInst(Ptr, PtrTy, "", InsertPos);
+
+ Instruction *NewSI = new StoreInst(V, Ptr, Volatile, Alignment, InsertPos);
+
+ if (MF->isGuestMemory(SI))
+ MF->setGuestMemory(NewSI);
+
+ toErase.push_back(SI);
+ return true;
+}
+
+/* This function aims to eliminate redundant casts.
+ * For example:
+ * %0 = <typeA> %0 = <typeA>
+ * %1 = bitcast %0, <typeB> =>
+ * %2 = bitcast %1, <typeC> %2 = bitcast %0, <typeC>
+ * = op <typeC> %2, ... = op <typeC> %2, ...
+ *
+ * And if <typeA> is the same as <typeC>, the code is further optimized to
+ * %0 = <typeA> %0 = <typeA>
+ * %1 = bitcast %0, <typeB> =>
+ * %2 = bitcast %1, <typeC>
+ * = op <typeC> %2, ... = op <typeA> %0, ...
+ */
+bool CombineCasts::combineCastCast(Function &F)
+{
+ SmallVector<Instruction*, 4> Worklist;
+ for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) {
+ Instruction *I = &*II;
+ if (isa<BitCastInst>(I))
+ Worklist.push_back(I);
+ }
+
+ for (auto I : Worklist) {
+ BitCastInst *CI = cast<BitCastInst>(I);
+ BitCastInst *CSrc = dyn_cast<BitCastInst>(CI->getOperand(0));
+ if (!CSrc)
+ continue;
+
+ Type *SrcTy = CSrc->getOperand(0)->getType();
+ Type *DstTy = CI->getType();
+ Value *Result = (SrcTy == DstTy) ? CSrc->getOperand(0) :
+ new BitCastInst(CSrc->getOperand(0), CI->getType(), "", CI);
+ I->replaceAllUsesWith(Result);
+ toErase.push_back(I);
+ }
+
+ if (toErase.empty())
+ return false;
+
+ ProcessErase(toErase);
+ return true;
+}
+
+/* This function converts sign change of float/double data (i.e., -num),
+ * which is implemented with integer operations, to use float/double ops.
+ * For example:
+ * %0 = bitcast float %num to i32
+ * %1 = xor i32 %0, 0x80000000 => %0 = fsub float 0, %num
+ * %2 = bitcast %1, float
+ */
+bool CombineCasts::simplifySignChange(Function &F)
+{
+ SmallVector<BitCastInst*, 16> Worklist;
+
+ for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) {
+ Instruction *I = &*II;
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) {
+ Type *SrcTy = BCI->getSrcTy();
+ Type *DstTy = BCI->getDestTy();
+ if (SrcTy == FloatTy && DstTy == Int32Ty)
+ Worklist.push_back(BCI);
+ else if (SrcTy == DoubleTy && DstTy == Int64Ty)
+ Worklist.push_back(BCI);
+ }
+ }
+
+ for (auto I : Worklist) {
+ Type *Ty = I->getSrcTy();
+ Value *C = (Ty == FloatTy) ? CONST32(0x80000000)
+ : CONST64(0x8000000000000000LL);
+
+ /* Check whether the single user of this bitcast is Xor. */
+ Instruction *UI = getUniqueUser(I);
+ if (UI && UI->getOpcode() == Instruction::Xor && UI->getOperand(1) == C) {
+ /* Check whether the single user of this Xor is a bitcast
+ * instruction that casts the type back to the src type. */
+ Instruction *UUI = getUniqueUser(UI);
+ if (UUI && UUI->getOpcode() == Instruction::BitCast &&
+ cast<BitCastInst>(UUI)->getDestTy() == Ty) {
+ Value *V = BinaryOperator::Create(Instruction::FSub,
+ ConstantFP::get(Ty, 0),
+ I->getOperand(0), "", I);
+ UUI->replaceAllUsesWith(V);
+ toErase.push_back(UUI);
+ }
+ }
+ }
+
+ if (toErase.empty())
+ return false;
+
+ ProcessErase(toErase);
+ return true;
+}
+
+bool CombineCasts::runOnFunction(Function &F)
+{
+ bool Changed = false;
+ SmallVector<LoadInst *, 16> Loads;
+ SmallVector<StoreInst *, 16> Stores;
+
+ /* Collect all guest memory and non-volatile cpu state loads/stores. */
+ for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) {
+ Instruction *I = &*II;
+
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (MF->isGuestMemory(LI) || !LI->isVolatile())
+ Loads.push_back(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (MF->isGuestMemory(SI) || !SI->isVolatile())
+ Stores.push_back(SI);
+ }
+ }
+
+ for (auto LI : Loads)
+ Changed |= combineLoadCast(LI);
+ for (auto SI : Stores)
+ Changed |= combineStoreCast(SI);
+
+ if (toErase.size())
+ ProcessErase(toErase);
+
+ Changed |= combineCastCast(F);
+ Changed |= simplifySignChange(F);
+
+ return Changed;
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/llvm/pass/CombineGuestMemory.cpp b/src/llvm/pass/CombineGuestMemory.cpp
new file mode 100644
index 0000000..0740a8b
--- /dev/null
+++ b/src/llvm/pass/CombineGuestMemory.cpp
@@ -0,0 +1,389 @@
+/*
+ * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "llvm-debug.h"
+#include "llvm-opc.h"
+#include "llvm-target.h"
+#include "llvm-pass.h"
+#include "utils.h"
+
+#define PASS_NAME "CombineGuestMemory"
+
+/*
+ * CombineGuestMemory Pass
+ */
+class CombineGuestMemory : public FunctionPass {
+
+ struct StateInfo {
+ StateInfo() : Ptr(nullptr) {}
+ StateInfo(Value *ptr, APInt &offset, APInt &size)
+ : Ptr(ptr), Offset(offset), Size(size) {}
+ Value *Ptr;
+ APInt Offset;
+ APInt Size;
+ };
+
+ typedef std::pair<Value *, Value *> ValuePair;
+ typedef std::map<size_t, size_t> StateMap;
+ typedef DenseMap<ValuePair, StateInfo> CSMap;
+
+ IRFactory *IF;
+ const DataLayout *DL;
+ MDFactory *MF;
+ IntegerType *Int8Ty;
+ IntegerType *Int32Ty;
+ IntegerType *Int64Ty;
+ IntegerType *IntPtrTy;
+ PointerType *Int8PtrTy;
+ PointerType *Int32PtrTy;
+ PointerType *Int64PtrTy;
+ Value *CPU;
+ Value *GuestBase;
+ Instruction *InitLastInst;
+ StateMap LegalStates;
+ IVec toErase;
+
+public:
+ static char ID;
+ explicit CombineGuestMemory() : FunctionPass(ID) {}
+ explicit CombineGuestMemory(IRFactory *IF)
+ : FunctionPass(ID), IF(IF), DL(IF->getDL()), MF(IF->getMDFactory())
+ {
+ LLVMContext &Context = IF->getContext();;
+ Int8Ty = IntegerType::get(Context, 8);
+ Int32Ty = IntegerType::get(Context, 32);
+ Int64Ty = IntegerType::get(Context, 64);
+ IntPtrTy = DL->getIntPtrType(Context);
+ Int8PtrTy = Type::getInt8PtrTy(Context, 0);
+ Int32PtrTy = Type::getInt32PtrTy(Context, 0);
+ Int64PtrTy = Type::getInt64PtrTy(Context, 0);
+
+ GuestBase = IF->getGuestBase();
+
+ addLegalStates();
+ }
+
+ unsigned getAddressSpaceOperand(Value *I) {
+ if (LoadInst *LI = dyn_cast<LoadInst>(I))
+ return LI->getPointerAddressSpace();
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerAddressSpace();
+ return -1U;
+ }
+
+ int getNumUsers(Instruction *I) {
+ return distance(I->user_begin(), I->user_end());
+ }
+
+ void addLegalStates();
+ bool isLegalState(Value *Ptr, APInt &Offset, APInt &Size);
+ bool isConsecutiveAccess(Value *A, Value *B, Value *&Ptr, APInt &Offset, APInt &Size);
+ bool tryCombineLoad(Value *A, Value *B, CSMap &States);
+ bool tryCombineStore(Value *A, Value *B, CSMap &States);
+ bool combineMemory(SmallVector<Value *, 8> &Memory, SmallVector<Value *, 8> &States);
+ bool runOnFunction(Function &F);
+};
+
+char CombineGuestMemory::ID = 0;
+INITIALIZE_PASS(CombineGuestMemory, "combinegm",
+ "Combine guest memory loads and stores", false, false)
+
+FunctionPass *llvm::createCombineGuestMemory(IRFactory *IF)
+{
+ return new CombineGuestMemory(IF);
+}
+
+
+void CombineGuestMemory::addLegalStates()
+{
+#if defined(TARGET_I386)
+ size_t Start = offsetof(CPUArchState, xmm_regs[0]);
+ size_t Size = sizeof(XMMReg);
+ for (int i = 0; i < CPU_NB_REGS; ++i)
+ LegalStates[Start + Size * i] = Size;
+#elif defined(TARGET_ARM)
+ size_t Start = offsetof(CPUArchState, vfp.regs[0]);
+ size_t Size = sizeof(float64) * 2;
+ for (int i = 0; i < 32; ++i)
+ LegalStates[Start + Size * i] = Size;
+#endif
+}
+
+bool CombineGuestMemory::isConsecutiveAccess(Value *A, Value *B, Value *&Ptr,
+ APInt &Offset, APInt &Size)
+{
+ Value *PtrA = getPointerOperand(A);
+ Value *PtrB = getPointerOperand(B);
+ unsigned ASA = getAddressSpaceOperand(A);
+ unsigned ASB = getAddressSpaceOperand(B);
+
+ if (!PtrA || !PtrB || (ASA != ASB))
+ return false;
+
+ Type *TyA = cast<PointerType>(PtrA->getType())->getElementType();
+ Type *TyB = cast<PointerType>(PtrB->getType())->getElementType();
+ if (DL->getTypeStoreSize(TyA) != DL->getTypeStoreSize(TyB))
+ return false;
+
+ unsigned PtrBitWidth = DL->getTypeSizeInBits(TyA);
+ APInt Sz(PtrBitWidth, DL->getTypeStoreSize(TyA));
+
+ APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
+ PtrA = StripPointerWithConstantOffset(DL, PtrA, OffsetA, GuestBase);
+ PtrB = StripPointerWithConstantOffset(DL, PtrB, OffsetB, GuestBase);
+
+ APInt OffsetDelta = OffsetB - OffsetA;
+ if (PtrA == PtrB && OffsetDelta == Sz) {
+ Ptr = PtrA;
+ Offset = OffsetA;
+ Size = Sz + Sz;
+ return true;
+ }
+
+ return false;
+}
+
+bool CombineGuestMemory::isLegalState(Value *Ptr, APInt &Offset, APInt &Size)
+{
+ if (Ptr != CPU)
+ return false;
+ uint64_t Start = Offset.getZExtValue();
+ if (LegalStates.find(Start) == LegalStates.end() ||
+ Size.getZExtValue() > LegalStates[Start])
+ return false;
+ return true;
+}
+
+static bool hasMemoryViolation(Instruction *SA, Instruction *SB,
+ Instruction *EA, Instruction *EB)
+{
+ std::set<Value*> Insts;
+ Insts.insert(SA);
+ Insts.insert(SB);
+ Insts.insert(EA);
+ Insts.insert(EB);
+
+ BasicBlock::iterator BI = BasicBlock::iterator(SA);
+ BasicBlock::iterator BE = BasicBlock::iterator(EA);
+ for (; BI != BE; ++BI) {
+ Instruction *I = &*BI;
+ if (isa<CallInst>(I))
+ return true;
+ if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
+ continue;
+ if (Insts.find(I) == Insts.end())
+ return true;
+ }
+
+ BI = BasicBlock::iterator(SB);
+ BE = BasicBlock::iterator(EB);
+ for (; BI != BE; ++BI) {
+ Instruction *I = &*BI;
+ if (isa<CallInst>(I))
+ return true;
+ if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
+ continue;
+ if (Insts.find(I) == Insts.end())
+ return true;
+ }
+ return false;
+}
+
+bool CombineGuestMemory::tryCombineLoad(Value *A, Value *B, CSMap &States)
+{
+ /* First, check if the guest loads are 'only' used by the store instructions
+ * to consecutive CPU states, and if any other loads/stores occurs between
+ * the queried operation. */
+ LoadInst *LA = cast<LoadInst>(A);
+ LoadInst *LB = cast<LoadInst>(B);
+ if (getNumUsers(LA) != 1 || getNumUsers(LB) != 1)
+ return false;
+
+ Value *VA = *LA->user_begin();
+ Value *VB = *LB->user_begin();
+ CSMap::iterator CSI = States.find(ValuePair(VA, VB));
+ if (CSI == States.end())
+ return false;
+
+ Instruction *SA = cast<Instruction>(VA);
+ Instruction *SB = cast<Instruction>(VB);
+
+ if (hasMemoryViolation(LA, LB, SA, SB))
+ return false;
+
+ /* Here we found the guest memory operations are loaded and stored to the
+ * CPU states immediately. The operations are safe to combine. */
+ Instruction *InsertPos = SA;
+ StateInfo &SI = CSI->second;
+ uint64_t Size = SI.Size.getZExtValue();
+ unsigned AS = getAddressSpaceOperand(LA);
+ unsigned Align = Size / 2;
+ Type *Ty = PointerType::get(VectorType::get(Int8Ty, Size), AS);
+ Instruction *Ptr = cast<Instruction>(LA->getPointerOperand());
+ if (isa<IntToPtrInst>(Ptr))
+ Ptr = new IntToPtrInst(Ptr->getOperand(0), Ty, "", InsertPos);
+ else
+ Ptr = new BitCastInst(Ptr, Ty, "", InsertPos);
+ Instruction *NewLI = new LoadInst(Ptr, "", true, Align, InsertPos);
+ MF->setGuestMemory(NewLI);
+
+ Ty = PointerType::getUnqual(VectorType::get(Int8Ty, Size));
+ Value *Offset = ConstantInt::get(Ty->getContext(), SI.Offset);
+ Ptr = GetElementPtrInst::CreateInBounds(CPU, Offset, "", InitLastInst);
+ Ptr = new BitCastInst(Ptr, Ty, "", InitLastInst);
+ new StoreInst(NewLI, Ptr, false, InsertPos);
+
+ States.erase(CSI);
+ toErase.push_back(SA);
+ toErase.push_back(SB);
+ return true;
+}
+
+bool CombineGuestMemory::tryCombineStore(Value *A, Value *B, CSMap &States)
+{
+ /* First, check if the CPU state loads are 'only' used by the guest store
+ * instructions, and if any other loads/stores occurs between the
+ * queried operation. */
+ StoreInst *SA = cast<StoreInst>(A);
+ StoreInst *SB = cast<StoreInst>(B);
+ Instruction *LA = dyn_cast<Instruction>(SA->getOperand(0));
+ Instruction *LB = dyn_cast<Instruction>(SB->getOperand(0));
+
+ if (!LA || !LB)
+ return false;
+ if (getNumUsers(LA) != 1 || getNumUsers(LB) != 1)
+ return false;
+
+ CSMap::iterator CSI = States.find(ValuePair(LA, LB));
+ if (CSI == States.end())
+ return false;
+
+ if (hasMemoryViolation(LA, LB, SA, SB))
+ return false;
+
+ /* Here we found the CPU states are loaded and stored to the guest memory
+ * immediately. The operations are safe to combine. */
+ Instruction *InsertPos = SA;
+ StateInfo &SI = CSI->second;
+ uint64_t Size = SI.Size.getZExtValue();
+ Type *Ty = PointerType::getUnqual(VectorType::get(Int8Ty, Size));
+ Value *Offset = ConstantInt::get(Ty->getContext(), SI.Offset);
+ Instruction *Ptr = GetElementPtrInst::CreateInBounds(CPU, Offset, "", InitLastInst);
+ Ptr = new BitCastInst(Ptr, Ty, "", InitLastInst);
+ Value *V = new LoadInst(Ptr, "", false, InsertPos);
+
+ unsigned AS = getAddressSpaceOperand(SA);
+ unsigned Align = Size / 2;
+ Ty = PointerType::get(VectorType::get(Int8Ty, Size), AS);
+ Ptr = cast<Instruction>(SA->getPointerOperand());
+ if (isa<IntToPtrInst>(Ptr))
+ Ptr = new IntToPtrInst(Ptr->getOperand(0), Ty, "", InsertPos);
+ else
+ Ptr = new BitCastInst(Ptr, Ty, "", InsertPos);
+ Instruction *NewSI = new StoreInst(V, Ptr, true, Align, InsertPos);
+ MF->setGuestMemory(NewSI);
+
+ States.erase(CSI);
+ toErase.push_back(SA);
+ toErase.push_back(SB);
+ return true;
+}
+
+bool CombineGuestMemory::combineMemory(SmallVector<Value *, 8> &Memory,
+ SmallVector<Value *, 8> &States)
+{
+ bool Changed = false;
+ SmallPtrSet<Value *, 4> Used;
+ CSMap ConsecutiveStates;
+ Value *Ptr;
+ APInt Offset, Size;
+
+ /* Find consecutive CPU states. */
+ for (unsigned i = 1, e = States.size(); i != e; i++) {
+ if (!isConsecutiveAccess(States[i-1], States[i], Ptr, Offset, Size))
+ continue;
+
+ if (!isLegalState(Ptr, Offset, Size))
+ continue;
+
+ ConsecutiveStates[ValuePair(States[i-1], States[i])] =
+ StateInfo(Ptr, Offset, Size);
+ }
+
+ if (ConsecutiveStates.size() == 0)
+ return false;
+
+ /* Find and combine consecutive guest memory accesses if their referrenced
+ * CPU states are also consecutive. */
+ for (unsigned i = 1, e = Memory.size(); i != e; i++) {
+ if (Used.count(Memory[i-1]) || Used.count(Memory[i]))
+ continue;
+ if (!isConsecutiveAccess(Memory[i-1], Memory[i], Ptr, Offset, Size))
+ continue;
+
+ bool ret = false;
+ if (isa<LoadInst>(Memory[i-1]) && isa<LoadInst>(Memory[i])) {
+ ret = tryCombineLoad(Memory[i-1], Memory[i], ConsecutiveStates);
+ } else if (isa<StoreInst>(Memory[i-1]) && isa<StoreInst>(Memory[i])) {
+ ret = tryCombineStore(Memory[i-1], Memory[i], ConsecutiveStates);
+ }
+ if (ret) {
+ Used.insert(Memory[i-1]);
+ Used.insert(Memory[i]);
+ Changed = true;
+ }
+ }
+ return Changed;
+}
+
+bool CombineGuestMemory::runOnFunction(Function &F)
+{
+ bool Changed = false;
+
+#if defined(CONFIG_SOFTMMU)
+ return Changed;
+#endif
+
+ /* Skip if no state is allowed to be combined. */
+ if (LegalStates.empty())
+ return Changed;
+
+ CPU = IF->getDefaultCPU(F);
+ if (!CPU) {
+ dbg() << DEBUG_PASS << "CombineGuestMemory: Cannot find CPU pointer.\n";
+ return false;
+ }
+
+ InitLastInst = F.getEntryBlock().getTerminator();
+
+ for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ SmallVector<Value *, 8> Memory;
+ SmallVector<Value *, 8> States;
+ for (auto BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
+ Instruction *I = &*BI;
+ if (MF->isGuestMemory(I)) {
+ Memory.push_back(I);
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (!LI->isVolatile())
+ States.push_back(LI);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (!SI->isVolatile())
+ States.push_back(SI);
+ }
+ }
+ if (Memory.size() >= 2 && States.size() >= 2)
+ Changed |= combineMemory(Memory, States);
+ }
+
+ if (!toErase.empty())
+ ProcessErase(toErase);
+
+ return Changed;
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/llvm/pass/CombineZExtTrunc.cpp b/src/llvm/pass/CombineZExtTrunc.cpp
new file mode 100644
index 0000000..de9a87f
--- /dev/null
+++ b/src/llvm/pass/CombineZExtTrunc.cpp
@@ -0,0 +1,70 @@
+/*
+ * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm-target.h"
+#include "llvm-opc.h"
+#include "llvm-pass.h"
+#include "utils.h"
+
+#define PASS_NAME "CombineZExtTrunc"
+
+/*
+ * CombineZExtTrunc Pass
+ */
+class CombineZExtTrunc : public FunctionPass {
+public:
+ static char ID;
+ explicit CombineZExtTrunc() : FunctionPass(ID) {}
+ bool runOnFunction(Function &F);
+};
+
+char CombineZExtTrunc::ID = 0;
+INITIALIZE_PASS(CombineZExtTrunc, "combinezet",
+ "Combine ZExt followed by Trunc", false, false)
+
+FunctionPass *llvm::createCombineZExtTrunc()
+{
+ return new CombineZExtTrunc;
+}
+
+bool CombineZExtTrunc::runOnFunction(Function &F)
+{
+ bool Changed = false;
+ IVec toErase;
+
+ SmallVector<Instruction*, 4> Worklist;
+ for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) {
+ Instruction *I = &*II;
+ if (isa<TruncInst>(I))
+ Worklist.push_back(I);
+ }
+
+ for (auto I : Worklist) {
+ TruncInst *TI = cast<TruncInst>(I);
+ ZExtInst *ZI = dyn_cast<ZExtInst>(TI->getOperand(0));
+ if (!ZI)
+ continue;
+
+ Type *SrcTy = ZI->getOperand(0)->getType();
+ Type *DstTy = TI->getType();
+ if (SrcTy == DstTy) {
+ I->replaceAllUsesWith(ZI->getOperand(0));
+ if (TI->use_empty())
+ toErase.push_back(TI);
+ Changed = true;
+ }
+ }
+
+ if (toErase.size())
+ ProcessErase(toErase);
+
+ return Changed;
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/llvm/pass/FastMathPass.cpp b/src/llvm/pass/FastMathPass.cpp
new file mode 100644
index 0000000..2b6a592
--- /dev/null
+++ b/src/llvm/pass/FastMathPass.cpp
@@ -0,0 +1,87 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm-target.h"
+#include "llvm-pass.h"
+#include "fpu/softfloat-native-def.h"
+
+#define PASS_DEBUG "FastMathPass"
+
+class FastMathPass : public FunctionPass {
+public:
+ static char ID;
+ std::map<std::string, std::string> FPUNameMap;
+
+ explicit FastMathPass() : FunctionPass(ID)
+ {
+ TCGHelperInfo *FPUHelper = (TCGHelperInfo *)get_native_fpu_helpers();
+ for (int i = 0, e = num_native_fpu_helpers(); i != e; ++i) {
+ /* ex: llvm_int32_to_float32 --> int32_to_float32 */
+ TCGHelperInfo &fpu = FPUHelper[i];
+ const char *native = fpu.name;
+ const char *soft = native + 5;
+ FPUNameMap[soft] = native;
+ }
+ }
+ bool runOnFunction(Function &F);
+};
+
+bool FastMathPass::runOnFunction(Function &F)
+{
+ IVec toErase;
+ SmallVector<CallInst *, 16> InlineCalls;
+ Module *Mod = F.getParent();
+
+ for (auto I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ if (CallInst *CI = dyn_cast<CallInst>(&*I)) {
+ if (CI->isInlineAsm() ||
+ CI->getCalledFunction() == nullptr ||
+ CI->getCalledFunction()->isIntrinsic())
+ continue;
+
+ std::string Fname = CI->getCalledFunction()->getName();
+ if (FPUNameMap.count(Fname) == 0)
+ continue;
+
+ Function *Fn = Mod->getFunction(FPUNameMap[Fname]);
+ FunctionType *FTy = cast<FunctionType>(
+ cast<PointerType>(Fn->getType())->getElementType());
+
+ unsigned NumArgs = FTy->getNumParams();
+ assert(NumArgs <= CI->getNumArgOperands());
+
+ SmallVector<Value *, 4> Params;
+ for (unsigned i = 0; i != NumArgs; ++i)
+ Params.push_back(CI->getArgOperand(i));
+
+ CallInst *NewCI = CallInst::Create(Fn, Params, "", CI);
+ CI->replaceAllUsesWith(NewCI);
+ InlineCalls.push_back(NewCI);
+ toErase.push_back(CI);
+ }
+ }
+
+ ProcessErase(toErase);
+
+ while (!InlineCalls.empty())
+ InlineFunc(InlineCalls.pop_back_val());
+
+ return false;
+}
+
+char FastMathPass::ID = 0;
+INITIALIZE_PASS(FastMathPass, "fastmath",
+ "Transform softfloat subroutines to native FP operations", false, false)
+
+FunctionPass *llvm::createFastMathPass()
+{
+ return new FastMathPass();
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/llvm/pass/ProfileExec.cpp b/src/llvm/pass/ProfileExec.cpp
new file mode 100644
index 0000000..56a68e1
--- /dev/null
+++ b/src/llvm/pass/ProfileExec.cpp
@@ -0,0 +1,172 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm-debug.h"
+#include "llvm-soft-perfmon.h"
+#include "llvm-pass.h"
+#include "llvm-opc.h"
+#include "llvm.h"
+#include "utils.h"
+
+#define PASS_NAME "ProfileExec"
+
+extern LLVMEnv *LLEnv;
+
+/*
+ * Profile Pass
+ */
+class ProfileExec : public FunctionPass {
+ enum {
+ IDX_LOOP = 0,
+ IDX_EXIT,
+ IDX_INBR,
+ };
+
+ IRFactory *IF;
+ const DataLayout *DL;
+ MDFactory *MF;
+ IntegerType *Int8Ty;
+ IntegerType *Int32Ty;
+ IntegerType *Int64Ty;
+ IntegerType *IntPtrTy;
+ PointerType *Int8PtrTy;
+ PointerType *Int32PtrTy;
+ PointerType *Int64PtrTy;
+
+public:
+ static char ID;
+ explicit ProfileExec() : FunctionPass(ID) {}
+ explicit ProfileExec(IRFactory *IF)
+ : FunctionPass(ID), IF(IF), DL(IF->getDL()), MF(IF->getMDFactory())
+ {
+ LLVMContext &Context = IF->getContext();;
+ Int8Ty = IntegerType::get(Context, 8);
+ Int32Ty = IntegerType::get(Context, 32);
+ Int64Ty = IntegerType::get(Context, 64);
+ IntPtrTy = DL->getIntPtrType(Context);
+ Int8PtrTy = Type::getInt8PtrTy(Context, 0);
+ Int32PtrTy = Type::getInt32PtrTy(Context, 0);
+ Int64PtrTy = Type::getInt64PtrTy(Context, 0);
+ }
+ bool runOnFunction(Function &F);
+
+ Instruction *getInsertPos(BasicBlock *BB) {
+ if (BB == &BB->getParent()->getEntryBlock())
+ return &*++BB->begin();
+ return BB->getFirstNonPHI();
+ }
+};
+
+char ProfileExec::ID = 0;
+INITIALIZE_PASS(ProfileExec, "profile", "Profile trace execution", false, false)
+
+FunctionPass *llvm::createProfileExec(IRFactory *IF)
+{
+ return new ProfileExec(IF);
+}
+
+bool ProfileExec::runOnFunction(Function &F)
+{
+ if (!LLEnv->isTraceMode())
+ return false;
+ if (!SP->isEnabled())
+ return false;
+
+ Instruction *CPU = IF->getDefaultCPU(F);
+ if (!CPU) {
+ dbg() << DEBUG_PASS << PASS_NAME << ": Cannot find CPU pointer.\n";
+ return false;
+ }
+
+ TraceInfo *Trace = IF->getTrace();
+
+ for (auto FI = F.begin(), FE = F.end(); FI != FE; FI++) {
+ BasicBlock *BB = &*FI;
+ if (distance(succ_begin(BB), succ_end(BB)) != 0)
+ continue;
+
+ /* Find exit points and indirect branches. */
+ Trace->NumExit++;
+ if (isa<IndirectBrInst>(BB->getTerminator()))
+ Trace->NumIndirectBr++;
+ }
+
+ /* Insert code to profile trace exit counts. */
+ if (SP->Mode & SPM_EXIT) {
+ Instruction *InsertPos = &*++BasicBlock::iterator(CPU);
+ Value *NumExitPtr = GetElementPtrInst::CreateInBounds(CPU,
+ CONSTPtr(offsetof(CPUArchState, num_trace_exits)),
+ "", InsertPos);
+ NumExitPtr = new BitCastInst(NumExitPtr, Int64PtrTy, "", InsertPos);
+ Instruction *NumExits = new LoadInst(NumExitPtr, "", true, InsertPos);
+ NumExits = BinaryOperator::Create(Instruction::Add, NumExits,
+ CONST64(1), "", InsertPos);
+ new StoreInst(NumExits, NumExitPtr, true, InsertPos);
+ }
+
+ if (!(SP->Mode & SPM_TRACE))
+ return false;
+
+ SmallVector<CallInst*, 16> InlineCalls;
+ Function *Helper = IF->ResolveFunction("helper_profile_exec");
+
+ /* Prepare counter structures. */
+ if (!Trace->ExecCount) {
+ Trace->ExecCount = new uint64_t *[MAX_SPM_THREADS];
+ for (int i = 0; i < MAX_SPM_THREADS; i++)
+ Trace->ExecCount[i] = new uint64_t[3] {0, 0, 0};
+ }
+
+ /* Find all profiling point. */
+ std::vector<std::pair<Instruction *, int> > ProfilePoint;
+
+ SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> BackEdges;
+ FindFunctionBackedges(F, BackEdges);
+ for (unsigned i = 0, e = BackEdges.size(); i != e; ++i) {
+ auto BackEdgeBB = const_cast<BasicBlock*>(BackEdges[i].first);
+ ProfilePoint.push_back(std::make_pair(BackEdgeBB->getTerminator(), IDX_LOOP));
+ }
+
+ for (auto FI = F.begin(), FE = F.end(); FI != FE; FI++) {
+ BasicBlock *BB = &*FI;
+ if (distance(succ_begin(BB), succ_end(BB)) != 0)
+ continue;
+ bool isIndirectBr = isa<IndirectBrInst>(BB->getTerminator());
+ ProfilePoint.push_back(std::make_pair(getInsertPos(BB),
+ isIndirectBr ? IDX_INBR : IDX_EXIT));
+ }
+
+ /* Insert profiling routines. */
+ for (unsigned i = 0, e = ProfilePoint.size(); i != e; ++i) {
+ Instruction *InsertPos = ProfilePoint[i].first;
+ Value *Ty = CONST32(ProfilePoint[i].second);
+
+ Value *Counter = ConstantExpr::getIntToPtr(
+ CONSTPtr((uintptr_t)Trace->ExecCount),
+ PointerType::getUnqual(Int8Ty));
+
+ SmallVector<Value *, 4> Params;
+ Type *ParamTy = Helper->getFunctionType()->getParamType(0);
+ Value *Env = new BitCastInst(CPU, ParamTy, "", InsertPos);
+ Params.push_back(Env);
+ Params.push_back(Counter);
+ Params.push_back(Ty);
+
+ CallInst *CI = CallInst::Create(Helper, Params, "", InsertPos);
+ MF->setConst(CI);
+ InlineCalls.push_back(CI);
+ }
+
+ while (!InlineCalls.empty())
+ InlineFunc(InlineCalls.pop_back_val());
+
+ return true;
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/llvm/pass/RedundantStateElimination.cpp b/src/llvm/pass/RedundantStateElimination.cpp
new file mode 100644
index 0000000..2e5f715
--- /dev/null
+++ b/src/llvm/pass/RedundantStateElimination.cpp
@@ -0,0 +1,179 @@
+/*
+ * (C) 2017 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "llvm-debug.h"
+#include "llvm-opc.h"
+#include "llvm-target.h"
+#include "llvm-pass.h"
+#include "utils.h"
+
+#define PASS_NAME "RedundantStateElimination"
+
+/*
+ * The RedundantStateElimination pass aims to remove
+ * (1) redundant stores to PC, and
+ * (2) redundant loads and stores enclosed by two helper function calls.
+ */
+class RedundantStateElimination : public FunctionPass {
+ IRFactory *IF;
+ MDFactory *MF;
+ const DataLayout *DL;
+ Value *CPU;
+ IVec toErase;
+
+public:
+ static char ID;
+ explicit RedundantStateElimination() : FunctionPass(ID) {}
+ explicit RedundantStateElimination(IRFactory *IF)
+ : FunctionPass(ID), IF(IF), MF(IF->getMDFactory()), DL(IF->getDL()) {}
+
+ int getNumUsers(Instruction *I) {
+ return distance(I->user_begin(), I->user_end());
+ }
+
+ bool isStateOfPC(Value *Ptr) {
+ intptr_t Off = 0;
+ Value *Base = getBaseWithConstantOffset(DL, Ptr, Off);
+ if (Base == CPU && IRFactory::isStateOfPC(Off))
+ return true;
+ return false;
+ }
+
+ bool isDirectDominator(LoadInst *LI, StoreInst *SI) {
+ Instruction *A = LI, *B = SI;
+ if (A->getParent() != B->getParent())
+ return false;
+ for (auto II = BasicBlock::iterator(A), EE = A->getParent()->end();
+ II != EE; ++II) {
+ if (&*II == B)
+ return true;
+ /* If a non-const helper function is between the two instructions,
+ * this is not a direct domination because the helper function could
+ * cause side effect. */
+ auto CI = dyn_cast<CallInst>(II);
+ if (CI && !MDFactory::isConst(CI))
+ return false;
+ }
+ return false;
+ }
+
+ bool removePCState(Function &F);
+ bool removeHelperState(Function &F);
+ bool runOnFunction(Function &F);
+};
+
+char RedundantStateElimination::ID = 0;
+INITIALIZE_PASS(RedundantStateElimination, "rse",
+ "Eliminate redundant CPU state loads/stores", false, false)
+
+FunctionPass *llvm::createRedundantStateElimination(IRFactory *IF)
+{
+ return new RedundantStateElimination(IF);
+}
+
+/* Eliminate redundant stores to PC for each basic block. */
+bool RedundantStateElimination::removePCState(Function &F)
+{
+ for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ bool Found = false;
+
+ for (auto BI = FI->rbegin(), BE = FI->rend(); BI != BE; ++BI) {
+ Instruction *I = &*BI;
+ if (MF->isGuestMemory(I))
+ continue;
+
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (isStateOfPC(getPointerOperand(SI))) {
+ if (!Found)
+ Found = true;
+ else
+ toErase.push_back(SI);
+ }
+ } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (isStateOfPC(getPointerOperand(LI)))
+ Found = false;
+ }
+ }
+ }
+
+ if (toErase.empty())
+ return false;
+
+ ProcessErase(toErase);
+ return true;
+
+}
+
+/* Eliminate redundant loads/stores enclosed by two helper function calls.
+ * The redundant loads and stores are generated by StateMappingPass for
+ * handling synchronization of CPU states around helper function calls.
+ * A load and store can be removed if a state value is loaded and immediately
+ * stored back to the same state. For example:
+ *
+ * Before optimization: After optimization:
+ * instructions to sync states instructions to sync states
+ * call void @helper_function1() call void @helper_function1()
+ *
+ * %v0 = load i32, i32* %state0
+ * %v1 = load i32, i32* %state1
+ * store i32 %v0, i32* %state0
+ * store i32 %v1, i32* %state1
+ *
+ * call void @helper_function2() call void @helper_function2()
+ * instructions to reload states instructions to reload states
+ */
+bool RedundantStateElimination::removeHelperState(Function &F)
+{
+ for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ for (auto BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) {
+ Instruction *I = &*BI;
+ if (MF->isGuestMemory(I))
+ continue;
+
+ StoreInst *SI = dyn_cast<StoreInst>(I);
+ if (!SI || SI->isVolatile())
+ continue;
+
+ LoadInst *LI = dyn_cast<LoadInst>(getValueOperand(SI));
+ if (LI && isDirectDominator(LI, SI)) {
+ /* We can try removing the store instruction if LI is a direct
+ * dominator of SI. */
+ Value *PtrA = getPointerOperand(LI);
+ Value *PtrB = getPointerOperand(SI);
+ if (StripPointer(PtrA) == CPU && PtrA == PtrB)
+ toErase.push_back(SI);
+ }
+ }
+ }
+
+ if (toErase.empty())
+ return false;
+
+ ProcessErase(toErase);
+ return true;
+}
+
+bool RedundantStateElimination::runOnFunction(Function &F)
+{
+ bool Changed = false;
+
+ CPU = IF->getDefaultCPU(F);
+ if (!CPU) {
+ dbg() << DEBUG_PASS << "RedundantStateElimination: Cannot find CPU pointer.\n";
+ return false;
+ }
+
+ Changed |= removePCState(F);
+#if 0
+ Changed |= removeHelperState(F);
+#endif
+
+ return Changed;
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
+
diff --git a/src/llvm/pass/ReplaceIntrinsic.cpp b/src/llvm/pass/ReplaceIntrinsic.cpp
new file mode 100644
index 0000000..62505f4
--- /dev/null
+++ b/src/llvm/pass/ReplaceIntrinsic.cpp
@@ -0,0 +1,137 @@
+/*
+ * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "llvm-types.h"
+#include "llvm-debug.h"
+#include "llvm-target.h"
+#include "llvm-pass.h"
+
+
+#define PASS_NAME "ReplaceIntrinsic"
+
+/*
+ * HQEMU does not allow helpers to contain any memory or debug intrinsics.
+ * This pass substitutes memory intrinsics to load/store instuctions and
+ * removes debug intrinsics (generated by Clang with -g flag).
+ */
+class ReplaceIntrinsic : public FunctionPass {
+ IVec toErase;
+public:
+ static char ID;
+ explicit ReplaceIntrinsic() : FunctionPass(ID) {}
+
+ Value *ConvertType(Value *V, Type *T, Instruction *InsertPos) {
+ if (likely(V->getType() == T))
+ return V;
+ return new BitCastInst(V, T, "", InsertPos);
+ }
+
+ bool replaceMemoryIntrinsic(IntrinsicInst *I);
+ bool runOnFunction(Function &F);
+};
+
+char ReplaceIntrinsic::ID = 0;
+INITIALIZE_PASS(ReplaceIntrinsic, "replaceintrinsic",
+ "Replace memory and debug intrinsics generated by clang",
+ false, false)
+
+FunctionPass *llvm::createReplaceIntrinsic()
+{
+ return new ReplaceIntrinsic();
+}
+
+
+/*
+ * Transform memcpy/memmove/memset to load/store instruction.
+ * Clang attempts to move memory data using LLVM memory intrinsic instructions.
+ * This causes the statemapping pass to miss some guest states. (Statemapping
+ * only considers guest states accessed by general load/store insts).
+ * So, we simply rewrite the memory intrinsics to load/store instuctions.
+ */
+bool ReplaceIntrinsic::replaceMemoryIntrinsic(IntrinsicInst *I)
+{
+ switch (I->getIntrinsicID()) {
+ case Intrinsic::memset:
+ case Intrinsic::memcpy:
+ case Intrinsic::memmove:
+ break;
+ default:
+ return false;
+ }
+
+ LLVMContext &Context = I->getContext();
+ Type *Int8PtrTy = Type::getInt8PtrTy(Context);
+ CallInst *CI = cast<CallInst>(I);
+
+ if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) {
+ /* memcpy/memmove */
+ Value *Src = MTI->getSource();
+ Value *Dst = MTI->getDest();
+ Value *NumBytes = MTI->getLength();
+
+ if (CI->getArgOperand(0)->getType() != Int8PtrTy ||
+ CI->getArgOperand(1)->getType() != Int8PtrTy ||
+ !isa<ConstantInt>(NumBytes) ||
+ MTI->isVolatile())
+ return false;
+
+ /* Remove this instruction if the access size is zero. */
+ size_t Len = cast<ConstantInt>(NumBytes)->getZExtValue();
+ if (Len == 0)
+ goto done;
+
+ Type *Ty = Type::getIntNPtrTy(Context, Len * 8);
+ Src = ConvertType(Src, Ty, I);
+ Dst = ConvertType(Dst, Ty, I);
+ Src = new LoadInst(Src, "", false, I);
+ new StoreInst(Src, Dst, false, I);
+ } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(I)) {
+ /* memset */
+ Value *Src = MSI->getValue();
+ Value *Dst = MSI->getDest();
+ Value *NumBytes = MSI->getLength();
+
+ if (CI->getArgOperand(0)->getType() != Int8PtrTy ||
+ !isa<ConstantInt>(Src) ||
+ !isa<ConstantInt>(NumBytes) ||
+ MSI->isVolatile())
+ return false;
+
+ size_t Val = cast<ConstantInt>(Src)->getZExtValue();
+ size_t Len = cast<ConstantInt>(NumBytes)->getZExtValue();
+ if (Val != 0)
+ return false;
+ if (Len == 0)
+ goto done;
+
+ Type *Ty = Type::getIntNPtrTy(Context, Len * 8);
+ Src = ConstantInt::get(Type::getIntNTy(Context, Len * 8), 0);
+ Dst = ConvertType(Dst, Ty, I);
+ new StoreInst(Src, Dst, false, I);
+ }
+
+done:
+ toErase.push_back(I);
+ return true;
+}
+
+bool ReplaceIntrinsic::runOnFunction(Function &F)
+{
+ for (auto I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+ Instruction *Inst = &*I;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ if (replaceMemoryIntrinsic(II))
+ continue;
+ if (isa<DbgInfoIntrinsic>(II))
+ toErase.push_back(II);
+ }
+ }
+ ProcessErase(toErase);
+ return true;
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
diff --git a/src/llvm/pass/SimplifyPointer.cpp b/src/llvm/pass/SimplifyPointer.cpp
new file mode 100644
index 0000000..87afbdd
--- /dev/null
+++ b/src/llvm/pass/SimplifyPointer.cpp
@@ -0,0 +1,334 @@
+//===- SimplifyPointer.cpp - Reassociate guest pointer arithmetic ---------===//
+//
+// The HQEMU Dynamic Binary Translator Infrastructure
+//
+// (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+// COVART Laboratory, CSIE Department, National Taiwan University, Taiwan.
+// See COPYRIGHT in top-level directory.
+//
+//===----------------------------------------------------------------------===//
+// This pass implements a simple pointer arithmetic reassociator for easier
+// pointer stripping. It gets scalar evolution results of all guest pointers
+// which are in simplest form. Next, it inserts new instructions to evaluate the
+// simplified expressions to construct new pointers, and rewrites corresponding
+// guest load/store with new pointers.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/InstIterator.h"
+
+#include "llvm-opc.h"
+#include "llvm-pass.h"
+#include "llvm-target.h"
+#include "utils.h"
+
+#define PASS_NAME "SIMPTR"
+#define DEBUG_TYPE "SIMPTR"
+
+//#define VERBOSE
+
+/// \brief Dump pass debug message with pass name mark.
+static inline llvm::raw_ostream &pout() {
+ return dbg() << DEBUG_PASS << PASS_NAME ": ";
+}
+
+/// \returns True if \p A dominates \p B.
+static bool dominates(Value *A, Value *B, DominatorTree *DT) {
+ auto *AI = dyn_cast<Instruction>(A);
+ auto *BI = dyn_cast<Instruction>(B);
+ if (AI && BI)
+ return DT->dominates(AI, BI);
+ return false;
+}
+
+class SimplifyPointer : public FunctionPass {
+public:
+ using ValueList = SmallVector<Value *, 32>;
+ using InstrList = SmallVector<Instruction *, 32>;
+ using ExprValMap = DenseMap<const SCEV *, Value *>;
+
+ // Pass identification, replacement for type id.
+ static char ID;
+
+ // LLVM pass constructor and destructor.
+ explicit SimplifyPointer() : FunctionPass(ID){};
+ explicit SimplifyPointer(IRFactory *IF)
+ : FunctionPass(ID), IF(IF), MF(IF->getMDFactory()), DL(IF->getDL()) {
+ // Initialize all.
+ initializeSimplifyPointerPass(*PassRegistry::getPassRegistry());
+ }
+
+ // LLVM pass public interfaces.
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnFunction(Function &F) override;
+
+private:
+ /// \return The evaluation result of expression \p S or null if not cached.
+ Value *lookupBaseExpressionCache(const SCEV *S) const {
+ auto V = BaseExprVal.find(S);
+ if (V != BaseExprVal.end())
+ return V->second;
+ return nullptr;
+ }
+
+ /// \returns True if spread constants in the expression tree of \p S can be
+ /// collected by reassociation and reduced to \p FoldVal.
+ ///
+ /// It traverses the expression tree of \p S and propagates constant nodes
+ /// from add, multiply and recurrent add nodes, i.e., (%1 + %2 + 5) * (%3 - 7)
+ /// should return 5 * -7 = -35.
+ bool foldConstantExpression(const SCEV *S, int64_t &FoldVal) const;
+
+ /// \returns The first non-pointer value traced along the use-define chain of
+ /// casting which starts from \p V and ends with a IntToPtrInst, or null if
+ /// the length of searching chain exceeds \p MaxLookup.
+ ///
+ /// In the context of DBT, pointer type is represented and manipulated as
+ /// integer data until used as a pointer. Therefore, it follows:
+ ///
+ /// [Expression Tree]
+ /// + + + +
+ /// \ / \ /
+ /// - ... -
+ /// \ /
+ /// +
+ /// [Scalar Root]
+ /// |
+ /// [Casting]
+ /// |
+ /// [Load/Store]
+ ///
+ /// This method targets the scalar root value.
+ Value *findPointerScalarRoot(Value *V, unsigned MaxLookup = 4);
+
+ /// \brief Simplify the pointer arithmetic of \p LSI based on scalar evolution
+ /// results which folds constants into simplest form. After extracting the
+ /// folded constant from the expression, the rest nodes can form a base
+ /// expression which is likely a common sub-expression of other \p LSI.
+ ///
+ /// It assumes \p LSI has the following use-define chain starting from its
+ /// pointer and containing only add, multiply and recurrent add nodes.
+ ///
+ /// [Expression Tree] [Expression Tree] [Expression Tree]
+ /// + A B + + + B A + +
+ /// \ / \ / \ / \ / \ /
+ /// - ... - - ... - - (B-A)
+ /// \ / \ / \ /
+ /// + + +
+ /// [Scalar Root] >> [Scalar Root] >> [Scalar Root]
+ /// | | |
+ /// [Casting] [Casting] [Casting]
+ /// | | |
+ /// [LSI] [LSI] [LSI]
+ ///
+ /// Suppose A and B are constants, they can be folded into (B-A) with scalar
+ /// evolution results. Need to insert instructions for other operations in
+ /// tree (e.g., the new sub in the right-most figure).
+ ///
+ /// First it tries to find the folded constant and substract it from root
+ /// expression to form the base expression. Then it generates instructions to
+ /// evaluate the base expression.
+ bool tryToSimplifyPointer(Instruction *I);
+
+ // HQEMU internal infrastructure.
+ IRFactory *IF = nullptr;
+ MDFactory *MF = nullptr;
+ // LLVM analysis and data type info.
+ const DataLayout *DL = nullptr;
+ DominatorTree *DT = nullptr;
+ ScalarEvolution *SE = nullptr;
+
+ /// The cache of base expression to corresponding evaluated value map.
+ ExprValMap BaseExprVal;
+};
+
+bool SimplifyPointer::foldConstantExpression(const SCEV *S,
+ int64_t &FoldVal) const {
+ // Handle expression tree of scalar root containing only add, multiply and
+ // recurrent add nodes.
+ if (auto *AddSE = dyn_cast<SCEVAddExpr>(S)) {
+ FoldVal = 0;
+ for (auto Op : AddSE->operands()) {
+ int64_t Val;
+ if (foldConstantExpression(Op, Val))
+ FoldVal += Val;
+ }
+ return true;
+ } else if (auto *MulSE = dyn_cast<SCEVMulExpr>(S)) {
+ FoldVal = 1;
+ for (auto Op : MulSE->operands()) {
+ int64_t Val;
+ // If one operand of multiplication fails to report a constant, entire
+ // expression becomes non-constant as well.
+ if (foldConstantExpression(Op, Val))
+ FoldVal *= Val;
+ else
+ return false;
+ }
+ return true;
+ } else if (auto *RecSE = dyn_cast<SCEVAddRecExpr>(S)) {
+ // Trace only the start expression, because the step expression must be
+ // multiplied by the loop trip count which is unlikely constant.
+ return foldConstantExpression(RecSE->getStart(), FoldVal);
+ } else if (auto *ConstSE = dyn_cast<SCEVConstant>(S)) {
+ FoldVal = ConstSE->getValue()->getValue().getSExtValue();
+ return true;
+ }
+ return false;
+}
+
+Value *SimplifyPointer::findPointerScalarRoot(Value *V, unsigned MaxLookup) {
+ if (!V || !V->getType()->isPointerTy())
+ return V;
+
+ for (unsigned i = 0; i < MaxLookup; ++i) {
+ if (BitCastInst *Cast = dyn_cast<BitCastInst>(V)) {
+ V = Cast->getOperand(0);
+ } else if (IntToPtrInst *Cast = dyn_cast<IntToPtrInst>(V)) {
+ // Found first scalar, return it.
+ V = Cast->getOperand(0);
+ return V;
+ }
+ }
+ return nullptr;
+}
+
+bool SimplifyPointer::tryToSimplifyPointer(Instruction *LSI) {
+ Value *Ptr = getPointerOperand(LSI);
+ Value *Root = findPointerScalarRoot(Ptr);
+ Type *RootTy = Root->getType();
+ Type *PtrTy = Ptr->getType();
+ if (!Ptr || !Root || !RootTy->isIntegerTy())
+ return false;
+
+#ifdef VERBOSE
+ if (DM.getDebugMode() & DEBUG_PASS) {
+ pout() << "Visiting memory instruction.\n";
+ pout() << "- " << *LSI << ".\n";
+ }
+#endif
+
+ // Traverse the simplest form expression tree and collect folded constants.
+ // Note the folded constant can be zero (base = root) if no folded constant
+ // is found.
+ auto *RootSE = SE->getSCEV(Root);
+ int64_t FoldConst = 0;
+ foldConstantExpression(RootSE, FoldConst);
+
+ // Substract offset constant from root expression to get the base expression,
+ // then query base expression cache to find whether it has been evaluated.
+ auto *BaseSE = SE->getMinusSCEV(RootSE,
+ SE->getConstant(RootTy, FoldConst, true));
+ Value *Base = lookupBaseExpressionCache(BaseSE);
+
+ // Create instructions to evaluate base expression if cache miss or previously
+ // computed value doesn't dominate load/store instruction.
+ if (!Base || !dominates(Base, LSI, DT)) {
+#ifdef VERBOSE
+ pout() << " Need to build base expression.\n";
+ pout() << " - Base " << *BaseSE << ".\n";
+ pout() << " - Offset " << FoldConst << ".\n";
+#endif
+ // Expand the base expression if it is safe.
+ if (isSafeToExpand(BaseSE, *SE)) {
+#if defined(LLVM_V35)
+ SCEVExpander Expander(*SE, "");
+#else
+ SCEVExpander Expander(*SE, *DL, "");
+#endif
+ Base = Expander.expandCodeFor(BaseSE, RootTy, LSI);
+ }
+ } else {
+#ifdef VERBOSE
+ pout() << " Use cached base expression value.\n";
+ pout() << " - Base " << *BaseSE << ".\n";
+ pout() << " - Offset " << FoldConst << ".\n";
+#endif
+ }
+
+ // Neither using cached value nor re-computing works, abort.
+ if (!Base)
+ return false;
+
+ // Add back folded constant (offset) to new root value and feed the result as
+ // new pointer to load/store instruction.
+ IRBuilder<> Builder(IF->getContext());
+
+ bool FoldZero = (FoldConst == 0);
+ Value *Offset = ConstantInt::get(RootTy, FoldConst);
+
+ Builder.SetInsertPoint(LSI);
+ Value *NewRoot = FoldZero ? Base : Builder.CreateAdd(Base, Offset);
+ Value *NewPtr = Builder.CreateIntToPtr(NewRoot, PtrTy);
+ LSI->replaceUsesOfWith(Ptr, NewPtr);
+
+ // Cache base expression value.
+ BaseExprVal[BaseSE] = Base;
+
+ return true;
+}
+
+void SimplifyPointer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<DominatorTreeWrapperPass>();
+#if defined(LLVM_V35)
+ AU.addRequired<ScalarEvolution>();
+#else
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+#endif
+}
+
+bool SimplifyPointer::runOnFunction(Function &F) {
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+#if defined(LLVM_V35)
+ SE = &getAnalysis<ScalarEvolution>();
+#else
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+#endif
+
+ bool Changed = false;
+
+ InstrList MemoryInstrs;
+ for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ BasicBlock *BB = &*FI;
+
+ // Skip dead basic blocks.
+ if (!DT->isReachableFromEntry(BB))
+ continue;
+
+ // Collect all guest memory instructions.
+ for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ Instruction *I = &*BI;
+ if (MDFactory::isGuestMemory(I))
+ MemoryInstrs.push_back(I);
+ }
+ }
+
+ // Try to simplify pointers of collected load/store instructions.
+ for (Instruction *I : MemoryInstrs)
+ Changed |= tryToSimplifyPointer(I);
+
+ return Changed;
+}
+
+char SimplifyPointer::ID = 0;
+INITIALIZE_PASS_BEGIN(SimplifyPointer, "simplifypointer",
+ "Reassiciate pointer arithmetic", false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+#if defined(LLVM_V35)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
+#else
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+#endif
+INITIALIZE_PASS_END(SimplifyPointer, "simplifypointer",
+ "Reassiciate pointer arithmetic", false, false)
+
+FunctionPass *llvm::createSimplifyPointer(IRFactory *IF) {
+ return new SimplifyPointer(IF);
+}
+
+/*
+ * vim: ts=2 sts=2 sw=2 expandtab
+ */
diff --git a/src/llvm/pass/StateMappingPass.cpp b/src/llvm/pass/StateMappingPass.cpp
new file mode 100644
index 0000000..0d9dd9b
--- /dev/null
+++ b/src/llvm/pass/StateMappingPass.cpp
@@ -0,0 +1,885 @@
+/*
+ * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan.
+ * See COPYRIGHT in top-level directory.
+ */
+
+#include "llvm-debug.h"
+#include "llvm-opc.h"
+#include "llvm-target.h"
+#include "llvm-pass.h"
+
+#define PASS_NAME "StateMapping"
+
+
+/*
+ * StateMappingPass is used to eliminate the redundant loads and stores to the
+ * CPUArchState. The loads and stores of the guest memory operations are not
+ * removed in order not to violate the memory model of the guest architecture.
+ *
+ * The state mapping rules are:
+ * - A guest state is not overlapped: (i.e., same access size)
+ * - Same type: map to this type.
+ * - Different type: select type in the order: vector, float and integer;
+ * use bitcast to convert between different types.
+ * - A guest state is overlapped with other state(s):
+ * - Query StateType to find state size (i.e., boundary) and type:
+ * - Vector type: use insert/extract to manipulate a vector element.
+ * - Other types: use shift to manipulate a vector element.
+ */
+class StateMappingPass : public FunctionPass {
+ IRFactory *IF; /* Uplink to the IRFactory */
+
+public:
+ static char ID;
+ explicit StateMappingPass() : FunctionPass(ID) {}
+ explicit StateMappingPass(IRFactory *IF) : FunctionPass(ID), IF(IF) {}
+
+ bool runOnFunction(Function &F);
+};
+
+struct StateMapping {
+ StateMapping()
+ : State(nullptr), Addr(nullptr), Ty(nullptr), AI(nullptr),
+ hasLoad(false), hasStore(false) {}
+
+ StateData *State;
+ Value *Addr;
+ Type *Ty;
+ AllocaInst *AI;
+ bool hasLoad;
+ bool hasStore;
+
+ intptr_t getSize() { return State->End - State->Start; }
+ intptr_t getStart() { return State->Start; }
+ intptr_t getEnd() { return State->End; }
+ Value *getAddr() { return Addr; }
+ Type *getType() { return Ty; }
+ bool isVector() { return Ty->isVectorTy(); }
+
+ bool overlap(StateRange &Range) {
+ if (Range.empty())
+ return false;
+ intptr_t Start = getStart();
+ intptr_t End = getEnd();
+ auto I = --Range.upper_bound(Start);
+ for (; I != Range.end() && I->first < End; ++I) {
+ if (I->second > Start)
+ return true;
+ }
+ return false;
+ }
+};
+
+struct ElementInfo {
+ ElementInfo() : Shift(0), NumElts(0), EltTy(nullptr), StateTy(nullptr) {}
+
+ intptr_t Shift;
+ unsigned NumElts;
+ Type *EltTy;
+ Type *StateTy;
+};
+
+class StateMapper {
+ typedef std::vector<StateMapping> StateMapList;
+
+ IRFactory *IF;
+ const DataLayout *DL;
+ Instruction *CPU; /* The CPU pointer */
+ Instruction *PreCastPos; /* The position to cast CPU states */
+ Instruction *PreLoadPos; /* The position to preload CPU states */
+ IVec toErase; /* The instructions to be removed */
+
+ FlatType &StateType;
+ StateAnalyzer Analyzer;
+ StateMapList StateMaps;
+
+public:
+ StateMapper(IRFactory *IF)
+ : IF(IF), DL(IF->getDL()), StateType(IF->getTranslator().getStateType()),
+ Analyzer(DL) {}
+
+ bool run(Function &F) {
+ if (!init(F))
+ return false;
+
+ AnalyzeState(F);
+ if (!StateMaps.empty())
+ PromoteState(F);
+
+ ProcessErase(toErase);
+ return true;
+ }
+
+ /* Rearrange instructions in the 'init' block. */
+ bool init(Function &F);
+
+ /* Analyze instructions in a Function that access CPU states. */
+ void AnalyzeState(Function &F);
+
+ /* Compute state mapping information. */
+ void ComputeStateMap(StateMapping &StateMap, StateData &State);
+
+ /* Determine if the state can be operated as a vector. */
+ Type *TryVectorState(StateData &State, Type *Ty);
+
+ /* Map state references to the virtual states. */
+ void PromoteState(Function &F);
+
+ /* Rewrite state loads and stores. */
+ void RewriteLoad(StateMapping &StateMap, StateRef &Ref);
+ void RewriteStore(StateMapping &StateMap, StateRef &Ref);
+ void RewriteLoadVector(StateMapping &StateMap, StateRef &Ref);
+ void RewriteStoreVector(StateMapping &StateMap, StateRef &Ref);
+
+ /* Compute state and element types for element insertion and extraction. */
+ void getElementInfo(StateMapping &StateMap, StateRef &Ref, ElementInfo &Info);
+
+ /* Sync CPU states around helper calls. */
+ void SyncHelperState();
+
+ /* Store dirty states at the leaf blocks. */
+ void ProcessExitBB(BasicBlock *BB);
+
+ /* Get the pointer without GEP and BitCast. */
+ void StripPointer(Value *V, IVec &IV);
+
+ /* Move the pointer before InsertPos. */
+ void MoveStatePointer(Value *V);
+
+ /* Load state from Src and store it to Dest. */
+ void CopyState(Value *Dest, Value *Src, Instruction *InsertPos);
+
+ bool isLegalState(Value *Ptr, intptr_t &Off);
+
+ /* Return true if the input is alias of a state pointer. */
+ bool isStatePointer(Value *V) {
+ if (auto BCI = dyn_cast<BitCastInst>(V)) {
+ if (BCI->getOperand(0) == CPU)
+ return true;
+ return isStatePointer(BCI->getOperand(0));
+ } else if (auto GEP = dyn_cast<GetElementPtrInst>(V))
+ return GEP->getOperand(0) == CPU;
+ return false;
+ }
+
+ bool isSimpleFunction(Function *F) {
+ HelperMap &Helpers = IF->getHelpers();
+ if (Helpers.find(F->getName()) == Helpers.end() ||
+ Helpers[F->getName()]->hasNestedCall)
+ return false;
+ return true;
+ }
+
+ Value *ConvertType(Value *V, Type *Ty, Instruction *InsertPos) {
+ return V->getType() == Ty ? V : new BitCastInst(V, Ty, "", InsertPos);
+ }
+};
+
+/* Return a pre-defined state name. */
+static std::string getStateName(intptr_t Off)
+{
+#if defined(TARGET_I386)
+ if (Off == offsetof(CPUArchState,xmm_regs[0])) return "xmm0";
+ if (Off == offsetof(CPUArchState,xmm_regs[1])) return "xmm1";
+ if (Off == offsetof(CPUArchState,xmm_regs[2])) return "xmm2";
+ if (Off == offsetof(CPUArchState,xmm_regs[3])) return "xmm3";
+ if (Off == offsetof(CPUArchState,xmm_regs[4])) return "xmm4";
+ if (Off == offsetof(CPUArchState,xmm_regs[5])) return "xmm5";
+ if (Off == offsetof(CPUArchState,xmm_regs[6])) return "xmm6";
+ if (Off == offsetof(CPUArchState,xmm_regs[7])) return "xmm7";
+ if (Off == offsetof(CPUArchState,xmm_t0)) return "xmm_t0";
+#endif
+ return "";
+}
+
+/* Determine if the offset is to access the temporary state. */
+static inline bool isLocalState(intptr_t Off)
+{
+#if defined(TARGET_I386)
+ if (Off == offsetof(CPUArchState, xmm_t0))
+ return true;
+#endif
+ return false;
+}
+
+/* Return states that should be ignored during state mapping. */
+static bool isSkipState(intptr_t Off)
+{
+ if (Off == (intptr_t)(offsetof(CPUState, tcg_exit_req) - ENV_OFFSET))
+ return true;
+
+#define stateof(X) \
+ (Off >= (intptr_t)offsetof(CPUArchState,X) && \
+ Off < (intptr_t)(offsetof(CPUArchState,X) + sizeof(((CPUArchState*)0)->X)))
+#define is_fpstatus(X) \
+ (stateof(X.float_detect_tininess) || \
+ stateof(X.float_rounding_mode) || \
+ stateof(X.float_exception_flags) || \
+ stateof(X.floatx80_rounding_precision) || \
+ stateof(X.flush_to_zero) || \
+ stateof(X.flush_inputs_to_zero) || \
+ stateof(X.default_nan_mode))
+
+#if defined(TARGET_ARM)
+ if (is_fpstatus(vfp.fp_status) || is_fpstatus(vfp.standard_fp_status))
+ return true;
+#elif defined(TARGET_I386)
+ if (is_fpstatus(fp_status))
+ return true;
+#endif
+ return false;
+
+#undef stateof
+#undef is_fpstatus
+}
+
+/* Check if the state is legal for state mapping. A legal state must have CPU
+ * as the base pointer, plus a positive constant offset. */
+bool StateMapper::isLegalState(Value *Ptr, intptr_t &Off)
+{
+ Value *Base = getBaseWithConstantOffset(DL, Ptr, Off);
+ if (Off < 0)
+ return false;
+ if (Base == CPU && !isSkipState(Off) && !IRFactory::isStateOfPC(Off))
+ return true;
+ return false;
+}
+
+/* Get the pointer without GEP and BitCast. The stripped GEP and BitCast
+ * instructions are returned to the caller. */
+void StateMapper::StripPointer(Value *V, IVec &IV)
+{
+ std::set<Value *> Visited;
+ Visited.insert(V);
+ do {
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) {
+ IV.push_back(GEP);
+ V = GEP->getOperand(0);
+ } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) {
+ IV.push_back(BCI);
+ V = BCI->getOperand(0);
+ } else
+ return;
+ if (Visited.find(V) != Visited.end())
+ break;
+ Visited.insert(V);
+ } while (true);
+}
+
+/* Move the pointer before InsertPos. */
+void StateMapper::MoveStatePointer(Value *V)
+{
+ IVec toMove;
+ StripPointer(V, toMove);
+ while (!toMove.empty()) {
+ Instruction *I = toMove.back();
+ toMove.pop_back();
+ if (I->getParent() == CPU->getParent())
+ continue;
+ I->moveBefore(PreCastPos);
+ }
+}
+
+/* Copy state data from src address to destination address. */
+void StateMapper::CopyState(Value *Dest, Value *Src, Instruction *InsertPos)
+{
+ if (!isa<AllocaInst>(Src)) {
+ MoveStatePointer(Src);
+ LoadInst *LI = new LoadInst(Src, "", false, InsertPos);
+ new StoreInst(LI, Dest, false, InsertPos);
+
+ if (Src->getType()->getPointerElementType()->isVectorTy())
+ LI->setAlignment(4);
+ } else {
+ MoveStatePointer(Dest);
+ LoadInst *LI = new LoadInst(Src, "", false, InsertPos);
+ StoreInst *SI = new StoreInst(LI, Dest, false, InsertPos);
+
+ if (Dest->getType()->getPointerElementType()->isVectorTy())
+ SI->setAlignment(4);
+ }
+}
+
+/* Store dirty states at the leaf blocks. */
+void StateMapper::ProcessExitBB(BasicBlock *BB)
+{
+ Instruction *InsertPos = nullptr;
+ for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ if (MDFactory::isExit(&*BI)) {
+ InsertPos = &*BI;
+ break;
+ }
+ }
+ if (!InsertPos)
+ InsertPos = BB->getTerminator();
+
+ for (auto &StateMap : StateMaps) {
+ if (!StateMap.hasStore || isLocalState(StateMap.getStart()))
+ continue;
+ CopyState(StateMap.Addr, StateMap.AI, InsertPos);
+ }
+}
+
+/* Sync CPU states around helper calls. */
+void StateMapper::SyncHelperState()
+{
+ CallList &Calls = Analyzer.getCalls();
+ if (Calls.empty())
+ return;
+
+ /*
+ * Rules of syncing states around calls:
+ * 1. Dirty states (i.e., stores) are written back before calls.
+ * 2. All states, including loads and stores, are read back after calls.
+ *
+ * If the helper is a simple function, only dependent states are synced.
+ * If the helper is a complicated function, all states are synced.
+ */
+ HelperMap &Helpers = IF->getHelpers();
+ DenseMap<CallInst*, std::set<unsigned> > StoreBeforeCall;
+ DenseMap<CallInst*, std::set<unsigned> > LoadAfterCall;
+
+ for (auto CI : Calls) {
+ Function *Func = CI->getCalledFunction();
+ std::string Name = Func->getName();
+
+ if (isSimpleFunction(Func)) {
+ /* A pre-defined helper without nested call. */
+ HelperInfo *Helper = Helpers[Name];
+ for (unsigned i = 0, e = StateMaps.size(); i != e; ++i) {
+ auto &StateMap = StateMaps[i];
+ if (StateMap.hasStore && StateMap.overlap(Helper->StateUse))
+ StoreBeforeCall[CI].insert(i);
+
+ if (StateMap.overlap(Helper->StateDef))
+ LoadAfterCall[CI].insert(i);
+
+ if (Helper->mayConflictArg) {
+ unsigned NumArgs = CI->getNumArgOperands();
+ for (unsigned j = 1; j < NumArgs; ++j) {
+ intptr_t Off = 0;
+ Value *Arg = CI->getArgOperand(j);
+ if (!isLegalState(Arg, Off))
+ continue;
+ if (Off + Helper->ConflictSize <= StateMap.getStart() ||
+ Off >= StateMap.getEnd())
+ continue;
+ if (StateMap.hasStore)
+ StoreBeforeCall[CI].insert(i);
+ LoadAfterCall[CI].insert(i);
+ }
+ }
+ }
+ } else {
+ /* Sync states for a complicated function (an unknown helper or a
+ * helper with nested calls). */
+ for (unsigned i = 0, e = StateMaps.size(); i != e; ++i) {
+ auto &StateMap = StateMaps[i];
+ if (StateMap.hasStore)
+ StoreBeforeCall[CI].insert(i);
+ LoadAfterCall[CI].insert(i);
+ }
+ }
+ }
+
+ /* Perform state syncing. */
+ for (auto CI : Calls) {
+ Instruction *InsertPos = CI;
+
+ if (!StoreBeforeCall.empty()) {
+ for (auto i : StoreBeforeCall[CI]) {
+ auto &StateMap = StateMaps[i];
+ CopyState(StateMap.Addr, StateMap.AI, InsertPos);
+ }
+ }
+
+ InsertPos = &*std::next(BasicBlock::iterator(InsertPos));
+ if (isa<UnreachableInst>(InsertPos)) {
+ /* No read back is required after tail call. */
+ continue;
+ }
+
+ if (!LoadAfterCall.empty()) {
+ for (auto i : LoadAfterCall[CI]) {
+ auto &StateMap = StateMaps[i];
+ CopyState(StateMap.AI, StateMap.Addr, InsertPos);
+ }
+ }
+ }
+}
+
+static inline bool isSameSize(StateMapping &StateMap, StateRef &Ref)
+{
+ return StateMap.getSize() == Ref.getSize();
+}
+
+/* Compute state and element types for element insertion and extraction. */
+void StateMapper::getElementInfo(StateMapping &StateMap, StateRef &Ref,
+ ElementInfo &Info)
+{
+ intptr_t StateSize = StateMap.getSize();
+ intptr_t Size = Ref.getSize();
+ intptr_t Shift = Ref.Start - StateMap.getStart();
+ Type *StateTy = StateMap.getType();
+ LLVMContext &Context = StateTy->getContext();
+
+ if (!StateMap.isVector()) {
+ /* Use int-N to emulate the state. */
+ Info.NumElts = 1;
+ Info.EltTy = Type::getIntNTy(Context, Size * 8);
+ Info.StateTy = Type::getIntNTy(Context, StateSize * 8);
+ Info.Shift = Shift;
+ return;
+ }
+
+ /* The state is emulated as a vector. */
+ if (StateSize % Size == 0 && Shift % Size == 0) {
+ Type *EltTy = Type::getIntNTy(Context, Size * 8);
+
+ Info.NumElts = 1;
+ Info.EltTy = EltTy;
+ Info.StateTy = VectorType::get(EltTy, StateSize / Size);
+ Info.Shift = Shift / Size;
+ } else {
+ VectorType *VecTy = cast<VectorType>(StateTy);
+ Type *EltTy = VecTy->getScalarType();
+ intptr_t EltSize = DL->getTypeSizeInBits(EltTy) / 8;
+
+ Info.NumElts = Size / EltSize;
+ Info.EltTy = VectorType::get(EltTy, Info.NumElts);
+ Info.StateTy = StateTy;
+ Info.Shift = Shift / EltSize;
+ }
+}
+
+void StateMapper::RewriteLoad(StateMapping &StateMap, StateRef &Ref)
+{
+ LoadInst *LI = cast<LoadInst>(Ref.I);
+ Type *Ty = LI->getType();
+ Instruction *InsertPos = LI;
+
+ /* The same reference size as the state size. */
+ if (isSameSize(StateMap, Ref)) {
+ Value *V = new LoadInst(StateMap.AI, "", false, InsertPos);
+ V = ConvertType(V, Ty, InsertPos);
+ LI->replaceAllUsesWith(V);
+ toErase.push_back(LI);
+ return;
+ }
+
+ if (StateMap.isVector()) {
+ RewriteLoadVector(StateMap, Ref);
+ return;
+ }
+
+ /* This is a non-vector state. Transform the state to the type of Int-N
+ * and use logical shift to extract/insert element data. */
+ ElementInfo Info;
+ getElementInfo(StateMap, Ref, Info);
+
+ Value *V = new LoadInst(StateMap.AI, "", false, InsertPos);
+ V = ConvertType(V, Info.StateTy, InsertPos);
+
+ /* Extract the element. */
+ if (Info.Shift) {
+ Value *Shift = ConstantInt::get(V->getType(), Info.Shift * 8);
+ V = BinaryOperator::Create(Instruction::LShr, V, Shift, "", InsertPos);
+ }
+ V = new TruncInst(V, Info.EltTy, "", InsertPos);
+ V = ConvertType(V, Ty, InsertPos);
+
+ LI->replaceAllUsesWith(V);
+ toErase.push_back(LI);
+}
+
+void StateMapper::RewriteStore(StateMapping &StateMap, StateRef &Ref)
+{
+ StoreInst *SI = cast<StoreInst>(Ref.I);
+ Value *Data = SI->getValueOperand();
+ Instruction *InsertPos = SI;
+
+ /* The same reference size as the state size. */
+ if (isSameSize(StateMap, Ref)) {
+ Value *V = ConvertType(Data, StateMap.getType(), InsertPos);
+ new StoreInst(V, StateMap.AI, false, InsertPos);
+ toErase.push_back(SI);
+ return;
+ }
+
+ if (StateMap.isVector()) {
+ RewriteStoreVector(StateMap, Ref);
+ return;
+ }
+
+ /* This is a non-vector state. Transform the state to the type of Int-N
+ * and use logical shift to extract/insert element data. */
+ ElementInfo Info;
+ getElementInfo(StateMap, Ref, Info);
+
+ Value *V = new LoadInst(StateMap.AI, "", false, InsertPos);
+ V = ConvertType(V, Info.StateTy, InsertPos);
+
+ /* Insert the element. */
+ Data = ConvertType(Data, Info.EltTy, InsertPos);
+ Data = new ZExtInst(Data, Info.StateTy, "", InsertPos);
+
+ if (Info.Shift) {
+ Value *Shift = ConstantInt::get(Data->getType(), Info.Shift * 8);
+ Data = BinaryOperator::Create(Instruction::Shl, Data, Shift, "", InsertPos);
+ }
+
+ unsigned numBits = StateMap.getSize() * 8;
+ unsigned loBit = Info.Shift * 8, hiBit = loBit + Ref.getSize() * 8;
+ APInt mask = ~APInt::getBitsSet(numBits, loBit, hiBit);
+ Value *Mask = ConstantInt::get(Data->getContext(), mask);
+
+ V = BinaryOperator::Create(Instruction::And, V, Mask, "", InsertPos);
+ V = BinaryOperator::Create(Instruction::Or, V, Data, "", InsertPos);
+ V = ConvertType(V, StateMap.getType(), InsertPos);
+
+ new StoreInst(V, StateMap.AI, false, InsertPos);
+ toErase.push_back(SI);
+}
+
+void StateMapper::RewriteLoadVector(StateMapping &StateMap, StateRef &Ref)
+{
+ LoadInst *LI = cast<LoadInst>(Ref.I);
+ Type *Ty = LI->getType();
+ Instruction *InsertPos = LI;
+
+ /* Compute offset, size and element type of this vector operation. */
+ ElementInfo Info;
+ getElementInfo(StateMap, Ref, Info);
+
+ Value *V = new LoadInst(StateMap.AI, "", false, InsertPos);
+ V = ConvertType(V, Info.StateTy, InsertPos);
+
+ /* Extract the element(s) from the vector value. */
+ IntegerType *I32 = IntegerType::get(V->getContext(), 32);
+
+ if (Info.EltTy->isVectorTy()) {
+ /* Multiple elements to load. Use shufflevector. */
+ Value *UndefVal = UndefValue::get(Info.StateTy);
+ SmallVector<Constant*, 8> Indices;
+ for (unsigned i = 0, e = Info.Shift; i != e; ++i)
+ Indices.push_back(ConstantInt::get(I32, Info.Shift + i));
+ Value *CV = ConstantVector::get(Indices);
+ V = new ShuffleVectorInst(V, UndefVal, CV, "", InsertPos);
+ } else {
+ /* Only one element. Use extractelement. */
+ V = ExtractElementInst::Create(V,
+ ConstantInt::get(I32, Info.Shift), "", InsertPos);
+ }
+
+ V = ConvertType(V, Ty, InsertPos);
+
+ LI->replaceAllUsesWith(V);
+ toErase.push_back(LI);
+}
+
+void StateMapper::RewriteStoreVector(StateMapping &StateMap, StateRef &Ref)
+{
+ StoreInst *SI = cast<StoreInst>(Ref.I);
+ Value *Data = SI->getValueOperand();
+ Instruction *InsertPos = SI;
+
+ /* Compute offset, size and element type of this vector operation. */
+ ElementInfo Info;
+ getElementInfo(StateMap, Ref, Info);
+
+ Value *V = new LoadInst(StateMap.AI, "", false, InsertPos);
+ V = ConvertType(V, Info.StateTy, InsertPos);
+ Data = ConvertType(Data, Info.EltTy, InsertPos);
+
+ /* Extract element(s) from data and insert it into the vector value. */
+ IntegerType *I32 = IntegerType::get(V->getContext(), 32);
+
+ if (Info.EltTy->isVectorTy()) {
+ SmallVector<Value *, 8> Partial;
+ for (unsigned i = 0, e = Info.NumElts; i != e; ++i) {
+ Partial.push_back(ExtractElementInst::Create(Data,
+ ConstantInt::get(I32, i), "", InsertPos));
+ }
+ for (unsigned i = 0, e = Info.NumElts; i != e; ++i) {
+ V = InsertElementInst::Create(V, Partial[i],
+ ConstantInt::get(I32, Info.Shift + i), "", InsertPos);
+ }
+ } else {
+ /* Only one element. Use insertelement. */
+ V = InsertElementInst::Create(V, Data,
+ ConstantInt::get(I32, Info.Shift), "", InsertPos);
+ }
+
+ V = ConvertType(V, StateMap.getType(), InsertPos);
+
+ new StoreInst(V, StateMap.AI, false, InsertPos);
+ toErase.push_back(SI);
+}
+
+/* Map state references to the virtual states. */
+void StateMapper::PromoteState(Function &F)
+{
+ /* Pre-load CPU states. */
+ Type *IntPtrTy = DL->getIntPtrType(CPU->getContext());
+ for (auto &StateMap : StateMaps) {
+ if (!StateMap.Addr) {
+ Value *Off = ConstantInt::get(IntPtrTy, StateMap.getStart());
+ Value *GEP = GetElementPtrInst::CreateInBounds(CPU, Off, "",
+ PreCastPos);
+ StateMap.Addr = new BitCastInst(GEP,
+ PointerType::getUnqual(StateMap.getType()), "",
+ PreCastPos);
+ }
+
+ std::string StateName = StateMap.Addr->getName();
+ if (StateName == "")
+ StateName = getStateName(StateMap.getStart());
+ if (StateName == "")
+ StateName = "state";
+ StateName.append(".a");
+
+ StateMap.AI = CreateAlloca(StateMap.getType(), 0, StateName, PreCastPos);
+ CopyState(StateMap.AI, StateMap.Addr, PreLoadPos);
+ }
+
+ /* Rewrite loads and stores. */
+ for (auto &StateMap : StateMaps) {
+ for (auto Ref : StateMap.State->Refs) {
+ if (isa<LoadInst>(Ref->I))
+ RewriteLoad(StateMap, *Ref);
+ else
+ RewriteStore(StateMap, *Ref);
+ }
+ }
+
+ /* Sync CPU states around helper calls. */
+ SyncHelperState();
+
+ /* Post-store dirty values back to CPU states for each exiting block. */
+ for (auto BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+ BasicBlock *BB = &*BI;
+ if (distance(succ_begin(BB), succ_end(BB)) == 0) /* leaf node */
+ ProcessExitBB(BB);
+ }
+}
+
+/* Determine if the state can be operated as a vector. */
+Type *StateMapper::TryVectorState(StateData &State, Type *Ty)
+{
+ intptr_t StateStart = State.Start;
+ intptr_t StateEnd = State.End;
+ intptr_t StateSize = StateEnd - StateStart;
+
+ /* If the reference type (from the IR) is already a vector type, use it.
+ * Otherwise, query StateType to determine if it is a vector state. */
+ VectorType *VecTy = dyn_cast<VectorType>(Ty);
+ if (!VecTy) {
+ auto TI = --StateType.upper_bound(StateStart);
+ for (; TI != StateType.end() && TI->first < StateEnd; ++TI) {
+ if (TI->second->isVectorTy()) {
+ VecTy = cast<VectorType>(TI->second);
+ break;
+ }
+ }
+ }
+
+ if (!VecTy)
+ return nullptr;
+
+ /* This is a vector state. Now, we need to check whether all state refs can
+ * be composed by the vector element type: (a) the state size is a multiple
+ * of the vector element size, and (b) the size and shift of each state ref
+ * are both a multiple of the vector element size. */
+ Type *ElementTy = VecTy->getScalarType();
+ intptr_t ElementSize = DL->getTypeSizeInBits(ElementTy) / 8;
+ if (StateSize % ElementSize != 0)
+ return nullptr;
+
+ for (auto Ref : State.Refs) {
+ if (Ref->getSize() % ElementSize != 0 ||
+ (Ref->Start - StateStart) % ElementSize != 0)
+ return nullptr;
+ }
+ return VectorType::get(ElementTy, StateSize / ElementSize);
+}
+
+/* Compute state mapping information based on the state mapping rules. */
+void StateMapper::ComputeStateMap(StateMapping &StateMap, StateData &State)
+{
+ /* State mapping rule:
+ * - A guest state is not overlapped: (i.e., same access size)
+ * - Same type: map to this type.
+ * - Different type: select type in the order: vector, float and integer;
+ * use bitcast to convert between different types.
+ * - A guest state is overlapped with other state(s):
+ * - Query StateType to find state size (i.e., boundary) and type:
+ * - Vector type: use insert/extract to manipulate a vector element.
+ * - Other types: use shift to manipulate a sub-register element. */
+ bool sameSize = true;
+ bool hasLoad = false;
+ bool hasStore = false;
+
+ for (auto Ref : State.Refs) {
+ hasLoad |= isa<LoadInst>(Ref->I);
+ hasStore |= isa<StoreInst>(Ref->I);
+ }
+
+ StateRef *Ref = State.Refs.front();
+ Type *Ty = Ref->getType();
+ Value *Addr = getPointerOperand(Ref->I);
+ intptr_t Size = Ref->getSize();
+
+ for (unsigned i = 1, e = State.Refs.size(); i != e; ++i) {
+ StateRef *NextRef = State.Refs[i];
+ Type *NextTy = NextRef->getType();
+ Value *NextAddr = getPointerOperand(NextRef->I);
+
+ /* Check type. */
+ if (Ty != NextTy) {
+ /* Select type in the order: vector, float and integer. */
+ bool Swap = false;
+ if (Ty->isVectorTy() && NextTy->isVectorTy()) {
+ /* We prefer a vector type of small element type. */
+ Type *ATy = cast<VectorType>(Ty)->getScalarType();
+ Type *BTy = cast<VectorType>(NextTy)->getScalarType();
+ if (DL->getTypeSizeInBits(BTy) < DL->getTypeSizeInBits(ATy))
+ Swap = true;
+ } else if (!Ty->isVectorTy() && NextTy->isVectorTy()) {
+ Swap = true;
+ } else if (Ty->isIntegerTy() && NextTy->isFloatTy()) {
+ Swap = true;
+ }
+
+ if (Swap) {
+ std::swap(Ty, NextTy);
+ std::swap(Addr, NextAddr);
+ }
+ }
+
+ /* Check size. */
+ if (Size != NextRef->getSize())
+ sameSize = false;
+ }
+
+ if (sameSize) {
+ /* The same reference size as the state size. */
+ StateMap.Ty = Ty;
+ StateMap.Addr = Addr;
+ } else {
+ /* Different reference sizes. */
+ intptr_t StateSize = State.End - State.Start;
+ Type *VecTy = TryVectorState(State, Ty);
+ StateMap.Ty = VecTy ? VecTy
+ : Type::getIntNTy(Ty->getContext(), StateSize * 8);
+ StateMap.Addr = nullptr;
+ }
+ StateMap.State = &State;
+ StateMap.hasLoad = hasLoad;
+ StateMap.hasStore = hasStore;
+}
+
+/* Analyze instructions in a Function that access CPU states. */
+void StateMapper::AnalyzeState(Function &F)
+{
+ /* Collect instructions (load/store/call) that access CPU states.
+ * Loads/stores that access guest memory or are tagged with volatile
+ * (e.g., accessing the states: %pc and %tcg_exit_req) are ignored. */
+
+ for (auto II = inst_begin(F), EE = inst_end(F); II != EE; ++II) {
+ Instruction *I = &*II;
+ intptr_t Off = 0;
+ if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+ if (MDFactory::isGuestMemory(I) || LI->isVolatile())
+ continue;
+
+ if (isLegalState(LI->getPointerOperand(), Off))
+ Analyzer.addStateRef(I, Off);
+ } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (MDFactory::isGuestMemory(I) || SI->isVolatile())
+ continue;
+
+ if (isLegalState(SI->getPointerOperand(), Off))
+ Analyzer.addStateRef(I, Off);
+ } else if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ /* Skip const helper, inlineasm and intrinsic function call. */
+ if (MDFactory::isConst(CI))
+ continue;
+ if (CI->isInlineAsm() || isa<IntrinsicInst>(CI))
+ continue;
+
+ Analyzer.addCall(CI);
+ }
+ }
+
+ /* Ask Analyzer to put state references into groups. */
+ Analyzer.computeState();
+
+ StateList &States = Analyzer.getStateList();
+ if (States.empty())
+ return;
+
+ /* Compute state mapping info. */
+ StateMaps.resize(States.size());
+ for (unsigned i = 0, e = States.size(); i != e; ++i)
+ ComputeStateMap(StateMaps[i], States[i]);
+}
+
+/* Rearrange instructions in the 'init' block. */
+bool StateMapper::init(Function &F)
+{
+ /*
+ * We would like to rearrange the instructions in the 'init' block, in which
+ * gep/cast instructions are in front of other instructions in the block.
+ * For example:
+ * %0 = getelementptr i8* %cpu, i64 0
+ * %1 = bitcast i8* %0 to i32* # gep/cast insns
+ * -------------------------------------- # precast_pos
+ * -------------------------------------- # preload_pos
+ * %2 = load i32, i32* %1 # the other insns
+ * br label %entry
+ */
+ CPU = IF->getDefaultCPU(F);
+ if (!CPU || CPU->getParent() != &F.getEntryBlock())
+ return false;
+
+ Instruction *InsertPos = &*std::next(BasicBlock::iterator(CPU));
+ PreLoadPos = new UnreachableInst(CPU->getContext(), InsertPos);
+ PreCastPos = new UnreachableInst(CPU->getContext(), PreLoadPos);
+
+ toErase.push_back(PreLoadPos);
+ toErase.push_back(PreCastPos);
+
+ /* Move gep/cast instructions. */
+ IVec toMove;
+ BasicBlock *BB = CPU->getParent();
+ for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) {
+ Instruction *I = &*BI;
+ if (isStatePointer(I))
+ toMove.push_back(I);
+ }
+ for (auto I : toMove)
+ I->moveBefore(PreCastPos);
+
+ return true;
+}
+
+/*
+ * StateMappingPass
+ */
+bool StateMappingPass::runOnFunction(Function &F)
+{
+ return StateMapper(IF).run(F);
+}
+
+char StateMappingPass::ID = 0;
+INITIALIZE_PASS(StateMappingPass, "statemap",
+ "Eliminate redundant loads/stores by mapping CPU states", false, false)
+
+FunctionPass *llvm::createStateMappingPass(IRFactory *IF)
+{
+ return new StateMappingPass(IF);
+}
+
+/*
+ * vim: ts=8 sts=4 sw=4 expandtab
+ */
OpenPOWER on IntegriCloud