diff options
author | Timothy Pearson <tpearson@raptorengineering.com> | 2019-11-29 19:00:14 -0600 |
---|---|---|
committer | Timothy Pearson <tpearson@raptorengineering.com> | 2019-11-29 19:02:28 -0600 |
commit | 4b3250c5073149c59c5c11e06c2c0d93b6a9f5ff (patch) | |
tree | dce73321255f834f7b2d4c16fa49760edb534f27 /llvm/pass | |
parent | a58047f7fbb055677e45c9a7d65ba40fbfad4b92 (diff) | |
download | hqemu-2.5.1_overlay.zip hqemu-2.5.1_overlay.tar.gz |
Initial overlay of HQEMU 2.5.2 changes onto underlying 2.5.1 QEMU GIT tree2.5.1_overlay
Diffstat (limited to 'llvm/pass')
-rw-r--r-- | llvm/pass/CombineCasts.cpp | 321 | ||||
-rw-r--r-- | llvm/pass/CombineGuestMemory.cpp | 389 | ||||
-rw-r--r-- | llvm/pass/CombineZExtTrunc.cpp | 70 | ||||
-rw-r--r-- | llvm/pass/FastMathPass.cpp | 87 | ||||
-rw-r--r-- | llvm/pass/ProfileExec.cpp | 172 | ||||
-rw-r--r-- | llvm/pass/RedundantStateElimination.cpp | 179 | ||||
-rw-r--r-- | llvm/pass/ReplaceIntrinsic.cpp | 137 | ||||
-rw-r--r-- | llvm/pass/SimplifyPointer.cpp | 334 | ||||
-rw-r--r-- | llvm/pass/StateMappingPass.cpp | 885 |
9 files changed, 2574 insertions, 0 deletions
diff --git a/llvm/pass/CombineCasts.cpp b/llvm/pass/CombineCasts.cpp new file mode 100644 index 0000000..71a74ff --- /dev/null +++ b/llvm/pass/CombineCasts.cpp @@ -0,0 +1,321 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm-target.h" +#include "llvm-opc.h" +#include "llvm-pass.h" +#include "utils.h" + +#define PASS_NAME "CombineCasts" + +/* + * CombineCasts Pass + */ +class CombineCasts : public FunctionPass { + IRFactory *IF; + const DataLayout *DL; + MDFactory *MF; + IntegerType *Int8Ty; + IntegerType *Int32Ty; + IntegerType *Int64Ty; + IntegerType *IntPtrTy; + PointerType *Int8PtrTy; + PointerType *Int32PtrTy; + PointerType *Int64PtrTy; + Type *FloatTy; + Type *DoubleTy; + IVec toErase; + +public: + static char ID; + explicit CombineCasts() : FunctionPass(ID) {} + explicit CombineCasts(IRFactory *IF) + : FunctionPass(ID), IF(IF), DL(IF->getDL()), MF(IF->getMDFactory()) + { + LLVMContext &Context = IF->getContext();; + Int8Ty = IntegerType::get(Context, 8); + Int32Ty = IntegerType::get(Context, 32); + Int64Ty = IntegerType::get(Context, 64); + IntPtrTy = DL->getIntPtrType(Context); + Int8PtrTy = Type::getInt8PtrTy(Context, 0); + Int32PtrTy = Type::getInt32PtrTy(Context, 0); + Int64PtrTy = Type::getInt64PtrTy(Context, 0); + FloatTy = Type::getFloatTy(Context); + DoubleTy = Type::getDoubleTy(Context); + } + + Instruction *getUniqueUser(Instruction *I) { + if (I->hasOneUse()) + return I->user_back(); + return nullptr; + }; + + bool combineLoadCast(LoadInst *LI); + bool combineStoreCast(StoreInst *SI); + bool combineCastCast(Function &F); + bool simplifySignChange(Function &F); + bool runOnFunction(Function &F); +}; + +char CombineCasts::ID = 0; +INITIALIZE_PASS(CombineCasts, "combinecast", + "Combine bitcast with guest memory loads/stores", false, false) + +FunctionPass *llvm::createCombineCasts(IRFactory *IF) +{ + return new CombineCasts(IF); +} + +static bool hasSameCastingTy(ArrayRef<BitCastInst *> IL) { + Type *SrcTy = IL[0]->getSrcTy(); + Type *DstTy = IL[0]->getDestTy(); + for (BitCastInst *I : IL) { + if (I->getSrcTy() != SrcTy) + return false; + if (I->getDestTy() != DstTy) + return false; + } + return true; +} + +/* This function aims to change the load type if (1) the type of loaded data is + * casted to another type, (2) only one user of the load instruction is bitcast, + * and (3) all other users of the load instruction are stores. + * + * For example: + * %0 = load <typeA>* %0 = load <typeB>* + * %1 = bitcast %0, <typeB> %1 = bitcast %0, <typeA> + * + * %2 = op <typeB> %1, ... => %2 = op <typeB> %0, ... + * + * store %0, <typeA>* store %1, <typeA>* + * store %1, <typeB>* store %0, <typeB>* + */ +bool CombineCasts::combineLoadCast(LoadInst *LI) +{ + Instruction *Ptr = dyn_cast<Instruction>(LI->getPointerOperand()); + + if (!Ptr) + return false; + + /* Find all bitcast users of this load. */ + SmallVector<BitCastInst *, 4> BCIs; + for (User *U : LI->users()) { + Instruction *UI = cast<Instruction>(U); + switch (UI->getOpcode()) { + default: + return false; + case Instruction::PHI: + case Instruction::Load: + case Instruction::Store: + break; + case Instruction::BitCast: + BCIs.push_back(cast<BitCastInst>(UI)); + break; + } + } + + if (BCIs.empty() || !hasSameCastingTy(BCIs)) + return false; + + Instruction *InsertPos = LI; + unsigned Alignment = LI->getAlignment(); + unsigned Volatile = LI->isVolatile(); + Type *SrcTy = LI->getType(); + Type *DstTy = BCIs[0]->getDestTy(); + + Type *PtrTy = PointerType::get(DstTy, LI->getPointerAddressSpace()); + if (isa<IntToPtrInst>(Ptr)) + Ptr = new IntToPtrInst(Ptr->getOperand(0), PtrTy, "", InsertPos); + else + Ptr = new BitCastInst(Ptr, PtrTy, "", InsertPos); + + Instruction *NewLI = new LoadInst(Ptr, "", Volatile, Alignment, InsertPos); + Instruction *NewBCI = new BitCastInst(NewLI, SrcTy, "", InsertPos); + + if (MF->isGuestMemory(LI)) + MF->setGuestMemory(NewLI); + for (BitCastInst *BCI : BCIs) + BCI->replaceAllUsesWith(NewLI); + LI->replaceAllUsesWith(NewBCI); + + toErase.push_back(LI); + for (BitCastInst *BCI : BCIs) + toErase.push_back(BCI); + + return true; +} + +/* This function aims to change the store type if stored data is casted from + * another type. + * + * For example: + * %0 = <typeA> + * %1 = bitcast %0, <typeB> => store %0, <typeA>* + * store %1, <typeB>* + */ +bool CombineCasts::combineStoreCast(StoreInst *SI) +{ + Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()); + Instruction *Data = dyn_cast<Instruction>(SI->getValueOperand()); + + if (!Ptr || !Data || !isa<BitCastInst>(Data)) + return false; + + Instruction *InsertPos = SI; + unsigned Alignment = SI->getAlignment(); + unsigned Volatile = SI->isVolatile(); + BitCastInst *BCI = cast<BitCastInst>(Data); + Value *V = BCI->getOperand(0); + Type *SrcTy = V->getType(); + + Type *PtrTy = PointerType::get(SrcTy, SI->getPointerAddressSpace()); + if (isa<IntToPtrInst>(Ptr)) + Ptr = new IntToPtrInst(Ptr->getOperand(0), PtrTy, "", InsertPos); + else + Ptr = new BitCastInst(Ptr, PtrTy, "", InsertPos); + + Instruction *NewSI = new StoreInst(V, Ptr, Volatile, Alignment, InsertPos); + + if (MF->isGuestMemory(SI)) + MF->setGuestMemory(NewSI); + + toErase.push_back(SI); + return true; +} + +/* This function aims to eliminate redundant casts. + * For example: + * %0 = <typeA> %0 = <typeA> + * %1 = bitcast %0, <typeB> => + * %2 = bitcast %1, <typeC> %2 = bitcast %0, <typeC> + * = op <typeC> %2, ... = op <typeC> %2, ... + * + * And if <typeA> is the same as <typeC>, the code is further optimized to + * %0 = <typeA> %0 = <typeA> + * %1 = bitcast %0, <typeB> => + * %2 = bitcast %1, <typeC> + * = op <typeC> %2, ... = op <typeA> %0, ... + */ +bool CombineCasts::combineCastCast(Function &F) +{ + SmallVector<Instruction*, 4> Worklist; + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) { + Instruction *I = &*II; + if (isa<BitCastInst>(I)) + Worklist.push_back(I); + } + + for (auto I : Worklist) { + BitCastInst *CI = cast<BitCastInst>(I); + BitCastInst *CSrc = dyn_cast<BitCastInst>(CI->getOperand(0)); + if (!CSrc) + continue; + + Type *SrcTy = CSrc->getOperand(0)->getType(); + Type *DstTy = CI->getType(); + Value *Result = (SrcTy == DstTy) ? CSrc->getOperand(0) : + new BitCastInst(CSrc->getOperand(0), CI->getType(), "", CI); + I->replaceAllUsesWith(Result); + toErase.push_back(I); + } + + if (toErase.empty()) + return false; + + ProcessErase(toErase); + return true; +} + +/* This function converts sign change of float/double data (i.e., -num), + * which is implemented with integer operations, to use float/double ops. + * For example: + * %0 = bitcast float %num to i32 + * %1 = xor i32 %0, 0x80000000 => %0 = fsub float 0, %num + * %2 = bitcast %1, float + */ +bool CombineCasts::simplifySignChange(Function &F) +{ + SmallVector<BitCastInst*, 16> Worklist; + + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) { + Instruction *I = &*II; + if (BitCastInst *BCI = dyn_cast<BitCastInst>(I)) { + Type *SrcTy = BCI->getSrcTy(); + Type *DstTy = BCI->getDestTy(); + if (SrcTy == FloatTy && DstTy == Int32Ty) + Worklist.push_back(BCI); + else if (SrcTy == DoubleTy && DstTy == Int64Ty) + Worklist.push_back(BCI); + } + } + + for (auto I : Worklist) { + Type *Ty = I->getSrcTy(); + Value *C = (Ty == FloatTy) ? CONST32(0x80000000) + : CONST64(0x8000000000000000LL); + + /* Check whether the single user of this bitcast is Xor. */ + Instruction *UI = getUniqueUser(I); + if (UI && UI->getOpcode() == Instruction::Xor && UI->getOperand(1) == C) { + /* Check whether the single user of this Xor is a bitcast + * instruction that casts the type back to the src type. */ + Instruction *UUI = getUniqueUser(UI); + if (UUI && UUI->getOpcode() == Instruction::BitCast && + cast<BitCastInst>(UUI)->getDestTy() == Ty) { + Value *V = BinaryOperator::Create(Instruction::FSub, + ConstantFP::get(Ty, 0), + I->getOperand(0), "", I); + UUI->replaceAllUsesWith(V); + toErase.push_back(UUI); + } + } + } + + if (toErase.empty()) + return false; + + ProcessErase(toErase); + return true; +} + +bool CombineCasts::runOnFunction(Function &F) +{ + bool Changed = false; + SmallVector<LoadInst *, 16> Loads; + SmallVector<StoreInst *, 16> Stores; + + /* Collect all guest memory and non-volatile cpu state loads/stores. */ + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) { + Instruction *I = &*II; + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (MF->isGuestMemory(LI) || !LI->isVolatile()) + Loads.push_back(LI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (MF->isGuestMemory(SI) || !SI->isVolatile()) + Stores.push_back(SI); + } + } + + for (auto LI : Loads) + Changed |= combineLoadCast(LI); + for (auto SI : Stores) + Changed |= combineStoreCast(SI); + + if (toErase.size()) + ProcessErase(toErase); + + Changed |= combineCastCast(F); + Changed |= simplifySignChange(F); + + return Changed; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/CombineGuestMemory.cpp b/llvm/pass/CombineGuestMemory.cpp new file mode 100644 index 0000000..0740a8b --- /dev/null +++ b/llvm/pass/CombineGuestMemory.cpp @@ -0,0 +1,389 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm-debug.h" +#include "llvm-opc.h" +#include "llvm-target.h" +#include "llvm-pass.h" +#include "utils.h" + +#define PASS_NAME "CombineGuestMemory" + +/* + * CombineGuestMemory Pass + */ +class CombineGuestMemory : public FunctionPass { + + struct StateInfo { + StateInfo() : Ptr(nullptr) {} + StateInfo(Value *ptr, APInt &offset, APInt &size) + : Ptr(ptr), Offset(offset), Size(size) {} + Value *Ptr; + APInt Offset; + APInt Size; + }; + + typedef std::pair<Value *, Value *> ValuePair; + typedef std::map<size_t, size_t> StateMap; + typedef DenseMap<ValuePair, StateInfo> CSMap; + + IRFactory *IF; + const DataLayout *DL; + MDFactory *MF; + IntegerType *Int8Ty; + IntegerType *Int32Ty; + IntegerType *Int64Ty; + IntegerType *IntPtrTy; + PointerType *Int8PtrTy; + PointerType *Int32PtrTy; + PointerType *Int64PtrTy; + Value *CPU; + Value *GuestBase; + Instruction *InitLastInst; + StateMap LegalStates; + IVec toErase; + +public: + static char ID; + explicit CombineGuestMemory() : FunctionPass(ID) {} + explicit CombineGuestMemory(IRFactory *IF) + : FunctionPass(ID), IF(IF), DL(IF->getDL()), MF(IF->getMDFactory()) + { + LLVMContext &Context = IF->getContext();; + Int8Ty = IntegerType::get(Context, 8); + Int32Ty = IntegerType::get(Context, 32); + Int64Ty = IntegerType::get(Context, 64); + IntPtrTy = DL->getIntPtrType(Context); + Int8PtrTy = Type::getInt8PtrTy(Context, 0); + Int32PtrTy = Type::getInt32PtrTy(Context, 0); + Int64PtrTy = Type::getInt64PtrTy(Context, 0); + + GuestBase = IF->getGuestBase(); + + addLegalStates(); + } + + unsigned getAddressSpaceOperand(Value *I) { + if (LoadInst *LI = dyn_cast<LoadInst>(I)) + return LI->getPointerAddressSpace(); + if (StoreInst *SI = dyn_cast<StoreInst>(I)) + return SI->getPointerAddressSpace(); + return -1U; + } + + int getNumUsers(Instruction *I) { + return distance(I->user_begin(), I->user_end()); + } + + void addLegalStates(); + bool isLegalState(Value *Ptr, APInt &Offset, APInt &Size); + bool isConsecutiveAccess(Value *A, Value *B, Value *&Ptr, APInt &Offset, APInt &Size); + bool tryCombineLoad(Value *A, Value *B, CSMap &States); + bool tryCombineStore(Value *A, Value *B, CSMap &States); + bool combineMemory(SmallVector<Value *, 8> &Memory, SmallVector<Value *, 8> &States); + bool runOnFunction(Function &F); +}; + +char CombineGuestMemory::ID = 0; +INITIALIZE_PASS(CombineGuestMemory, "combinegm", + "Combine guest memory loads and stores", false, false) + +FunctionPass *llvm::createCombineGuestMemory(IRFactory *IF) +{ + return new CombineGuestMemory(IF); +} + + +void CombineGuestMemory::addLegalStates() +{ +#if defined(TARGET_I386) + size_t Start = offsetof(CPUArchState, xmm_regs[0]); + size_t Size = sizeof(XMMReg); + for (int i = 0; i < CPU_NB_REGS; ++i) + LegalStates[Start + Size * i] = Size; +#elif defined(TARGET_ARM) + size_t Start = offsetof(CPUArchState, vfp.regs[0]); + size_t Size = sizeof(float64) * 2; + for (int i = 0; i < 32; ++i) + LegalStates[Start + Size * i] = Size; +#endif +} + +bool CombineGuestMemory::isConsecutiveAccess(Value *A, Value *B, Value *&Ptr, + APInt &Offset, APInt &Size) +{ + Value *PtrA = getPointerOperand(A); + Value *PtrB = getPointerOperand(B); + unsigned ASA = getAddressSpaceOperand(A); + unsigned ASB = getAddressSpaceOperand(B); + + if (!PtrA || !PtrB || (ASA != ASB)) + return false; + + Type *TyA = cast<PointerType>(PtrA->getType())->getElementType(); + Type *TyB = cast<PointerType>(PtrB->getType())->getElementType(); + if (DL->getTypeStoreSize(TyA) != DL->getTypeStoreSize(TyB)) + return false; + + unsigned PtrBitWidth = DL->getTypeSizeInBits(TyA); + APInt Sz(PtrBitWidth, DL->getTypeStoreSize(TyA)); + + APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0); + PtrA = StripPointerWithConstantOffset(DL, PtrA, OffsetA, GuestBase); + PtrB = StripPointerWithConstantOffset(DL, PtrB, OffsetB, GuestBase); + + APInt OffsetDelta = OffsetB - OffsetA; + if (PtrA == PtrB && OffsetDelta == Sz) { + Ptr = PtrA; + Offset = OffsetA; + Size = Sz + Sz; + return true; + } + + return false; +} + +bool CombineGuestMemory::isLegalState(Value *Ptr, APInt &Offset, APInt &Size) +{ + if (Ptr != CPU) + return false; + uint64_t Start = Offset.getZExtValue(); + if (LegalStates.find(Start) == LegalStates.end() || + Size.getZExtValue() > LegalStates[Start]) + return false; + return true; +} + +static bool hasMemoryViolation(Instruction *SA, Instruction *SB, + Instruction *EA, Instruction *EB) +{ + std::set<Value*> Insts; + Insts.insert(SA); + Insts.insert(SB); + Insts.insert(EA); + Insts.insert(EB); + + BasicBlock::iterator BI = BasicBlock::iterator(SA); + BasicBlock::iterator BE = BasicBlock::iterator(EA); + for (; BI != BE; ++BI) { + Instruction *I = &*BI; + if (isa<CallInst>(I)) + return true; + if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) + continue; + if (Insts.find(I) == Insts.end()) + return true; + } + + BI = BasicBlock::iterator(SB); + BE = BasicBlock::iterator(EB); + for (; BI != BE; ++BI) { + Instruction *I = &*BI; + if (isa<CallInst>(I)) + return true; + if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) + continue; + if (Insts.find(I) == Insts.end()) + return true; + } + return false; +} + +bool CombineGuestMemory::tryCombineLoad(Value *A, Value *B, CSMap &States) +{ + /* First, check if the guest loads are 'only' used by the store instructions + * to consecutive CPU states, and if any other loads/stores occurs between + * the queried operation. */ + LoadInst *LA = cast<LoadInst>(A); + LoadInst *LB = cast<LoadInst>(B); + if (getNumUsers(LA) != 1 || getNumUsers(LB) != 1) + return false; + + Value *VA = *LA->user_begin(); + Value *VB = *LB->user_begin(); + CSMap::iterator CSI = States.find(ValuePair(VA, VB)); + if (CSI == States.end()) + return false; + + Instruction *SA = cast<Instruction>(VA); + Instruction *SB = cast<Instruction>(VB); + + if (hasMemoryViolation(LA, LB, SA, SB)) + return false; + + /* Here we found the guest memory operations are loaded and stored to the + * CPU states immediately. The operations are safe to combine. */ + Instruction *InsertPos = SA; + StateInfo &SI = CSI->second; + uint64_t Size = SI.Size.getZExtValue(); + unsigned AS = getAddressSpaceOperand(LA); + unsigned Align = Size / 2; + Type *Ty = PointerType::get(VectorType::get(Int8Ty, Size), AS); + Instruction *Ptr = cast<Instruction>(LA->getPointerOperand()); + if (isa<IntToPtrInst>(Ptr)) + Ptr = new IntToPtrInst(Ptr->getOperand(0), Ty, "", InsertPos); + else + Ptr = new BitCastInst(Ptr, Ty, "", InsertPos); + Instruction *NewLI = new LoadInst(Ptr, "", true, Align, InsertPos); + MF->setGuestMemory(NewLI); + + Ty = PointerType::getUnqual(VectorType::get(Int8Ty, Size)); + Value *Offset = ConstantInt::get(Ty->getContext(), SI.Offset); + Ptr = GetElementPtrInst::CreateInBounds(CPU, Offset, "", InitLastInst); + Ptr = new BitCastInst(Ptr, Ty, "", InitLastInst); + new StoreInst(NewLI, Ptr, false, InsertPos); + + States.erase(CSI); + toErase.push_back(SA); + toErase.push_back(SB); + return true; +} + +bool CombineGuestMemory::tryCombineStore(Value *A, Value *B, CSMap &States) +{ + /* First, check if the CPU state loads are 'only' used by the guest store + * instructions, and if any other loads/stores occurs between the + * queried operation. */ + StoreInst *SA = cast<StoreInst>(A); + StoreInst *SB = cast<StoreInst>(B); + Instruction *LA = dyn_cast<Instruction>(SA->getOperand(0)); + Instruction *LB = dyn_cast<Instruction>(SB->getOperand(0)); + + if (!LA || !LB) + return false; + if (getNumUsers(LA) != 1 || getNumUsers(LB) != 1) + return false; + + CSMap::iterator CSI = States.find(ValuePair(LA, LB)); + if (CSI == States.end()) + return false; + + if (hasMemoryViolation(LA, LB, SA, SB)) + return false; + + /* Here we found the CPU states are loaded and stored to the guest memory + * immediately. The operations are safe to combine. */ + Instruction *InsertPos = SA; + StateInfo &SI = CSI->second; + uint64_t Size = SI.Size.getZExtValue(); + Type *Ty = PointerType::getUnqual(VectorType::get(Int8Ty, Size)); + Value *Offset = ConstantInt::get(Ty->getContext(), SI.Offset); + Instruction *Ptr = GetElementPtrInst::CreateInBounds(CPU, Offset, "", InitLastInst); + Ptr = new BitCastInst(Ptr, Ty, "", InitLastInst); + Value *V = new LoadInst(Ptr, "", false, InsertPos); + + unsigned AS = getAddressSpaceOperand(SA); + unsigned Align = Size / 2; + Ty = PointerType::get(VectorType::get(Int8Ty, Size), AS); + Ptr = cast<Instruction>(SA->getPointerOperand()); + if (isa<IntToPtrInst>(Ptr)) + Ptr = new IntToPtrInst(Ptr->getOperand(0), Ty, "", InsertPos); + else + Ptr = new BitCastInst(Ptr, Ty, "", InsertPos); + Instruction *NewSI = new StoreInst(V, Ptr, true, Align, InsertPos); + MF->setGuestMemory(NewSI); + + States.erase(CSI); + toErase.push_back(SA); + toErase.push_back(SB); + return true; +} + +bool CombineGuestMemory::combineMemory(SmallVector<Value *, 8> &Memory, + SmallVector<Value *, 8> &States) +{ + bool Changed = false; + SmallPtrSet<Value *, 4> Used; + CSMap ConsecutiveStates; + Value *Ptr; + APInt Offset, Size; + + /* Find consecutive CPU states. */ + for (unsigned i = 1, e = States.size(); i != e; i++) { + if (!isConsecutiveAccess(States[i-1], States[i], Ptr, Offset, Size)) + continue; + + if (!isLegalState(Ptr, Offset, Size)) + continue; + + ConsecutiveStates[ValuePair(States[i-1], States[i])] = + StateInfo(Ptr, Offset, Size); + } + + if (ConsecutiveStates.size() == 0) + return false; + + /* Find and combine consecutive guest memory accesses if their referrenced + * CPU states are also consecutive. */ + for (unsigned i = 1, e = Memory.size(); i != e; i++) { + if (Used.count(Memory[i-1]) || Used.count(Memory[i])) + continue; + if (!isConsecutiveAccess(Memory[i-1], Memory[i], Ptr, Offset, Size)) + continue; + + bool ret = false; + if (isa<LoadInst>(Memory[i-1]) && isa<LoadInst>(Memory[i])) { + ret = tryCombineLoad(Memory[i-1], Memory[i], ConsecutiveStates); + } else if (isa<StoreInst>(Memory[i-1]) && isa<StoreInst>(Memory[i])) { + ret = tryCombineStore(Memory[i-1], Memory[i], ConsecutiveStates); + } + if (ret) { + Used.insert(Memory[i-1]); + Used.insert(Memory[i]); + Changed = true; + } + } + return Changed; +} + +bool CombineGuestMemory::runOnFunction(Function &F) +{ + bool Changed = false; + +#if defined(CONFIG_SOFTMMU) + return Changed; +#endif + + /* Skip if no state is allowed to be combined. */ + if (LegalStates.empty()) + return Changed; + + CPU = IF->getDefaultCPU(F); + if (!CPU) { + dbg() << DEBUG_PASS << "CombineGuestMemory: Cannot find CPU pointer.\n"; + return false; + } + + InitLastInst = F.getEntryBlock().getTerminator(); + + for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + SmallVector<Value *, 8> Memory; + SmallVector<Value *, 8> States; + for (auto BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + if (MF->isGuestMemory(I)) { + Memory.push_back(I); + } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (!LI->isVolatile()) + States.push_back(LI); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (!SI->isVolatile()) + States.push_back(SI); + } + } + if (Memory.size() >= 2 && States.size() >= 2) + Changed |= combineMemory(Memory, States); + } + + if (!toErase.empty()) + ProcessErase(toErase); + + return Changed; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/CombineZExtTrunc.cpp b/llvm/pass/CombineZExtTrunc.cpp new file mode 100644 index 0000000..de9a87f --- /dev/null +++ b/llvm/pass/CombineZExtTrunc.cpp @@ -0,0 +1,70 @@ +/* + * (C) 2015 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/Transforms/Utils/Local.h" +#include "llvm-target.h" +#include "llvm-opc.h" +#include "llvm-pass.h" +#include "utils.h" + +#define PASS_NAME "CombineZExtTrunc" + +/* + * CombineZExtTrunc Pass + */ +class CombineZExtTrunc : public FunctionPass { +public: + static char ID; + explicit CombineZExtTrunc() : FunctionPass(ID) {} + bool runOnFunction(Function &F); +}; + +char CombineZExtTrunc::ID = 0; +INITIALIZE_PASS(CombineZExtTrunc, "combinezet", + "Combine ZExt followed by Trunc", false, false) + +FunctionPass *llvm::createCombineZExtTrunc() +{ + return new CombineZExtTrunc; +} + +bool CombineZExtTrunc::runOnFunction(Function &F) +{ + bool Changed = false; + IVec toErase; + + SmallVector<Instruction*, 4> Worklist; + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; II++) { + Instruction *I = &*II; + if (isa<TruncInst>(I)) + Worklist.push_back(I); + } + + for (auto I : Worklist) { + TruncInst *TI = cast<TruncInst>(I); + ZExtInst *ZI = dyn_cast<ZExtInst>(TI->getOperand(0)); + if (!ZI) + continue; + + Type *SrcTy = ZI->getOperand(0)->getType(); + Type *DstTy = TI->getType(); + if (SrcTy == DstTy) { + I->replaceAllUsesWith(ZI->getOperand(0)); + if (TI->use_empty()) + toErase.push_back(TI); + Changed = true; + } + } + + if (toErase.size()) + ProcessErase(toErase); + + return Changed; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/FastMathPass.cpp b/llvm/pass/FastMathPass.cpp new file mode 100644 index 0000000..2b6a592 --- /dev/null +++ b/llvm/pass/FastMathPass.cpp @@ -0,0 +1,87 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm-target.h" +#include "llvm-pass.h" +#include "fpu/softfloat-native-def.h" + +#define PASS_DEBUG "FastMathPass" + +class FastMathPass : public FunctionPass { +public: + static char ID; + std::map<std::string, std::string> FPUNameMap; + + explicit FastMathPass() : FunctionPass(ID) + { + TCGHelperInfo *FPUHelper = (TCGHelperInfo *)get_native_fpu_helpers(); + for (int i = 0, e = num_native_fpu_helpers(); i != e; ++i) { + /* ex: llvm_int32_to_float32 --> int32_to_float32 */ + TCGHelperInfo &fpu = FPUHelper[i]; + const char *native = fpu.name; + const char *soft = native + 5; + FPUNameMap[soft] = native; + } + } + bool runOnFunction(Function &F); +}; + +bool FastMathPass::runOnFunction(Function &F) +{ + IVec toErase; + SmallVector<CallInst *, 16> InlineCalls; + Module *Mod = F.getParent(); + + for (auto I = inst_begin(F), E = inst_end(F); I != E; ++I) { + if (CallInst *CI = dyn_cast<CallInst>(&*I)) { + if (CI->isInlineAsm() || + CI->getCalledFunction() == nullptr || + CI->getCalledFunction()->isIntrinsic()) + continue; + + std::string Fname = CI->getCalledFunction()->getName(); + if (FPUNameMap.count(Fname) == 0) + continue; + + Function *Fn = Mod->getFunction(FPUNameMap[Fname]); + FunctionType *FTy = cast<FunctionType>( + cast<PointerType>(Fn->getType())->getElementType()); + + unsigned NumArgs = FTy->getNumParams(); + assert(NumArgs <= CI->getNumArgOperands()); + + SmallVector<Value *, 4> Params; + for (unsigned i = 0; i != NumArgs; ++i) + Params.push_back(CI->getArgOperand(i)); + + CallInst *NewCI = CallInst::Create(Fn, Params, "", CI); + CI->replaceAllUsesWith(NewCI); + InlineCalls.push_back(NewCI); + toErase.push_back(CI); + } + } + + ProcessErase(toErase); + + while (!InlineCalls.empty()) + InlineFunc(InlineCalls.pop_back_val()); + + return false; +} + +char FastMathPass::ID = 0; +INITIALIZE_PASS(FastMathPass, "fastmath", + "Transform softfloat subroutines to native FP operations", false, false) + +FunctionPass *llvm::createFastMathPass() +{ + return new FastMathPass(); +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/ProfileExec.cpp b/llvm/pass/ProfileExec.cpp new file mode 100644 index 0000000..56a68e1 --- /dev/null +++ b/llvm/pass/ProfileExec.cpp @@ -0,0 +1,172 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm-debug.h" +#include "llvm-soft-perfmon.h" +#include "llvm-pass.h" +#include "llvm-opc.h" +#include "llvm.h" +#include "utils.h" + +#define PASS_NAME "ProfileExec" + +extern LLVMEnv *LLEnv; + +/* + * Profile Pass + */ +class ProfileExec : public FunctionPass { + enum { + IDX_LOOP = 0, + IDX_EXIT, + IDX_INBR, + }; + + IRFactory *IF; + const DataLayout *DL; + MDFactory *MF; + IntegerType *Int8Ty; + IntegerType *Int32Ty; + IntegerType *Int64Ty; + IntegerType *IntPtrTy; + PointerType *Int8PtrTy; + PointerType *Int32PtrTy; + PointerType *Int64PtrTy; + +public: + static char ID; + explicit ProfileExec() : FunctionPass(ID) {} + explicit ProfileExec(IRFactory *IF) + : FunctionPass(ID), IF(IF), DL(IF->getDL()), MF(IF->getMDFactory()) + { + LLVMContext &Context = IF->getContext();; + Int8Ty = IntegerType::get(Context, 8); + Int32Ty = IntegerType::get(Context, 32); + Int64Ty = IntegerType::get(Context, 64); + IntPtrTy = DL->getIntPtrType(Context); + Int8PtrTy = Type::getInt8PtrTy(Context, 0); + Int32PtrTy = Type::getInt32PtrTy(Context, 0); + Int64PtrTy = Type::getInt64PtrTy(Context, 0); + } + bool runOnFunction(Function &F); + + Instruction *getInsertPos(BasicBlock *BB) { + if (BB == &BB->getParent()->getEntryBlock()) + return &*++BB->begin(); + return BB->getFirstNonPHI(); + } +}; + +char ProfileExec::ID = 0; +INITIALIZE_PASS(ProfileExec, "profile", "Profile trace execution", false, false) + +FunctionPass *llvm::createProfileExec(IRFactory *IF) +{ + return new ProfileExec(IF); +} + +bool ProfileExec::runOnFunction(Function &F) +{ + if (!LLEnv->isTraceMode()) + return false; + if (!SP->isEnabled()) + return false; + + Instruction *CPU = IF->getDefaultCPU(F); + if (!CPU) { + dbg() << DEBUG_PASS << PASS_NAME << ": Cannot find CPU pointer.\n"; + return false; + } + + TraceInfo *Trace = IF->getTrace(); + + for (auto FI = F.begin(), FE = F.end(); FI != FE; FI++) { + BasicBlock *BB = &*FI; + if (distance(succ_begin(BB), succ_end(BB)) != 0) + continue; + + /* Find exit points and indirect branches. */ + Trace->NumExit++; + if (isa<IndirectBrInst>(BB->getTerminator())) + Trace->NumIndirectBr++; + } + + /* Insert code to profile trace exit counts. */ + if (SP->Mode & SPM_EXIT) { + Instruction *InsertPos = &*++BasicBlock::iterator(CPU); + Value *NumExitPtr = GetElementPtrInst::CreateInBounds(CPU, + CONSTPtr(offsetof(CPUArchState, num_trace_exits)), + "", InsertPos); + NumExitPtr = new BitCastInst(NumExitPtr, Int64PtrTy, "", InsertPos); + Instruction *NumExits = new LoadInst(NumExitPtr, "", true, InsertPos); + NumExits = BinaryOperator::Create(Instruction::Add, NumExits, + CONST64(1), "", InsertPos); + new StoreInst(NumExits, NumExitPtr, true, InsertPos); + } + + if (!(SP->Mode & SPM_TRACE)) + return false; + + SmallVector<CallInst*, 16> InlineCalls; + Function *Helper = IF->ResolveFunction("helper_profile_exec"); + + /* Prepare counter structures. */ + if (!Trace->ExecCount) { + Trace->ExecCount = new uint64_t *[MAX_SPM_THREADS]; + for (int i = 0; i < MAX_SPM_THREADS; i++) + Trace->ExecCount[i] = new uint64_t[3] {0, 0, 0}; + } + + /* Find all profiling point. */ + std::vector<std::pair<Instruction *, int> > ProfilePoint; + + SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> BackEdges; + FindFunctionBackedges(F, BackEdges); + for (unsigned i = 0, e = BackEdges.size(); i != e; ++i) { + auto BackEdgeBB = const_cast<BasicBlock*>(BackEdges[i].first); + ProfilePoint.push_back(std::make_pair(BackEdgeBB->getTerminator(), IDX_LOOP)); + } + + for (auto FI = F.begin(), FE = F.end(); FI != FE; FI++) { + BasicBlock *BB = &*FI; + if (distance(succ_begin(BB), succ_end(BB)) != 0) + continue; + bool isIndirectBr = isa<IndirectBrInst>(BB->getTerminator()); + ProfilePoint.push_back(std::make_pair(getInsertPos(BB), + isIndirectBr ? IDX_INBR : IDX_EXIT)); + } + + /* Insert profiling routines. */ + for (unsigned i = 0, e = ProfilePoint.size(); i != e; ++i) { + Instruction *InsertPos = ProfilePoint[i].first; + Value *Ty = CONST32(ProfilePoint[i].second); + + Value *Counter = ConstantExpr::getIntToPtr( + CONSTPtr((uintptr_t)Trace->ExecCount), + PointerType::getUnqual(Int8Ty)); + + SmallVector<Value *, 4> Params; + Type *ParamTy = Helper->getFunctionType()->getParamType(0); + Value *Env = new BitCastInst(CPU, ParamTy, "", InsertPos); + Params.push_back(Env); + Params.push_back(Counter); + Params.push_back(Ty); + + CallInst *CI = CallInst::Create(Helper, Params, "", InsertPos); + MF->setConst(CI); + InlineCalls.push_back(CI); + } + + while (!InlineCalls.empty()) + InlineFunc(InlineCalls.pop_back_val()); + + return true; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/RedundantStateElimination.cpp b/llvm/pass/RedundantStateElimination.cpp new file mode 100644 index 0000000..2e5f715 --- /dev/null +++ b/llvm/pass/RedundantStateElimination.cpp @@ -0,0 +1,179 @@ +/* + * (C) 2017 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm-debug.h" +#include "llvm-opc.h" +#include "llvm-target.h" +#include "llvm-pass.h" +#include "utils.h" + +#define PASS_NAME "RedundantStateElimination" + +/* + * The RedundantStateElimination pass aims to remove + * (1) redundant stores to PC, and + * (2) redundant loads and stores enclosed by two helper function calls. + */ +class RedundantStateElimination : public FunctionPass { + IRFactory *IF; + MDFactory *MF; + const DataLayout *DL; + Value *CPU; + IVec toErase; + +public: + static char ID; + explicit RedundantStateElimination() : FunctionPass(ID) {} + explicit RedundantStateElimination(IRFactory *IF) + : FunctionPass(ID), IF(IF), MF(IF->getMDFactory()), DL(IF->getDL()) {} + + int getNumUsers(Instruction *I) { + return distance(I->user_begin(), I->user_end()); + } + + bool isStateOfPC(Value *Ptr) { + intptr_t Off = 0; + Value *Base = getBaseWithConstantOffset(DL, Ptr, Off); + if (Base == CPU && IRFactory::isStateOfPC(Off)) + return true; + return false; + } + + bool isDirectDominator(LoadInst *LI, StoreInst *SI) { + Instruction *A = LI, *B = SI; + if (A->getParent() != B->getParent()) + return false; + for (auto II = BasicBlock::iterator(A), EE = A->getParent()->end(); + II != EE; ++II) { + if (&*II == B) + return true; + /* If a non-const helper function is between the two instructions, + * this is not a direct domination because the helper function could + * cause side effect. */ + auto CI = dyn_cast<CallInst>(II); + if (CI && !MDFactory::isConst(CI)) + return false; + } + return false; + } + + bool removePCState(Function &F); + bool removeHelperState(Function &F); + bool runOnFunction(Function &F); +}; + +char RedundantStateElimination::ID = 0; +INITIALIZE_PASS(RedundantStateElimination, "rse", + "Eliminate redundant CPU state loads/stores", false, false) + +FunctionPass *llvm::createRedundantStateElimination(IRFactory *IF) +{ + return new RedundantStateElimination(IF); +} + +/* Eliminate redundant stores to PC for each basic block. */ +bool RedundantStateElimination::removePCState(Function &F) +{ + for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + bool Found = false; + + for (auto BI = FI->rbegin(), BE = FI->rend(); BI != BE; ++BI) { + Instruction *I = &*BI; + if (MF->isGuestMemory(I)) + continue; + + if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (isStateOfPC(getPointerOperand(SI))) { + if (!Found) + Found = true; + else + toErase.push_back(SI); + } + } else if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (isStateOfPC(getPointerOperand(LI))) + Found = false; + } + } + } + + if (toErase.empty()) + return false; + + ProcessErase(toErase); + return true; + +} + +/* Eliminate redundant loads/stores enclosed by two helper function calls. + * The redundant loads and stores are generated by StateMappingPass for + * handling synchronization of CPU states around helper function calls. + * A load and store can be removed if a state value is loaded and immediately + * stored back to the same state. For example: + * + * Before optimization: After optimization: + * instructions to sync states instructions to sync states + * call void @helper_function1() call void @helper_function1() + * + * %v0 = load i32, i32* %state0 + * %v1 = load i32, i32* %state1 + * store i32 %v0, i32* %state0 + * store i32 %v1, i32* %state1 + * + * call void @helper_function2() call void @helper_function2() + * instructions to reload states instructions to reload states + */ +bool RedundantStateElimination::removeHelperState(Function &F) +{ + for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + for (auto BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + if (MF->isGuestMemory(I)) + continue; + + StoreInst *SI = dyn_cast<StoreInst>(I); + if (!SI || SI->isVolatile()) + continue; + + LoadInst *LI = dyn_cast<LoadInst>(getValueOperand(SI)); + if (LI && isDirectDominator(LI, SI)) { + /* We can try removing the store instruction if LI is a direct + * dominator of SI. */ + Value *PtrA = getPointerOperand(LI); + Value *PtrB = getPointerOperand(SI); + if (StripPointer(PtrA) == CPU && PtrA == PtrB) + toErase.push_back(SI); + } + } + } + + if (toErase.empty()) + return false; + + ProcessErase(toErase); + return true; +} + +bool RedundantStateElimination::runOnFunction(Function &F) +{ + bool Changed = false; + + CPU = IF->getDefaultCPU(F); + if (!CPU) { + dbg() << DEBUG_PASS << "RedundantStateElimination: Cannot find CPU pointer.\n"; + return false; + } + + Changed |= removePCState(F); +#if 0 + Changed |= removeHelperState(F); +#endif + + return Changed; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ + diff --git a/llvm/pass/ReplaceIntrinsic.cpp b/llvm/pass/ReplaceIntrinsic.cpp new file mode 100644 index 0000000..62505f4 --- /dev/null +++ b/llvm/pass/ReplaceIntrinsic.cpp @@ -0,0 +1,137 @@ +/* + * (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm-types.h" +#include "llvm-debug.h" +#include "llvm-target.h" +#include "llvm-pass.h" + + +#define PASS_NAME "ReplaceIntrinsic" + +/* + * HQEMU does not allow helpers to contain any memory or debug intrinsics. + * This pass substitutes memory intrinsics to load/store instuctions and + * removes debug intrinsics (generated by Clang with -g flag). + */ +class ReplaceIntrinsic : public FunctionPass { + IVec toErase; +public: + static char ID; + explicit ReplaceIntrinsic() : FunctionPass(ID) {} + + Value *ConvertType(Value *V, Type *T, Instruction *InsertPos) { + if (likely(V->getType() == T)) + return V; + return new BitCastInst(V, T, "", InsertPos); + } + + bool replaceMemoryIntrinsic(IntrinsicInst *I); + bool runOnFunction(Function &F); +}; + +char ReplaceIntrinsic::ID = 0; +INITIALIZE_PASS(ReplaceIntrinsic, "replaceintrinsic", + "Replace memory and debug intrinsics generated by clang", + false, false) + +FunctionPass *llvm::createReplaceIntrinsic() +{ + return new ReplaceIntrinsic(); +} + + +/* + * Transform memcpy/memmove/memset to load/store instruction. + * Clang attempts to move memory data using LLVM memory intrinsic instructions. + * This causes the statemapping pass to miss some guest states. (Statemapping + * only considers guest states accessed by general load/store insts). + * So, we simply rewrite the memory intrinsics to load/store instuctions. + */ +bool ReplaceIntrinsic::replaceMemoryIntrinsic(IntrinsicInst *I) +{ + switch (I->getIntrinsicID()) { + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + break; + default: + return false; + } + + LLVMContext &Context = I->getContext(); + Type *Int8PtrTy = Type::getInt8PtrTy(Context); + CallInst *CI = cast<CallInst>(I); + + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + /* memcpy/memmove */ + Value *Src = MTI->getSource(); + Value *Dst = MTI->getDest(); + Value *NumBytes = MTI->getLength(); + + if (CI->getArgOperand(0)->getType() != Int8PtrTy || + CI->getArgOperand(1)->getType() != Int8PtrTy || + !isa<ConstantInt>(NumBytes) || + MTI->isVolatile()) + return false; + + /* Remove this instruction if the access size is zero. */ + size_t Len = cast<ConstantInt>(NumBytes)->getZExtValue(); + if (Len == 0) + goto done; + + Type *Ty = Type::getIntNPtrTy(Context, Len * 8); + Src = ConvertType(Src, Ty, I); + Dst = ConvertType(Dst, Ty, I); + Src = new LoadInst(Src, "", false, I); + new StoreInst(Src, Dst, false, I); + } else if (MemSetInst *MSI = dyn_cast<MemSetInst>(I)) { + /* memset */ + Value *Src = MSI->getValue(); + Value *Dst = MSI->getDest(); + Value *NumBytes = MSI->getLength(); + + if (CI->getArgOperand(0)->getType() != Int8PtrTy || + !isa<ConstantInt>(Src) || + !isa<ConstantInt>(NumBytes) || + MSI->isVolatile()) + return false; + + size_t Val = cast<ConstantInt>(Src)->getZExtValue(); + size_t Len = cast<ConstantInt>(NumBytes)->getZExtValue(); + if (Val != 0) + return false; + if (Len == 0) + goto done; + + Type *Ty = Type::getIntNPtrTy(Context, Len * 8); + Src = ConstantInt::get(Type::getIntNTy(Context, Len * 8), 0); + Dst = ConvertType(Dst, Ty, I); + new StoreInst(Src, Dst, false, I); + } + +done: + toErase.push_back(I); + return true; +} + +bool ReplaceIntrinsic::runOnFunction(Function &F) +{ + for (auto I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { + Instruction *Inst = &*I; + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { + if (replaceMemoryIntrinsic(II)) + continue; + if (isa<DbgInfoIntrinsic>(II)) + toErase.push_back(II); + } + } + ProcessErase(toErase); + return true; +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ diff --git a/llvm/pass/SimplifyPointer.cpp b/llvm/pass/SimplifyPointer.cpp new file mode 100644 index 0000000..87afbdd --- /dev/null +++ b/llvm/pass/SimplifyPointer.cpp @@ -0,0 +1,334 @@ +//===- SimplifyPointer.cpp - Reassociate guest pointer arithmetic ---------===// +// +// The HQEMU Dynamic Binary Translator Infrastructure +// +// (C) 2016 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. +// COVART Laboratory, CSIE Department, National Taiwan University, Taiwan. +// See COPYRIGHT in top-level directory. +// +//===----------------------------------------------------------------------===// +// This pass implements a simple pointer arithmetic reassociator for easier +// pointer stripping. It gets scalar evolution results of all guest pointers +// which are in simplest form. Next, it inserts new instructions to evaluate the +// simplified expressions to construct new pointers, and rewrites corresponding +// guest load/store with new pointers. +// +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstIterator.h" + +#include "llvm-opc.h" +#include "llvm-pass.h" +#include "llvm-target.h" +#include "utils.h" + +#define PASS_NAME "SIMPTR" +#define DEBUG_TYPE "SIMPTR" + +//#define VERBOSE + +/// \brief Dump pass debug message with pass name mark. +static inline llvm::raw_ostream &pout() { + return dbg() << DEBUG_PASS << PASS_NAME ": "; +} + +/// \returns True if \p A dominates \p B. +static bool dominates(Value *A, Value *B, DominatorTree *DT) { + auto *AI = dyn_cast<Instruction>(A); + auto *BI = dyn_cast<Instruction>(B); + if (AI && BI) + return DT->dominates(AI, BI); + return false; +} + +class SimplifyPointer : public FunctionPass { +public: + using ValueList = SmallVector<Value *, 32>; + using InstrList = SmallVector<Instruction *, 32>; + using ExprValMap = DenseMap<const SCEV *, Value *>; + + // Pass identification, replacement for type id. + static char ID; + + // LLVM pass constructor and destructor. + explicit SimplifyPointer() : FunctionPass(ID){}; + explicit SimplifyPointer(IRFactory *IF) + : FunctionPass(ID), IF(IF), MF(IF->getMDFactory()), DL(IF->getDL()) { + // Initialize all. + initializeSimplifyPointerPass(*PassRegistry::getPassRegistry()); + } + + // LLVM pass public interfaces. + void getAnalysisUsage(AnalysisUsage &AU) const override; + bool runOnFunction(Function &F) override; + +private: + /// \return The evaluation result of expression \p S or null if not cached. + Value *lookupBaseExpressionCache(const SCEV *S) const { + auto V = BaseExprVal.find(S); + if (V != BaseExprVal.end()) + return V->second; + return nullptr; + } + + /// \returns True if spread constants in the expression tree of \p S can be + /// collected by reassociation and reduced to \p FoldVal. + /// + /// It traverses the expression tree of \p S and propagates constant nodes + /// from add, multiply and recurrent add nodes, i.e., (%1 + %2 + 5) * (%3 - 7) + /// should return 5 * -7 = -35. + bool foldConstantExpression(const SCEV *S, int64_t &FoldVal) const; + + /// \returns The first non-pointer value traced along the use-define chain of + /// casting which starts from \p V and ends with a IntToPtrInst, or null if + /// the length of searching chain exceeds \p MaxLookup. + /// + /// In the context of DBT, pointer type is represented and manipulated as + /// integer data until used as a pointer. Therefore, it follows: + /// + /// [Expression Tree] + /// + + + + + /// \ / \ / + /// - ... - + /// \ / + /// + + /// [Scalar Root] + /// | + /// [Casting] + /// | + /// [Load/Store] + /// + /// This method targets the scalar root value. + Value *findPointerScalarRoot(Value *V, unsigned MaxLookup = 4); + + /// \brief Simplify the pointer arithmetic of \p LSI based on scalar evolution + /// results which folds constants into simplest form. After extracting the + /// folded constant from the expression, the rest nodes can form a base + /// expression which is likely a common sub-expression of other \p LSI. + /// + /// It assumes \p LSI has the following use-define chain starting from its + /// pointer and containing only add, multiply and recurrent add nodes. + /// + /// [Expression Tree] [Expression Tree] [Expression Tree] + /// + A B + + + B A + + + /// \ / \ / \ / \ / \ / + /// - ... - - ... - - (B-A) + /// \ / \ / \ / + /// + + + + /// [Scalar Root] >> [Scalar Root] >> [Scalar Root] + /// | | | + /// [Casting] [Casting] [Casting] + /// | | | + /// [LSI] [LSI] [LSI] + /// + /// Suppose A and B are constants, they can be folded into (B-A) with scalar + /// evolution results. Need to insert instructions for other operations in + /// tree (e.g., the new sub in the right-most figure). + /// + /// First it tries to find the folded constant and substract it from root + /// expression to form the base expression. Then it generates instructions to + /// evaluate the base expression. + bool tryToSimplifyPointer(Instruction *I); + + // HQEMU internal infrastructure. + IRFactory *IF = nullptr; + MDFactory *MF = nullptr; + // LLVM analysis and data type info. + const DataLayout *DL = nullptr; + DominatorTree *DT = nullptr; + ScalarEvolution *SE = nullptr; + + /// The cache of base expression to corresponding evaluated value map. + ExprValMap BaseExprVal; +}; + +bool SimplifyPointer::foldConstantExpression(const SCEV *S, + int64_t &FoldVal) const { + // Handle expression tree of scalar root containing only add, multiply and + // recurrent add nodes. + if (auto *AddSE = dyn_cast<SCEVAddExpr>(S)) { + FoldVal = 0; + for (auto Op : AddSE->operands()) { + int64_t Val; + if (foldConstantExpression(Op, Val)) + FoldVal += Val; + } + return true; + } else if (auto *MulSE = dyn_cast<SCEVMulExpr>(S)) { + FoldVal = 1; + for (auto Op : MulSE->operands()) { + int64_t Val; + // If one operand of multiplication fails to report a constant, entire + // expression becomes non-constant as well. + if (foldConstantExpression(Op, Val)) + FoldVal *= Val; + else + return false; + } + return true; + } else if (auto *RecSE = dyn_cast<SCEVAddRecExpr>(S)) { + // Trace only the start expression, because the step expression must be + // multiplied by the loop trip count which is unlikely constant. + return foldConstantExpression(RecSE->getStart(), FoldVal); + } else if (auto *ConstSE = dyn_cast<SCEVConstant>(S)) { + FoldVal = ConstSE->getValue()->getValue().getSExtValue(); + return true; + } + return false; +} + +Value *SimplifyPointer::findPointerScalarRoot(Value *V, unsigned MaxLookup) { + if (!V || !V->getType()->isPointerTy()) + return V; + + for (unsigned i = 0; i < MaxLookup; ++i) { + if (BitCastInst *Cast = dyn_cast<BitCastInst>(V)) { + V = Cast->getOperand(0); + } else if (IntToPtrInst *Cast = dyn_cast<IntToPtrInst>(V)) { + // Found first scalar, return it. + V = Cast->getOperand(0); + return V; + } + } + return nullptr; +} + +bool SimplifyPointer::tryToSimplifyPointer(Instruction *LSI) { + Value *Ptr = getPointerOperand(LSI); + Value *Root = findPointerScalarRoot(Ptr); + Type *RootTy = Root->getType(); + Type *PtrTy = Ptr->getType(); + if (!Ptr || !Root || !RootTy->isIntegerTy()) + return false; + +#ifdef VERBOSE + if (DM.getDebugMode() & DEBUG_PASS) { + pout() << "Visiting memory instruction.\n"; + pout() << "- " << *LSI << ".\n"; + } +#endif + + // Traverse the simplest form expression tree and collect folded constants. + // Note the folded constant can be zero (base = root) if no folded constant + // is found. + auto *RootSE = SE->getSCEV(Root); + int64_t FoldConst = 0; + foldConstantExpression(RootSE, FoldConst); + + // Substract offset constant from root expression to get the base expression, + // then query base expression cache to find whether it has been evaluated. + auto *BaseSE = SE->getMinusSCEV(RootSE, + SE->getConstant(RootTy, FoldConst, true)); + Value *Base = lookupBaseExpressionCache(BaseSE); + + // Create instructions to evaluate base expression if cache miss or previously + // computed value doesn't dominate load/store instruction. + if (!Base || !dominates(Base, LSI, DT)) { +#ifdef VERBOSE + pout() << " Need to build base expression.\n"; + pout() << " - Base " << *BaseSE << ".\n"; + pout() << " - Offset " << FoldConst << ".\n"; +#endif + // Expand the base expression if it is safe. + if (isSafeToExpand(BaseSE, *SE)) { +#if defined(LLVM_V35) + SCEVExpander Expander(*SE, ""); +#else + SCEVExpander Expander(*SE, *DL, ""); +#endif + Base = Expander.expandCodeFor(BaseSE, RootTy, LSI); + } + } else { +#ifdef VERBOSE + pout() << " Use cached base expression value.\n"; + pout() << " - Base " << *BaseSE << ".\n"; + pout() << " - Offset " << FoldConst << ".\n"; +#endif + } + + // Neither using cached value nor re-computing works, abort. + if (!Base) + return false; + + // Add back folded constant (offset) to new root value and feed the result as + // new pointer to load/store instruction. + IRBuilder<> Builder(IF->getContext()); + + bool FoldZero = (FoldConst == 0); + Value *Offset = ConstantInt::get(RootTy, FoldConst); + + Builder.SetInsertPoint(LSI); + Value *NewRoot = FoldZero ? Base : Builder.CreateAdd(Base, Offset); + Value *NewPtr = Builder.CreateIntToPtr(NewRoot, PtrTy); + LSI->replaceUsesOfWith(Ptr, NewPtr); + + // Cache base expression value. + BaseExprVal[BaseSE] = Base; + + return true; +} + +void SimplifyPointer::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<DominatorTreeWrapperPass>(); +#if defined(LLVM_V35) + AU.addRequired<ScalarEvolution>(); +#else + AU.addRequired<ScalarEvolutionWrapperPass>(); +#endif +} + +bool SimplifyPointer::runOnFunction(Function &F) { + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); +#if defined(LLVM_V35) + SE = &getAnalysis<ScalarEvolution>(); +#else + SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); +#endif + + bool Changed = false; + + InstrList MemoryInstrs; + for (auto FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + BasicBlock *BB = &*FI; + + // Skip dead basic blocks. + if (!DT->isReachableFromEntry(BB)) + continue; + + // Collect all guest memory instructions. + for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + if (MDFactory::isGuestMemory(I)) + MemoryInstrs.push_back(I); + } + } + + // Try to simplify pointers of collected load/store instructions. + for (Instruction *I : MemoryInstrs) + Changed |= tryToSimplifyPointer(I); + + return Changed; +} + +char SimplifyPointer::ID = 0; +INITIALIZE_PASS_BEGIN(SimplifyPointer, "simplifypointer", + "Reassiciate pointer arithmetic", false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +#if defined(LLVM_V35) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +#else +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +#endif +INITIALIZE_PASS_END(SimplifyPointer, "simplifypointer", + "Reassiciate pointer arithmetic", false, false) + +FunctionPass *llvm::createSimplifyPointer(IRFactory *IF) { + return new SimplifyPointer(IF); +} + +/* + * vim: ts=2 sts=2 sw=2 expandtab + */ diff --git a/llvm/pass/StateMappingPass.cpp b/llvm/pass/StateMappingPass.cpp new file mode 100644 index 0000000..0d9dd9b --- /dev/null +++ b/llvm/pass/StateMappingPass.cpp @@ -0,0 +1,885 @@ +/* + * (C) 2010 by Computer System Laboratory, IIS, Academia Sinica, Taiwan. + * See COPYRIGHT in top-level directory. + */ + +#include "llvm-debug.h" +#include "llvm-opc.h" +#include "llvm-target.h" +#include "llvm-pass.h" + +#define PASS_NAME "StateMapping" + + +/* + * StateMappingPass is used to eliminate the redundant loads and stores to the + * CPUArchState. The loads and stores of the guest memory operations are not + * removed in order not to violate the memory model of the guest architecture. + * + * The state mapping rules are: + * - A guest state is not overlapped: (i.e., same access size) + * - Same type: map to this type. + * - Different type: select type in the order: vector, float and integer; + * use bitcast to convert between different types. + * - A guest state is overlapped with other state(s): + * - Query StateType to find state size (i.e., boundary) and type: + * - Vector type: use insert/extract to manipulate a vector element. + * - Other types: use shift to manipulate a vector element. + */ +class StateMappingPass : public FunctionPass { + IRFactory *IF; /* Uplink to the IRFactory */ + +public: + static char ID; + explicit StateMappingPass() : FunctionPass(ID) {} + explicit StateMappingPass(IRFactory *IF) : FunctionPass(ID), IF(IF) {} + + bool runOnFunction(Function &F); +}; + +struct StateMapping { + StateMapping() + : State(nullptr), Addr(nullptr), Ty(nullptr), AI(nullptr), + hasLoad(false), hasStore(false) {} + + StateData *State; + Value *Addr; + Type *Ty; + AllocaInst *AI; + bool hasLoad; + bool hasStore; + + intptr_t getSize() { return State->End - State->Start; } + intptr_t getStart() { return State->Start; } + intptr_t getEnd() { return State->End; } + Value *getAddr() { return Addr; } + Type *getType() { return Ty; } + bool isVector() { return Ty->isVectorTy(); } + + bool overlap(StateRange &Range) { + if (Range.empty()) + return false; + intptr_t Start = getStart(); + intptr_t End = getEnd(); + auto I = --Range.upper_bound(Start); + for (; I != Range.end() && I->first < End; ++I) { + if (I->second > Start) + return true; + } + return false; + } +}; + +struct ElementInfo { + ElementInfo() : Shift(0), NumElts(0), EltTy(nullptr), StateTy(nullptr) {} + + intptr_t Shift; + unsigned NumElts; + Type *EltTy; + Type *StateTy; +}; + +class StateMapper { + typedef std::vector<StateMapping> StateMapList; + + IRFactory *IF; + const DataLayout *DL; + Instruction *CPU; /* The CPU pointer */ + Instruction *PreCastPos; /* The position to cast CPU states */ + Instruction *PreLoadPos; /* The position to preload CPU states */ + IVec toErase; /* The instructions to be removed */ + + FlatType &StateType; + StateAnalyzer Analyzer; + StateMapList StateMaps; + +public: + StateMapper(IRFactory *IF) + : IF(IF), DL(IF->getDL()), StateType(IF->getTranslator().getStateType()), + Analyzer(DL) {} + + bool run(Function &F) { + if (!init(F)) + return false; + + AnalyzeState(F); + if (!StateMaps.empty()) + PromoteState(F); + + ProcessErase(toErase); + return true; + } + + /* Rearrange instructions in the 'init' block. */ + bool init(Function &F); + + /* Analyze instructions in a Function that access CPU states. */ + void AnalyzeState(Function &F); + + /* Compute state mapping information. */ + void ComputeStateMap(StateMapping &StateMap, StateData &State); + + /* Determine if the state can be operated as a vector. */ + Type *TryVectorState(StateData &State, Type *Ty); + + /* Map state references to the virtual states. */ + void PromoteState(Function &F); + + /* Rewrite state loads and stores. */ + void RewriteLoad(StateMapping &StateMap, StateRef &Ref); + void RewriteStore(StateMapping &StateMap, StateRef &Ref); + void RewriteLoadVector(StateMapping &StateMap, StateRef &Ref); + void RewriteStoreVector(StateMapping &StateMap, StateRef &Ref); + + /* Compute state and element types for element insertion and extraction. */ + void getElementInfo(StateMapping &StateMap, StateRef &Ref, ElementInfo &Info); + + /* Sync CPU states around helper calls. */ + void SyncHelperState(); + + /* Store dirty states at the leaf blocks. */ + void ProcessExitBB(BasicBlock *BB); + + /* Get the pointer without GEP and BitCast. */ + void StripPointer(Value *V, IVec &IV); + + /* Move the pointer before InsertPos. */ + void MoveStatePointer(Value *V); + + /* Load state from Src and store it to Dest. */ + void CopyState(Value *Dest, Value *Src, Instruction *InsertPos); + + bool isLegalState(Value *Ptr, intptr_t &Off); + + /* Return true if the input is alias of a state pointer. */ + bool isStatePointer(Value *V) { + if (auto BCI = dyn_cast<BitCastInst>(V)) { + if (BCI->getOperand(0) == CPU) + return true; + return isStatePointer(BCI->getOperand(0)); + } else if (auto GEP = dyn_cast<GetElementPtrInst>(V)) + return GEP->getOperand(0) == CPU; + return false; + } + + bool isSimpleFunction(Function *F) { + HelperMap &Helpers = IF->getHelpers(); + if (Helpers.find(F->getName()) == Helpers.end() || + Helpers[F->getName()]->hasNestedCall) + return false; + return true; + } + + Value *ConvertType(Value *V, Type *Ty, Instruction *InsertPos) { + return V->getType() == Ty ? V : new BitCastInst(V, Ty, "", InsertPos); + } +}; + +/* Return a pre-defined state name. */ +static std::string getStateName(intptr_t Off) +{ +#if defined(TARGET_I386) + if (Off == offsetof(CPUArchState,xmm_regs[0])) return "xmm0"; + if (Off == offsetof(CPUArchState,xmm_regs[1])) return "xmm1"; + if (Off == offsetof(CPUArchState,xmm_regs[2])) return "xmm2"; + if (Off == offsetof(CPUArchState,xmm_regs[3])) return "xmm3"; + if (Off == offsetof(CPUArchState,xmm_regs[4])) return "xmm4"; + if (Off == offsetof(CPUArchState,xmm_regs[5])) return "xmm5"; + if (Off == offsetof(CPUArchState,xmm_regs[6])) return "xmm6"; + if (Off == offsetof(CPUArchState,xmm_regs[7])) return "xmm7"; + if (Off == offsetof(CPUArchState,xmm_t0)) return "xmm_t0"; +#endif + return ""; +} + +/* Determine if the offset is to access the temporary state. */ +static inline bool isLocalState(intptr_t Off) +{ +#if defined(TARGET_I386) + if (Off == offsetof(CPUArchState, xmm_t0)) + return true; +#endif + return false; +} + +/* Return states that should be ignored during state mapping. */ +static bool isSkipState(intptr_t Off) +{ + if (Off == (intptr_t)(offsetof(CPUState, tcg_exit_req) - ENV_OFFSET)) + return true; + +#define stateof(X) \ + (Off >= (intptr_t)offsetof(CPUArchState,X) && \ + Off < (intptr_t)(offsetof(CPUArchState,X) + sizeof(((CPUArchState*)0)->X))) +#define is_fpstatus(X) \ + (stateof(X.float_detect_tininess) || \ + stateof(X.float_rounding_mode) || \ + stateof(X.float_exception_flags) || \ + stateof(X.floatx80_rounding_precision) || \ + stateof(X.flush_to_zero) || \ + stateof(X.flush_inputs_to_zero) || \ + stateof(X.default_nan_mode)) + +#if defined(TARGET_ARM) + if (is_fpstatus(vfp.fp_status) || is_fpstatus(vfp.standard_fp_status)) + return true; +#elif defined(TARGET_I386) + if (is_fpstatus(fp_status)) + return true; +#endif + return false; + +#undef stateof +#undef is_fpstatus +} + +/* Check if the state is legal for state mapping. A legal state must have CPU + * as the base pointer, plus a positive constant offset. */ +bool StateMapper::isLegalState(Value *Ptr, intptr_t &Off) +{ + Value *Base = getBaseWithConstantOffset(DL, Ptr, Off); + if (Off < 0) + return false; + if (Base == CPU && !isSkipState(Off) && !IRFactory::isStateOfPC(Off)) + return true; + return false; +} + +/* Get the pointer without GEP and BitCast. The stripped GEP and BitCast + * instructions are returned to the caller. */ +void StateMapper::StripPointer(Value *V, IVec &IV) +{ + std::set<Value *> Visited; + Visited.insert(V); + do { + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(V)) { + IV.push_back(GEP); + V = GEP->getOperand(0); + } else if (BitCastInst *BCI = dyn_cast<BitCastInst>(V)) { + IV.push_back(BCI); + V = BCI->getOperand(0); + } else + return; + if (Visited.find(V) != Visited.end()) + break; + Visited.insert(V); + } while (true); +} + +/* Move the pointer before InsertPos. */ +void StateMapper::MoveStatePointer(Value *V) +{ + IVec toMove; + StripPointer(V, toMove); + while (!toMove.empty()) { + Instruction *I = toMove.back(); + toMove.pop_back(); + if (I->getParent() == CPU->getParent()) + continue; + I->moveBefore(PreCastPos); + } +} + +/* Copy state data from src address to destination address. */ +void StateMapper::CopyState(Value *Dest, Value *Src, Instruction *InsertPos) +{ + if (!isa<AllocaInst>(Src)) { + MoveStatePointer(Src); + LoadInst *LI = new LoadInst(Src, "", false, InsertPos); + new StoreInst(LI, Dest, false, InsertPos); + + if (Src->getType()->getPointerElementType()->isVectorTy()) + LI->setAlignment(4); + } else { + MoveStatePointer(Dest); + LoadInst *LI = new LoadInst(Src, "", false, InsertPos); + StoreInst *SI = new StoreInst(LI, Dest, false, InsertPos); + + if (Dest->getType()->getPointerElementType()->isVectorTy()) + SI->setAlignment(4); + } +} + +/* Store dirty states at the leaf blocks. */ +void StateMapper::ProcessExitBB(BasicBlock *BB) +{ + Instruction *InsertPos = nullptr; + for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + if (MDFactory::isExit(&*BI)) { + InsertPos = &*BI; + break; + } + } + if (!InsertPos) + InsertPos = BB->getTerminator(); + + for (auto &StateMap : StateMaps) { + if (!StateMap.hasStore || isLocalState(StateMap.getStart())) + continue; + CopyState(StateMap.Addr, StateMap.AI, InsertPos); + } +} + +/* Sync CPU states around helper calls. */ +void StateMapper::SyncHelperState() +{ + CallList &Calls = Analyzer.getCalls(); + if (Calls.empty()) + return; + + /* + * Rules of syncing states around calls: + * 1. Dirty states (i.e., stores) are written back before calls. + * 2. All states, including loads and stores, are read back after calls. + * + * If the helper is a simple function, only dependent states are synced. + * If the helper is a complicated function, all states are synced. + */ + HelperMap &Helpers = IF->getHelpers(); + DenseMap<CallInst*, std::set<unsigned> > StoreBeforeCall; + DenseMap<CallInst*, std::set<unsigned> > LoadAfterCall; + + for (auto CI : Calls) { + Function *Func = CI->getCalledFunction(); + std::string Name = Func->getName(); + + if (isSimpleFunction(Func)) { + /* A pre-defined helper without nested call. */ + HelperInfo *Helper = Helpers[Name]; + for (unsigned i = 0, e = StateMaps.size(); i != e; ++i) { + auto &StateMap = StateMaps[i]; + if (StateMap.hasStore && StateMap.overlap(Helper->StateUse)) + StoreBeforeCall[CI].insert(i); + + if (StateMap.overlap(Helper->StateDef)) + LoadAfterCall[CI].insert(i); + + if (Helper->mayConflictArg) { + unsigned NumArgs = CI->getNumArgOperands(); + for (unsigned j = 1; j < NumArgs; ++j) { + intptr_t Off = 0; + Value *Arg = CI->getArgOperand(j); + if (!isLegalState(Arg, Off)) + continue; + if (Off + Helper->ConflictSize <= StateMap.getStart() || + Off >= StateMap.getEnd()) + continue; + if (StateMap.hasStore) + StoreBeforeCall[CI].insert(i); + LoadAfterCall[CI].insert(i); + } + } + } + } else { + /* Sync states for a complicated function (an unknown helper or a + * helper with nested calls). */ + for (unsigned i = 0, e = StateMaps.size(); i != e; ++i) { + auto &StateMap = StateMaps[i]; + if (StateMap.hasStore) + StoreBeforeCall[CI].insert(i); + LoadAfterCall[CI].insert(i); + } + } + } + + /* Perform state syncing. */ + for (auto CI : Calls) { + Instruction *InsertPos = CI; + + if (!StoreBeforeCall.empty()) { + for (auto i : StoreBeforeCall[CI]) { + auto &StateMap = StateMaps[i]; + CopyState(StateMap.Addr, StateMap.AI, InsertPos); + } + } + + InsertPos = &*std::next(BasicBlock::iterator(InsertPos)); + if (isa<UnreachableInst>(InsertPos)) { + /* No read back is required after tail call. */ + continue; + } + + if (!LoadAfterCall.empty()) { + for (auto i : LoadAfterCall[CI]) { + auto &StateMap = StateMaps[i]; + CopyState(StateMap.AI, StateMap.Addr, InsertPos); + } + } + } +} + +static inline bool isSameSize(StateMapping &StateMap, StateRef &Ref) +{ + return StateMap.getSize() == Ref.getSize(); +} + +/* Compute state and element types for element insertion and extraction. */ +void StateMapper::getElementInfo(StateMapping &StateMap, StateRef &Ref, + ElementInfo &Info) +{ + intptr_t StateSize = StateMap.getSize(); + intptr_t Size = Ref.getSize(); + intptr_t Shift = Ref.Start - StateMap.getStart(); + Type *StateTy = StateMap.getType(); + LLVMContext &Context = StateTy->getContext(); + + if (!StateMap.isVector()) { + /* Use int-N to emulate the state. */ + Info.NumElts = 1; + Info.EltTy = Type::getIntNTy(Context, Size * 8); + Info.StateTy = Type::getIntNTy(Context, StateSize * 8); + Info.Shift = Shift; + return; + } + + /* The state is emulated as a vector. */ + if (StateSize % Size == 0 && Shift % Size == 0) { + Type *EltTy = Type::getIntNTy(Context, Size * 8); + + Info.NumElts = 1; + Info.EltTy = EltTy; + Info.StateTy = VectorType::get(EltTy, StateSize / Size); + Info.Shift = Shift / Size; + } else { + VectorType *VecTy = cast<VectorType>(StateTy); + Type *EltTy = VecTy->getScalarType(); + intptr_t EltSize = DL->getTypeSizeInBits(EltTy) / 8; + + Info.NumElts = Size / EltSize; + Info.EltTy = VectorType::get(EltTy, Info.NumElts); + Info.StateTy = StateTy; + Info.Shift = Shift / EltSize; + } +} + +void StateMapper::RewriteLoad(StateMapping &StateMap, StateRef &Ref) +{ + LoadInst *LI = cast<LoadInst>(Ref.I); + Type *Ty = LI->getType(); + Instruction *InsertPos = LI; + + /* The same reference size as the state size. */ + if (isSameSize(StateMap, Ref)) { + Value *V = new LoadInst(StateMap.AI, "", false, InsertPos); + V = ConvertType(V, Ty, InsertPos); + LI->replaceAllUsesWith(V); + toErase.push_back(LI); + return; + } + + if (StateMap.isVector()) { + RewriteLoadVector(StateMap, Ref); + return; + } + + /* This is a non-vector state. Transform the state to the type of Int-N + * and use logical shift to extract/insert element data. */ + ElementInfo Info; + getElementInfo(StateMap, Ref, Info); + + Value *V = new LoadInst(StateMap.AI, "", false, InsertPos); + V = ConvertType(V, Info.StateTy, InsertPos); + + /* Extract the element. */ + if (Info.Shift) { + Value *Shift = ConstantInt::get(V->getType(), Info.Shift * 8); + V = BinaryOperator::Create(Instruction::LShr, V, Shift, "", InsertPos); + } + V = new TruncInst(V, Info.EltTy, "", InsertPos); + V = ConvertType(V, Ty, InsertPos); + + LI->replaceAllUsesWith(V); + toErase.push_back(LI); +} + +void StateMapper::RewriteStore(StateMapping &StateMap, StateRef &Ref) +{ + StoreInst *SI = cast<StoreInst>(Ref.I); + Value *Data = SI->getValueOperand(); + Instruction *InsertPos = SI; + + /* The same reference size as the state size. */ + if (isSameSize(StateMap, Ref)) { + Value *V = ConvertType(Data, StateMap.getType(), InsertPos); + new StoreInst(V, StateMap.AI, false, InsertPos); + toErase.push_back(SI); + return; + } + + if (StateMap.isVector()) { + RewriteStoreVector(StateMap, Ref); + return; + } + + /* This is a non-vector state. Transform the state to the type of Int-N + * and use logical shift to extract/insert element data. */ + ElementInfo Info; + getElementInfo(StateMap, Ref, Info); + + Value *V = new LoadInst(StateMap.AI, "", false, InsertPos); + V = ConvertType(V, Info.StateTy, InsertPos); + + /* Insert the element. */ + Data = ConvertType(Data, Info.EltTy, InsertPos); + Data = new ZExtInst(Data, Info.StateTy, "", InsertPos); + + if (Info.Shift) { + Value *Shift = ConstantInt::get(Data->getType(), Info.Shift * 8); + Data = BinaryOperator::Create(Instruction::Shl, Data, Shift, "", InsertPos); + } + + unsigned numBits = StateMap.getSize() * 8; + unsigned loBit = Info.Shift * 8, hiBit = loBit + Ref.getSize() * 8; + APInt mask = ~APInt::getBitsSet(numBits, loBit, hiBit); + Value *Mask = ConstantInt::get(Data->getContext(), mask); + + V = BinaryOperator::Create(Instruction::And, V, Mask, "", InsertPos); + V = BinaryOperator::Create(Instruction::Or, V, Data, "", InsertPos); + V = ConvertType(V, StateMap.getType(), InsertPos); + + new StoreInst(V, StateMap.AI, false, InsertPos); + toErase.push_back(SI); +} + +void StateMapper::RewriteLoadVector(StateMapping &StateMap, StateRef &Ref) +{ + LoadInst *LI = cast<LoadInst>(Ref.I); + Type *Ty = LI->getType(); + Instruction *InsertPos = LI; + + /* Compute offset, size and element type of this vector operation. */ + ElementInfo Info; + getElementInfo(StateMap, Ref, Info); + + Value *V = new LoadInst(StateMap.AI, "", false, InsertPos); + V = ConvertType(V, Info.StateTy, InsertPos); + + /* Extract the element(s) from the vector value. */ + IntegerType *I32 = IntegerType::get(V->getContext(), 32); + + if (Info.EltTy->isVectorTy()) { + /* Multiple elements to load. Use shufflevector. */ + Value *UndefVal = UndefValue::get(Info.StateTy); + SmallVector<Constant*, 8> Indices; + for (unsigned i = 0, e = Info.Shift; i != e; ++i) + Indices.push_back(ConstantInt::get(I32, Info.Shift + i)); + Value *CV = ConstantVector::get(Indices); + V = new ShuffleVectorInst(V, UndefVal, CV, "", InsertPos); + } else { + /* Only one element. Use extractelement. */ + V = ExtractElementInst::Create(V, + ConstantInt::get(I32, Info.Shift), "", InsertPos); + } + + V = ConvertType(V, Ty, InsertPos); + + LI->replaceAllUsesWith(V); + toErase.push_back(LI); +} + +void StateMapper::RewriteStoreVector(StateMapping &StateMap, StateRef &Ref) +{ + StoreInst *SI = cast<StoreInst>(Ref.I); + Value *Data = SI->getValueOperand(); + Instruction *InsertPos = SI; + + /* Compute offset, size and element type of this vector operation. */ + ElementInfo Info; + getElementInfo(StateMap, Ref, Info); + + Value *V = new LoadInst(StateMap.AI, "", false, InsertPos); + V = ConvertType(V, Info.StateTy, InsertPos); + Data = ConvertType(Data, Info.EltTy, InsertPos); + + /* Extract element(s) from data and insert it into the vector value. */ + IntegerType *I32 = IntegerType::get(V->getContext(), 32); + + if (Info.EltTy->isVectorTy()) { + SmallVector<Value *, 8> Partial; + for (unsigned i = 0, e = Info.NumElts; i != e; ++i) { + Partial.push_back(ExtractElementInst::Create(Data, + ConstantInt::get(I32, i), "", InsertPos)); + } + for (unsigned i = 0, e = Info.NumElts; i != e; ++i) { + V = InsertElementInst::Create(V, Partial[i], + ConstantInt::get(I32, Info.Shift + i), "", InsertPos); + } + } else { + /* Only one element. Use insertelement. */ + V = InsertElementInst::Create(V, Data, + ConstantInt::get(I32, Info.Shift), "", InsertPos); + } + + V = ConvertType(V, StateMap.getType(), InsertPos); + + new StoreInst(V, StateMap.AI, false, InsertPos); + toErase.push_back(SI); +} + +/* Map state references to the virtual states. */ +void StateMapper::PromoteState(Function &F) +{ + /* Pre-load CPU states. */ + Type *IntPtrTy = DL->getIntPtrType(CPU->getContext()); + for (auto &StateMap : StateMaps) { + if (!StateMap.Addr) { + Value *Off = ConstantInt::get(IntPtrTy, StateMap.getStart()); + Value *GEP = GetElementPtrInst::CreateInBounds(CPU, Off, "", + PreCastPos); + StateMap.Addr = new BitCastInst(GEP, + PointerType::getUnqual(StateMap.getType()), "", + PreCastPos); + } + + std::string StateName = StateMap.Addr->getName(); + if (StateName == "") + StateName = getStateName(StateMap.getStart()); + if (StateName == "") + StateName = "state"; + StateName.append(".a"); + + StateMap.AI = CreateAlloca(StateMap.getType(), 0, StateName, PreCastPos); + CopyState(StateMap.AI, StateMap.Addr, PreLoadPos); + } + + /* Rewrite loads and stores. */ + for (auto &StateMap : StateMaps) { + for (auto Ref : StateMap.State->Refs) { + if (isa<LoadInst>(Ref->I)) + RewriteLoad(StateMap, *Ref); + else + RewriteStore(StateMap, *Ref); + } + } + + /* Sync CPU states around helper calls. */ + SyncHelperState(); + + /* Post-store dirty values back to CPU states for each exiting block. */ + for (auto BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + BasicBlock *BB = &*BI; + if (distance(succ_begin(BB), succ_end(BB)) == 0) /* leaf node */ + ProcessExitBB(BB); + } +} + +/* Determine if the state can be operated as a vector. */ +Type *StateMapper::TryVectorState(StateData &State, Type *Ty) +{ + intptr_t StateStart = State.Start; + intptr_t StateEnd = State.End; + intptr_t StateSize = StateEnd - StateStart; + + /* If the reference type (from the IR) is already a vector type, use it. + * Otherwise, query StateType to determine if it is a vector state. */ + VectorType *VecTy = dyn_cast<VectorType>(Ty); + if (!VecTy) { + auto TI = --StateType.upper_bound(StateStart); + for (; TI != StateType.end() && TI->first < StateEnd; ++TI) { + if (TI->second->isVectorTy()) { + VecTy = cast<VectorType>(TI->second); + break; + } + } + } + + if (!VecTy) + return nullptr; + + /* This is a vector state. Now, we need to check whether all state refs can + * be composed by the vector element type: (a) the state size is a multiple + * of the vector element size, and (b) the size and shift of each state ref + * are both a multiple of the vector element size. */ + Type *ElementTy = VecTy->getScalarType(); + intptr_t ElementSize = DL->getTypeSizeInBits(ElementTy) / 8; + if (StateSize % ElementSize != 0) + return nullptr; + + for (auto Ref : State.Refs) { + if (Ref->getSize() % ElementSize != 0 || + (Ref->Start - StateStart) % ElementSize != 0) + return nullptr; + } + return VectorType::get(ElementTy, StateSize / ElementSize); +} + +/* Compute state mapping information based on the state mapping rules. */ +void StateMapper::ComputeStateMap(StateMapping &StateMap, StateData &State) +{ + /* State mapping rule: + * - A guest state is not overlapped: (i.e., same access size) + * - Same type: map to this type. + * - Different type: select type in the order: vector, float and integer; + * use bitcast to convert between different types. + * - A guest state is overlapped with other state(s): + * - Query StateType to find state size (i.e., boundary) and type: + * - Vector type: use insert/extract to manipulate a vector element. + * - Other types: use shift to manipulate a sub-register element. */ + bool sameSize = true; + bool hasLoad = false; + bool hasStore = false; + + for (auto Ref : State.Refs) { + hasLoad |= isa<LoadInst>(Ref->I); + hasStore |= isa<StoreInst>(Ref->I); + } + + StateRef *Ref = State.Refs.front(); + Type *Ty = Ref->getType(); + Value *Addr = getPointerOperand(Ref->I); + intptr_t Size = Ref->getSize(); + + for (unsigned i = 1, e = State.Refs.size(); i != e; ++i) { + StateRef *NextRef = State.Refs[i]; + Type *NextTy = NextRef->getType(); + Value *NextAddr = getPointerOperand(NextRef->I); + + /* Check type. */ + if (Ty != NextTy) { + /* Select type in the order: vector, float and integer. */ + bool Swap = false; + if (Ty->isVectorTy() && NextTy->isVectorTy()) { + /* We prefer a vector type of small element type. */ + Type *ATy = cast<VectorType>(Ty)->getScalarType(); + Type *BTy = cast<VectorType>(NextTy)->getScalarType(); + if (DL->getTypeSizeInBits(BTy) < DL->getTypeSizeInBits(ATy)) + Swap = true; + } else if (!Ty->isVectorTy() && NextTy->isVectorTy()) { + Swap = true; + } else if (Ty->isIntegerTy() && NextTy->isFloatTy()) { + Swap = true; + } + + if (Swap) { + std::swap(Ty, NextTy); + std::swap(Addr, NextAddr); + } + } + + /* Check size. */ + if (Size != NextRef->getSize()) + sameSize = false; + } + + if (sameSize) { + /* The same reference size as the state size. */ + StateMap.Ty = Ty; + StateMap.Addr = Addr; + } else { + /* Different reference sizes. */ + intptr_t StateSize = State.End - State.Start; + Type *VecTy = TryVectorState(State, Ty); + StateMap.Ty = VecTy ? VecTy + : Type::getIntNTy(Ty->getContext(), StateSize * 8); + StateMap.Addr = nullptr; + } + StateMap.State = &State; + StateMap.hasLoad = hasLoad; + StateMap.hasStore = hasStore; +} + +/* Analyze instructions in a Function that access CPU states. */ +void StateMapper::AnalyzeState(Function &F) +{ + /* Collect instructions (load/store/call) that access CPU states. + * Loads/stores that access guest memory or are tagged with volatile + * (e.g., accessing the states: %pc and %tcg_exit_req) are ignored. */ + + for (auto II = inst_begin(F), EE = inst_end(F); II != EE; ++II) { + Instruction *I = &*II; + intptr_t Off = 0; + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + if (MDFactory::isGuestMemory(I) || LI->isVolatile()) + continue; + + if (isLegalState(LI->getPointerOperand(), Off)) + Analyzer.addStateRef(I, Off); + } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { + if (MDFactory::isGuestMemory(I) || SI->isVolatile()) + continue; + + if (isLegalState(SI->getPointerOperand(), Off)) + Analyzer.addStateRef(I, Off); + } else if (CallInst *CI = dyn_cast<CallInst>(I)) { + /* Skip const helper, inlineasm and intrinsic function call. */ + if (MDFactory::isConst(CI)) + continue; + if (CI->isInlineAsm() || isa<IntrinsicInst>(CI)) + continue; + + Analyzer.addCall(CI); + } + } + + /* Ask Analyzer to put state references into groups. */ + Analyzer.computeState(); + + StateList &States = Analyzer.getStateList(); + if (States.empty()) + return; + + /* Compute state mapping info. */ + StateMaps.resize(States.size()); + for (unsigned i = 0, e = States.size(); i != e; ++i) + ComputeStateMap(StateMaps[i], States[i]); +} + +/* Rearrange instructions in the 'init' block. */ +bool StateMapper::init(Function &F) +{ + /* + * We would like to rearrange the instructions in the 'init' block, in which + * gep/cast instructions are in front of other instructions in the block. + * For example: + * %0 = getelementptr i8* %cpu, i64 0 + * %1 = bitcast i8* %0 to i32* # gep/cast insns + * -------------------------------------- # precast_pos + * -------------------------------------- # preload_pos + * %2 = load i32, i32* %1 # the other insns + * br label %entry + */ + CPU = IF->getDefaultCPU(F); + if (!CPU || CPU->getParent() != &F.getEntryBlock()) + return false; + + Instruction *InsertPos = &*std::next(BasicBlock::iterator(CPU)); + PreLoadPos = new UnreachableInst(CPU->getContext(), InsertPos); + PreCastPos = new UnreachableInst(CPU->getContext(), PreLoadPos); + + toErase.push_back(PreLoadPos); + toErase.push_back(PreCastPos); + + /* Move gep/cast instructions. */ + IVec toMove; + BasicBlock *BB = CPU->getParent(); + for (auto BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { + Instruction *I = &*BI; + if (isStatePointer(I)) + toMove.push_back(I); + } + for (auto I : toMove) + I->moveBefore(PreCastPos); + + return true; +} + +/* + * StateMappingPass + */ +bool StateMappingPass::runOnFunction(Function &F) +{ + return StateMapper(IF).run(F); +} + +char StateMappingPass::ID = 0; +INITIALIZE_PASS(StateMappingPass, "statemap", + "Eliminate redundant loads/stores by mapping CPU states", false, false) + +FunctionPass *llvm::createStateMappingPass(IRFactory *IF) +{ + return new StateMappingPass(IF); +} + +/* + * vim: ts=8 sts=4 sw=4 expandtab + */ |